Skip to content

variants

Phonetic and morphological variant generation for proactive typosquatting detection.

phonemenal.variants

Generate sound-alike spelling variants of a word.

Used for reverse scanning: given a known name, what sound-alike alternatives exist? Generates candidates by applying common phonetic substitutions, double/single letter swaps, and separator permutations.

generate(name: str, *, include_separators: bool = True) -> set[str]

Generate phonetically similar variant spellings of a name.

Parameters:

Name Type Description Default
name str

Input name (separators stripped internally).

required
include_separators bool

If True, also generate hyphen/underscore variants.

True

Returns set of variant strings (excludes the input itself).

Source code in phonemenal/variants.py
def generate(name: str, *, include_separators: bool = True) -> set[str]:
    """Generate phonetically similar variant spellings of a name.

    Args:
        name: Input name (separators stripped internally).
        include_separators: If True, also generate hyphen/underscore variants.

    Returns set of variant strings (excludes the input itself).
    """
    clean = re.sub(r"[-_.]+", "", name.lower())
    results: set[str] = set()

    # Phonetic substitutions
    for old, new in SUBSTITUTIONS:
        if old in clean:
            results.add(clean.replace(old, new, 1))

    # Double/single letter variants
    prev = ""
    for i, ch in enumerate(clean):
        if ch == prev:
            # Drop one of a double letter
            results.add(clean[:i] + clean[i + 1 :])
        else:
            # Double a single letter
            results.add(clean[:i] + ch + clean[i:])
        prev = ch

    # Separator permutations (common in multi-word names)
    if include_separators and len(clean) > 2:
        # Try inserting separators at word boundaries detected by case or length
        for sep in ["-", "_"]:
            for i in range(2, len(clean) - 1):
                results.add(clean[:i] + sep + clean[i:])

    results.discard(clean)
    return results

generate_morphological(name: str) -> set[str]

Generate morphological variants (suffix/prefix swaps).

These aren't phonetic but are common morphological near-misses: packaging → packages, packaged, packager, etc.

Source code in phonemenal/variants.py
def generate_morphological(name: str) -> set[str]:
    """Generate morphological variants (suffix/prefix swaps).

    These aren't phonetic but are common morphological near-misses:
    packaging → packages, packaged, packager, etc.
    """
    clean = re.sub(r"[-_.]+", "", name.lower())
    results: set[str] = set()

    suffixes_to_try = [
        ("ing", ["ed", "er", "es", "tion", "ment"]),
        ("ed", ["ing", "er", "es"]),
        ("er", ["ing", "ed", "or"]),
        ("es", ["ed", "ing", "er"]),
        ("tion", ["sion", "ment", "ting"]),
        ("sion", ["tion", "ment"]),
        ("ment", ["tion", "ments"]),
        ("ly", ["ful", "less", "ness"]),
        ("ful", ["ly", "less"]),
        ("less", ["ly", "ful"]),
        ("ness", ["ly"]),
        ("py", ["pie", "pi"]),
        ("pi", ["py", "pie"]),
    ]

    for existing, replacements in suffixes_to_try:
        if clean.endswith(existing):
            base = clean[: -len(existing)]
            for replacement in replacements:
                results.add(base + replacement)

    # Also try just adding common suffixes
    for suffix in ["s", "py", "lib", "2", "3", "x", "js", "io"]:
        results.add(clean + suffix)

    # And removing trailing digits/common suffixes
    if clean[-1].isdigit():
        results.add(clean[:-1])
    for suffix in ["py", "lib", "2", "3", "x", "js", "io", "s"]:
        if clean.endswith(suffix) and len(clean) > len(suffix) + 1:
            results.add(clean[: -len(suffix)])

    results.discard(clean)
    return results