Skip to content

fallback

Fast phonetic key encoder that works on any string without CMU dict lookup. Simplified Metaphone-inspired encoding tuned for package names.

phonemenal.fallback

Fast phonetic key encoder — works on any string without CMU dict lookup.

Simplified Metaphone-inspired encoding that handles arbitrary strings. Useful as a fallback when the CMU dict doesn't cover a word (non-English, neologisms, coined terms like "numpy", "pytorch", "fastapi").

Also provides an LCS-based similarity ratio for comparing phonetic keys.

phonetic_key(name: str) -> str

Generate a coarse phonetic key for a name.

Applies digraph replacements, vowel normalization, and character collapsing to produce a phonetic skeleton. Sound-alike names produce the same or similar keys.

Examples:

phonetic_key("phone") → "fAn" phonetic_key("fone") → "fAn" phonetic_key("flask") → "flAsk" phonetic_key("phlask") → "flAsk"

Source code in phonemenal/fallback.py
def phonetic_key(name: str) -> str:
    """Generate a coarse phonetic key for a name.

    Applies digraph replacements, vowel normalization, and character collapsing
    to produce a phonetic skeleton. Sound-alike names produce the same or
    similar keys.

    Examples:
        phonetic_key("phone")  → "fAn"
        phonetic_key("fone")   → "fAn"
        phonetic_key("flask")  → "flAsk"
        phonetic_key("phlask") → "flAsk"
    """
    s = re.sub(r"[-_.]+", "", name.lower())
    if not s:
        return ""

    # Digraph replacements (order matters — longer patterns first)
    replacements = [
        ("tion", "shn"),
        ("sion", "shn"),
        ("tch", "c"),
        ("dge", "j"),
        ("ph", "f"),
        ("ck", "k"),
        ("gh", "g"),
        ("wh", "w"),
        ("wr", "r"),
        ("kn", "n"),
        ("gn", "n"),
        ("mb", "m"),
        ("th", "t"),
        ("sh", "s"),
        ("ch", "c"),
        ("qu", "kw"),
    ]
    for old, new in replacements:
        s = s.replace(old, new)

    # Preserve leading letter
    result = [s[0]]
    s = "".join(result) + s[1:]

    # Vowel normalization
    vowel_map = str.maketrans("aeiouy", "AAAAAY")
    s = s.translate(vowel_map)

    # Collapse runs of identical characters
    collapsed = [s[0]] if s else []
    for ch in s[1:]:
        if ch != collapsed[-1]:
            collapsed.append(ch)
    s = "".join(collapsed)

    # Drop trailing silent-e: only strip if the original word ended with 'e'
    original = re.sub(r"[-_.]+", "", name.lower())
    if len(s) > 2 and s.endswith("A") and original.endswith("e"):
        s = s[:-1]

    return s

similarity(key1: str, key2: str) -> float

LCS-based similarity ratio between two phonetic keys.

Returns 0.0–1.0 reflecting phonetic structure overlap.

Source code in phonemenal/fallback.py
def similarity(key1: str, key2: str) -> float:
    """LCS-based similarity ratio between two phonetic keys.

    Returns 0.0–1.0 reflecting phonetic structure overlap.
    """
    if not key1 or not key2:
        return 0.0
    if key1 == key2:
        return 1.0

    m, n = len(key1), len(key2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if key1[i - 1] == key2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1] + 1
            else:
                dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])

    lcs_len = dp[m][n]
    return (2.0 * lcs_len) / (m + n)

compare(name1: str, name2: str) -> dict

Compare two names using the fallback phonetic encoder.

Returns dict with keys, similarity score, and whether they're an exact match.

Source code in phonemenal/fallback.py
def compare(name1: str, name2: str) -> dict:
    """Compare two names using the fallback phonetic encoder.

    Returns dict with keys, similarity score, and whether they're an exact match.
    """
    k1 = phonetic_key(name1)
    k2 = phonetic_key(name2)
    score = similarity(k1, k2)

    return {
        "name1": name1,
        "name2": name2,
        "key1": k1,
        "key2": k2,
        "score": round(score, 4),
        "exact_match": k1 == k2,
    }