Wiki Home

Metaphone Implementation - Soundex Alternative


Namespace: VFP
Soundex is not always the best thing to use when trying to produce reliable Sounds-Like functionality in your application. Here's an alternative...
*********************************
*!* Example of use
*********************************
LOCAL lnSoundex1, lnSoundex2, lcMetaphone1, lcMetaphone2, lcFirstWord, lcSecondWord

lnSoundex1 = SOUNDEX("FONE")
lnSoundex2 = SOUNDEX("PHONE")
lcMetaphone1 = METAPHONE("FONE")
lcMetaphone2 = METAPHONE("PHONE")
MESSAGEBOX(["FONE" compared to "PHONE"] + CHR(13)+ CHR(13) ;
    +"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
        + TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" + IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
    +"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
        + lcMetaphone1 + " = " + lcMetaphone2 + " (" + IIF(lcMetaphone1 = lcMetaphone2,"TRUE", "FALSE") + ")",0,"EXAMPLE 1 OF 2")

lnSoundex1 = SOUNDEX("KREG")
lnSoundex2 = SOUNDEX("CRAIG")
lcMetaphone1 = METAPHONE("KREG")
lcMetaphone2 = METAPHONE("CRAIG")
MESSAGEBOX(["KREG" compared to "CRAIG"] + CHR(13)+ CHR(13) ;
            +"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
         + TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" + IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
            +"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
         + lcMetaphone1 + " = " + lcMetaphone2 + " (" + IIF(lcMetaphone1 = lcMetaphone2,"TRUE", "FALSE") + ")",0,"EXAMPLE 2 OF 2")

DO WHILE .T.
    IF MESSAGEBOX("Would you like to try a comparison of your own?",36,"GIVE IT A TRY") = 6
        lcFirstWord = ALLTRIM(INPUTBOX("Enter a name or word:", "FIRST WORD TO COMPARE"))
        IF EMPTY(lcFirstWord)
            MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
            LOOP
        ENDIF
        lcSecondWord =  ALLTRIM(INPUTBOX("Enter another name or word:", "SECOND WORD TO COMPARE"))
        IF EMPTY(lcSecondWord)
            MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
            LOOP
        ENDIF
        lnSoundex1 = SOUNDEX(lcFirstWord)
        lnSoundex2 = SOUNDEX(lcSecondWord)
        lcMetaphone1 = METAPHONE(lcFirstWord)
        lcMetaphone2 = METAPHONE(lcSecondWord)
        MESSAGEBOX(["]+lcFirstWord+[" compared to "]+lcSecondWord+["] + CHR(13)+ CHR(13) ;
            +"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
                 + TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" + IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
            +"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
                 + lcMetaphone1 + " = " + lcMetaphone2 + " (" + IIF(lcMetaphone1 = lcMetaphone2,"TRUE", "FALSE") + ")",0,"COMPARISON RESULTS")
    ELSE
        EXIT
    ENDIF
ENDDO
*********************************

*!* Original C version by Michael Kuhn 
*!* http://aspell.sourceforge.net/metaphone/metaphone-kuhn.txt
*!* Metaphone algorithm translated from C to Delphi by Tom White 
*!* Translated to Visual Basic by Dave White 9/10/01
*!* Translated to Visual Foxpro by Craig Boyd (Slighthaze) 10-21-2003 craig1442@mchsi.com

*********************************
FUNCTION Metaphone (lcWord)
*********************************
#DEFINE VOWELS        "AEIOU"
#DEFINE FRONTV        "EIY"
#DEFINE VARSON        "CSPTG"
#DEFINE DBL            "."
#DEFINE EXCPPAIR    "AGKPW"
#DEFINE NXTLTR        "ENNNR"
#DEFINE ALPHACHR    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#DEFINE SIGNIFICANTCHARS    5 &&Can be set higher or lower to affect results, common to use 4 or 5
LOCAL lcB, lcC
LOCAL lcInp, lcOutp
LOCAL lnIi, lnJj
LOCAL lcCurltr, lcPrevltr, lcNextltr, lcNextltr2, lcNextltr3
LOCAL lnVowelafter, llVowelbefore, llFrontvafter, llSilent, llHard
IF PCOUNT()=0
    RETURN ""
ELSE
    IF TYPE("lcWord") != "C"
        RETURN ""
    ENDIF
ENDIF

lcInp = UPPER(lcWord)
lcInp = CHRTRAN(lcInp,CHRTRAN(lcInp,ALPHACHR,""),"") &&Remove all non-alpha characters
lcOutp = ""

IF LEN(lcInp) = 0
    RETURN ""
ENDIF

*!*--Check rules at beginning of word
IF LEN(lcInp) > 1
    lcB = SUBSTR(lcInp, 1, 1)
    lcC = SUBSTR(lcInp, 2, 1)
    lnIi = ATC(lcB, EXCPPAIR)
    lnJj = ATC(lcC, NXTLTR)
    IF lnIi = lnJj AND lnIi > 0
        lcInp = SUBSTR(lcInp, 2, LEN(lcInp) - 1)
    ENDIF
ENDIF
IF SUBSTR(lcInp, 1, 1) = "X"
    lcInp = STUFF(lcInp, 1, 1, "S")
ENDIF
IF SUBSTR(lcInp, 1, 2) = "WH"
    lcInp = "W" + SUBSTR(lcInp, 3)
ENDIF
IF RIGHT(lcInp, 1) = "S"
    lcInp = LEFT(lcInp, LEN(lcInp) - 1)
ENDIF
lnIi = 0
DO WHILE (lnIi <= LEN(lcInp))
    lnIi = lnIi + 1
*!*--Main LOOP!
    llSilent = .F.
    llHard = .F.
    lcCurltr = SUBSTR(lcInp, lnIi, 1)
    llVowelbefore = .F.
    lcPrevltr = " "
    IF lnIi > 1
        lcPrevltr = SUBSTR(lcInp, lnIi - 1, 1)
        IF InStrC(lcPrevltr, VOWELS) > 0
            llVowelbefore = .T.
        ENDIF
    ENDIF
    IF ((lnIi = 1) AND (InStrC(lcCurltr, VOWELS) > 0))
        lcOutp = lcOutp + lcCurltr
        LOOP
    ENDIF
    llVowelafter = .F.
    llFrontvafter = .F.
    lcNextltr = " "
    IF lnIi < LEN(lcInp)
        lcNextltr = SUBSTR(lcInp, lnIi + 1, 1)
        IF InStrC(lcNextltr, VOWELS) > 0
            llVowelafter = .T.
        ENDIF
        IF InStrC(lcNextltr, FRONTV) > 0
            llFrontvafter = .T.
        ENDIF
    ENDIF
*!*--Skip double letters EXCEPT ones in variable double
    IF InStrC(lcCurltr, DBL) = 0
        IF lcCurltr = lcNextltr
            LOOP
        ENDIF
    ENDIF
    lcNextltr2 = " "
    IF LEN(lcInp) - lnIi > 1
        lcNextltr2 = SUBSTR(lcInp, lnIi + 2, 1)
    ENDIF
    lcNextltr3 = " "
    IF (LEN(lcInp) - lnIi) > 2
        lcNextltr3 = SUBSTR(lcInp, lnIi + 3, 1)
    ENDIF
    DO CASE
    CASE lcCurltr = "B"
        llSilent = .F.
        IF (lnIi = LEN(lcInp)) AND (lcPrevltr = "M")
            llSilent = .T.
        ENDIF
        IF NOT (llSilent)
            lcOutp = lcOutp + lcCurltr
        ENDIF
    CASE lcCurltr = "C"
        IF NOT ((lnIi > 2) AND (lcPrevltr = "S") AND llFrontvafter)
            IF ((lnIi > 1) AND (lcNextltr = "I") AND (lcNextltr2 = "A"))
                lcOutp = lcOutp + "X"
            ELSE
                IF llFrontvafter
                    lcOutp = lcOutp + "S"
                ELSE
                    IF ((lnIi > 2) AND (lcPrevltr = "S") AND (lcNextltr = "H"))
                        lcOutp = lcOutp + "K"
                    ELSE
                        IF lcNextltr = "H"
                            IF ((lnIi = 1) AND (InStrC(lcNextltr2, VOWELS) = 0))
                                lcOutp = lcOutp + "K"
                            ELSE
                                lcOutp = lcOutp + "X"
                            ENDIF
                        ELSE
                            IF lcPrevltr = "C"
                                lcOutp = lcOutp + "C"
                            ELSE
                                lcOutp = lcOutp + "K"
                            ENDIF
                        ENDIF
                    ENDIF
                ENDIF
            ENDIF
        ENDIF
    CASE lcCurltr = "D"
        IF ((lcNextltr = "G") AND (InStrC(lcNextltr2, FRONTV) > 0))
            lcOutp = lcOutp + "J"
        ELSE
            lcOutp = lcOutp + "T"
        ENDIF
    CASE lcCurltr = "G"
        llSilent = .F.
        IF ((lnIi < LEN(lcInp)) AND (lcNextltr = "H") AND (InStrC(lcNextltr2, VOWELS) = 0))
            llSilent = .T.
        ENDIF
        DO CASE
        CASE ((lnIi = LEN(lcInp) - 4) AND (lcNextltr = "N") AND (lcNextltr2 = "E") AND (lcNextltr3 = "D"))
            llSilent = .T.
        CASE ((lnIi = LEN(lcInp) - 2) AND (lcNextltr = "N"))
            llSilent = .T.
        ENDCASE
        IF (lcPrevltr = "D") AND llFrontvafter
            llSilent = .T.
        ENDIF
        IF lcPrevltr = "G"
            llHard = .T.
        ENDIF
        IF NOT (llSilent)
            IF llFrontvafter AND (NOT (llHard))
                lcOutp = lcOutp + "J"
            ELSE
                lcOutp = lcOutp + "K"
            ENDIF
        ENDIF
    CASE lcCurltr = "H"
        llSilent = .F.
        IF InStrC(lcPrevltr, VARSON) > 0
            llSilent = .T.
        ENDIF
        IF llVowelbefore AND (NOT (llVowelafter))
            llSilent = .T.
        ENDIF
        IF NOT llSilent
            lcOutp = lcOutp + lcCurltr
        ENDIF
    CASE lcCurltr = "F" OR lcCurltr =  "J" OR lcCurltr =  "L" OR lcCurltr =  "M" OR lcCurltr =  "N" OR lcCurltr =  "R"
        lcOutp = lcOutp + lcCurltr
    CASE lcCurltr = "K"
        IF lcPrevltr <> "C"
            lcOutp = lcOutp + lcCurltr
        endif
    CASE lcCurltr = "P"
        IF lcNextltr = "H"
            lcOutp = lcOutp + "F"
        ELSE
            lcOutp = lcOutp + "P"
        ENDIF
    CASE lcCurltr = "Q"
        lcOutp = lcOutp + "K"
    CASE lcCurltr = "S"
        IF ((lnIi > 2) AND (lcNextltr = "I") AND ((lcNextltr2 = "O") OR (lcNextltr2 = "A")))
            lcOutp = lcOutp + "X"
        ENDIF
        IF (lcNextltr = "H")
            lcOutp = lcOutp + "X"
        ELSE
            lcOutp = lcOutp + "S"
        ENDIF
    CASE lcCurltr = "T"
        IF ((lnIi > 0) AND (lcNextltr = "I") AND ((lcNextltr2 = "O") OR (lcNextltr2 = "A")))
            lcOutp = lcOutp + "X"
        ENDIF
        DO CASE
        CASE lcNextltr = "H"
            IF ((lnIi > 1) OR (InStrC(lcNextltr2, VOWELS) > 0))
                lcOutp = lcOutp + "0"
            ELSE
                lcOutp = lcOutp + "T"
            ENDIF
        CASE NOT ((lnIi < LEN(lcInp) - 3) AND (lcNextltr = "C") AND (lcNextltr2 = "H"))
            lcOutp = lcOutp + "T"
        ENDCASE
    CASE lcCurltr = "V"
        lcOutp = lcOutp + "F"
    CASE lcCurltr = "W" OR lcCurltr =  "Y"
        IF (lnIi < LEN(lcInp) - 1) AND llVowelafter
            lcOutp = lcOutp + lcCurltr
        ENDIF
    CASE lcCurltr = "X"
        lcOutp = lcOutp + "KS"
    CASE lcCurltr = "Z"
        lcOutp = lcOutp + "S"
    ENDCASE
ENDDO
RETURN LEFT(lcOutp, SIGNIFICANTCHARS)
ENDFUNC

*********************************
FUNCTION InStrC (lcSearchIn, lcSoughtCharacters)
*********************************
*!*--- Returns the position of the first character in lcSearchIn that is contained
*!*--- in the string lcSoughtCharacters.  Returns 0 if none found.
LOCAL i, lnReturn
lnReturn = 0
lcSoughtCharacters = UPPER(lcSoughtCharacters)
FOR i = 1 TO LEN(lcSearchIn)
    IF ATC(SUBSTR(lcSearchIn, i, 1), lcSoughtCharacters) > 0
        lnReturn = i
        EXIT
    ENDIF
ENDFOR
RETURN lnReturn
ENDFUNC

-- Craig SBoyd
Category Code Samples