Wiki Home

Double Metaphone - Soundex Alternative

(Updated: 2010.06.04 12:18:58 PM)
Namespace: VFP
I have used this in a project recently and it works quite nicely. I came up with the following implementation to make certain I got full Rushmore Optimization when the users did a metaphone search. To that end I added a metaphone field and an index to my table. When the surname is added/modified I update that field by using Double Meta Phone - Soundex Alternative.

LOCAL lcResult, lcPrimary, lcSecondary
STORE "" TO lcResult, lcPrimary, lcSecondary
IF DoubleMetaPhone(SURNAMEFIELD,@lcPrimary,@lcSecondary)
	lcResult = PADR(m.lcPrimary,5," ")
	REPLACE MetaPhoneField with m.lcResult IN mytable
ENDIF


When the user wants to search, take their search value and convert it using doublemetaphone and then search the metaphone field.

LOCAL lcResult, lcPrimary, lcSecondary
STORE "" TO lcResult, lcPrimary, lcSecondary
IF DoubleMetaPhone(THISFORM.textbox.value,@lcPrimary,@lcSecondary)
	lcResult = PADR(m.lcPrimary,5," ")
	SELECT * FROM MYTABLE WHERE MetaPhoneField = m.lcResult
ENDIF

-- Mike Yearwood

***************************************************************
*!* DOUBLE Metaphone (c) 1998, 1999 BY Lawrence Philips
*!*
*!* Slightly modified BY Kevin Atkinson TO fix several bugs AND
*!* TO ALLOW it TO give BACK more than 4 characters.
*!*
*!* Atkinson's C++ version Translated to Visual Foxpro
*!* by Craig Boyd (Slighthaze) 10-23-2003
*!* From http://aspell.sourceforge.net/metaphone/dmetaph.cpp
*!* Also added SIGNIFICANTCHARS constant so developer
*!* can control number of characters returned
***************************************************************

*********************************
*!* Matches are as follows
*********************************
*!* STRONG MATCH
*!* Primary = Primary
*!*
*!* NORMAL MATCH
*!* Primary = Secondary
*!* Secondary = Primary
*!*
*!* WEAK MATCH
*!* Alternate = Alternate
*********************************


*********************************
*!* Example of use
*********************************

LOCAL lnSoundex1, lnSoundex2
LOCAL lcPrimary1, lcSecondary1
LOCAL lcPrimary2, lcSecondary2
LOCAL lcFirstWord, lcSecondWord

lnSoundex1 = SOUNDEX("CEASE")
lnSoundex2 = SOUNDEX("SEAS")
=DOUBLEMETAPHONE("CEASE", @lcPrimary1, @lcSecondary1)
=DOUBLEMETAPHONE("SEAS", @lcPrimary2, @lcSecondary2)

MESSAGEBOX(["CEASE" compared to "SEAS"] + CHR(13)+ CHR(13) ;
    +"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
        + TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" + IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
    +"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
        + "Primary #1: " + lcPrimary1 + CHR(9) + "Primary #2: " + lcPrimary2 + CHR(9) + CHR(13)+ CHR(9) ;
        + "Secondary: " + lcSecondary1 + CHR(9) + "Secondary: " + lcSecondary2 + CHR(9) + CHR(13)+ CHR(9) ;
        + "Match Type: " + GetMatchType(lcPrimary1, lcSecondary1, lcPrimary2, lcSecondary2) ;
    , 0, "EXAMPLE 1 OF 2")

lnSoundex1 = SOUNDEX("MICHAEL")
lnSoundex2 = SOUNDEX("MICIAL")
=DOUBLEMETAPHONE("MICHAEL", @lcPrimary1, @lcSecondary1)
=DOUBLEMETAPHONE("MICIAL", @lcPrimary2, @lcSecondary2)

MESSAGEBOX(["MICHAEL" compared to "MICIAL"] + CHR(13)+ CHR(13) ;
    +"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
        + TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" + IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
    +"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
        + "Primary #1: " + lcPrimary1 + CHR(9) + "Primary #2: " + lcPrimary2 + CHR(9) + CHR(13)+ CHR(9) ;
        + "Secondary: " + lcSecondary1 + CHR(9) + "Secondary: " + lcSecondary2 + CHR(9) + CHR(13)+ CHR(9) ;
        + "Match Type: " + GetMatchType(lcPrimary1, lcSecondary1, lcPrimary2, lcSecondary2) ;
    , 0, "EXAMPLE 2 OF 2")

DO WHILE .T.
    IF MESSAGEBOX("Would you like to try a comparison of your own?",36,"GIVE IT A TRY") = 6
        lcFirstWord = ALLTRIM(INPUTBOX("Enter a name or word:", "FIRST WORD TO COMPARE"))
        IF EMPTY(lcFirstWord)
            MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
            LOOP
        ENDIF
        lcSecondWord =  ALLTRIM(INPUTBOX("Enter another name or word:", "SECOND WORD TO COMPARE"))
        IF EMPTY(lcSecondWord)
            MESSAGEBOX("You must enter both words.", 16, "UNABLE TO RUN COMPARISON")
            LOOP
        ENDIF
        lnSoundex1 = SOUNDEX(lcFirstWord)
        lnSoundex2 = SOUNDEX(lcSecondWord)
        =DOUBLEMETAPHONE(lcFirstWord, @lcPrimary1, @lcSecondary1)
        =DOUBLEMETAPHONE(lcSecondWord, @lcPrimary2, @lcSecondary2)
        MESSAGEBOX(["]+lcFirstWord+[" compared to "]+lcSecondWord+["] + CHR(13)+ CHR(13) ;
            +"SOUNDEX RETURNS: " + CHR(13)+ CHR(9) ;
                 + TRANSFORM(lnSoundex1) + " = " + TRANSFORM(lnSoundex2) + " (" + IIF(lnSoundex1 = lnSoundex2,"TRUE", "FALSE") + ")" + CHR(13)+ CHR(13) ;
            +"METAPHONE RETURNS: " + CHR(13)+ CHR(9) ;
                + "Primary #1: " + lcPrimary1 + CHR(9) + "Primary #2: " + lcPrimary2 + CHR(9) + CHR(13)+ CHR(9) ;
                + "Secondary: " + lcSecondary1 + CHR(9) + "Secondary: " + lcSecondary2 + CHR(9) + CHR(13)+ CHR(9) ;
                + "Match Type: " + GetMatchType(lcPrimary1, lcSecondary1, lcPrimary2, lcSecondary2) ;
            , 0, "COMPARISON RESULTS")
    ELSE
        EXIT
    ENDIF
ENDDO

FUNCTION GetMatchType(tcPrime1, tcSecond1, tcPrime2, tcSecond2)
    DO CASE
    CASE tcPrime1 = tcPrime2 AND !EMPTY(tcPrime1) AND !EMPTY(tcPrime2)
        RETURN "STRONG MATCH"
    CASE tcPrime1 = tcSecond2 AND !EMPTY(tcPrime1) AND !EMPTY(tcSecond2)
        RETURN "NORMAL MATCH"
    CASE tcSecond1 = tcPrime2 AND !EMPTY(tcSecond1) AND !EMPTY(tcPrime2)
        RETURN "NORMAL MATCH"
    CASE tcSecond1 = tcSecond2 AND !EMPTY(tcSecond1) AND !EMPTY(tcSecond2)
        RETURN "WEAK MATCH"
    OTHERWISE
        RETURN "NO MATCH"
    ENDCASE
ENDFUNC

********END OF EXAMPLE*********


*******************************
FUNCTION DoubleMetaphone
*******************************
PARAMETERS tcWord, tcMetaph, tcMetaph2

#DEFINE SIGNIFICANTCHARS 4 &&Can be changed to allow more or less characters returned

PRIVATE plAlternate, pnLength, pcPrimary, pcSecondary
LOCAL lcLetter, lnCurrent, lnLast

STORE "" TO lcLetter, pcPrimary, pcSecondary
plAlternate = .F.
lnCurrent = 1

pnLength = LEN(tcWord)
IF pnLength < 1
    RETURN ""
ENDIF

lnLast = pnLength

plAlternate = .F.

tcWord = UPPER(ALLTRIM(tcWord))

*!* pad the original string so that we can index beyond the edge of the world
tcWord = tcWord + "     "

*!* skip these when at start of word
IF INLIST(SUBSTR(tcWord,1,2), "GN", "KN", "PN", "WR", "PS")
    lnCurrent = lnCurrent + 1
ENDIF

*!* Initial 'X' is pronounced 'Z' e.g. 'Xavier'
IF SUBSTR(tcWord,1,1) = 'X'
    MetaphAdd("S") && 'Z' maps to 'S'
    lnCurrent = lnCurrent + 1
ENDIF

DO WHILE .T. OR LEN(pcPrimary) < 4 OR LEN(pcSecondary) < 4
    IF lnCurrent > pnLength
        EXIT
    ENDIF
    lcLetter = SUBSTR(tcWord,lnCurrent,1)
    DO CASE
    CASE INLIST(lcLetter, 'A', 'E', 'I', 'O', 'U', 'Y')
        IF lnCurrent = 1
            *!* all init vowels now map to 'A'
            MetaphAdd("A")
        ENDIF
        lnCurrent = lnCurrent + 1
        LOOP

    CASE lcLetter = 'B'

        *!* "-mb", e.g", "dumb", already skipped over...
        MetaphAdd("P")

        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'B'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        LOOP

    CASE lcLetter = '�'
        MetaphAdd("S")
        lnCurrent = lnCurrent + 1
        LOOP

    CASE lcLetter = 'C'
        *!* various germanic
        IF (lnCurrent > 1) ;
                AND !IsVowel(lnCurrent - 2) ;
                AND SUBSTR(tcWord, lnCurrent - 1, 3) = "ACH" ;
                AND ((SUBSTR(tcWord,lnCurrent + 2,1) != 'I') AND ((SUBSTR(tcWord,lnCurrent + 2,1) != 'E') ;
                OR INLIST(SUBSTR(tcWord,lnCurrent - 2, 6), "BACHER", "MACHER") ))

            MetaphAdd("K")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!* special case 'caesar'
        IF lnCurrent = 1 AND SUBSTR(tcWord,lnCurrent, 6) = "CAESAR"

            MetaphAdd("S")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!* italian 'chianti'
        IF SUBSTR(tcWord, lnCurrent, 4) = "CHIA"

            MetaphAdd("K")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        IF SUBSTR(tcWord, lnCurrent, 2) = "CH"

            *!* find 'michael'
            IF (lnCurrent > 1) AND SUBSTR(tcWord, lnCurrent, 4) = "CHAE"

                MetaphAddCond("K", "X")
                lnCurrent = lnCurrent + 2
                LOOP
            ENDIF

            *!* greek roots e.g. 'chemistry', 'chorus'
            IF (lnCurrent = 1) ;
                    AND (INLIST(SUBSTR(tcWord, lnCurrent + 1, 5), "HARAC", "HARIS") ;
                    OR INLIST(SUBSTR(tcWord, lnCurrent + 1, 3), "HOR", "HYM", "HIA", "HEM")) ;
                    AND !SUBSTR(tcWord, 1, 5) = "CHORE"

                MetaphAdd("K")
                lnCurrent = lnCurrent + 2
                LOOP
            ENDIF

            *!* germanic, greek, or otherwise 'ch' for 'kh' sound
            *!* e.g., 'wachtler', 'wechsler', but not 'tichner'
            *!* e.g., 'wachtler', 'wechsler', but not 'tichner'
            IF (INLIST(SUBSTR(tcWord, 1, 4), "VAN ", "VON ") OR SUBSTR(tcWord, 1, 3) = "SCH") ;
                    OR INLIST(SUBSTR(tcWord, lnCurrent - 2, 6), "ORCHES", "ARCHIT", "ORCHID") ;
                    OR INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "T", "S") ;
                    OR ((INLIST(SUBSTR(tcWord, lnCurrent - 1, 1), "A", "O", "U", "E") OR (lnCurrent = 1)) ;
                    AND INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "L", "R", "N", "M", "B", "H", "F", "V", "W", " "))

                MetaphAdd("K")
            ELSE
                IF lnCurrent > 1

                    IF SUBSTR(tcWord, 1, 2) = "MC"
                        *!* e.g., "McHugh"
                        MetaphAdd("K")
                    ELSE
                        MetaphAddCond("X", "K")
                    ENDIF
                ELSE
                    MetaphAdd("X")
                ENDIF
            ENDIF
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF
        *!* e.g, 'czerny'
        IF SUBSTR(tcWord, lnCurrent, 2) = "CZ" AND SUBSTR(tcWord, lnCurrent - 2, 4) != "WICZ"
            MetaphAddCond("S", "X")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!* e.g., 'focaccia'
        IF SUBSTR(tcWord, lnCurrent + 1, 3) = "CIA"
            MetaphAdd("X")
            lnCurrent = lnCurrent + 3
            LOOP
        ENDIF

        *!* double 'C', but not if e.g. 'McClellan'
        IF SUBSTR(tcWord, lnCurrent, 2) = "CC" AND !(lnCurrent = 1 AND SUBSTR(tcWord,1,1) = 'M')
            *!* 'bellocchio' but not 'bacchus'
            IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "I", "E", "H") AND SUBSTR(tcWord, lnCurrent + 2, 2) != "HU"

                *!* 'accident', 'accede' 'succeed'
                IF((lnCurrent = 1) AND (SUBSTR(tcWord,lnCurrent - 1,1) = 'A')) ;
                        OR INLIST(SUBSTR(tcWord, lnCurrent - 1, 5), "UCCEE", "UCCES")
                    MetaphAdd("KS")
                    *!* 'bacci', 'bertucci', other italian
                ELSE
                    MetaphAdd("X")
                ENDIF
                lnCurrent = lnCurrent + 3
                LOOP
            ELSE&& Pierce's rule
                MetaphAdd("K")
                lnCurrent = lnCurrent + 2
                LOOP
            ENDIF
        ENDIF

        IF INLIST(SUBSTR(tcWord, lnCurrent, 2), "CK", "CG", "CQ")

            MetaphAdd("K")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        IF INLIST(SUBSTR(tcWord, lnCurrent, 2), "CI", "CE", "CY")

            *!* italian vs. english
            IF INLIST(SUBSTR(tcWord, lnCurrent, 3), "CIO", "CIE", "CIA")
                MetaphAddCond("S", "X")
            ELSE
                MetaphAdd("S")
            ENDIF
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!* else
        MetaphAdd("K")

        *!* name sent in 'mac caffrey', 'mac gregor
        IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), " C", " Q", " G")
            lnCurrent = lnCurrent + 3
        ELSE
            IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "C", "K", "Q") ;
                    AND !INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), "CE", "CI")
                lnCurrent = lnCurrent + 2
            ELSE
                lnCurrent = lnCurrent + 1
            ENDIF
            LOOP
        ENDIF

    CASE lcLetter = 'D'
        IF SUBSTR(tcWord, lnCurrent, 2) = "DG"
            IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "I", "E", "Y")
                *!* e.g. 'edge'
                MetaphAdd("J")
                lnCurrent = lnCurrent + 3
            ELSE
                *!* e.g. 'edgar'
                MetaphAdd("TK")
                lnCurrent = lnCurrent + 2
            ENDIF
            LOOP
        ENDIF
        IF INLIST(SUBSTR(tcWord, lnCurrent, 2), "DT", "DD")
            MetaphAdd("T")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF
        *!* else
        MetaphAdd("T")
        lnCurrent = lnCurrent + 1
        LOOP

    CASE lcLetter = 'F'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'F'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("F")
        LOOP

    CASE lcLetter = 'G'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'H'
            IF (lnCurrent > 1) AND !IsVowel(lnCurrent - 1)
                MetaphAdd("K")
                lnCurrent = lnCurrent + 2
                LOOP
            ENDIF

            IF lnCurrent < 3

                *!* 'ghislane', ghiradelli
                IF lnCurrent = 1
                    IF SUBSTR(tcWord,lnCurrent + 2,1) = 'I'
                        MetaphAdd("J")
                    ELSE
                        MetaphAdd("K")
                    ENDIF
                    lnCurrent = lnCurrent + 2
                    LOOP
                ENDIF
            ENDIF
            *!* Parker's rule (with some further refinements) - e.g., 'hugh'
            *!* e.g., 'bough'
            *!* e.g., 'broughton'
            IF((lnCurrent > 1) AND INLIST(SUBSTR(tcWord, lnCurrent - 2, 1), "B", "H", "D")) ;
                    OR ((lnCurrent > 2) AND INLIST(SUBSTR(tcWord, lnCurrent - 3, 1), "B", "H", "D")) ;
                    OR ((lnCurrent > 3) AND INLIST(SUBSTR(tcWord, lnCurrent - 4, 1), "B", "H"))

                lnCurrent = lnCurrent + 2
                LOOP
            ELSE
                *!* e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
                IF (lnCurrent > 2) ;
                        AND (SUBSTR(tcWord,lnCurrent - 1,1) = 'U') ;
                        AND INLIST(SUBSTR(tcWord, lnCurrent - 3, 1), "C", "G", "L", "R", "T")

                    MetaphAdd("F")
                ELSE
                    IF (lnCurrent > 1) AND SUBSTR(tcWord,lnCurrent - 1,1) != 'I'
                        MetaphAdd("K")
                    ENDIF
                ENDIF
                lnCurrent = lnCurrent + 2
                LOOP
            ENDIF
        ENDIF

        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'N'

            IF (lnCurrent = 1) AND IsVowel(1) AND !SlavoGermanic()

                MetaphAddCond("KN", "N")
            ELSE
                *!* not e.g. 'cagney'
                IF SUBSTR(tcWord, lnCurrent + 2, 2) != "EY" ;
                        AND (SUBSTR(tcWord,lnCurrent + 1,1) != 'Y') AND !SlavoGermanic()

                    MetaphAddCond("N", "KN")
                ELSE
                    MetaphAdd("KN")
                ENDIF
            ENDIF
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF
        *!* 'tagliaro'
        IF SUBSTR(tcWord, lnCurrent + 1, 2) = "LI" AND !SlavoGermanic()
            MetaphAddCond("KL", "L")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!* -ges-,-gep-,-gel-, -gie- at beginning
        IF (lnCurrent = 1) ;
                AND ((SUBSTR(tcWord,lnCurrent + 1,1) = 'Y') ;
                OR INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER"))

            MetaphAddCond("K", "J")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!*  -ger-,  -gy-
        IF (SUBSTR(tcWord, lnCurrent + 1, 2) = "ER" OR SUBSTR(tcWord,lnCurrent + 1,1) = 'Y') ;
                AND !INLIST(SUBSTR(tcWord, 1, 6), "DANGER", "RANGER", "MANGER") ;
                AND !INLIST(SUBSTR(tcWord, lnCurrent - 1, 1), "E", "I") ;
                AND !INLIST(SUBSTR(tcWord, lnCurrent - 1, 3), "RGY", "OGY")

            MetaphAddCond("K", "J")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!*  italian e.g, 'biaggi'
        IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "E", "I", "Y") OR INLIST(SUBSTR(tcWord, lnCurrent - 1, 4), "AGGI", "OGGI")
            *!* obvious germanic
            IF (INLIST(SUBSTR(tcWord, 1, 4), "VAN ", "VON ") OR SUBSTR(tcWord, 1, 3) = "SCH") ;
                    OR SUBSTR(tcWord, lnCurrent + 1, 2) = "ET"
                MetaphAdd("K")
            ELSE
                *!* always soft if french ending
                IF SUBSTR(tcWord, lnCurrent + 1, 4) = "IER "
                    MetaphAdd("J")
                ELSE
                    MetaphAddCond("J", "K")
                ENDIF
            ENDIF
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'G'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("K")
        LOOP

    CASE lcLetter = 'H'
        *!* only keep if first & before vowel or btw. 2 vowels
        IF((lnCurrent = 1) OR IsVowel(lnCurrent - 1)) ;
                AND IsVowel(lnCurrent + 1)

            MetaphAdd("H")
            lnCurrent = lnCurrent + 2
        ELSE && also takes care of 'HH'
            lnCurrent = lnCurrent + 1
        ENDIF
        LOOP

    CASE lcLetter = 'J'
        *!* obvious spanish, 'jose', 'san jacinto'
        IF SUBSTR(tcWord, lnCurrent, 4) = "JOSE" OR SUBSTR(tcWord, 1, 4) = "SAN "
            IF ((lnCurrent = 1) AND (SUBSTR(tcWord, lnCurrent + 4,1) = ' ')) OR SUBSTR(tcWord, 1, 4) = "SAN "
                MetaphAdd("H")
            ELSE
                MetaphAddCond("J", "H")
            ENDIF
            lnCurrent = lnCurrent + 1
            LOOP
        ENDIF

        IF (lnCurrent = 1) AND SUBSTR(tcWord, lnCurrent, 4) != "JOSE"
            MetaphAddCond("J", "A") && Yankelovich/Jankelowicz
        ELSE
            *!* spanish pron. of e.g. 'bajador'
            IF IsVowel(lnCurrent - 1) ;
                    AND !SlavoGermanic() ;
                    AND ((SUBSTR(tcWord,lnCurrent + 1,1) = 'A') OR (SUBSTR(tcWord,lnCurrent + 1,1) = 'O'))
                MetaphAddCond("J", "H")
            ELSE
                IF lnCurrent = lnLast
                    MetaphAddCond("J", " ")
                ELSE
                    IF !INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "L", "T", "K", "S", "N", "M", "B", "Z") ;
                            AND !INLIST(SUBSTR(tcWord, lnCurrent - 1, 1), "S", "K", "L")
                        MetaphAdd("J")
                    ENDIF
                ENDIF
            ENDIF

        ENDIF
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'J' && it could happen!
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        LOOP

    CASE lcLetter = 'K'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'K'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("K")
        LOOP

    CASE lcLetter = 'L'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'L'

            *!* spanish e.g. 'cabrillo', 'gallegos'
            IF ((lnCurrent = (pnLength - 3)) ;
                    AND INLIST(SUBSTR(tcWord, lnCurrent - 1, 4), "ILLO", "ILLA", "ALLE")) ;
                    OR ((INLIST(SUBSTR(tcWord, lnLast - 1, 2), "AS", "OS") OR INLIST(SUBSTR(tcWord, lnLast, 1), "A", "O")) ;
                    AND SUBSTR(tcWord, lnCurrent - 1, 4) = "ALLE")

                MetaphAddCond("L", " ")
                lnCurrent = lnCurrent + 2
                LOOP
            ENDIF
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("L")
        LOOP

    CASE lcLetter = 'M'
        *!* 'dumb','thumb'
        IF (SUBSTR(tcWord, lnCurrent - 1, 3) = "UMB" ;
                AND (((lnCurrent + 1) = lnLast) OR SUBSTR(tcWord, lnCurrent + 2, 2) = "ER")) ;
                OR  (SUBSTR(tcWord,lnCurrent + 1,1) = 'M')
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("M")
        LOOP

    CASE lcLetter = 'N'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'N'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("N")
        LOOP

    CASE lcLetter = '�'
        lnCurrent = lnCurrent + 1
        MetaphAdd("N")
        LOOP

    CASE lcLetter = 'P'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'H'

            MetaphAdd("F")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        *!* also account for "campbell", "raspberry"
        IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "P", "B")
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
            MetaphAdd("P")
        ENDIF
        LOOP

    CASE lcLetter = 'Q'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'Q'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("K")
        LOOP

    CASE lcLetter = 'R'
        *!* french e.g. 'rogier', but exclude 'hochmeier'
        IF (lnCurrent = lnLast) ;
                AND !SlavoGermanic() ;
                AND SUBSTR(tcWord, lnCurrent - 2, 2) = "IE" ;
                AND !INLIST(SUBSTR(tcWord, lnCurrent - 4, 2), "ME", "MA")
            MetaphAddCond("", "R")
        ELSE
            MetaphAdd("R")

        ENDIF
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'R'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        LOOP

    CASE lcLetter = 'S'
        *!* special cases 'island', 'isle', 'carlisle', 'carlysle'
        IF INLIST(SUBSTR(tcWord, lnCurrent - 1, 3), "ISL", "YSL")

            lnCurrent = lnCurrent + 1
            LOOP
        ENDIF

        *!* special case 'sugar-'
        IF (lnCurrent = 1) AND SUBSTR(tcWord, lnCurrent, 5) = "SUGAR"

            MetaphAddCond("X", "S")
            lnCurrent = lnCurrent + 1
            LOOP
        ENDIF

        IF SUBSTR(tcWord, lnCurrent, 2) = "SH"
            *!* germanic
            IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 4), "HEIM", "HOEK", "HOLM", "HOLZ")
                MetaphAdd("S")
            ELSE
                MetaphAdd("X")
            ENDIF
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF
        *!* italian & armenian
        IF INLIST(SUBSTR(tcWord, lnCurrent, 3), "SIO", "SIA") OR SUBSTR(tcWord, lnCurrent, 4) = "SIAN"

            IF !SlavoGermanic()
                MetaphAddCond("S", "X")
            ELSE
                MetaphAdd("S")
            ENDIF
            lnCurrent = lnCurrent + 3
            LOOP
        ENDIF
        *!* german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
        *!* also, -sz- in slavic language altho in hungarian it is pronounced 's'
        IF ((lnCurrent = 1) ;
                AND INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "M", "N", "L", "W")) ;
                OR SUBSTR(tcWord, lnCurrent + 1, 1) = "Z"

            MetaphAddCond("S", "X")
            IF SUBSTR(tcWord, lnCurrent + 1, 1) = "Z"
                lnCurrent = lnCurrent + 2
            ELSE
                lnCurrent = lnCurrent + 1
            ENDIF
            LOOP
        ENDIF

        IF SUBSTR(tcWord, lnCurrent, 2) = "SC"
            *!* Schlesinger's rule
            IF SUBSTR(tcWord,lnCurrent + 2,1) = 'H'
                *!* dutch origin, e.g. 'school', 'schooner'
                IF INLIST(SUBSTR(tcWord, lnCurrent + 3, 2), "OO", "ER", "EN", "UY", "ED", "EM")
                    *!* 'schermerhorn', 'schenker'
                    IF INLIST(SUBSTR(tcWord, lnCurrent + 3, 2), "ER", "EN")
                        MetaphAddCond("X", "SK")
                    ELSE
                        MetaphAdd("SK")
                    ENDIF
                    lnCurrent = lnCurrent + 3
                    LOOP
                ELSE
                    IF (lnCurrent = 1) AND !IsVowel(3) AND (SUBSTR(tcWord,3,1) != 'W')
                        MetaphAddCond("X", "S")
                    ELSE
                        MetaphAdd("X")
                    ENDIF
                    lnCurrent = lnCurrent + 3
                    LOOP
                ENDIF
            ENDIF
            IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 1), "I", "E", "Y")
                MetaphAdd("S")
                lnCurrent = lnCurrent + 3
                LOOP
            ENDIF
            *!* else
            MetaphAdd("SK")
            lnCurrent = lnCurrent + 3
            LOOP
        ENDIF
        *!* french e.g. 'resnais', 'artois'
        IF (lnCurrent = lnLast) AND INLIST(SUBSTR(tcWord, lnCurrent - 2, 2), "AI", "OI")
            MetaphAddCond("", "S")
        ELSE
            MetaphAdd("S")
        ENDIF
        IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "S", "Z")
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        LOOP
    CASE lcLetter = 'T'
        IF SUBSTR(tcWord, lnCurrent, 4) = "TION"
            MetaphAdd("X")
            lnCurrent = lnCurrent + 3
            LOOP
        ENDIF
        IF INLIST(SUBSTR(tcWord, lnCurrent, 3), "TIA", "TCH")
            MetaphAdd("X")
            lnCurrent = lnCurrent + 3
            LOOP
        ENDIF
        IF SUBSTR(tcWord, lnCurrent, 2) = "TH" ;
                OR SUBSTR(tcWord, lnCurrent, 3) = "TTH"
            *!* special case 'thomas', 'thames' or germanic
            IF INLIST(SUBSTR(tcWord, lnCurrent + 2, 2), "OM", "AM") ;
                    OR INLIST(SUBSTR(tcWord, 1, 4), "VAN ", "VON ") ;
                    OR SUBSTR(tcWord, 1, 3) = "SCH"
                MetaphAdd("T")
            ELSE
                MetaphAddCond("0", "T")
            ENDIF
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF
        IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "T", "D")
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("T")
        LOOP

    CASE lcLetter = 'V'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'V'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        MetaphAdd("F")
        LOOP

    CASE lcLetter = 'W'
        *!* can also be in middle of word
        IF SUBSTR(tcWord, lnCurrent, 2) = "WR"
            MetaphAdd("R")
            lnCurrent = lnCurrent + 2
            LOOP
        ENDIF

        IF (lnCurrent = 1) ;
                AND (IsVowel(lnCurrent + 1) OR SUBSTR(tcWord, lnCurrent, 2) = "WH")
            *!* Wasserman should match Vasserman
            IF IsVowel(lnCurrent + 1)
                MetaphAddCond("A", "F")
            ELSE
                *!* need Uomo to match Womo
                MetaphAdd("A")
            ENDIF
        ENDIF
        *!* Arnow should match Arnoff
        IF ((lnCurrent = lnLast) AND IsVowel(lnCurrent - 1)) ;
                OR INLIST(SUBSTR(tcWord, lnCurrent - 1, 5), "EWSKI", "EWSKY", "OWSKI", "OWSKY") ;
                OR SUBSTR(tcWord, 1, 3) = "SCH"
            MetaphAddCond("", "F")
            lnCurrent = lnCurrent + 1
            LOOP
        ENDIF
        *!* polish e.g. 'filipowicz'
        IF INLIST(SUBSTR(tcWord, lnCurrent, 4), "WICZ", "WITZ")
            MetaphAddCond("TS", "FX")
            lnCurrent = lnCurrent + 4
            LOOP
        ENDIF

        *!* else skip it
        lnCurrent = lnCurrent + 1
        LOOP

    CASE lcLetter = 'X'
        *!* french e.g. breaux
        IF !(lnCurrent = lnLast ;
                AND (INLIST(SUBSTR(tcWord, lnCurrent - 3, 3), "IAU", "EAU") ;
                OR INLIST(SUBSTR(tcWord, lnCurrent - 2, 2), "AU", "OU")))
            MetaphAdd("KS")
        ENDIF
        IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 1), "C", "X")
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        LOOP

    CASE lcLetter = 'Z'
        *!* chinese pinyin e.g. 'zhao'
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'H'
            MetaphAdd("J")
            lnCurrent = lnCurrent + 2
            LOOP
        ELSE
            IF INLIST(SUBSTR(tcWord, lnCurrent + 1, 2), "ZO", "ZI", "ZA") ;
                    OR (SlavoGermanic() AND ((lnCurrent > 1) AND SUBSTR(tcWord,lnCurrent - 1,1) != 'T'))

                MetaphAddCond("S", "TS")

            ELSE
                MetaphAdd("S")
            ENDIF
        ENDIF
        IF SUBSTR(tcWord,lnCurrent + 1,1) = 'Z'
            lnCurrent = lnCurrent + 2
        ELSE
            lnCurrent = lnCurrent + 1
        ENDIF
        LOOP
    ENDCASE
    lnCurrent = lnCurrent + 1
ENDDO

tcMetaph  = LEFT(pcPrimary, SIGNIFICANTCHARS)
IF plAlternate
    tcMetaph2 = LEFT(pcSecondary, SIGNIFICANTCHARS)
ELSE
    tcMetaph2 = ""
ENDIF
ENDFUNC


*******************************
FUNCTION SlavoGermanic()
*******************************
IF ATC('W', tcWord) > 0 OR ATC('K', tcWord) > 0 ;
    OR ATC("CZ", tcWord) > 0 OR ATC("WITZ", tcWord) > 0
    RETURN .T.
ELSE
    RETURN .F.
ENDIF
ENDFUNC

*******************************
PROCEDURE MetaphAdd(lcMain)
*******************************
IF LEN(lcMain) > 0
    pcPrimary = pcPrimary + lcMain
    pcSecondary = pcSecondary + lcMain
ENDIF
ENDPROC

*******************************
PROCEDURE MetaphAddCond(lcMain, lcAlt)
*******************************
IF LEN(lcMain) > 0
    pcPrimary = pcPrimary + lcMain
ENDIF
IF LEN(lcAlt) > 0
    plAlternate = .T.
    IF SUBSTR(lcAlt, 1, 1) != ' '
        pcSecondary = pcSecondary + lcAlt
    ENDIF
ELSE
    IF LEN(lcMain) > 0 AND SUBSTR(lcMain, 1, 1) != ' '
        pcSecondary = pcSecondary + lcMain
    ENDIF
ENDIF
ENDPROC

*******************************
FUNCTION IsVowel(lnAt)
*******************************
LOCAL lcIt
IF !BETWEEN(lnAt, 1, pnLength)
    RETURN .F.
ELSE
    lcIt = SUBSTR(tcWord, lnAt, 1)
    RETURN (INLIST(lcIt,'A','E','I','O','U','Y'))
ENDIF
ENDFUNC

-- Craig SBoyd
Category Code Samples Category UDF