Package org.dcm4che3.soundex

Source Code of org.dcm4che3.soundex.SoundexTest

/* ***** BEGIN LICENSE BLOCK *****
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* The original code is from the Apache commons-codec-1.5-src package
* and has been modified for the dcm4che project
*
* ***** END LICENSE BLOCK ***** */

package org.dcm4che3.soundex;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import junit.framework.Assert;

import org.dcm4che3.soundex.Soundex;
import org.junit.Test;

public class SoundexTest {

    public String getSoundexEncoderString(String arg) {
        Soundex inst = new Soundex();
        return inst.toFuzzy(arg);
    }

    protected void checkEncodingVariations(String expected, String data[]) throws Exception {
        for (int i = 0; i < data.length; i++) {
            checkEncoding(expected, data[i]);
        }
    }
   
    public void checkEncoding(String expected, String source) throws Exception {
        assertEquals("Source: " + source, expected, getSoundexEncoderString(source));
    }
   
    @Test
    public void testB650() throws Exception {
        checkEncodingVariations("B650", (new String[]{
            "BARHAM",
            "BARONE",
            "BARRON",
            "BERNA",
            "BIRNEY",
            "BIRNIE",
            "BOOROM",
            "BOREN",
            "BORN",
            "BOURN",
            "BOURNE",
            "BOWRON",
            "BRAIN",
            "BRAME",
            "BRANN",
            "BRAUN",
            "BREEN",
            "BRIEN",
            "BRIM",
            "BRIMM",
            "BRINN",
            "BRION",
            "BROOM",
            "BROOME",
            "BROWN",
            "BROWNE",
            "BRUEN",
            "BRUHN",
            "BRUIN",
            "BRUMM",
            "BRUN",
            "BRUNO",
            "BRYAN",
            "BURIAN",
            "BURN",
            "BURNEY",
            "BYRAM",
            "BYRNE",
            "BYRON",
            "BYRUM"}));
    }

    public void testBadCharacters() {
        Assert.assertEquals("H452", getSoundexEncoderString("HOL>MES"));

    }

    @Test
    public void testEncodeBasic() {
        Assert.assertEquals("T235", getSoundexEncoderString("testing"));
        Assert.assertEquals("T000", getSoundexEncoderString("The"));
        Assert.assertEquals("Q200", getSoundexEncoderString("quick"));
        Assert.assertEquals("B650", getSoundexEncoderString("brown"));
        Assert.assertEquals("F200", getSoundexEncoderString("fox"));
        Assert.assertEquals("J513", getSoundexEncoderString("jumped"));
        Assert.assertEquals("O160", getSoundexEncoderString("over"));
        Assert.assertEquals("T000", getSoundexEncoderString("the"));
        Assert.assertEquals("L200", getSoundexEncoderString("lazy"));
        Assert.assertEquals("D200", getSoundexEncoderString("dogs"));
    }

    /**
     * Examples from http://www.bradandkathy.com/genealogy/overviewofsoundex.html
     */
    @Test
    public void testEncodeBatch2() {
        Assert.assertEquals("A462", getSoundexEncoderString("Allricht"));
        Assert.assertEquals("E166", getSoundexEncoderString("Eberhard"));
        Assert.assertEquals("E521", getSoundexEncoderString("Engebrethson"));
        Assert.assertEquals("H512", getSoundexEncoderString("Heimbach"));
        Assert.assertEquals("H524", getSoundexEncoderString("Hanselmann"));
        Assert.assertEquals("H431", getSoundexEncoderString("Hildebrand"));
        Assert.assertEquals("K152", getSoundexEncoderString("Kavanagh"));
        Assert.assertEquals("L530", getSoundexEncoderString("Lind"));
        Assert.assertEquals("L222", getSoundexEncoderString("Lukaschowsky"));
        Assert.assertEquals("M235", getSoundexEncoderString("McDonnell"));
        Assert.assertEquals("M200", getSoundexEncoderString("McGee"));
        Assert.assertEquals("O155", getSoundexEncoderString("Opnian"));
        Assert.assertEquals("O155", getSoundexEncoderString("Oppenheimer"));
        Assert.assertEquals("R355", getSoundexEncoderString("Riedemanas"));
        Assert.assertEquals("Z300", getSoundexEncoderString("Zita"));
        Assert.assertEquals("Z325", getSoundexEncoderString("Zitzmeinn"));
    }

    /**
     * Examples from http://www.archives.gov/research_room/genealogy/census/soundex.html
     */
    @Test
    public void testEncodeBatch3() {
        Assert.assertEquals("W252", getSoundexEncoderString("Washington"));
        Assert.assertEquals("L000", getSoundexEncoderString("Lee"));
        Assert.assertEquals("G362", getSoundexEncoderString("Gutierrez"));
        Assert.assertEquals("P236", getSoundexEncoderString("Pfister"));
        Assert.assertEquals("J250", getSoundexEncoderString("Jackson"));
        Assert.assertEquals("T522", getSoundexEncoderString("Tymczak"));
        // For VanDeusen: D-250 (D, 2 for the S, 5 for the N, 0 added) is also
        // possible.
        Assert.assertEquals("V532", getSoundexEncoderString("VanDeusen"));
    }

    /**
     * Examples from: http://www.myatt.demon.co.uk/sxalg.htm
     */
    @Test
    public void testEncodeBatch4() {
        Assert.assertEquals("H452", getSoundexEncoderString("HOLMES"));
        Assert.assertEquals("A355", getSoundexEncoderString("ADOMOMI"));
        Assert.assertEquals("V536", getSoundexEncoderString("VONDERLEHR"));
        Assert.assertEquals("B400", getSoundexEncoderString("BALL"));
        Assert.assertEquals("S000", getSoundexEncoderString("SHAW"));
        Assert.assertEquals("J250", getSoundexEncoderString("JACKSON"));
        Assert.assertEquals("S545", getSoundexEncoderString("SCANLON"));
        Assert.assertEquals("S532", getSoundexEncoderString("SAINTJOHN"));

    }

    @Test
    public void testEncodeIgnoreApostrophes() throws Exception {
        checkEncodingVariations("O165", (new String[]{
            "OBrien",
            "'OBrien",
            "O'Brien",
            "OB'rien",
            "OBr'ien",
            "OBri'en",
            "OBrie'n",
            "OBrien'"}));
    }

    @Test
    public void testEncodeIgnoreHyphens() throws Exception {
        checkEncodingVariations("K525", (new String[]{
            "KINGSMITH",
            "-KINGSMITH",
            "K-INGSMITH",
            "KI-NGSMITH",
            "KIN-GSMITH",
            "KINGS-MITH",
            "KINGSM-ITH",
            "KINGSMI-TH",
            "KINGSMIT-H",
            "KINGSMITH-"}));
        assertFalse("Source: KING-SMITH", getSoundexEncoderString("KING-SMITH").equals("K525"));
    }

    @Test
    public void testEncodeIgnoreTrimmable() {
        Assert.assertEquals("W252", getSoundexEncoderString(" \t\n\r Washington \t\n\r "));
    }

    /**
     * Consonants from the same code group separated by W or H are treated as one.
     */
    @Test
    public void testHWRuleEx1() {
        // From
        // http://www.archives.gov/research_room/genealogy/census/soundex.html:
        // Ashcraft is coded A-261 (A, 2 for the S, C ignored, 6 for the R, 1
        // for the F). It is not coded A-226.
        Assert.assertEquals("A261", getSoundexEncoderString("Ashcraft"));
    }

    /**
     * Consonants from the same code group separated by W or H are treated as one.
     *
     * Test data from http://www.myatt.demon.co.uk/sxalg.htm
     */
    @Test
    public void testHWRuleEx2() {
        Assert.assertEquals("B312", getSoundexEncoderString("BOOTHDAVIS"));
        Assert.assertNotSame("B312", getSoundexEncoderString("BOOTH-DAVIS"));
    }

    /**
     * Consonants from the same code group separated by W or H are treated as one.
     *
     * @throws Exception
     */
    @Test
    public void testHWRuleEx3() throws Exception {
        Assert.assertEquals("S460", getSoundexEncoderString("Sgler"));
        Assert.assertEquals("S460", getSoundexEncoderString("Swhgler"));
        // Also S460:
        checkEncodingVariations("S460", (new String[]{
            "SAILOR",
            "SALYER",
            "SAYLOR",
            "SCHALLER",
            "SCHELLER",
            "SCHILLER",
            "SCHOOLER",
            "SCHULER",
            "SCHUYLER",
            "SEILER",
            "SEYLER",
            "SHOLAR",
            "SHULER",
            "SILAR",
            "SILER",
            "SILLER"}));
    }

    @Test
    public void testMaxLength() throws Exception {
//        Soundex soundex = new Soundex();
//        soundex.setMaxLength(soundex.getMaxLength());
        Assert.assertEquals("S460", getSoundexEncoderString("Sgler"));
    }

    @Test
    public void testMaxLengthLessThan3Fix() throws Exception {
//        Soundex soundex = new Soundex();
//        soundex.setMaxLength(2);
        Assert.assertEquals("S460", getSoundexEncoderString("SCHELLER"));
    }

    /**
     * Examples for MS SQLServer from
     * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
     */
    @Test
    public void testMsSqlServer1() {
        Assert.assertEquals("S530", getSoundexEncoderString("Smith"));
        Assert.assertEquals("S530", getSoundexEncoderString("Smythe"));
    }

    /**
     * Examples for MS SQLServer from
     * http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support
     * /kb/articles/Q100/3/65.asp&NoWebContent=1
     *
     * @throws Exception
     */
    @Test
    public void testMsSqlServer2() throws Exception {
        checkEncodingVariations("E625", (new String[]{"Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen"}));
    }

    /**
     * Examples for MS SQLServer from http://databases.about.com/library/weekly/aa042901a.htm
     */
    @Test
    public void testMsSqlServer3() {
        Assert.assertEquals("A500", getSoundexEncoderString("Ann"));
        Assert.assertEquals("A536", getSoundexEncoderString("Andrew"));
        Assert.assertEquals("J530", getSoundexEncoderString("Janet"));
        Assert.assertEquals("M626", getSoundexEncoderString("Margaret"));
        Assert.assertEquals("S315", getSoundexEncoderString("Steven"));
        Assert.assertEquals("M240", getSoundexEncoderString("Michael"));
        Assert.assertEquals("R163", getSoundexEncoderString("Robert"));
        Assert.assertEquals("L600", getSoundexEncoderString("Laura"));
        Assert.assertEquals("A500", getSoundexEncoderString("Anne"));
    }

    /**
     * Fancy characters are not mapped by the default US mapping.
     *
     * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
     */
    @Test
    public void testUsMappingEWithAcute() {
        Assert.assertEquals("E000", getSoundexEncoderString("e"));
        if (Character.isLetter('�')) {
            try {
                Assert.assertEquals("�000", getSoundexEncoderString("�"));
                Assert.fail("Expected IllegalArgumentException not thrown");
            } catch (IllegalArgumentException e) {
                // expected
            }
        } else {
            Assert.assertEquals("", getSoundexEncoderString("�"));
        }
    }

    /**
     * Fancy characters are not mapped by the default US mapping.
     *
     * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
     */
    @Test
    public void testUsMappingOWithDiaeresis() {
        Assert.assertEquals("O000", getSoundexEncoderString("o"));
        if (Character.isLetter('�')) {
            try {
                Assert.assertEquals("�000", getSoundexEncoderString("�"));
                Assert.fail("Expected IllegalArgumentException not thrown");
            } catch (IllegalArgumentException e) {
                // expected
            }
        } else {
            Assert.assertEquals("", getSoundexEncoderString("�"));
        }
    }
}
TOP

Related Classes of org.dcm4che3.soundex.SoundexTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.