Package org.apache.commons.codec.language

Examples of org.apache.commons.codec.language.RefinedSoundex


        return true;
      }
    }

    Soundex soundex = new Soundex();
    RefinedSoundex refinedSoundex = new RefinedSoundex();
    Metaphone metaphone = new Metaphone();

    double threshold;
    if (matchMode == MatchMode.STRICT) {
      threshold = STRICT_SIMILARITY_THRESHOLD;
    } else {
      threshold = LOOSE_SIMILARITY_THRESHOLD;
    }
    int soundexThreshold = (int) Math.round(threshold * 4);

    for (String similarityGroupValue : similarityGroup.getValues()) {
      boolean metaphoneEquals = metaphone.isMetaphoneEqual(value, similarityGroupValue);
      if (metaphoneEquals) {
        return true;
      }

      try {
        int soundexDiff = soundex.difference(value, similarityGroupValue);

        if (soundexDiff >= soundexThreshold) {
          return true;
        }
      } catch (Exception e) {
        logger.error("Could not determine soundex difference", e);
      }

      int refinedSoundexThreshold = (int) Math.round(threshold
          * Math.min(value.length(), similarityGroupValue.length()));

      try {
        int refinedSoundexDiff = refinedSoundex.difference(value, similarityGroupValue);

        if (refinedSoundexDiff >= refinedSoundexThreshold) {
          return true;
        }
      } catch (Exception e) {
View Full Code Here


 
  public void testEncodes() throws Exception {
    runner( new DoubleMetaphone(), true );
    runner( new Metaphone(), true );
    runner( new Soundex(), true );
    runner( new RefinedSoundex(), true );

    runner( new DoubleMetaphone(), false );
    runner( new Metaphone(), false );
    runner( new Soundex(), false );
    runner( new RefinedSoundex(), false );
  }
View Full Code Here

 
  public void testEncodes() throws Exception {
    runner( new DoubleMetaphone(), true );
    runner( new Metaphone(), true );
    runner( new Soundex(), true );
    runner( new RefinedSoundex(), true );

    runner( new DoubleMetaphone(), false );
    runner( new Metaphone(), false );
    runner( new Soundex(), false );
    runner( new RefinedSoundex(), false );
  }
View Full Code Here

             
            if( eqArtist > 80 && eqTitle > 80) {
              // soundex analysis
              int difference = -1;
              int length = 0;
              RefinedSoundex soundex = new RefinedSoundex();
              try {
                difference = soundex.difference(title1, title2);
                length = Math.max(soundex.encode(title1).length(), soundex.encode(title2).length());
              } catch (Exception e) {}
           
              double diff = (double)difference / (double)length;
              if(diff >= 0.75) {
                Duplicate d;
View Full Code Here

 
  public void testEncodes() throws Exception {
    runner( new DoubleMetaphone(), true );
    runner( new Metaphone(), true );
    runner( new Soundex(), true );
    runner( new RefinedSoundex(), true );

    runner( new DoubleMetaphone(), false );
    runner( new Metaphone(), false );
    runner( new Soundex(), false );
    runner( new RefinedSoundex(), false );
  }
View Full Code Here

    assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
        new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
    assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
        new String[] { "A000", "B000", "C000", "E220" });
   
    assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
        new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
    assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
        new String[] { "A0", "B1", "C3", "E034034" });
   
    assertAlgorithm(new Caverphone(), true, "Darda Karleen Datha Carlene",
        new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen",
          "TTA1111111", "Datha", "KLN1111111", "Carlene" });
View Full Code Here

  }
 
  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws IOException {
    Encoder encoders[] = new Encoder[] {
      new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone()
    };
   
    for (final Encoder e : encoders) {
      Analyzer a = new ReusableAnalyzerBase() {
        @Override
View Full Code Here

    }
  }
 
  public void testEmptyTerm() throws IOException {
    Encoder encoders[] = new Encoder[] {
        new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone()
    };
    for (final Encoder e : encoders) {
      Analyzer a = new ReusableAnalyzerBase() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
View Full Code Here

TOP

Related Classes of org.apache.commons.codec.language.RefinedSoundex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.