Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.NormalizeCharMap


        }
      }
      catch( IOException e ){
        throw new RuntimeException( e );
      }
      normMap = new NormalizeCharMap();
      parseRules( wlist, normMap );
    }
  }
View Full Code Here


    // create MappingCharFilter
    MappingCharFilterFactory cfFactory = new MappingCharFilterFactory();
    List<String> mappingRules = new ArrayList<String>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap normMap = new NormalizeCharMap();
    cfFactory.parseRules( mappingRules, normMap );
    CharStream charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );

    // create PatternTokenizer
    Map<String,String> args = new HashMap<String, String>();
View Full Code Here

    // create MappingCharFilter
    MappingCharFilterFactory cfFactory = new MappingCharFilterFactory();
    List<String> mappingRules = new ArrayList<String>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap normMap = new NormalizeCharMap();
    cfFactory.parseRules( mappingRules, normMap );
    CharStream charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );

    // create PatternTokenizer
    Map<String,String> args = new HashMap<String, String>();
View Full Code Here

        List<String> rules = Analysis.getWordList(env, settings, "mappings");
        if (rules == null) {
            throw new ElasticSearchIllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
        }
       
        normMap = new NormalizeCharMap();
        parseRules(rules, normMap);
    }
View Full Code Here

        }
      }
      catch( IOException e ){
        throw new RuntimeException( e );
      }
      normMap = new NormalizeCharMap();
      parseRules( wlist, normMap );
    }
  }
View Full Code Here

        new int[]{1, 0, 0, 0},
        path.length());
  }

  public void testNormalizeWinDelimToLinuxDelim() throws Exception {
    NormalizeCharMap normMap = new NormalizeCharMap();
    normMap.add("\\", "/");
    String path = "c:\\a\\b\\c";
    CharStream cs = new MappingCharFilter(normMap, new StringReader(path));
    PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
    assertTokenStreamContents(t,
        new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
View Full Code Here

        }
      }
      catch ( IOException e ) {
        throw new RuntimeException( e );
      }
      normMap = new NormalizeCharMap();
      parseRules( wlist, normMap );
    }
  }
View Full Code Here

        new int[]{2, 4, 6, 8},
        new int[]{1, 0, 0, 0});
  }

  public void testNormalizeWinDelimToLinuxDelim() throws Exception {
    NormalizeCharMap normMap = new NormalizeCharMap();
    normMap.add("\\", "/");
    String path = "c:\\a\\b\\c";
    CharStream cs = new MappingCharFilter(normMap, new StringReader(path));
    PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
    assertTokenStreamContents(t,
        new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
View Full Code Here

        new int[]{1, 0, 0, 0},
        path.length());
  }

  public void testNormalizeWinDelimToLinuxDelim() throws Exception {
    NormalizeCharMap normMap = new NormalizeCharMap();
    normMap.add("\\", "/");
    String path = "c:\\a\\b\\c";
    CharStream cs = new MappingCharFilter(normMap, new StringReader(path));
    PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
    assertTokenStreamContents(t,
        new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
View Full Code Here

  // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
  // wrt original text if a previous filter increases the length of the word (in this case ü -> ue)
  // so in this case we behave like WDF, and preserve any modified offsets
  public void testInvalidOffsets() throws Exception {
    final String[] dict = { "fall" };
    final NormalizeCharMap normMap = new NormalizeCharMap();
    normMap.add("ü", "ue");
   
    Analyzer analyzer = new ReusableAnalyzerBase() {

      //@Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.NormalizeCharMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.