Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.MappingCharFilter


      parseRules( wlist, normMap );
    }
  }

  public CharStream create(CharStream input) {
    return new MappingCharFilter(normMap,input);
  }
View Full Code Here


    MappingCharFilterFactory cfFactory = new MappingCharFilterFactory();
    List<String> mappingRules = new ArrayList<String>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap normMap = new NormalizeCharMap();
    cfFactory.parseRules( mappingRules, normMap );
    CharStream charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );

    // create PatternTokenizer
    Map<String,String> args = new HashMap<String, String>();
    args.put( PatternTokenizerFactory.PATTERN, "[,;/\\s]+" );
    PatternTokenizerFactory tokFactory = new PatternTokenizerFactory();
    tokFactory.init( args );
    TokenStream stream = tokFactory.create( charStream );
    assertTokenStreamContents(stream,
        new String[] { "Günther", "Günther", "is", "here" },
        new int[] { 0, 13, 26, 29 },
        new int[] { 12, 25, 28, 33 });
   
    charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );
    args.put( PatternTokenizerFactory.PATTERN, "Günther" );
    args.put( PatternTokenizerFactory.GROUP, "0" );
    tokFactory = new PatternTokenizerFactory();
    tokFactory.init( args );
    stream = tokFactory.create( charStream );
View Full Code Here

    MappingCharFilterFactory cfFactory = new MappingCharFilterFactory();
    List<String> mappingRules = new ArrayList<String>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap normMap = new NormalizeCharMap();
    cfFactory.parseRules( mappingRules, normMap );
    CharStream charStream = new MappingCharFilter( normMap, CharReader.get( new StringReader( INPUT ) ) );

    // create PatternTokenizer
    Map<String,String> args = new HashMap<String, String>();
    args.put( PatternTokenizerFactory.PATTERN, "[,;/\\s]+" );
    PatternTokenizerFactory tokFactory = new PatternTokenizerFactory();
    tokFactory.init( args );
    TokenStream stream = tokFactory.create( charStream );

    List<Token> result = getTokens( stream );
    List<Token> expect = tokens( "Günther,1,0,12 Günther,1,13,25 is,1,26,28 here,1,29,33" );
    assertTokEqualOff( expect, result );
   
    charStream.reset();
    args.put( PatternTokenizerFactory.PATTERN, "Günther" );
    args.put( PatternTokenizerFactory.GROUP, "0" );
    tokFactory = new PatternTokenizerFactory();
    tokFactory.init( args );
    stream = tokFactory.create( charStream );
View Full Code Here

        normMap = new NormalizeCharMap();
        parseRules(rules, normMap);
    }
   
    @Override public CharStream create(CharStream tokenStream) {
        return new MappingCharFilter(normMap, tokenStream);
    }
View Full Code Here

      parseRules( wlist, normMap );
    }
  }

  public CharStream create(CharStream input) {
    return new MappingCharFilter(normMap,input);
  }
View Full Code Here

  public void testNormalizeWinDelimToLinuxDelim() throws Exception {
    NormalizeCharMap normMap = new NormalizeCharMap();
    normMap.add("\\", "/");
    String path = "c:\\a\\b\\c";
    CharStream cs = new MappingCharFilter(normMap, new StringReader(path));
    PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
    assertTokenStreamContents(t,
        new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
        new int[]{0, 0, 0, 0},
        new int[]{2, 4, 6, 8},
View Full Code Here

      parseRules( wlist, normMap );
    }
  }

  public CharStream create(CharStream input) {
    return new MappingCharFilter( normMap, input );
  }
View Full Code Here

  public void testNormalizeWinDelimToLinuxDelim() throws Exception {
    NormalizeCharMap normMap = new NormalizeCharMap();
    normMap.add("\\", "/");
    String path = "c:\\a\\b\\c";
    CharStream cs = new MappingCharFilter(normMap, new StringReader(path));
    PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
    assertTokenStreamContents(t,
        new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
        new int[]{0, 0, 0, 0},
        new int[]{2, 4, 6, 8},
View Full Code Here

  public void testNormalizeWinDelimToLinuxDelim() throws Exception {
    NormalizeCharMap normMap = new NormalizeCharMap();
    normMap.add("\\", "/");
    String path = "c:\\a\\b\\c";
    CharStream cs = new MappingCharFilter(normMap, new StringReader(path));
    PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
    assertTokenStreamContents(t,
        new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
        new int[]{0, 0, 0, 0},
        new int[]{2, 4, 6, 8},
View Full Code Here

        return new TokenStreamComponents(tokenizer, filter);
      }

      //@Override
      protected Reader initReader(Reader reader) {
        return new MappingCharFilter(normMap, CharReader.get(reader));
      }
    };

    assertAnalyzesTo(analyzer, "banküberfall",
        new String[] { "bankueberfall", "fall" },
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.MappingCharFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.