Package org.apache.lucene.analysis.charfilter

Examples of org.apache.lucene.analysis.charfilter.MappingCharFilter


        return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
        return new MappingCharFilter(norm, reader);
      }
    };
   
    assertAnalyzesTo(analyzer, "ab",
        new String[] { "一二", "二二", "二三" },
View Full Code Here


      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
        reader = new MockCharFilter(reader, 0);
        reader = new MappingCharFilter(map, reader);
        return reader;
      }
    };
    checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
  }
View Full Code Here

        return new TokenStreamComponents(tokenizer, filter);
      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
        return new MappingCharFilter(normMap, reader);
      }
    };

    assertAnalyzesTo(analyzer, "banküberfall",
        new String[] { "bankueberfall", "fall" },
View Full Code Here

      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
        reader = new MockCharFilter(reader, 0);
        reader = new MappingCharFilter(map, reader);
        return reader;
      }
    };
    checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
  }
View Full Code Here

  public void testNormalizeWinDelimToLinuxDelim() throws Exception {
    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("\\", "/");
    NormalizeCharMap normMap = builder.build();
    String path = "c:\\a\\b\\c";
    Reader cs = new MappingCharFilter(normMap, new StringReader(path));
    PathHierarchyTokenizer t = new PathHierarchyTokenizer( cs );
    assertTokenStreamContents(t,
        new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
        new int[]{0, 0, 0, 0},
        new int[]{2, 4, 6, 8},
View Full Code Here

    List<String> mappingRules = new ArrayList<>();
    mappingRules.add( "\"&uuml;\" => \"ü\"" );
    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("&uuml;", "ü");
    NormalizeCharMap normMap = builder.build();
    CharFilter charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );

    // create PatternTokenizer
    TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1);
    assertTokenStreamContents(stream,
        new String[] { "Günther", "Günther", "is", "here" },
        new int[] { 0, 13, 26, 29 },
        new int[] { 12, 25, 28, 33 },
        INPUT.length());
   
    charStream = new MappingCharFilter( normMap, new StringReader( INPUT ) );
    stream = new PatternTokenizer(charStream, Pattern.compile("Günther"), 0);
    assertTokenStreamContents(stream,
        new String[] { "Günther", "Günther" },
        new int[] { 0, 13 },
        new int[] { 12, 25 },
View Full Code Here

        return new TokenStreamComponents(tokenizer, filter);
      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
        return new MappingCharFilter(normMap, reader);
      }
    };

    assertAnalyzesTo(analyzer, "banküberfall",
        new String[] { "bankueberfall", "fall" },
View Full Code Here

        return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
      }

      @Override
      protected Reader initReader(String fieldName, Reader reader) {
        return new MappingCharFilter(norm, reader);
      }
    };
   
    assertAnalyzesTo(analyzer, "ab",
        new String[] { "一二", "二二", "二三" },
View Full Code Here

      builder.add("\u00AB", " | ");
      builder.add("\u00BB", " | ");

      NormalizeCharMap normMap = builder.build();

      Reader reader2 = (normMap == null ? reader : new MappingCharFilter(normMap,reader));
         
    final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader2);
      TokenStream tokenStream = new LowerCaseFilter(matchVersion, source);
      tokenStream = new ShingleFilter(tokenStream, 2, 3);
      tokenStream = new NTermStopFilter(matchVersion, tokenStream, ntermStopFilterRules);   
View Full Code Here

        normMap = normMapBuilder.build();
    }

    @Override
    public Reader create(Reader tokenStream) {
        return new MappingCharFilter(normMap, tokenStream);
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.charfilter.MappingCharFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.