Package org.apache.lucene.analysis.miscellaneous

Examples of org.apache.lucene.analysis.miscellaneous.LengthFilter


    TokenStream sink = new URIDecodingFilter(source, "UTF-8");
    sink = this.applyURINormalisation(sink);
    sink = new MailtoFilter(sink);
    sink = new LowerCaseFilter(matchVersion, sink );
    sink = new StopFilter(matchVersion, sink, stopSet);
    sink = new LengthFilter(true, sink, 2, 256);
    return new TokenStreamComponents(source, sink);
  }
View Full Code Here


    enablePositionIncrements = getBoolean("enablePositionIncrements",false);
  }
 
  @Override
  public LengthFilter create(TokenStream input) {
    return new LengthFilter(enablePositionIncrements, input,min,max);
  }
View Full Code Here

   * @param input {@link TokenStream} holding the input to be tokenized
   * @param minGram the smallest n-gram to generate
   * @param maxGram the largest n-gram to generate
   */
  public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
    super(new LengthFilter(version, input, minGram, Integer.MAX_VALUE));
    this.version = version;
    this.charUtils = version.onOrAfter(Version.LUCENE_44)
        ? CharacterUtils.getInstance(version)
        : CharacterUtils.getJava4Instance();
    if (minGram < 1) {
View Full Code Here

        this.directChildrenPathIndexingAnalyzer = new Analyzer() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer source = new KeywordTokenizer(reader);
                TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
                filter = new LengthFilter(Version.LUCENE_47, filter, 2, Integer.MAX_VALUE);
                filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
                filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
                filter = new ReverseStringFilter(Version.LUCENE_47, filter);
                return new TokenStreamComponents(source, filter);
            }
View Full Code Here

        this.directChildrenPathIndexingAnalyzer = new Analyzer() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                Tokenizer source = new KeywordTokenizer(reader);
                TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
                filter = new LengthFilter(Version.LUCENE_47, filter, 2, Integer.MAX_VALUE);
                filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
                filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
                filter = new ReverseStringFilter(Version.LUCENE_47, filter);
                return new TokenStreamComponents(source, filter);
            }
View Full Code Here

    public DataBag exec(Tuple input) throws IOException {
        if (input == null || input.size() < 1 || input.isNull(0))
            return null;
       
        TokenStream stream = analyzer.tokenStream(NOFIELD, input.get(0).toString());
        LengthFilter filtered = new LengthFilter(Version.LUCENE_44, stream, minWordSize, Integer.MAX_VALUE); // Let words be long

        DataBag result;
        if (minGramSize == 1 && maxGramSize == 1) {
            result = fillBag(filtered);
        } else {
View Full Code Here

    }

    @Override
    public TokenStream create(TokenStream tokenStream) {
        if (version.onOrAfter(Version.LUCENE_4_4)) {
            return new LengthFilter(tokenStream, min, max);
        } else {
            @SuppressWarnings("deprecated")
            final TokenStream filter = new Lucene43LengthFilter(enablePositionIncrements, tokenStream, min, max);
            return filter;
        }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.miscellaneous.LengthFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.