Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()


    Reader reader = new StringReader(localText);
    TokenStream stream = analyzer.reusableTokenStream( fieldName, reader);
    TermAttribute attribute = (TermAttribute) stream.addAttribute( TermAttribute.class );
    stream.reset();

    while ( stream.incrementToken() ) {
      if ( attribute.termLength() > 0 ) {
        String term = attribute.term();
        terms.add( term );
      }
    }
View Full Code Here


    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      String word = termAtt.toString();
      if ( already.add( word))
        top.add( word);
    }
    ts.end();
View Full Code Here

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.toString();
      if ( already.add( word))
        top.add( word);
    }
    final BooleanQuery tmp = new BooleanQuery();
View Full Code Here

    ShingleMatrixFilter.defaultSettingsCodec = null;

    TokenStream ts;

    ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
    assertFalse(ts.incrementToken());

    TokenListStream tls;
    LinkedList tokens;

    // test a plain old token stream with synonyms translated to rows.
View Full Code Here

    assertNext(ts, "into", 28, 32);
    assertNext(ts, "into shingles", 28, 39);
    assertNext(ts, "shingles", 33, 39);


    assertFalse(ts.incrementToken());

  }

  /**
   * Extracts a matrix from a token stream.
View Full Code Here

    assertNext(ts, "göran_eriksson_in_the", 1, 2.0f, 0, 0);
    assertNext(ts, "eriksson_in", 1, 1.4142135f, 0, 0);
    assertNext(ts, "eriksson_in_the", 1, 1.7320508f, 0, 0);
    assertNext(ts, "eriksson_in_the_croud", 1, 2.0f, 0, 0);

    assertFalse(ts.incrementToken());

  }

  private Token tokenFactory(String text, int startOffset, int endOffset) {
    return tokenFactory(text, 1, 1f, startOffset, endOffset);
View Full Code Here

    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
      while (ts.incrementToken()) {
        if (term == null)
        {
          term = new Term(fieldName, termAtt.toString());
        } else
        {
View Full Code Here

    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
        while (ts.incrementToken()) {
        if (term == null)
        {
          term = new Term(fieldName, termAtt.toString());
        } else
        {
View Full Code Here

                try
                {
                  TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
                  CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
                  ts.reset();
                  while(ts.incrementToken()) {
                      stopWordsSet.add(termAtt.toString());
                  }
                  ts.end();
                  ts.close();
                }
View Full Code Here

       
        int corpusNumDocs=reader.numDocs();
        Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
        HashSet<String> processedTerms=new HashSet<String>();
        ts.reset();
        while (ts.incrementToken())
        {
                String term = termAtt.toString();
          if(!processedTerms.contains(term))
          {
            processedTerms.add(term);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.