Examples of incrementToken()


Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
        TokenStream stream = ra.tokenStream("", reader);

        TermAttribute termText = stream.getAttribute(TermAttribute.class);
        try {
            assertTrue(stream.incrementToken());
            assertEquals("text", termText.term());
            assertTrue(stream.incrementToken());
            assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term());
            assertFalse(stream.incrementToken());
        }
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

        TermAttribute termText = stream.getAttribute(TermAttribute.class);
        try {
            assertTrue(stream.incrementToken());
            assertEquals("text", termText.term());
            assertTrue(stream.incrementToken());
            assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term());
            assertFalse(stream.incrementToken());
        }
        catch (IOException e)
        {
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

        try {
            assertTrue(stream.incrementToken());
            assertEquals("text", termText.term());
            assertTrue(stream.incrementToken());
            assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term());
            assertFalse(stream.incrementToken());
        }
        catch (IOException e)
        {
            fail("unexpected IOException");
        }
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

          }

          // reset the TokenStream to the first token         
          tokenStream.reset();

          while (tokenStream.incrementToken()) {
            // TODO: this is a simple workaround to still work with tokens, not very effective, but as far as I know, this writer should get removed soon:
            final Token token = new Token();
            for (Iterator<AttributeImpl> atts = tokenStream.getAttributeImplsIterator(); atts.hasNext();) {
              final AttributeImpl att = atts.next();
              try {
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.tokenStream( field, new StringReader( query));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.term();
      if ( already.add( word))
        top.add( word);
    }
    final BooleanQuery tmp = new BooleanQuery();
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.tokenStream( field, new StringReader( query));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.term();
      if ( already.add( word))
        top.add( word);
    }
    final BooleanQuery tmp = new BooleanQuery();
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
          PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
         
          try {
            while (stream.incrementToken()) {
              position += (posIncrAtt.getPositionIncrement() - 1);
              position++;
              String name = termAtt.term();
              Integer Count = tokenMap.get(name);
              if (Count == null) { // not in there yet
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

    // compute frequencies of distinct terms
    HashMap<String,MutableInteger> map = new HashMap<String,MutableInteger>();
    TokenStream stream = analyzer.tokenStream("", new StringReader(text));
    TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
    try {
      while (stream.incrementToken()) {
        MutableInteger freq = map.get(termAtt.term());
        if (freq == null) {
          freq = new MutableInteger(1);
          map.put(termAtt.term(), freq);
        } else {
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

      int corpusNumDocs = reader.numDocs();
      HashSet<String> processedTerms = new HashSet<String>();
      ts.reset();
      while (ts.incrementToken()) {
        String term = termAtt.toString();
        if (!processedTerms.contains(term)) {
          processedTerms.add(term);
          ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
          float minScore = 0;
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()

    ts1.reset();
    ts2.reset();
    TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
    TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
    assertTrue(ts1.incrementToken());
    assertTrue(ts2.incrementToken());
    BytesRef bytes1 = termAtt1.getBytesRef();
    BytesRef bytes2 = termAtt2.getBytesRef();
    termAtt1.fillBytesRef();
    termAtt2.fillBytesRef();
    assertEquals(bytes1, bytes2);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.