Examples of TermAttribute


Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    boolean hasnext = tk.incrementToken();

    while(hasnext){

      TermAttribute ta = tk.getAttribute(TermAttribute.class);

      System.out.println(ta.term());

      hasnext = tk.incrementToken();

    }
  }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

  static List<String> getAllTermsFromText(String fieldName, String localText, Analyzer analyzer) throws IOException {
    List<String> terms = new ArrayList<String>();

    Reader reader = new StringReader(localText);
    TokenStream stream = analyzer.reusableTokenStream( fieldName, reader);
    TermAttribute attribute = (TermAttribute) stream.addAttribute( TermAttribute.class );
    stream.reset();

    while ( stream.incrementToken() ) {
      if ( attribute.termLength() > 0 ) {
        String term = attribute.term();
        terms.add( term );
      }
    }
    stream.end();
    stream.close();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    return token;
  }

  public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException {
    CJKTokenizer tokenizer = new CJKTokenizer(new StringReader(str));
    TermAttribute termAtt = (TermAttribute) tokenizer.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute) tokenizer.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute) tokenizer.getAttribute(TypeAttribute.class);
    for (int i = 0; i < out_tokens.length; i++) {
      assertTrue(tokenizer.incrementToken());
      assertEquals(termAtt.term(), out_tokens[i].termText);
      assertEquals(offsetAtt.startOffset(), out_tokens[i].start);
      assertEquals(offsetAtt.endOffset(), out_tokens[i].end);
      assertEquals(typeAtt.type(), out_tokens[i].type);
    }
    assertFalse(tokenizer.incrementToken());
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    assertFalse(tokenizer.incrementToken());
  }
 
  public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException {
    TokenStream ts = a.reusableTokenStream("dummy", new StringReader(str));
    TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class);
    for (int i = 0; i < out_tokens.length; i++) {
      assertTrue(ts.incrementToken());
      assertEquals(termAtt.term(), out_tokens[i].termText);
      assertEquals(offsetAtt.startOffset(), out_tokens[i].start);
      assertEquals(offsetAtt.endOffset(), out_tokens[i].end);
      assertEquals(typeAtt.type(), out_tokens[i].type);
    }
    assertFalse(ts.incrementToken());
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

  }
 
  public void testTokenStream() throws Exception {
    Analyzer analyzer = new CJKAnalyzer();
    TokenStream ts = analyzer.tokenStream("dummy", new StringReader("\u4e00\u4e01\u4e02"));
    TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
    assertTrue(ts.incrementToken());
    assertEquals("\u4e00\u4e01", termAtt.term());
    assertTrue(ts.incrementToken());
    assertEquals("\u4e01\u4e02", termAtt.term());
    assertFalse(ts.incrementToken());
  }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

        new String[]{"he", "abhorred", "accents"});
  }
 
  public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute) filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute) filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = (FlagsAttribute) filter.getAttribute(FlagsAttribute.class);
   
    filter.incrementToken();

    assertEquals("accent", termAtt.term());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

      HashMap terms = new HashMap();
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
     
      TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
      PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = (OffsetAttribute) stream.addAttribute(OffsetAttribute.class);
     
      stream.reset();
      while (stream.incrementToken()) {
        String term = termAtt.term();
        if (term.length() == 0) continue; // nothing to do
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

      source.reset();
    } catch (IOException e) {
      source = analyzer.tokenStream(field, new StringReader(queryText));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    boolean success = false;
    try {
      buffer.reset();
      success = true;
    } catch (IOException e) {
      // success==false if we hit an exception
    }
    if (success) {
      if (buffer.hasAttribute(TermAttribute.class)) {
        termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class);
      }
      if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class);
      }
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
      try {
        hasMoreTokens = buffer.incrementToken();
        while (hasMoreTokens) {
          numTokens++;
          int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
          if (positionIncrement != 0) {
            positionCount += positionIncrement;
          } else {
            severalTokensAtSamePosition = true;
          }
          hasMoreTokens = buffer.incrementToken();
        }
      } catch (IOException e) {
        // ignore
      }
    }
    try {
      // rewind the buffer stream
      buffer.reset();

      // close original stream - all tokens buffered
      source.close();
    }
    catch (IOException e) {
      // ignore
    }

    if (numTokens == 0)
      return null;
    else if (numTokens == 1) {
      String term = null;
      try {
        boolean hasNext = buffer.incrementToken();
        assert hasNext == true;
        term = termAtt.term();
      } catch (IOException e) {
        // safe to ignore, because we know the number of tokens
      }
      return newTermQuery(new Term(field, term));
    } else {
      if (severalTokensAtSamePosition) {
        if (positionCount == 1) {
          // no phrase query:
          BooleanQuery q = newBooleanQuery(true);
          for (int i = 0; i < numTokens; i++) {
            String term = null;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.term();
            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }

            Query currentQuery = newTermQuery(
                new Term(field, term));
            q.add(currentQuery, BooleanClause.Occur.SHOULD);
          }
          return q;
        }
        else {
          // phrase query:
          MultiPhraseQuery mpq = newMultiPhraseQuery();
          mpq.setSlop(phraseSlop);
          List multiTerms = new ArrayList();
          int position = -1;
          for (int i = 0; i < numTokens; i++) {
            String term = null;
            int positionIncrement = 1;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.term();
              if (posIncrAtt != null) {
                positionIncrement = posIncrAtt.getPositionIncrement();
              }
            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }

            if (positionIncrement > 0 && multiTerms.size() > 0) {
              if (enablePositionIncrements) {
                mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
              } else {
                mpq.add((Term[])multiTerms.toArray(new Term[0]));
              }
              multiTerms.clear();
            }
            position += positionIncrement;
            multiTerms.add(new Term(field, term));
          }
          if (enablePositionIncrements) {
            mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
          } else {
            mpq.add((Term[])multiTerms.toArray(new Term[0]));
          }
          return mpq;
        }
      }
      else {
        PhraseQuery pq = newPhraseQuery();
        pq.setSlop(phraseSlop);
        int position = -1;


        for (int i = 0; i < numTokens; i++) {
          String term = null;
          int positionIncrement = 1;

          try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.term();
            if (posIncrAtt != null) {
              positionIncrement = posIncrAtt.getPositionIncrement();
            }
          } catch (IOException e) {
            // safe to ignore, because we know the number of tokens
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    int count = 0;
    int numItalics = 0;
    int numBoldItalics = 0;
    int numCategory = 0;
    int numCitation = 0;
    TermAttribute termAtt = (TermAttribute) tf.addAttribute(TermAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute) tf.addAttribute(TypeAttribute.class);
   
    while (tf.incrementToken()) {
      String tokText = termAtt.term();
      //System.out.println("Text: " + tokText + " Type: " + token.type());
      String expectedType = (String) tcm.get(tokText);
      assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
      assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
      count++;
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    checkLinkPhrases(tf);
   
  }

  private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException {
    TermAttribute termAtt = (TermAttribute) tf.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) tf.addAttribute(PositionIncrementAttribute.class);
   
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "here",
        termAtt.term().equals("here") == true);
    //The link, and here should be at the same position for phrases to work
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "again",
        termAtt.term().equals("again") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "click",
        termAtt.term().equals("click") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org",
        termAtt.term().equals("http://lucene.apache.org") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "here",
        termAtt.term().equals("here") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "again",
        termAtt.term().equals("again") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "a",
        termAtt.term().equals("a") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "b",
        termAtt.term().equals("b") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "c",
        termAtt.term().equals("c") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "d",
        termAtt.term().equals("d") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertFalse(tf.incrementToken())
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.