Examples of TermAttribute


Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

        RussianLetterTokenizer sample =
            new RussianLetterTokenizer(
                sampleUnicode);

        TermAttribute text = in.getAttribute(TermAttribute.class);
        TermAttribute sampleText = sample.getAttribute(TermAttribute.class);

        for (;;)
        {
          if (in.incrementToken() == false)
            break;

            boolean nextSampleToken = sample.incrementToken();
            assertEquals(
                "Unicode",
                text.term(),
                nextSampleToken == false
                ? null
                : sampleText.term());
        }

        inWords.close();
        sampleUnicode.close();
    }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    {
        Reader reader = new StringReader("text 1000");
        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
        TokenStream stream = ra.tokenStream("", reader);

        TermAttribute termText = stream.getAttribute(TermAttribute.class);
        try {
            assertTrue(stream.incrementToken());
            assertEquals("text", termText.term());
            assertTrue(stream.incrementToken());
            assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term());
            assertFalse(stream.incrementToken());
        }
        catch (IOException e)
        {
            fail("unexpected IOException");
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    final String field = ( f == null) ? "contents" : f;
    if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.tokenStream( field, new StringReader( query));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.term();
      if ( already.add( word))
        top.add( word);
    }
    final BooleanQuery tmp = new BooleanQuery();
   
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    final Set<String> already = new HashSet<String>(); // avoid dups   
    List<String> top = new LinkedList<String>(); // needs to be separately listed..

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.tokenStream( field, new StringReader( query));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      String word = termAtt.term();
      if ( already.add( word))
        top.add( word);
    }
    final BooleanQuery tmp = new BooleanQuery();
   
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    int count = 0;
    int numItalics = 0;
    int numBoldItalics = 0;
    int numCategory = 0;
    int numCitation = 0;
    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
   
    while (tf.incrementToken()) {
      String tokText = termAtt.term();
      //System.out.println("Text: " + tokText + " Type: " + token.type());
      String expectedType = (String) tcm.get(tokText);
      assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
      assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
      count++;
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    checkLinkPhrases(tf);
   
  }

  private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException {
    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
    PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
   
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "here",
        termAtt.term().equals("here") == true);
    //The link, and here should be at the same position for phrases to work
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "again",
        termAtt.term().equals("again") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "click",
        termAtt.term().equals("click") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org",
        termAtt.term().equals("http://lucene.apache.org") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "here",
        termAtt.term().equals("here") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "again",
        termAtt.term().equals("again") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "a",
        termAtt.term().equals("a") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "b",
        termAtt.term().equals("b") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "c",
        termAtt.term().equals("c") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "d",
        termAtt.term().equals("d") == true);
    assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);

    assertFalse(tf.incrementToken())
  }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

              ("field must have either String or Reader value");

          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
          PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
         
          try {
            while (stream.incrementToken()) {
              position += (posIncrAtt.getPositionIncrement() - 1);
              position++;
              String name = termAtt.term();
              Integer Count = tokenMap.get(name);
              if (Count == null) { // not in there yet
                tokenMap.put(name, Integer.valueOf(1)); //first one
              } else {
                int count = Count.intValue();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

    if (limit <= 0) limit = Integer.MAX_VALUE;
   
    // compute frequencies of distinct terms
    HashMap<String,MutableInteger> map = new HashMap<String,MutableInteger>();
    TokenStream stream = analyzer.tokenStream("", new StringReader(text));
    TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
    try {
      while (stream.incrementToken()) {
        MutableInteger freq = map.get(termAtt.term());
        if (freq == null) {
          freq = new MutableInteger(1);
          map.put(termAtt.term(), freq);
        } else {
          freq.setValue(freq.intValue() + 1);
        }
      }
    } catch (IOException e) {
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

      //make sure we produce the same tokens
      TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))));
      TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
      teeStream.consumeAllTokens();
      TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))), 100);
      TermAttribute tfTok = stream.addAttribute(TermAttribute.class);
      TermAttribute sinkTok = sink.addAttribute(TermAttribute.class);
      for (int i=0; stream.incrementToken(); i++) {
        assertTrue(sink.incrementToken());
        assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
      }
     
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute

  }
 
  private void checkTokens(TokenStream stream) throws IOException {
    int count = 0;
   
    TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
    assertNotNull(termAtt);
    while (stream.incrementToken()) {
      assertTrue(count < tokens.length);
      assertEquals(tokens[count], termAtt.term());
      count++;
    }
   
    assertEquals(tokens.length, count);
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.