Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.TermAttribute



  public void testFloatEncoding() throws Exception {
    String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder());
    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    assertTermEquals("The", filter, termAtt, payAtt, null);
    assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
    assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
    assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f));
View Full Code Here


  }

  public void testIntEncoding() throws Exception {
    String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder());
    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    assertTermEquals("The", filter, termAtt, payAtt, null);
    assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
    assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
    assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
View Full Code Here

    assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
    assertFalse(filter.incrementToken());
  }

  void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
    TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
    PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
    assertTrue(stream.incrementToken());
    assertEquals(expected, termAtt.term());
    Payload payload = payloadAtt.getPayload();
    if (payload != null) {
      assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
      for (int i = 0; i < expectPay.length; i++) {
        assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
View Full Code Here

    assertEquals("enfin", tas.get(7));
  }

  private List filtre(TokenFilter filter) throws IOException {
    List tas = new ArrayList();
    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
    while (filter.incrementToken()) {
      tas.add(termAtt.term());
    }
    return tas;
  }
View Full Code Here

  public void test() throws IOException {
    String test = "The quick red fox jumped over the lazy brown dogs";

    NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), 3, "D");
    boolean seenDogs = false;
    TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
    while (nptf.incrementToken()) {
      if (termAtt.term().equals("dogs")) {
        seenDogs = true;
        assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
        assertTrue("payloadAtt.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
        byte [] bytes = payloadAtt.getPayload().getData();//safe here to just use the bytes, otherwise we should use offset, length
        assertTrue(bytes.length + " does not equal: " + payloadAtt.getPayload().length(), bytes.length == payloadAtt.getPayload().length());
View Full Code Here

  public void test() throws IOException {
    String test = "The quick red fox jumped over the lazy brown dogs";

    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
    int count = 0;
    TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
   
    while (nptf.incrementToken()) {
      assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))));
      assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
      String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
      assertTrue("type is null and it shouldn't be", type != null);
      assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
      count++;
View Full Code Here

        wsTokenizer, hyphenator, dict,
        CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
        CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
        CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
   
    TermAttribute termAtt = tf.getAttribute(TermAttribute.class);
    assertTrue(tf.incrementToken());
    assertEquals("Rindfleischüberwachungsgesetz", termAtt.term());
    assertTrue(tf.incrementToken());
    assertEquals("Rind", termAtt.term());
    wsTokenizer.reset(new StringReader("Rindfleischüberwachungsgesetz"));
    tf.reset();
    assertTrue(tf.incrementToken());
    assertEquals("Rindfleischüberwachungsgesetz", termAtt.term());
  }
View Full Code Here

    throws IOException
  {
       TokenStream ts = analyzer.tokenStream(fieldName, r);
      int tokenCount=0;
      // for every token
      TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
     
      while (ts.incrementToken()) {
        String word = termAtt.term();
        tokenCount++;
        if(tokenCount>maxNumTokensParsed)
        {
          break;
        }
View Full Code Here

    TokenStream ts = analyzer.tokenStream("content",
                                          new StringReader("this sentence"));
    int j = -1;
   
    PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      j += posIncrAtt.getPositionIncrement();
      String termText = termAtt.term();
      q.add(new Term("content", termText), j);
    }

    ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
    int[] ranks = new int[] { 0 };
View Full Code Here

    BooleanQuery q = new BooleanQuery();

    TokenStream ts = analyzer.tokenStream("content",
                                          new StringReader("test sentence"));
   
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
    while (ts.incrementToken()) {
      String termText =  termAtt.term();
      q.add(new TermQuery(new Term("content", termText)),
            BooleanClause.Occur.SHOULD);
    }

    ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.TermAttribute

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.