Examples of org.apache.lucene.analysis.tokenattributes.TypeAttribute

Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.TypeAttribute

org.apache.lucene.analysis.tokenattributes.TypeAttribute
A Token's lexical type. The Default value is "word".

    SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
    
    boolean seenDogs = false;


    CharTermAttribute termAtt = ttf.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class);
    ttf.reset();
    while (ttf.incrementToken()) {
      if (termAtt.toString().equals("dogs")) {
        seenDogs = true;
        assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
      } else {
        assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals("word"));
      }
    }
    assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
    
    int sinkCount = 0;

View Full Code Here

  
  public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
    
    filter.incrementToken();


    assertEquals("accent", termAtt.toString());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
    assertEquals(new Payload(new byte[]{0,1,2,3}), payloadAtt.getPayload());
  }

View Full Code Here

    String test = "The quick red fox jumped over the lazy brown dogs";


    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
    int count = 0;
    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
    
    while (nptf.incrementToken()) {
      assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
      assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
      String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
      assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
      count++;
    }


    assertTrue(count + " does not equal: " + 10, count == 10);
  }

View Full Code Here


  public void testLongStream() throws Exception {
    final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
    final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
      assertTrue("New token is available", stream.incrementToken());
      assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.toString());
      assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }
    assertFalse("No more tokens available", stream.incrementToken());
  }

View Full Code Here


  public void testIntStream() throws Exception {
    final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
    final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
      assertTrue("New token is available", stream.incrementToken());
      assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.toString());
      assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }
    assertFalse("No more tokens available", stream.incrementToken());
  }

View Full Code Here

  
  public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
    
    filter.incrementToken();


    assertEquals("accent", termAtt.toString());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
    assertEquals(new Payload(new byte[]{0,1,2,3}), payloadAtt.getPayload());
  }

View Full Code Here

      //获取词元位置属性
        OffsetAttribute  offset = ts.addAttribute(OffsetAttribute.class); 
        //获取词元文本属性
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //获取词元文本属性
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);
        
        
        //重置TokenStream（重置StringReader）
      ts.reset(); 
      //迭代获取分词结果
      while (ts.incrementToken()) {
        System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
      }
      //关闭TokenStream（关闭StringReader）
      ts.end();   // Perform end-of-stream operations, e.g. set the final offset.


    } catch (IOException e) {

View Full Code Here

0 1 2 3 4 5 6 7 8

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.TypeAttribute

com.chenlb.mmseg4j.analysis.TokenUtils

com.github.le11.nls.lucene.UIMATypeAwareAnalyzerTest

com.tistory.devyongsik.crescent.admin.service.MorphServiceImpl

org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser

org.apache.lucene.analysis.cjk.TestCJKTokenizer

org.apache.lucene.analysis.core.TestTypeTokenFilter

org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterTest

org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterTest

org.apache.lucene.analysis.sinks.TokenTypeSinkTokenizerTest

org.apache.lucene.analysis.snowball.TestSnowball

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.