Examples of org.apache.lucene.analysis.tokenattributes.TypeAttribute

Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.TypeAttribute

org.apache.lucene.analysis.tokenattributes.TypeAttribute
A Token's lexical type. The Default value is "word".

    String test = "The quick red fox jumped over the lazy brown dogs";


    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
    int count = 0;
    CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
    nptf.reset();
    while (nptf.incrementToken()) {
      assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
      assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
      String type = payloadAtt.getPayload().utf8ToString();
      assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()));
      count++;
    }


    assertTrue(count + " does not equal: " + 10, count == 10);
  }

View Full Code Here


  public void testLongStream() throws Exception {
    final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
    final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
    final BytesRef bytes = bytesAtt.getBytesRef();
    stream.reset();
    assertEquals(64, numericAtt.getValueSize());
    for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
      assertTrue("New token is available", stream.incrementToken());
      assertEquals("Shift value wrong", shift, numericAtt.getShift());
      bytesAtt.fillBytesRef();
      assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
      assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
      assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }
    assertFalse("More tokens available", stream.incrementToken());
    stream.end();
    stream.close();
  }

View Full Code Here


  public void testIntStream() throws Exception {
    final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
    final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
    final BytesRef bytes = bytesAtt.getBytesRef();
    stream.reset();
    assertEquals(32, numericAtt.getValueSize());
    for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
      assertTrue("New token is available", stream.incrementToken());
      assertEquals("Shift value wrong", shift, numericAtt.getShift());
      bytesAtt.fillBytesRef();
      assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
      assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
      assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }
    assertFalse("More tokens available", stream.incrementToken());
    stream.end();
    stream.close();
  }

View Full Code Here

    testPositons(typeTokenFilter);


  }


  private void testPositons(TypeTokenFilter stpf) throws IOException {
    TypeAttribute typeAtt = stpf.getAttribute(TypeAttribute.class);
    CharTermAttribute termAttribute = stpf.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
    stpf.reset();
    boolean enablePositionIncrements = stpf.getEnablePositionIncrements();
    while (stpf.incrementToken()) {
      log("Token: " + termAttribute.toString() + ": " + typeAtt.type() + " - " + posIncrAtt.getPositionIncrement());
      assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1",
          posIncrAtt.getPositionIncrement(), enablePositionIncrements ? 3 : 1);
    }
    stpf.end();
    stpf.close();

View Full Code Here

    TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
    
    boolean seenDogs = false;


    CharTermAttribute termAtt = ttf.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class);
    ttf.reset();
    while (ttf.incrementToken()) {
      if (termAtt.toString().equals("dogs")) {
        seenDogs = true;
        assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
      } else {
        assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals("word"));
      }
    }
    assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
    
    int sinkCount = 0;

View Full Code Here

  
  public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
    
    filter.incrementToken();


    assertEquals("accent", termAtt.term());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
    assertEquals(new Payload(new byte[]{0,1,2,3}), payloadAtt.getPayload());
  }

View Full Code Here


  public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException {
    CJKTokenizer tokenizer = new CJKTokenizer(new StringReader(str));
    TermAttribute termAtt = tokenizer.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = tokenizer.getAttribute(TypeAttribute.class);
    for (int i = 0; i < out_tokens.length; i++) {
      assertTrue(tokenizer.incrementToken());
      assertEquals(termAtt.term(), out_tokens[i].termText);
      assertEquals(offsetAtt.startOffset(), out_tokens[i].start);
      assertEquals(offsetAtt.endOffset(), out_tokens[i].end);
      assertEquals(typeAtt.type(), out_tokens[i].type);
    }
    assertFalse(tokenizer.incrementToken());
  }

View Full Code Here

  
  public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException {
    TokenStream ts = a.reusableTokenStream("dummy", new StringReader(str));
    TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.getAttribute(TypeAttribute.class);
    for (int i = 0; i < out_tokens.length; i++) {
      assertTrue(ts.incrementToken());
      assertEquals(termAtt.term(), out_tokens[i].termText);
      assertEquals(offsetAtt.startOffset(), out_tokens[i].start);
      assertEquals(offsetAtt.endOffset(), out_tokens[i].end);
      assertEquals(typeAtt.type(), out_tokens[i].type);
    }
    assertFalse(ts.incrementToken());
  }

View Full Code Here

    int numItalics = 0;
    int numBoldItalics = 0;
    int numCategory = 0;
    int numCitation = 0;
    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
    
    while (tf.incrementToken()) {
      String tokText = termAtt.term();
      //System.out.println("Text: " + tokText + " Type: " + token.type());
      String expectedType = (String) tcm.get(tokText);
      assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
      assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
      count++;
      if (typeAtt.type().equals(WikipediaTokenizer.ITALICS)  == true){
        numItalics++;
      } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS)  == true){
        numBoldItalics++;
      } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY)  == true){
        numCategory++;
      }
      else if (typeAtt.type().equals(WikipediaTokenizer.CITATION)  == true){
        numCitation++;
      }
    }
    assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size());
    assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4);

View Full Code Here


  public void testLinks() throws Exception {
    String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]";
    WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
    TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
    TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
    
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news",
        termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true);
    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
    tf.incrementToken();//skip here
    
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c",
        termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true);
    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
    tf.incrementToken();//skip here
    
    assertTrue(tf.incrementToken());
    assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c",
        termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true);
    assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
    
    assertTrue(tf.incrementToken());
    assertFalse(tf.incrementToken());
  }

View Full Code Here

0 1 2 3 4 5 6 7 8

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.TypeAttribute

com.chenlb.mmseg4j.analysis.TokenUtils

com.github.le11.nls.lucene.UIMATypeAwareAnalyzerTest

com.tistory.devyongsik.crescent.admin.service.MorphServiceImpl

org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser

org.apache.lucene.analysis.cjk.TestCJKTokenizer

org.apache.lucene.analysis.core.TestTypeTokenFilter

org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterTest

org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterTest

org.apache.lucene.analysis.sinks.TokenTypeSinkTokenizerTest

org.apache.lucene.analysis.snowball.TestSnowball

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.