Examples of org.apache.lucene.analysis.tokenattributes.TypeAttribute

Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.TypeAttribute

org.apache.lucene.analysis.tokenattributes.TypeAttribute
A Token's lexical type. The Default value is "word".


  private static void displayTokensWithFullDetails(TokenStream stream) throws IOException {
    final CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    final OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
    final TypeAttribute type = stream.addAttribute(TypeAttribute.class);
    int position = 0;
    while (stream.incrementToken()) {
      final int increment = posIncr.getPositionIncrement();
      if (increment > 0) {
        position = position + increment;
        System.out.println();
        System.out.print(position + ": ");
      }
      System.out.print("[" + term.toString() + ":" + offset.startOffset() + "->"
          + offset.endOffset() + ":" + type.type() + "] ");
    }
    System.out.println();
  }

View Full Code Here

        Analyzer a = getAnalyzer();
        TokenStream ts = a.tokenStream(field, new StringReader(termStr));
        int count = 0;
        boolean isCJ = false;
        try {
            TypeAttribute t = ts.addAttribute(TypeAttribute.class);
            ts.reset();
            while (ts.incrementToken()) {
                count++;
                isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type());
            }
            ts.end();
        } catch (IOException e) {
            throw new ParseException(e.getMessage());
        } finally {

View Full Code Here

      record.put( "flags", narrowedAttr.getFlags() );
      return record;
    }
    else if ( attr instanceof TypeAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "TypeAttribute" ) );
      TypeAttribute narrowedAttr = (TypeAttribute) attr;
      record.put( "type", narrowedAttr.type() );
      return record;
    }
    else if ( attr instanceof OffsetAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "OffsetAttribute" ) );
      OffsetAttribute narrowedAttr = (OffsetAttribute) attr;
      record.put( "startOffset", narrowedAttr.startOffset() );
      record.put( "endOffset", narrowedAttr.endOffset() );
      return record;
    }
    else if ( attr instanceof Serializable ) {
      return ByteBuffer.wrap( toByteArray( (Serializable) attr ) );
    }

View Full Code Here

        Analyzer a = getAnalyzer();
        TokenStream ts = a.tokenStream(field, new StringReader(termStr));
        int count = 0;
        boolean isCJ = false;
        try {
            TypeAttribute t = ts.addAttribute(TypeAttribute.class);
            ts.reset();
            while (ts.incrementToken()) {
                count++;
                isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type());
            }
            ts.end();
        } catch (IOException e) {
            throw new ParseException(e.getMessage());
        } finally {

View Full Code Here

  
  public void testFilterTokens() throws Exception {
    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
    
    filter.incrementToken();


    assertEquals("accent", termAtt.toString());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
    assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload());
  }

View Full Code Here

      record.put( "flags", narrowedAttr.getFlags() );
      return record;
    }
    else if ( attr instanceof TypeAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "TypeAttribute" ) );
      TypeAttribute narrowedAttr = (TypeAttribute) attr;
      record.put( "type", narrowedAttr.type() );
      return record;
    }
    else if ( attr instanceof OffsetAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "OffsetAttribute" ) );
      OffsetAttribute narrowedAttr = (OffsetAttribute) attr;
      record.put( "startOffset", narrowedAttr.startOffset() );
      record.put( "endOffset", narrowedAttr.endOffset() );
      return record;
    }
    else if ( attr instanceof Serializable ) {
      return ByteBuffer.wrap( toByteArray( (Serializable) attr ) );
    }

View Full Code Here

    testPositons(typeTokenFilter);


  }


  private void testPositons(TypeTokenFilter stpf) throws IOException {
    TypeAttribute typeAtt = stpf.getAttribute(TypeAttribute.class);
    CharTermAttribute termAttribute = stpf.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
    stpf.reset();
    boolean enablePositionIncrements = stpf.getEnablePositionIncrements();
    while (stpf.incrementToken()) {
      log("Token: " + termAttribute.toString() + ": " + typeAtt.type() + " - " + posIncrAtt.getPositionIncrement());
      assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1",
          posIncrAtt.getPositionIncrement(), enablePositionIncrements ? 3 : 1);
    }
    stpf.end();
    stpf.close();

View Full Code Here

      return null;
    }
    
    CharTermAttribute termAtt = (CharTermAttribute)input.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute)input.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute)input.getAttribute(TypeAttribute.class);
    
    if(reusableToken == null) {
      reusableToken = new Token();
    }
    
    reusableToken.clear();
    if(termAtt != null) {
      //lucene 3.0
      //reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
      //lucene 3.1
      reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    }
    if(offsetAtt != null) {
      reusableToken.setStartOffset(offsetAtt.startOffset());
      reusableToken.setEndOffset(offsetAtt.endOffset());
    }
    
    if(typeAtt != null) {
      reusableToken.setType(typeAtt.type());
    }
    
    return reusableToken;
  }

View Full Code Here

    TokenStream stream = analyzer.tokenStream("dummy", reader);
    stream.reset();
    
    CharTermAttribute charTermAtt = stream.getAttribute(CharTermAttribute.class);
    OffsetAttribute offSetAtt = stream.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);


    List<MorphToken> resultTokenList = new ArrayList<MorphToken>();
    
    while(stream.incrementToken()) {
      Token t = new Token(charTermAtt.toString(), offSetAtt.startOffset(), offSetAtt.endOffset(), typeAtt.type());
      
      logger.debug("termAtt : {}, startOffset : {}, endOffset : {}, typeAtt : {}", 
          new Object[] {charTermAtt.toString(),offSetAtt.startOffset(), offSetAtt.endOffset(), typeAtt.type()});
      
      MorphToken mt = new MorphToken();
      mt.setTerm(t.toString());
      mt.setType(t.type());
      mt.setStartOffset(t.startOffset());

View Full Code Here

    int numItalics = 0;
    int numBoldItalics = 0;
    int numCategory = 0;
    int numCitation = 0;
    TermAttribute termAtt = (TermAttribute) tf.addAttribute(TermAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute) tf.addAttribute(TypeAttribute.class);
    
    while (tf.incrementToken()) {
      String tokText = termAtt.term();
      //System.out.println("Text: " + tokText + " Type: " + token.type());
      String expectedType = (String) tcm.get(tokText);
      assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
      assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
      count++;
      if (typeAtt.type().equals(WikipediaTokenizer.ITALICS)  == true){
        numItalics++;
      } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS)  == true){
        numBoldItalics++;
      } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY)  == true){
        numCategory++;
      }
      else if (typeAtt.type().equals(WikipediaTokenizer.CITATION)  == true){
        numCitation++;
      }
    }
    assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size());
    assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4);

View Full Code Here

0 1 2 3 4 5 6 7 8

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.TypeAttribute

com.chenlb.mmseg4j.analysis.TokenUtils

com.github.le11.nls.lucene.UIMATypeAwareAnalyzerTest

com.tistory.devyongsik.crescent.admin.service.MorphServiceImpl

org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser

org.apache.lucene.analysis.cjk.TestCJKTokenizer

org.apache.lucene.analysis.core.TestTypeTokenFilter

org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterTest

org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterTest

org.apache.lucene.analysis.sinks.TokenTypeSinkTokenizerTest

org.apache.lucene.analysis.snowball.TestSnowball

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.