Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token


import org.apache.lucene.analysis.tokenattributes.TermAttribute;

public class TestSingleTokenTokenFilter extends LuceneTestCase {

  public void test() throws IOException {
    Token token = new Token();
    SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
    AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(TermAttribute.class);
    assertTrue(tokenAtt instanceof Token);
    ts.reset();

    assertTrue(ts.incrementToken());
    assertEquals(token, tokenAtt);
    assertFalse(ts.incrementToken());
   
    token = new Token("hallo", 10, 20, "someType");
    ts.setToken(token);
    ts.reset();

    assertTrue(ts.incrementToken());
    assertEquals(token, tokenAtt);
View Full Code Here


            matchEndOffset = Math.max(matchEndOffset, termEndOffset);
          }
          tot += score;
        }
      }
      Token token = new Token(termStartOffset, termEndOffset);
      token.setTermBuffer(termAtt.term());
      tokens[numTokens] = token;
      scores[numTokens] = score;
      numTokens++;
    }
  }
View Full Code Here

          public boolean incrementToken() throws IOException {
            if (currentToken >= tokens.length) {
              return false;
            }
            clearAttributes();
            Token token = tokens[currentToken++];
            termAtt.setTermBuffer(token.term());
            offsetAtt.setOffset(token.startOffset(), token.endOffset());
            return true;
          }
        }     
        //code to reconstruct the original sequence of Tokens
        String[] terms=tpv.getTerms();         
        int[] freq=tpv.getTermFrequencies();
        int totalTokens=0;

        for (int t = 0; t < freq.length; t++)
        {
            totalTokens+=freq[t];
        }
        Token tokensInOriginalOrder[]=new Token[totalTokens];
        ArrayList<Token> unsortedTokens = null;
        for (int t = 0; t < freq.length; t++)
        {
            TermVectorOffsetInfo[] offsets=tpv.getOffsets(t);
            if(offsets==null)
            {
                return null;
            }
           
            int[] pos=null;
            if(tokenPositionsGuaranteedContiguous)
            {
                //try get the token position info to speed up assembly of tokens into sorted sequence
                pos=tpv.getTermPositions(t);
            }
            if(pos==null)
            { 
                //tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
                if(unsortedTokens==null)
                {
                    unsortedTokens=new ArrayList<Token>();
                }
                for (int tp = 0; tp < offsets.length; tp++)
                {
                  Token token = new Token(offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
                  token.setTermBuffer(terms[t]);
                  unsortedTokens.add(token);
                }
            }
            else
            {
                //We have positions stored and a guarantee that the token position information is contiguous
               
                // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or
                // creates jumps in position numbers - this code would fail under those circumstances
               
                //tokens stored with positions - can use this to index straight into sorted array
                for (int tp = 0; tp < pos.length; tp++)
                {
                  Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
                  tokensInOriginalOrder[pos[tp]] = token;
                }               
            }
        }
        //If the field has been stored without position data we must perform a sort       
View Full Code Here

          public boolean incrementToken() throws IOException {
            if (currentToken >= tokens.length) {
              return false;
            }
            clearAttributes();
            Token token = tokens[currentToken++];
            termAtt.setTermBuffer(token.term());
            offsetAtt.setOffset(token.startOffset(), token.endOffset());
            return true;
          }
View Full Code Here

        end = Integer.parseInt(params[3]);
      } else {
        end = start + params[0].length();
      }

      Token t = new Token(params[0],start,end,"TEST");
      t.setPositionIncrement(posInc);
     
      result.add(t);
      for (int j=1; j<toks.length; j++) {
        t = new Token(toks[j],0,0,"TEST");
        t.setPositionIncrement(0);
        result.add(t);
      }
    }
    return result;
  }
View Full Code Here

    public boolean incrementToken() throws IOException {
      if (index >= tokens.length)
        return false;
      else {
        clearAttributes();
        Token token = tokens[index++];
        termAtt.setEmpty().append(token);
        offsetAtt.setOffset(token.startOffset(), token.endOffset());
        posIncAtt.setPositionIncrement(token.getPositionIncrement());
        flagsAtt.setFlags(token.getFlags());
        typeAtt.setType(token.type());
        payloadAtt.setPayload(token.getPayload());
        return true;
      }
    }
View Full Code Here

              superColumnList.clear();
              if(importer_.columnFamily.superColumn.tokenize)
              {
                  Analyzer analyzer = new StandardAnalyzer();
                  TokenStream ts = analyzer.tokenStream("superColumn", new StringReader(superColumnName));
                  Token token = null;
                  token = ts.next();
                  while(token != null)
                  {
                    superColumnList.add(token.termText());
                      token = ts.next();
                  }
              }
              else
              {
View Full Code Here

    public TokenStream tokenStream(String name, Reader reader) {

        if (log.isDebugEnabled()) {
            TokenStream ts = super.tokenStream(name,
                    htmlReaderFromReader(reader));
            Token t;
            if (log.isDebugEnabled()) {
                try {
                    while ((t = ts.next()) != null) {
                        log.debug("token: " + t);
                    }
View Full Code Here

    ArrayList<Token> matchList = new ArrayList<Token>();

    // Add segments before each match found
    while(matcher.find()) {
      String match = input.subSequence(index, matcher.start()).toString();
      matchList.add( new Token( match, index, matcher.start()) );
      index = matcher.end();
      if( match.length() > 0 ) {
        lastNonEmptySize = matchList.size();
      }
    }

    // If no match is found, return the full string
    if (index == 0) {
      matchList.add( new Token( input, 0, input.length()) );
    }
    else {
      String match = input.subSequence(index, input.length()).toString();
      matchList.add( new Token( match, index, input.length()) );
      if( match.length() > 0 ) {
        lastNonEmptySize = matchList.size();
      }
    }
   
View Full Code Here

  @Deprecated
  public static List<Token> group( Matcher matcher, String input, int group )
  {
    ArrayList<Token> matchList = new ArrayList<Token>();
    while(matcher.find()) {
      Token t = new Token(
        matcher.group(group),
        matcher.start(group),
        matcher.end(group) );
      matchList.add( t );
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Token

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.