Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token.termLength()


        LowercaseWhitespaceAnalyzer similarAnalyzer = new LowercaseWhitespaceAnalyzer();
        TokenStream tokenStream = similarAnalyzer.tokenStream(String.valueOf(IEntity.ALL_FIELDS), new StringReader(value));
        Token token = null;
        while ((token = tokenStream.next()) != null) {
          String termText = new String(token.termBuffer(), 0, token.termLength());
          Term term = new Term(fieldname, termText);
          similarityQuery.add(new BooleanClause(new FuzzyQuery(term), Occur.MUST));
        }

        return similarityQuery;
View Full Code Here


    if(nextToken != null &&
        (Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type())
          || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type()))
        ) {
      final char[] buffer = nextToken.termBuffer();
      final int length = nextToken.termLength();
      byte lastType = (byte) Character.getType(buffer[0])//与上次的字符是否同类
      int termBufferOffset = 0;
      int termBufferLength = 0;
      for(int i=0;i<length;i++) {
        byte type = (byte) Character.getType(buffer[i]);
View Full Code Here

  public boolean incrementToken() throws IOException {
    clearAttributes();
    Token token = nextToken(reusableToken);
    if(token != null) {
      termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
      offsetAtt.setOffset(token.startOffset(), token.endOffset());
      typeAtt.setType(token.type());
      return true;
    } else {
      end();
View Full Code Here

        while((line = reader.readLine()) != null) {
          bw.append("--------------------------").append("\r\n");;
          bw.append(line).append("\r\n");
          TokenStream ts = analyzer.tokenStream("text", new StringReader(line));
          for(Token t= new Token(); (t=TokenUtils.nextToken(ts, t)) !=null;) {
            bw.append(new String(t.termBuffer(), 0, t.termLength())).append(" | ");
          }
          bw.append("\r\n");
        }
       
        long t = System.currentTimeMillis() - start;
View Full Code Here

      token = produceNextToken(reusableToken);
    } while (token == request_next_token);
    if (token == null) return false;

    clearAttributes();
    termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
    posIncrAtt.setPositionIncrement(token.getPositionIncrement());
    flagsAtt.setFlags(token.getFlags());
    offsetAtt.setOffset(token.startOffset(), token.endOffset());
    typeAtt.setType(token.type());
    payloadAtt.setPayload(token.getPayload());
View Full Code Here

        List<Token> shingle = new ArrayList<Token>(currentShingleLength);

        for (int i = 0; i < currentShingleLength; i++) {
          Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
          termLength += shingleToken.termLength();
          shingle.add(shingleToken);
        }
        if (spacerCharacter != null) {
          termLength += currentShingleLength - 1;
        }
View Full Code Here

        StringBuilder sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future.
        for (Token shingleToken : shingle) {
          if (spacerCharacter != null && sb.length() > 0) {
            sb.append(spacerCharacter);
          }
          sb.append(shingleToken.termBuffer(), 0, shingleToken.termLength());
        }
        reusableToken.setTermBuffer(sb.toString());
        updateToken(reusableToken, shingle, currentPermutationTokensStartOffset, currentPermutationRows, currentPermuationTokens);

        return reusableToken;
View Full Code Here

        // check the dictionary
        if (dictionary.contains(lowerCaseTermBuffer, start, partLength)) {
          if (this.onlyLongestMatch) {
            if (longestMatchToken != null) {
              if (longestMatchToken.termLength() < partLength) {
                longestMatchToken = createToken(start, partLength, token);
              }
            } else {
              longestMatchToken = createToken(start, partLength, token);
            }
View Full Code Here

          // shorter
          // to avoid problems with genitive 's characters and other binding
          // characters
          if (this.onlyLongestMatch) {
            if (longestMatchToken != null) {
              if (longestMatchToken.termLength() < partLength - 1) {
                longestMatchToken = createToken(start, partLength - 1, token);
              }
            } else {
              longestMatchToken = createToken(start, partLength - 1, token);
            }
View Full Code Here

                break;
            }
            if(dictionary.contains(lowerCaseTermBuffer, i, j)) {
                if (this.onlyLongestMatch) {
                   if (longestMatchToken!=null) {
                     if (longestMatchToken.termLength()<j) {
                       longestMatchToken=createToken(i,j,token);
                     }
                   } else {
                     longestMatchToken=createToken(i,j,token);
                   }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.