Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token


  /** Produces a List<Token> from a List<String> */
  public static List<Token> makeTokens(List<String> strings) {
    List<Token> ret = new ArrayList<Token>(strings.size());
    for (String str : strings) {
      //Token newTok = new Token(str,0,0,"SYNONYM");
      Token newTok = new Token(str, 0,0,"SYNONYM");
      ret.add(newTok);
    }
    return ret;
  }
View Full Code Here


    }

    int pos=0;
    Iterator<Token> iter1=lst1.iterator();
    Iterator<Token> iter2=lst2.iterator();
    Token tok1 = iter1.hasNext() ? iter1.next() : null;
    Token tok2 = iter2.hasNext() ? iter2.next() : null;
    int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
    int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
    while(tok1!=null || tok2!=null) {
      while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
        tok.copyBuffer(tok1.buffer(), 0, tok1.length());
        tok.setPositionIncrement(pos1-pos);
        result.add(tok);
        pos=pos1;
        tok1 = iter1.hasNext() ? iter1.next() : null;
        pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
      }
      while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
        tok.copyBuffer(tok2.buffer(), 0, tok2.length());
        tok.setPositionIncrement(pos2-pos);
        result.add(tok);
        pos=pos2;
        tok2 = iter2.hasNext() ? iter2.next() : null;
        pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
      }
View Full Code Here

      int origPos = firstPosIncAtt.getPositionIncrement()// position of origTok in the original stream
      int repPos=0; // curr position in replacement token stream
      int pos=0// current position in merged token stream

      for (int i=0; i<result.synonyms.length; i++) {
        Token repTok = result.synonyms[i];
        AttributeSource newTok = firstTok.cloneAttributes();
        CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
        OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);

        OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);

        newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
        newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
        repPos += repTok.getPositionIncrement();
        if (i==0) repPos=origPos;  // make position of first token equal to original

        // if necessary, insert original tokens and adjust position increment
        while (origTok != null && origPos <= repPos) {
          PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
View Full Code Here

  @Override
  public final boolean incrementToken() throws IOException {
    while (true) {
      if (!outQueue.isEmpty()) return writeToken(outQueue.removeFirst());
      Token t = read();
      if (null == t) return false;
      Token out = process(t);
      if (null != out) return writeToken(out);
      // loop back to top in case process() put something on the output queue
    }
  }
View Full Code Here

   * Read a token from the buffered input stream. 
   * @return null at EOS
   */
  protected Token read() throws IOException {
    if (inQueue.isEmpty()) {
      Token t = readToken();
      return t;
    }
    return inQueue.removeFirst();
  }
View Full Code Here

   *         <code>read()</code> from the stream.
   */
  protected Token peek(int n) throws IOException {
    int fillCount = n-inQueue.size();
    for (int i=0; i < fillCount; i++) {
      Token t = readToken();
      if (null==t) return null;
      inQueue.addLast(t);
    }
    return inQueue.get(n-1);
  }
View Full Code Here

  /** old api emulation for back compat */
  private Token readToken() throws IOException {
    if (!input.incrementToken()) {
      return null;
    } else {
      Token token = new Token();
      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      token.setType(typeAtt.type());
      token.setFlags(flagsAtt.getFlags());
      token.setPositionIncrement(posIncAtt.getPositionIncrement());
      token.setPayload(payloadAtt.getPayload());
      return token;
    }
  }
View Full Code Here

          TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
          stream.reset();
          while (stream.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setStartOffset(matcher.start());
            token.setEndOffset(matcher.end());
            token.setFlags(flagsAtt.getFlags());
            token.setType(typeAtt.type());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
          }
          stream.end();
          stream.close();
        } catch (IOException e) {
View Full Code Here

   *
   * @param suggestions
   */
  public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions, int maximumRequiredSuggestions, int maxEvaluations) {
    for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
      Token token = entry.getKey();
      List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
      for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
        SpellCheckCorrection correction = new SpellCheckCorrection();
        correction.setOriginal(token);
        correction.setCorrection(entry1.getKey());
View Full Code Here

  private String getCollation(String origQuery,
                              List<SpellCheckCorrection> corrections) {
    StringBuilder collation = new StringBuilder(origQuery);
    int offset = 0;
    for (SpellCheckCorrection correction : corrections) {
      Token tok = correction.getOriginal();
      // we are replacing the query in order, but injected terms might cause
      // illegal offsets due to previous replacements.
      if (tok.getPositionIncrement() == 0)
        continue;
      collation.replace(tok.startOffset() + offset, tok.endOffset() + offset,
          correction.getCorrection());
      offset += correction.getCorrection().length()
          - (tok.endOffset() - tok.startOffset());
    }
    return collation.toString();
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Token

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.