Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Token


        }
      }

      // create token
      SpellCheckResponse.Suggestion suggestion = origVsSuggestion.get(original);
      Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());

      // get top 'count' suggestions out of 'sugQueue.size()' candidates
      SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
      // skip the first sugQueue.size() - count elements
      for (int k=0; k < sugQueue.size() - count; k++) sugQueue.pop();
View Full Code Here


    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
   
    while (ts.incrementToken()){
      Token token = new Token();
      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      token.setType(typeAtt.type());
      token.setFlags(flagsAtt.getFlags());
      token.setPayload(payloadAtt.getPayload());
      token.setPositionIncrement(posIncAtt.getPositionIncrement());
      result.add(token);
    }
    ts.end();
    ts.close();
    return result;
View Full Code Here

    if(numSuggestions > 0) {
      isCorrectlySpelled = true;
    }
   
    for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
      Token inputToken = entry.getKey();
      Map<String, Integer> theSuggestions = entry.getValue();
      if (theSuggestions != null && (theSuggestions.size()>0 || shardRequest)) {
        SimpleOrderedMap suggestionList = new SimpleOrderedMap();
        suggestionList.add("numFound", theSuggestions.size());
        suggestionList.add("startOffset", inputToken.startOffset());
        suggestionList.add("endOffset", inputToken.endOffset());

        // Logical structure of normal (non-extended) results:
        // "suggestion":["alt1","alt2"]
        //
        // Logical structure of the extended results:
        // "suggestion":[
        //     {"word":"alt1","freq":7},
        //     {"word":"alt2","freq":4}
        // ]
        if (extendedResults && hasFreqInfo) {
          suggestionList.add("origFreq", spellingResult.getTokenFrequency(inputToken));

          ArrayList<SimpleOrderedMap> sugs = new ArrayList<SimpleOrderedMap>();
          suggestionList.add("suggestion", sugs);
          for (Map.Entry<String, Integer> suggEntry : theSuggestions.entrySet()) {
            SimpleOrderedMap sugEntry = new SimpleOrderedMap();
            sugEntry.add("word",suggEntry.getKey());
            sugEntry.add("freq",suggEntry.getValue());
            sugs.add(sugEntry);
          }
        } else {
          suggestionList.add("suggestion", theSuggestions.keySet());
        }

        if (hasFreqInfo) {
          isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0;
        }
        result.add(new String(inputToken.buffer(), 0, inputToken.length()), suggestionList);
      }
    }
    if (hasFreqInfo) {
      result.add("correctlySpelled", isCorrectlySpelled);
    } else if(extendedResults && suggestions.size() == 0) { // if the word is misspelled, its added to suggestions with freqinfo
View Full Code Here

      {
        termAtt = (TermAttribute) addAttribute(TermAttribute.class);
        posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        lst = new ArrayList();
        Token t;
        t = createToken("hi", 0, 2);
        t.setPositionIncrement(1);
        lst.add(t);
        t = createToken("hispeed", 0, 8);
        t.setPositionIncrement(1);
        lst.add(t);
        t = createToken("speed", 3, 8);
        t.setPositionIncrement(0);
        lst.add(t);
        t = createToken("10", 8, 10);
        t.setPositionIncrement(1);
        lst.add(t);
        t = createToken("foo", 11, 14);
        t.setPositionIncrement(1);
        lst.add(t);
        iter = lst.iterator();
      }

      public boolean incrementToken() throws IOException {
        if(iter.hasNext()) {
          Token token = (Token) iter.next();
          termAtt.setTermBuffer(token.term());
          posIncrAtt.setPositionIncrement(token.getPositionIncrement());
          offsetAtt.setOffset(token.startOffset(), token.endOffset());
          return true;
        }
        return false;
      }
    
View Full Code Here

      {
        termAtt = (TermAttribute) addAttribute(TermAttribute.class);
        posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        lst = new ArrayList();
        Token t;
        t = createToken("hispeed", 0, 8);
        t.setPositionIncrement(1);
        lst.add(t);
        t = createToken("hi", 0, 2);
        t.setPositionIncrement(0);
        lst.add(t);
        t = createToken("speed", 3, 8);
        t.setPositionIncrement(1);
        lst.add(t);
        t = createToken("10", 8, 10);
        t.setPositionIncrement(1);
        lst.add(t);
        t = createToken("foo", 11, 14);
        t.setPositionIncrement(1);
        lst.add(t);
        iter = lst.iterator();
      }

      public boolean incrementToken() throws IOException {
        if(iter.hasNext()) {
          Token token = (Token) iter.next();
          termAtt.setTermBuffer(token.term());
          posIncrAtt.setPositionIncrement(token.getPositionIncrement());
          offsetAtt.setOffset(token.startOffset(), token.endOffset());
          return true;
        }
        return false;
      }
    };
View Full Code Here

    super.tearDown();
  }

  private static Token createToken(String term, int start, int offset)
  {
    Token token = new Token(start, offset);
    token.setTermBuffer(term);
    return token;
  }
View Full Code Here

        iterator = tokens.iterator();
      }
      if (!iterator.hasNext()) {
        return false;
      }
      Token prototype = (Token) iterator.next();
      termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
      posIncrAtt.setPositionIncrement(prototype.getPositionIncrement());
      flagsAtt.setFlags(prototype.getFlags());
      offsetAtt.setOffset(prototype.startOffset(), prototype.endOffset());
      typeAtt.setType(prototype.type());
      payloadAtt.setPayload(prototype.getPayload());

      return true;
    }
View Full Code Here

                 tokensToCompare.length, i);
  }

  private static Token createToken(String term, int start, int offset)
  {
    Token token = new Token(start, offset);
    token.setTermBuffer(term);
    return token;
  }
View Full Code Here

    }

    public final boolean incrementToken() throws IOException {
      clearAttributes();
      if (index < testToken.length) {
        Token t = testToken[index++];
        termAtt.setTermBuffer(t.termBuffer(), 0, t.termLength());
        offsetAtt.setOffset(t.startOffset(), t.endOffset());
        posIncrAtt.setPositionIncrement(t.getPositionIncrement());
        typeAtt.setType(TypeAttributeImpl.DEFAULT_TYPE);
        return true;
      } else {
        return false;
      }
View Full Code Here

          }

          // reset the TokenStream to the first token         
          tokenStream.reset();

          final Token reusableToken = new Token();
          for (Token nextToken = tokenStream.next(reusableToken); nextToken != null; nextToken = tokenStream.next(reusableToken)) {
            tokens.add((Token) nextToken.clone()); // the vector will be built on commit.
            fieldSetting.fieldLength++;
            if (fieldSetting.fieldLength > maxFieldLength) {
              break;
            }
          }
        } else {
          // untokenized
          String fieldVal = field.stringValue();
          Token token = new Token(0, fieldVal.length(), "untokenized");
          token.setTermBuffer(fieldVal);
          tokens.add(token);
          fieldSetting.fieldLength++;
        }
      }

      if (!field.isStored()) {
        it.remove();
      }
    }


    Map<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>> termDocumentInformationFactoryByTermTextAndFieldSetting = new HashMap<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>>();
    termDocumentInformationFactoryByDocument.put(document, termDocumentInformationFactoryByTermTextAndFieldSetting);

    // build term vector, term positions and term offsets
    for (Map.Entry<Field, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
      FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());

      Map<String, TermDocumentInformationFactory> termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
      if (termDocumentInformationFactoryByTermText == null) {
        termDocumentInformationFactoryByTermText = new HashMap<String /*text*/, TermDocumentInformationFactory>();
        termDocumentInformationFactoryByTermTextAndFieldSetting.put(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()), termDocumentInformationFactoryByTermText);
      }

      int lastOffset = 0;

      // for each new field, move positions a bunch.
      if (fieldSetting.position > 0) {
        // todo what if no analyzer set, multiple fields with same name and index without tokenization?
        fieldSetting.position += analyzer.getPositionIncrementGap(fieldSetting.fieldName);
      }

      for (Token token : eField_Tokens.getValue()) {

        TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.term());
        if (termDocumentInformationFactory == null) {
          termDocumentInformationFactory = new TermDocumentInformationFactory();
          termDocumentInformationFactoryByTermText.put(token.term(), termDocumentInformationFactory);
        }
        //termDocumentInformationFactory.termFrequency++;

        fieldSetting.position += (token.getPositionIncrement() - 1);
        termDocumentInformationFactory.termPositions.add(fieldSetting.position++);

        if (token.getPayload() != null && token.getPayload().length() > 0) {
          termDocumentInformationFactory.payloads.add(token.getPayload().toByteArray());
          fieldSetting.storePayloads = true;
        } else {
          termDocumentInformationFactory.payloads.add(null);
        }

        if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {

          termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset()));
          lastOffset = fieldSetting.offset + token.endOffset();
        }


      }

View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Token

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.