Examples of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute

Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute

org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute
Determines the position of this token relative to the previous Token in a TokenStream, used in phrase searching.
The default value is one.
Some common uses for this are:
- Set it to zero to put multiple terms in the same position. This is useful if, e.g., a word has multiple stems. Searches for phrases including either stem will match. In this case, all but the first stem's increment should be set to zero: the increment of the first instance should be one. Repeating a token with an increment of zero can also be used to boost the scores of matches on that token.
- Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want phrases to match across removed stop words, then one could build a stop word filter that removes stop words and also sets the increment to the number of stop words removed before each non-stop word. Then exact phrase queries will only match when the terms occur with no intervening stop words.
@see org.apache.lucene.index.DocsAndPositionsEnum

    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    
    while (ts.incrementToken()){
      Token token = new Token();
      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      token.setType(typeAtt.type());
      token.setFlags(flagsAtt.getFlags());
      token.setPayload(payloadAtt.getPayload());
      token.setPositionIncrement(posIncAtt.getPositionIncrement());
      result.add(token);
    }
    return result;
  }

View Full Code Here

          // TODO: support custom attributes
          CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
          FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
          TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
          stream.reset();
          while (stream.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setStartOffset(matcher.start());
            token.setEndOffset(matcher.end());
            token.setFlags(flagsAtt.getFlags());
            token.setType(typeAtt.type());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
          }
        } catch (IOException e) {
        }
      }

View Full Code Here

      //
      AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
      boolean includeOrig = result.includeOrig();


      AttributeSource origTok = includeOrig ? firstTok : null;
      PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class);
      int origPos = firstPosIncAtt.getPositionIncrement();  // position of origTok in the original stream
      int repPos=0; // curr position in replacement token stream
      int pos=0;  // current position in merged token stream


      for (int i=0; i<result.synonyms.length; i++) {
        Token repTok = result.synonyms[i];
        AttributeSource newTok = firstTok.cloneAttributes();
        CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
        OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);


        OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);


        newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
        newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
        repPos += repTok.getPositionIncrement();
        if (i==0) repPos=origPos;  // make position of first token equal to original


        // if necessary, insert original tokens and adjust position increment
        while (origTok != null && origPos <= repPos) {
          PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
          origPosInc.setPositionIncrement(origPos-pos);
          generated.add(origTok);
          pos += origPosInc.getPositionIncrement();
          origTok = matched.isEmpty() ? null : matched.removeFirst();
          if (origTok != null) {
            origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
            origPos += origPosInc.getPositionIncrement();
          }
        }


        newPosIncAtt.setPositionIncrement(repPos - pos);
        generated.add(newTok);
        pos += newPosIncAtt.getPositionIncrement();
      }


      // finish up any leftover original tokens
      while (origTok!=null) {
        PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
        origPosInc.setPositionIncrement(origPos-pos);
        generated.add(origTok);
        pos += origPosInc.getPositionIncrement();
        origTok = matched.isEmpty() ? null : matched.removeFirst();
        if (origTok != null) {
          origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
          origPos += origPosInc.getPositionIncrement();
        }
      }


      // what if we replaced a longer sequence with a shorter one?
      // a/0 b/5 =>  foo/0

View Full Code Here

    TokenStream ts = a.tokenStream("dummy", new StringReader(input));
    TermAttribute termAtt = (TermAttribute) ts
        .getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute) ts
        .getAttribute(OffsetAttribute.class);
    PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts
        .getAttribute(PositionIncrementAttribute.class);
    for (int i = 0; i < output.length; i++) {
      assertTrue(ts.incrementToken());
      assertEquals(output[i], termAtt.term());
      assertEquals(startOffsets[i], offsetAtt.startOffset());
      assertEquals(endOffsets[i], offsetAtt.endOffset());
      assertEquals(posIncs[i], posIncAtt.getPositionIncrement());
    }
    assertFalse(ts.incrementToken());
    ts.close();
  }

View Full Code Here

        } catch (IOException e) {
            source = analyzer.tokenStream(field, new FastStringReader(text));
        }
        CachingTokenFilter buffer = new CachingTokenFilter(source);
        CharTermAttribute termAtt = null;
        PositionIncrementAttribute posIncrAtt = null;
        int numTokens = 0;


        boolean success = false;
        try {
            buffer.reset();
            success = true;
        } catch (IOException e) {
            // success==false if we hit an exception
        }
        if (success) {
            if (buffer.hasAttribute(CharTermAttribute.class)) {
                termAtt = buffer.getAttribute(CharTermAttribute.class);
            }
            if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
                posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
            }
        }


        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;


        boolean hasMoreTokens = false;
        if (termAtt != null) {
            try {
                hasMoreTokens = buffer.incrementToken();
                while (hasMoreTokens) {
                    numTokens++;
                    int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                    if (positionIncrement != 0) {
                        positionCount += positionIncrement;
                    } else {
                        severalTokensAtSamePosition = true;
                    }
                    hasMoreTokens = buffer.incrementToken();
                }
            } catch (IOException e) {
                // ignore
            }
        }
        try {
            // rewind the buffer stream
            buffer.reset();


            // close original stream - all tokens buffered
            source.close();
        } catch (IOException e) {
            // ignore
        }


        Term termFactory = new Term(field);
        if (numTokens == 0) {
            return MatchNoDocsQuery.INSTANCE;
        } else if (type == Type.BOOLEAN) {
            if (numTokens == 1) {
                String term = null;
                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }
                Query q = newTermQuery(mapper, termFactory.createTerm(term));
                return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
            }
            BooleanQuery q = new BooleanQuery(positionCount == 1);
            for (int i = 0; i < numTokens; i++) {
                String term = null;
                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }


                Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term));
                q.add(currentQuery, occur);
            }
            return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
        } else if (type == Type.PHRASE) {
            if (severalTokensAtSamePosition) {
                MultiPhraseQuery mpq = new MultiPhraseQuery();
                mpq.setSlop(phraseSlop);
                List<Term> multiTerms = new ArrayList<Term>();
                int position = -1;
                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }


                    if (positionIncrement > 0 && multiTerms.size() > 0) {
                        if (enablePositionIncrements) {
                            mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
                        } else {
                            mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
                        }
                        multiTerms.clear();
                    }
                    position += positionIncrement;
                    multiTerms.add(termFactory.createTerm(term));
                }
                if (enablePositionIncrements) {
                    mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
                } else {
                    mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
                }
                return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext);
            } else {
                PhraseQuery pq = new PhraseQuery();
                pq.setSlop(phraseSlop);
                int position = -1;




                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;


                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }


                    if (enablePositionIncrements) {
                        position += positionIncrement;
                        pq.add(termFactory.createTerm(term), position);
                    } else {
                        pq.add(termFactory.createTerm(term));
                    }
                }
                return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext);
            }
        } else if (type == Type.PHRASE_PREFIX) {
            MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery();
            mpq.setSlop(phraseSlop);
            mpq.setMaxExpansions(maxExpansions);
            List<Term> multiTerms = new ArrayList<Term>();
            int position = -1;
            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;
                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

View Full Code Here

            //
            AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
            boolean includeOrig = result.includeOrig();


            AttributeSource origTok = includeOrig ? firstTok : null;
            PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class);
            int origPos = firstPosIncAtt.getPositionIncrement();  // position of origTok in the original stream
            int repPos = 0; // curr position in replacement token stream
            int pos = 0;  // current position in merged token stream


            for (int i = 0; i < result.synonyms.length; i++) {
                Token repTok = result.synonyms[i];
                AttributeSource newTok = firstTok.cloneAttributes();
                CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
                OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
                PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);


                OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);


                newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
                newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
                repPos += repTok.getPositionIncrement();
                if (i == 0) repPos = origPos;  // make position of first token equal to original


                // if necessary, insert original tokens and adjust position increment
                while (origTok != null && origPos <= repPos) {
                    PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                    origPosInc.setPositionIncrement(origPos - pos);
                    generated.add(origTok);
                    pos += origPosInc.getPositionIncrement();
                    origTok = matched.isEmpty() ? null : matched.removeFirst();
                    if (origTok != null) {
                        origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                        origPos += origPosInc.getPositionIncrement();
                    }
                }


                newPosIncAtt.setPositionIncrement(repPos - pos);
                generated.add(newTok);
                pos += newPosIncAtt.getPositionIncrement();
            }


            // finish up any leftover original tokens
            while (origTok != null) {
                PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                origPosInc.setPositionIncrement(origPos - pos);
                generated.add(origTok);
                pos += origPosInc.getPositionIncrement();
                origTok = matched.isEmpty() ? null : matched.removeFirst();
                if (origTok != null) {
                    origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                    origPos += origPosInc.getPositionIncrement();
                }
            }


            // what if we replaced a longer sequence with a shorter one?
            // a/0 b/5 =>  foo/0

View Full Code Here

            } else {
                terms = new HashMap<String, ArrayIntList>();
            }


            CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
            OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
            stream.reset();
            while (stream.incrementToken()) {
                String term = termAtt.toString();
                if (term.length() == 0) continue; // nothing to do
//        if (DEBUG) System.err.println("token='" + term + "'");
                numTokens++;
                final int posIncr = posIncrAttribute.getPositionIncrement();
                if (posIncr == 0)
                    numOverlapTokens++;
                pos += posIncr;


                ArrayIntList positions = terms.get(term);

View Full Code Here

        TokenStream stream = null;
        try {
            stream = analyzer.reusableTokenStream(field, new FastStringReader(request.text()));
            stream.reset();
            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
            OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
            TypeAttribute type = stream.addAttribute(TypeAttribute.class);


            int position = 0;
            while (stream.incrementToken()) {
                int increment = posIncr.getPositionIncrement();
                if (increment > 0) {
                    position = position + increment;
                }
                tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), position, offset.startOffset(), offset.endOffset(), type.type()));
            }

View Full Code Here

     *  separates by {@link SynonymMap#WORD_SEPARATOR}.
     *  reuse and its chars must not be null. */
    public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws IOException {
      TokenStream ts = analyzer.reusableTokenStream("", new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
      ts.reset();
      reuse.length = 0;
      while (ts.incrementToken()) {
        int length = termAtt.length();
        if (length == 0) {
          throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
        }
        if (posIncAtt.getPositionIncrement() != 1) {
          throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
        }
        reuse.grow(reuse.length + length + 1); /* current + word + separator */
        int end = reuse.offset + reuse.length;
        if (reuse.length > 0) {

View Full Code Here

            boolean hasMoreTokens = stream.incrementToken();


            fieldState.attributeSource = stream;


            OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
            
            consumer.start(field);
            
            for(;;) {


              // If we hit an exception in stream.next below
              // (which is fairly common, eg if analyzer
              // chokes on a given document), then it's
              // non-aborting and (above) this one document
              // will be marked as deleted, but still
              // consume a docID
              
              if (!hasMoreTokens) break;
              
              final int posIncr = posIncrAttribute.getPositionIncrement();
              fieldState.position += posIncr;
              if (fieldState.position > 0) {
                fieldState.position--;
              }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute

com.flaptor.indextank.query.IndexEngineParser

com.github.le11.nls.lucene.UIMATypeAwareAnalyzerTest

com.o19s.RegexPathHierarchyTokenizerTest

com.tamingtext.texttamer.solr.NameFilterTest

com.tamingtext.texttamer.solr.SentenceTokenizerTest

lucli.LuceneMethods

net.sf.logsaw.index.internal.LuceneIndexServiceImpl

org.apache.lucene.analysis.core.TestDuelingAnalyzers

org.apache.lucene.analysis.core.TestStopAnalyzer

org.apache.lucene.analysis.core.TestStopFilter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.