Examples of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute

Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute

org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute
Determines the position of this token relative to the previous Token in a TokenStream, used in phrase searching.
The default value is one.
Some common uses for this are:
- Set it to zero to put multiple terms in the same position. This is useful if, e.g., a word has multiple stems. Searches for phrases including either stem will match. In this case, all but the first stem's increment should be set to zero: the increment of the first instance should be one. Repeating a token with an increment of zero can also be used to boost the scores of matches on that token.
- Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want phrases to match across removed stop words, then one could build a stop word filter that removes stop words and also sets the increment to the number of stop words removed before each non-stop word. Then exact phrase queries will only match when the terms occur with no intervening stop words.
@see org.apache.lucene.index.DocsAndPositionsEnum

    TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
    try {
      TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
      ts.reset();
      
      BytesRef[] lastTokens = new BytesRef[grams];
      //System.out.println("lookup: key='" + key + "'");
      
      // Run full analysis, but save only the
      // last 1gram, last 2gram, etc.:
      BytesRef tokenBytes = termBytesAtt.getBytesRef();
      int maxEndOffset = -1;
      boolean sawRealToken = false;
      while(ts.incrementToken()) {
        termBytesAtt.fillBytesRef();
        sawRealToken |= tokenBytes.length > 0;
        // TODO: this is somewhat iffy; today, ShingleFilter
        // sets posLen to the gram count; maybe we should make
        // a separate dedicated att for this?
        int gramCount = posLenAtt.getPositionLength();
        
        assert gramCount <= grams;
        
        // Safety: make sure the recalculated count "agrees":
        if (countGrams(tokenBytes) != gramCount) {
          throw new IllegalArgumentException("tokens must not contain separator byte; got token=" + tokenBytes + " but gramCount=" + gramCount + " does not match recalculated count=" + countGrams(tokenBytes));
        }
        maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
        lastTokens[gramCount-1] = BytesRef.deepCopyOf(tokenBytes);
      }
      ts.end();
      
      if (!sawRealToken) {
        throw new IllegalArgumentException("no tokens produced by analyzer, or the only tokens were empty strings");
      }
      
      // Carefully fill last tokens with _ tokens;
      // ShingleFilter appraently won't emit "only hole"
      // tokens:
      int endPosInc = posIncAtt.getPositionIncrement();
      
      // Note this will also be true if input is the empty
      // string (in which case we saw no tokens and
      // maxEndOffset is still -1), which in fact works out OK
      // because we fill the unigram with an empty BytesRef

View Full Code Here

     *  separates by {@link SynonymMap#WORD_SEPARATOR}.
     *  reuse and its chars must not be null. */
    public CharsRef analyze(String text, CharsRef reuse) throws IOException {
      try (TokenStream ts = analyzer.tokenStream("", text)) {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        reuse.length = 0;
        while (ts.incrementToken()) {
          int length = termAtt.length();
          if (length == 0) {
            throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
          }
          if (posIncAtt.getPositionIncrement() != 1) {
            throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
          }
          reuse.grow(reuse.length + length + 1); /* current + word + separator */
          int end = reuse.offset + reuse.length;
          if (reuse.length > 0) {

View Full Code Here

    right.reset();
    CharTermAttribute leftTerm = left.addAttribute(CharTermAttribute.class);
    CharTermAttribute rightTerm = right.addAttribute(CharTermAttribute.class);
    OffsetAttribute leftOffset = left.addAttribute(OffsetAttribute.class);
    OffsetAttribute rightOffset = right.addAttribute(OffsetAttribute.class);
    PositionIncrementAttribute leftPos = left.addAttribute(PositionIncrementAttribute.class);
    PositionIncrementAttribute rightPos = right.addAttribute(PositionIncrementAttribute.class);
    
    while (left.incrementToken()) {
      assertTrue("wrong number of tokens for input: " + s, right.incrementToken());
      assertEquals("wrong term text for input: " + s, leftTerm.toString(), rightTerm.toString());
      assertEquals("wrong position for input: " + s, leftPos.getPositionIncrement(), rightPos.getPositionIncrement());
      assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset());
      assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
    };
    assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
    left.end();

View Full Code Here

      for (int j = 0; j < modCounts.length; j++) {
        int tfPos = 0;
        long start = System.currentTimeMillis();
        for (int i = 0; i < 20; i++) {
          stream = new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString())));
          PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
          while (stream.incrementToken()) {
            tfPos += posIncrAtt.getPositionIncrement();
          }
          stream = new ModuloTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), modCounts[j]);
          posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
          while (stream.incrementToken()) {
            tfPos += posIncrAtt.getPositionIncrement();
          }
        }
        long finish = System.currentTimeMillis();
        System.out.println("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
        int sinkPos = 0;
        //simulate one field with one sink
        start = System.currentTimeMillis();
        for (int i = 0; i < 20; i++) {
          teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))));
          sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j]));
          PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
          while (teeStream.incrementToken()) {
            sinkPos += posIncrAtt.getPositionIncrement();
          }
          //System.out.println("Modulo--------");
          posIncrAtt = sink.getAttribute(PositionIncrementAttribute.class);
          while (sink.incrementToken()) {
            sinkPos += posIncrAtt.getPositionIncrement();
          }
        }
        finish = System.currentTimeMillis();
        System.out.println("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
        assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos);

View Full Code Here

  }


  private void testPositons(TypeTokenFilter stpf) throws IOException {
    TypeAttribute typeAtt = stpf.getAttribute(TypeAttribute.class);
    CharTermAttribute termAttribute = stpf.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
    stpf.reset();
    boolean enablePositionIncrements = stpf.getEnablePositionIncrements();
    while (stpf.incrementToken()) {
      log("Token: " + termAttribute.toString() + ": " + typeAtt.type() + " - " + posIncrAtt.getPositionIncrement());
      assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1",
          posIncrAtt.getPositionIncrement(), enablePositionIncrements ? 3 : 1);
    }
    stpf.end();
    stpf.close();
  }

View Full Code Here

  
  private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
    log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
    stpf.setEnablePositionIncrements(enableIcrements);
    CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
    stpf.reset();
    for (int i=0; i<20; i+=3) {
      assertTrue(stpf.incrementToken());
      log("Token "+i+": "+stpf);
      String w = English.intToEnglish(i).trim();
      assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
      assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
    }
    assertFalse(stpf.incrementToken());
    stpf.end();
    stpf.close();
  }

View Full Code Here

    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
    
    filter.incrementToken();


    assertEquals("accent", termAtt.toString());
    assertEquals(2, offsetAtt.startOffset());
    assertEquals(7, offsetAtt.endOffset());
    assertEquals("wrd", typeAtt.type());
    assertEquals(3, posIncAtt.getPositionIncrement());
    assertEquals(77, flagsAtt.getFlags());
    assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload());
  }

View Full Code Here

    this.maxGram = maxGram;
    if (version.onOrAfter(Version.LUCENE_4_4)) {
      posIncAtt = addAttribute(PositionIncrementAttribute.class);
      posLenAtt = addAttribute(PositionLengthAttribute.class);
    } else {
      posIncAtt = new PositionIncrementAttribute() {
        @Override
        public void setPositionIncrement(int positionIncrement) {}
        @Override
        public int getPositionIncrement() {
          return 0;

View Full Code Here

        new int[] { 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0 }, 16);
  }


  private static void displayTokensWithFullDetails(TokenStream stream) throws IOException {
    final CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    final OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
    final TypeAttribute type = stream.addAttribute(TypeAttribute.class);
    int position = 0;
    while (stream.incrementToken()) {
      final int increment = posIncr.getPositionIncrement();
      if (increment > 0) {
        position = position + increment;
        System.out.println();
        System.out.print(position + ": ");
      }

View Full Code Here

      record.put( "isKeyword", narrowedAttr.isKeyword() );
      return record;
    }
    else if ( attr instanceof PositionIncrementAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "PositionIncrementAttribute" ) );
      PositionIncrementAttribute narrowedAttr = (PositionIncrementAttribute) attr;
      record.put( "positionIncrement", narrowedAttr.getPositionIncrement() );
      return record;
    }
    else if ( attr instanceof FlagsAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "FlagsAttribute" ) );
      FlagsAttribute narrowedAttr = (FlagsAttribute) attr;
      record.put( "flags", narrowedAttr.getFlags() );
      return record;
    }
    else if ( attr instanceof TypeAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "TypeAttribute" ) );
      TypeAttribute narrowedAttr = (TypeAttribute) attr;
      record.put( "type", narrowedAttr.type() );
      return record;
    }
    else if ( attr instanceof OffsetAttribute ) {
      GenericRecord record = new GenericData.Record( protocol.getType( "OffsetAttribute" ) );
      OffsetAttribute narrowedAttr = (OffsetAttribute) attr;
      record.put( "startOffset", narrowedAttr.startOffset() );
      record.put( "endOffset", narrowedAttr.endOffset() );
      return record;
    }
    else if ( attr instanceof Serializable ) {
      return ByteBuffer.wrap( toByteArray( (Serializable) attr ) );
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute

com.flaptor.indextank.query.IndexEngineParser

com.github.le11.nls.lucene.UIMATypeAwareAnalyzerTest

com.o19s.RegexPathHierarchyTokenizerTest

com.tamingtext.texttamer.solr.NameFilterTest

com.tamingtext.texttamer.solr.SentenceTokenizerTest

lucli.LuceneMethods

net.sf.logsaw.index.internal.LuceneIndexServiceImpl

org.apache.lucene.analysis.core.TestDuelingAnalyzers

org.apache.lucene.analysis.core.TestStopAnalyzer

org.apache.lucene.analysis.core.TestStopFilter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.