Examples of org.apache.lucene.analysis.CannedTokenStream

org.apache.lucene.analysis.CannedTokenStream
TokenStream from a canned list of Tokens.

        // stuff abs position into type:
        token.setType(""+pos);
        offset += offIncr + tokenOffset;
        //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.startOffset() + "/" + token.endOffset() + " (freq=" + postingsByDoc.get(docCount).size() + ")");
      }
      doc.add(new Field("content", new CannedTokenStream(tokens.toArray(new Token[tokens.size()])), ft));
      w.addDocument(doc);
    }
    final DirectoryReader r = w.getReader();
    w.close();

View Full Code Here

    Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
    if (random().nextBoolean()) {
      t1.setPayload(new BytesRef("test"));
    }
    Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
    TokenStream tokenStream = new CannedTokenStream(
        new Token[] { t1, t2 }
    );
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    // store some term vectors for the checkindex cross-check

View Full Code Here

      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorPositions(true);
      ft.setStoreTermVectorOffsets(true);
     
      Document doc = new Document();
      doc.add(new Field("body", new CannedTokenStream(tokens), ft));
      riw.addDocument(doc);
      success = true;
    } finally {
      if (success) {
        IOUtils.close(riw, dir);

View Full Code Here

    doc.add(field);
    writer.addDocument(doc);
    
    Token withPayload = new Token("withPayload", 0, 11);
    withPayload.setPayload(new BytesRef("test"));
    ts = new CannedTokenStream(withPayload);
    assertTrue(ts.hasAttribute(PayloadAttribute.class));
    field.setTokenStream(ts);
    writer.addDocument(doc);
    
    ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);

View Full Code Here

  protected void shingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare,
                                   int[] positionIncrements, String[] types,
                                   boolean outputUnigrams)
    throws IOException {


    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }

View Full Code Here

  protected void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, 
                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams)
    throws IOException {
    ShingleFilter filter 
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }

View Full Code Here

                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams, 
                                   boolean outputUnigramsIfNoShingles)
    throws IOException {
    ShingleFilter filter 
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }

View Full Code Here

  protected void shingleFilterTest(String tokenSeparator, int minSize, int maxSize, Token[] tokensToShingle, 
                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams)
    throws IOException {
    ShingleFilter filter 
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setTokenSeparator(tokenSeparator);
    filter.setOutputUnigrams(outputUnigrams);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }

View Full Code Here


  public void testTrailingHole1() throws IOException {
    // Analyzing "wizard of", where of is removed as a
    // stopword leaving a trailing hole:
    Token[] inputTokens = new Token[] {createToken("wizard", 0, 6)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);


    assertTokenStreamContents(filter,
                              new String[] {"wizard", "wizard _"},
                              new int[] {0, 0},
                              new int[] {6, 9},

View Full Code Here

  public void testTrailingHole2() throws IOException {
    // Analyzing "purple wizard of", where of is removed as a
    // stopword leaving a trailing hole:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
                                       createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 16, inputTokens), 2, 2);


    assertTokenStreamContents(filter,
                              new String[] {"purple", "purple wizard", "wizard", "wizard _"},
                              new int[] {0, 0, 7, 7},
                              new int[] {6, 13, 13, 16},

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.analysis.CannedTokenStream

org.apache.lucene.analysis.miscellaneous.TestRemoveDuplicatesTokenFilterFactory

org.apache.lucene.analysis.shingle.ShingleFilterTest

org.apache.lucene.document.TestField

org.apache.lucene.index.memory.MemoryIndexTest

org.apache.lucene.index.TestCheckIndex

org.apache.lucene.index.TestFieldReuse

org.apache.lucene.index.TestFieldReuse$MyField

org.apache.lucene.index.TestIndexWriterExceptions

org.apache.lucene.index.TestPayloadsOnVectors

org.apache.lucene.index.TestPostingsOffsets

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.