Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CannedTokenStream


        // stuff abs position into type:
        token.setType(""+pos);
        offset += offIncr + tokenOffset;
        //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.startOffset() + "/" + token.endOffset() + " (freq=" + postingsByDoc.get(docCount).size() + ")");
      }
      doc.add(new Field("content", new CannedTokenStream(tokens.toArray(new Token[tokens.size()])), ft));
      w.addDocument(doc);
    }
    final DirectoryReader r = w.getReader();
    w.close();
View Full Code Here


    Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
    if (random().nextBoolean()) {
      t1.setPayload(new BytesRef("test"));
    }
    Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
    TokenStream tokenStream = new CannedTokenStream(
        new Token[] { t1, t2 }
    );
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    // store some term vectors for the checkindex cross-check
View Full Code Here

      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorPositions(true);
      ft.setStoreTermVectorOffsets(true);
    
      Document doc = new Document();
      doc.add(new Field("body", new CannedTokenStream(tokens), ft));
      riw.addDocument(doc);
      success = true;
    } finally {
      if (success) {
        IOUtils.close(riw, dir);
View Full Code Here

    doc.add(field);
    writer.addDocument(doc);
   
    Token withPayload = new Token("withPayload", 0, 11);
    withPayload.setPayload(new BytesRef("test"));
    ts = new CannedTokenStream(withPayload);
    assertTrue(ts.hasAttribute(PayloadAttribute.class));
    field.setTokenStream(ts);
    writer.addDocument(doc);
   
    ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
View Full Code Here

  protected void shingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare,
                                   int[] positionIncrements, String[] types,
                                   boolean outputUnigrams)
    throws IOException {

    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }
View Full Code Here

  protected void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle,
                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams)
    throws IOException {
    ShingleFilter filter
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }
View Full Code Here

                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams,
                                   boolean outputUnigramsIfNoShingles)
    throws IOException {
    ShingleFilter filter
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setOutputUnigrams(outputUnigrams);
    filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }
View Full Code Here

  protected void shingleFilterTest(String tokenSeparator, int minSize, int maxSize, Token[] tokensToShingle,
                                   Token[] tokensToCompare, int[] positionIncrements,
                                   String[] types, boolean outputUnigrams)
    throws IOException {
    ShingleFilter filter
      = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
    filter.setTokenSeparator(tokenSeparator);
    filter.setOutputUnigrams(outputUnigrams);
    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
  }
View Full Code Here

  public void testTrailingHole1() throws IOException {
    // Analyzing "wizard of", where of is removed as a
    // stopword leaving a trailing hole:
    Token[] inputTokens = new Token[] {createToken("wizard", 0, 6)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);

    assertTokenStreamContents(filter,
                              new String[] {"wizard", "wizard _"},
                              new int[] {0, 0},
                              new int[] {6, 9},
View Full Code Here

  public void testTrailingHole2() throws IOException {
    // Analyzing "purple wizard of", where of is removed as a
    // stopword leaving a trailing hole:
    Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
                                       createToken("wizard", 7, 13)};
    ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 16, inputTokens), 2, 2);

    assertTokenStreamContents(filter,
                              new String[] {"purple", "purple wizard", "wizard", "wizard _"},
                              new int[] {0, 0, 7, 7},
                              new int[] {6, 13, 13, 16},
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.CannedTokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.