Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.WhitespaceAnalyzer


                    new int[]{ 10001101010});
    }

    @Test
    public void testCanUseFromNamedAnalyzer() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(namedAnalyzer.tokenStream("field", new StringReader("just a little test " + i)),
                    new String[]{"just", "a", "little", "test", Integer.toString(i)},
                    new int[]{ 057, 14, 19},
View Full Code Here


    }

    @Test
    public void testReuseSequentialMultithreading() throws IOException, InterruptedException {
        // Create the analyzer
        final ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        final NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        // Use N threads, each running M times
        Thread[] threads = new Thread[4];
        final int runs = 4;
        // The lock ensures only one thread is running at a given time
View Full Code Here

    }

    @Test
    public void testReuseConcurrentMultithreading() throws IOException, InterruptedException {
        // Create the analyzer
        final ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        final NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        // Use N threads, each running M times
        Thread[] threads = new Thread[4];
        final int runs = 4000; // leave time for threads to run concurrently
        // This integer ensures each thread runs with a different input
View Full Code Here

  static RAMDirectory createTestIndex(Field.TermVector termVector,
                                      RAMDirectory directory,
                                      boolean createNew,
                                      int startingId) throws IOException {
    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)));

    try {
      for (int i = 0; i < DOCS.length; i++) {
        Document doc = new Document();
        Field id = new StringField("id", "doc_" + (i + startingId), Field.Store.YES);
View Full Code Here

 
  /** normal case, unfiltered analyzer */
  @Test
  public void testAnalyzer() throws IOException {
    Reader reader = new StringReader(input);
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_43);
    TokenStream ts = analyzer.tokenStream(null, reader);
    ts.reset();
    validateTokens(allTokens, ts);
    ts.end();
    ts.close();
  }
View Full Code Here

  }

  @Test
  public void testLuceneEncoding() throws Exception {
    LuceneTextValueEncoder enc = new LuceneTextValueEncoder("text");
    enc.setAnalyzer(new WhitespaceAnalyzer(Version.LUCENE_43));
    Vector v1 = new DenseVector(200);
    enc.addToVector("test1 and more", v1);
    enc.flush(1, v1);

    //should be the same as text test above, since we are splitting on whitespace
View Full Code Here

 
  /** filtered analyzer */
  @Test
  public void testNonKeepdAnalyzer() throws IOException {
    Reader reader = new StringReader(input);
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_43);
    TokenStream ts = analyzer.tokenStream(null, reader);
    ts.reset();
    TokenStream f = new BloomTokenFilter(getFilter(filterTokens), false /* toss matching tokens */, ts);
    validateTokens(expectedNonKeepTokens, f);
    ts.end();
    ts.close();
View Full Code Here

  /** keep analyzer */
  @Test
  public void testKeepAnalyzer() throws IOException {
    Reader reader = new StringReader(input);
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_43);
    TokenStream ts = analyzer.tokenStream(null, reader);
    ts.reset();
    TokenStream f = new BloomTokenFilter(getFilter(filterTokens), true /* keep matching tokens */, ts);
    validateTokens(expectedKeepTokens, f);
    ts.end();
    ts.close();
View Full Code Here

 
  /** shingles, keep those matching whitelist */
  @Test
  public void testShingleFilteredAnalyzer() throws IOException {
    Reader reader = new StringReader(input);
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_43);
    TokenStream ts = analyzer.tokenStream(null, reader);
    ts.reset();
    ShingleFilter sf = new ShingleFilter(ts, 3);
    TokenStream f = new BloomTokenFilter(getFilter(shingleKeepTokens),  true, sf);
    validateTokens(expectedShingleTokens, f);
    ts.end();
View Full Code Here

  public RangeFacetsExample() {}
 
  /** Build the example index. */
  public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
        new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

    // Add documents with a fake timestamp, 1000 sec before
    // "now", 2000 sec before "now", ...:
    for(int i=0;i<100;i++) {
      Document doc = new Document();
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.WhitespaceAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.