Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.WhitespaceAnalyzer


  /** keep analyzer */
  @Test
  public void testKeepAnalyzer() throws IOException {
    Reader reader = new StringReader(input);
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);
    TokenStream ts = analyzer.tokenStream(null, reader);
    ts.reset();
    TokenStream f = new BloomTokenFilter(getFilter(filterTokens), true /* keep matching tokens */, ts);
    validateTokens(expectedKeepTokens, f);
    ts.end();
    ts.close();
View Full Code Here


 
  /** shingles, keep those matching whitelist */
  @Test
  public void testShingleFilteredAnalyzer() throws IOException {
    Reader reader = new StringReader(input);
    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);
    TokenStream ts = analyzer.tokenStream(null, reader);
    ts.reset();
    ShingleFilter sf = new ShingleFilter(ts, 3);
    TokenStream f = new BloomTokenFilter(getFilter(shingleKeepTokens),  true, sf);
    validateTokens(expectedShingleTokens, f);
    ts.end();
View Full Code Here

  }

  @Test
  public void testLuceneEncoding() throws Exception {
    LuceneTextValueEncoder enc = new LuceneTextValueEncoder("text");
    enc.setAnalyzer(new WhitespaceAnalyzer(Version.LUCENE_46));
    Vector v1 = new DenseVector(200);
    enc.addToVector("test1 and more", v1);
    enc.flush(1, v1);

    //should be the same as text test above, since we are splitting on whitespace
View Full Code Here

        spellCheckerCache = CacheBuilder.newBuilder().build(
                new CacheLoader<String, SpellChecker>() {
                    @Override
                    public SpellChecker load(String field) throws Exception {
                        SpellChecker spellChecker = new SpellChecker(ramDirectoryCache.get(field));
                        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, new WhitespaceAnalyzer(Version.LUCENE_44));
                        spellChecker.indexDictionary(dictCache.getUnchecked(field), indexWriterConfig, false);
                        return spellChecker;
                    }
                }
        );
View Full Code Here

            ModuloDocumentSourceDescriptor.attributeBuilder(params)
                .query("dummy")
                .results(10)
                .documents(SampleDocumentData.DOCUMENTS_DATA_MINING)
                .modulo(2)
                .analyzer(new WhitespaceAnalyzer(Version.LUCENE_CURRENT));

            ByFirstTitleLetterClusteringAlgorithmDescriptor.attributeBuilder(params)
                .caseSensitive(false);

            /*
 
View Full Code Here

        );

        String[] names = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
                "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"};

        WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_43);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        try (IndexWriter indexWriter = new IndexWriter(hdfsDirectory, config)) {
            for (int i = 0; i < 1000000; i++) {
                Document document = new Document();
View Full Code Here

            binaryFieldType.freeze();

            String[] names = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
                    "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"};

            WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_43);
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            try (IndexWriter indexWriter = new IndexWriter(localDirectory, config)) {
                for (int i = 0; i < 1000000; i++) {
                    Document document = new Document();
View Full Code Here

  @Test
  public void testIndexWithAnalyzer() throws ZoieException, IOException {
    File idxDir = getIdxDir();
    ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, 20,
      new WhitespaceAnalyzer(Version.LUCENE_43), null, ZoieConfig.DEFAULT_VERSION_COMPARATOR, false);
    idxSystem.start();

    MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>(
        ZoieConfig.DEFAULT_VERSION_COMPARATOR);
    memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE);
View Full Code Here

        mltQuery.setMinWordLen(0);
        mltQuery.setMinDocFreq(0);

        // one document has all values
        MemoryIndex index = new MemoryIndex();
        index.addField("name.first", "apache lucene", new WhitespaceAnalyzer());
        index.addField("name.last", "1 2 3 4", new WhitespaceAnalyzer());

        // two clauses, one for items and one for like_text if set
        BooleanQuery luceneQuery = (BooleanQuery) mltQuery.rewrite(index.createSearcher().getIndexReader());
        BooleanClause[] clauses = luceneQuery.getClauses();
View Full Code Here

    }

    private static Fields generateFields(String[] fieldNames, String text) throws IOException {
        MemoryIndex index = new MemoryIndex();
        for (String fieldName : fieldNames) {
            index.addField(fieldName, text, new WhitespaceAnalyzer());
        }
        return MultiFields.getFields(index.createSearcher().getIndexReader());
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.WhitespaceAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.