Package org.apache.lucene.index

Examples of org.apache.lucene.index.RandomIndexWriter


    dir.close();
  }

  public void testEnforceDeletions() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(
        random,
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
            setMergeScheduler(new SerialMergeScheduler()).
            // asserts below requires no unexpected merges:
            setMergePolicy(newLogMergePolicy(10))
    );

    // NOTE: cannot use writer.getReader because RIW (on
    // flipping a coin) may give us a newly opened reader,
    // but we use .reopen on this reader below and expect to
    // (must) get an NRT reader:
    IndexReader reader = IndexReader.open(writer.w, true);
    // same reason we don't wrap?
    IndexSearcher searcher = newSearcher(reader, false);

    // add a doc, refresh the reader, and check that its there
    Document doc = new Document();
    doc.add(newField("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);

    reader = refreshReader(reader);
    searcher.close();
    searcher = newSearcher(reader, false);

    TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1);
    assertEquals("Should find a hit...", 1, docs.totalHits);

    final Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1")));

    // ignore deletions
    CachingWrapperFilter filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE);
       
    docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
    assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);
    ConstantScoreQuery constantScore = new ConstantScoreQuery(filter);
    docs = searcher.search(constantScore, 1);
    assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);

    // now delete the doc, refresh the reader, and see that it's not there
    _TestUtil.keepFullyDeletedSegments(writer.w);
    writer.deleteDocuments(new Term("id", "1"));

    reader = refreshReader(reader);
    searcher.close();
    searcher = newSearcher(reader, false);

    docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
    assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits);

    docs = searcher.search(constantScore, 1);
    assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);


    // force cache to regenerate:
    filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE);

    writer.addDocument(doc);

    reader = refreshReader(reader);
    searcher.close();
    searcher = newSearcher(reader, false);
       
    docs = searcher.search(new MatchAllDocsQuery(), filter, 1);

    assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);

    constantScore = new ConstantScoreQuery(filter);
    docs = searcher.search(constantScore, 1);
    assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);

    // NOTE: important to hold ref here so GC doesn't clear
    // the cache entry!  Else the assert below may sometimes
    // fail:
    IndexReader oldReader = reader;

    // make sure we get a cache hit when we reopen reader
    // that had no change to deletions
    reader = refreshReader(reader);
    assertTrue(reader != oldReader);
    searcher.close();
    searcher = newSearcher(reader, false);
    int missCount = filter.missCount;
    docs = searcher.search(constantScore, 1);
    assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);
    assertEquals(missCount, filter.missCount);

    // now delete the doc, refresh the reader, and see that it's not there
    writer.deleteDocuments(new Term("id", "1"));

    reader = refreshReader(reader);
    searcher.close();
    searcher = newSearcher(reader, false);

    missCount = filter.missCount;
    docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
    assertEquals(missCount+1, filter.missCount);
    assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits);
    docs = searcher.search(constantScore, 1);
    assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits);


    // apply deletions dynamically
    filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.DYNAMIC);

    writer.addDocument(doc);
    reader = refreshReader(reader);
    searcher.close();
    searcher = newSearcher(reader, false);
       
    docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
    assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);
    constantScore = new ConstantScoreQuery(filter);
    docs = searcher.search(constantScore, 1);
    assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);

    // now delete the doc, refresh the reader, and see that it's not there
    writer.deleteDocuments(new Term("id", "1"));

    reader = refreshReader(reader);
    searcher.close();
    searcher = newSearcher(reader, false);

    docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
    assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits);

    missCount = filter.missCount;
    docs = searcher.search(constantScore, 1);
    assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits);

    // doesn't count as a miss
    assertEquals(missCount, filter.missCount);

    // NOTE: silliness to make sure JRE does not optimize
    // away our holding onto oldReader to prevent
    // CachingWrapperFilter's WeakHashMap from dropping the
    // entry:
    assertTrue(oldReader != null);

    searcher.close();
    reader.close();
    writer.close();
    dir.close();
  }
View Full Code Here


    String[] data = new String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null,
        "B   2   4 5 6", "Y     3   5 6", null, "C     3     6",
        "X       4 5 6" };

    small = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, small,
        newIndexWriterConfig(TEST_VERSION_CURRENT,
            new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy()));

    for (int i = 0; i < data.length; i++) {
      Document doc = new Document();
      doc.add(newField("id", String.valueOf(i), Field.Store.YES,
          Field.Index.NOT_ANALYZED));// Field.Keyword("id",String.valueOf(i)));
      doc
          .add(newField("all", "all", Field.Store.YES,
              Field.Index.NOT_ANALYZED));// Field.Keyword("all","all"));
      if (null != data[i]) {
        doc.add(newField("data", data[i], Field.Store.YES,
            Field.Index.ANALYZED));// Field.Text("data",data[i]));
      }
      writer.addDocument(doc);
    }

    reader = writer.getReader();
    writer.close();
  }
View Full Code Here

  @Test
  public void testFarsi() throws Exception {

    /* build an index */
    Directory farsiIndex = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex, new SimpleAnalyzer(TEST_VERSION_CURRENT));
    Document doc = new Document();
    doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
        Field.Index.NOT_ANALYZED));
    doc
        .add(newField("body", "body", Field.Store.YES,
            Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);

    IndexReader reader = writer.getReader();
    writer.close();

    IndexSearcher search = newSearcher(reader);

    // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
    // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
View Full Code Here

  @Test
  public void testDanish() throws Exception {

    /* build an index */
    Directory danishIndex = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex, new SimpleAnalyzer(TEST_VERSION_CURRENT));

    // Danish collation orders the words below in the given order
    // (example taken from TestSort.testInternationalSort() ).
    String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
    for (int docnum = 0 ; docnum < words.length ; ++docnum) {  
      Document doc = new Document();
      doc.add(newField("content", words[docnum],
                        Field.Store.YES, Field.Index.NOT_ANALYZED));
      doc.add(newField("body", "body",
                        Field.Store.YES, Field.Index.NOT_ANALYZED));
      writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    writer.close();

    IndexSearcher search = newSearcher(reader);

    Collator c = Collator.getInstance(new Locale("da", "dk"));

View Full Code Here

  final int NUM_SEARCH_THREADS = 5;
  final int RUN_TIME_MSEC = atLeast(1000);

  public void test() throws Exception {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random, dir);

    final long startTime = System.currentTimeMillis();

    // TODO: replace w/ the @nightly test data; make this
    // into an optional @nightly stress test
    final Document doc = new Document();
    final Field body = newField("body", "", Field.Index.ANALYZED);
    doc.add(body);
    final StringBuilder sb = new StringBuilder();
    for(int docCount=0;docCount<NUM_DOCS;docCount++) {
      final int numTerms = random.nextInt(10);
      for(int termCount=0;termCount<numTerms;termCount++) {
        sb.append(random.nextBoolean() ? "aaa" : "bbb");
        sb.append(' ');
      }
      body.setValue(sb.toString());
      w.addDocument(doc);
      sb.delete(0, sb.length());
    }
    final IndexReader r = w.getReader();
    w.close();

    final long endTime = System.currentTimeMillis();
    if (VERBOSE) System.out.println("BUILD took " + (endTime-startTime));

    final IndexSearcher s = newSearcher(r);
View Full Code Here

    mmapDir.setMaxChunkSize(chunkSize);
    // we will map a lot, try to turn on the unmap hack
    if (MMapDirectory.UNMAP_SUPPORTED)
      mmapDir.setUseUnmap(true);
    MockDirectoryWrapper dir = new MockDirectoryWrapper(random, mmapDir);
    RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    Field docid = newField("docid", "0", Field.Store.YES, Field.Index.NOT_ANALYZED);
    Field junk = newField("junk", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
    doc.add(docid);
    doc.add(junk);
   
    int numDocs = 100;
    for (int i = 0; i < numDocs; i++) {
      docid.setValue("" + i);
      junk.setValue(_TestUtil.randomUnicodeString(random));
      writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    writer.close();
   
    int numAsserts = atLeast(100);
    for (int i = 0; i < numAsserts; i++) {
      int docID = random.nextInt(numDocs);
      assertEquals("" + docID, reader.document(docID).get("docid"));
View Full Code Here

  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
   
    //Add series of docs with filterable fields : url, text and dates  flags
    addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
    addDoc(writer, "http://lucene.apache.org", "New release pending", "20040102");
    addDoc(writer, "http://lucene.apache.org", "Lucene 1.9 out now", "20050101");   
    addDoc(writer, "http://www.bar.com", "Local man bites dog", "20040101");
    addDoc(writer, "http://www.bar.com", "Dog bites local man", "20040102");
    addDoc(writer, "http://www.bar.com", "Dog uses Lucene", "20050101");
    addDoc(writer, "http://lucene.apache.org", "Lucene 2.0 out", "20050101");
    addDoc(writer, "http://lucene.apache.org", "Oops. Lucene 2.1 out", "20050102");

                // Until we fix LUCENE-2348, the index must
                // have only 1 segment:
                writer.optimize();

    reader = writer.getReader();
    writer.close();     
    searcher =newSearcher(reader);
   
  }
View Full Code Here

  @Override
  public void setUp() throws Exception  {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
    //Add series of docs with misspelt names
    addDoc(writer, "jonathon smythe","1");
    addDoc(writer, "jonathan smith","2");
    addDoc(writer, "johnathon smyth","3");
    addDoc(writer, "johnny smith","4" );
    addDoc(writer, "jonny smith","5" );
    addDoc(writer, "johnathon smythe","6");
    reader = writer.getReader();
    writer.close();
    searcher=newSearcher(reader);     
  }
View Full Code Here

  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, directory, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
   
    //Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags
    addDoc(writer, "admin guest", "010", "20040101","Y");
    addDoc(writer, "guest", "020", "20040101","Y");
    addDoc(writer, "guest", "020", "20050101","Y");
    addDoc(writer, "admin", "020", "20050101","Maybe");
    addDoc(writer, "admin guest", "030", "20050101","N");
    reader = writer.getReader();
    writer.close()
  }
View Full Code Here

 
  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, directory);
   
    // Add series of docs with specific information for MoreLikeThis
    addDoc(writer, "lucene");
    addDoc(writer, "lucene release");

    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.RandomIndexWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.