Package org.apache.lucene.util

Examples of org.apache.lucene.util.LineFileDocs$DocState


    final AtomicReference<IndexWriter> writerRef = new AtomicReference<>();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)));
    final LineFileDocs docs = new LineFileDocs(random());
    final Thread[] threads = new Thread[threadCount];
    final int iters = atLeast(100);
    final AtomicBoolean failed = new AtomicBoolean();
    final Lock rollbackLock = new ReentrantLock();
    final Lock commitLock = new ReentrantLock();
    for(int threadID=0;threadID<threadCount;threadID++) {
      threads[threadID] = new Thread() {
          @Override
          public void run() {
            for(int iter=0;iter<iters && !failed.get();iter++) {
              //final int x = random().nextInt(5);
              final int x = random().nextInt(3);
              try {
                switch(x) {
                case 0:
                  rollbackLock.lock();
                  if (VERBOSE) {
                    System.out.println("\nTEST: " + Thread.currentThread().getName() + ": now rollback");
                  }
                  try {
                    writerRef.get().rollback();
                    if (VERBOSE) {
                      System.out.println("TEST: " + Thread.currentThread().getName() + ": rollback done; now open new writer");
                    }
                    writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))));
                  } finally {
                    rollbackLock.unlock();
                  }
                  break;
                case 1:
                  commitLock.lock();
                  if (VERBOSE) {
                    System.out.println("\nTEST: " + Thread.currentThread().getName() + ": now commit");
                  }
                  try {
                    if (random().nextBoolean()) {
                      writerRef.get().prepareCommit();
                    }
                    writerRef.get().commit();
                  } catch (AlreadyClosedException ace) {
                    // ok
                  } catch (NullPointerException npe) {
                    // ok
                  } finally {
                    commitLock.unlock();
                  }
                  break;
                case 2:
                  if (VERBOSE) {
                    System.out.println("\nTEST: " + Thread.currentThread().getName() + ": now add");
                  }
                  try {
                    writerRef.get().addDocument(docs.nextDoc());
                  } catch (AlreadyClosedException ace) {
                    // ok
                  } catch (NullPointerException npe) {
                    // ok
                  } catch (AssertionError ae) {
View Full Code Here


   * populates a writer with random stuff. this must be fully reproducable with the seed!
   */
  public static void createRandomIndex(int numdocs, RandomIndexWriter writer, long seed) throws IOException {
    Random random = new Random(seed);
    // primary source for our data is from linefiledocs, its realistic.
    LineFileDocs lineFileDocs = new LineFileDocs(random);

    // TODO: we should add other fields that use things like docs&freqs but omit positions,
    // because linefiledocs doesn't cover all the possibilities.
    for (int i = 0; i < numdocs; i++) {
      Document document = lineFileDocs.nextDoc();
      // grab the title and add some SortedSet instances for fun
      String title = document.get("titleTokenized");
      String split[] = title.split("\\s+");
      for (String trash : split) {
        document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
      }
      // add a numeric dv field sometimes
      document.removeFields("sparsenumeric");
      if (random.nextInt(4) == 2) {
        document.add(new NumericDocValuesField("sparsenumeric", random.nextInt()));
      }
      writer.addDocument(document);
    }
   
    lineFileDocs.close();
  }
View Full Code Here

    assertNull(reader.termPositionsEnum(new Term("not-in-index", "foo")));
    assertNull(reader.terms("not-in-index"));
  }
 
  public void testDuellMemIndex() throws IOException {
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    int numDocs = atLeast(10);
    MemoryIndex memory = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
    for (int i = 0; i < numDocs; i++) {
      Directory dir = newDirectory();
      MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
      mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
      IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
      Document nextDoc = lineFileDocs.nextDoc();
      Document doc = new Document();
      for (IndexableField field : nextDoc.getFields()) {
        if (field.fieldType().indexed()) {
          doc.add(field);
          if (random().nextInt(3) == 0) {
            doc.add(field)// randomly add the same field twice
          }
        }
      }
     
      writer.addDocument(doc);
      writer.close();
      for (IndexableField field : doc.getFields()) {
          memory.addField(field.name(), ((Field)field).stringValue(), mockAnalyzer)
      }
      DirectoryReader competitor = DirectoryReader.open(dir);
      AtomicReader memIndexReader= (AtomicReader) memory.createSearcher().getIndexReader();
      duellReaders(competitor, memIndexReader);
      IOUtils.close(competitor, memIndexReader);
      memory.reset();
      dir.close();
    }
    lineFileDocs.close();
  }
View Full Code Here

    dir.setPreventDoubleWrite(false);
    double rate = random().nextDouble()*0.01;
    //System.out.println("rate=" + rate);
    dir.setRandomIOExceptionRateOnOpen(rate);
    int iters = atLeast(20);
    LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
    IndexReader r = null;
    DirectoryReader r2 = null;
    boolean any = false;
    MockDirectoryWrapper dirCopy = null;
    int lastNumDocs = 0;
    for(int iter=0;iter<iters;iter++) {

      IndexWriter w = null;
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }
      try {
        MockAnalyzer analyzer = new MockAnalyzer(random());
        analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
        IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

        if (VERBOSE) {
          // Do this ourselves instead of relying on LTC so
          // we see incrementing messageID:
          iwc.setInfoStream(new PrintStreamInfoStream(System.out));
        }
        MergeScheduler ms = iwc.getMergeScheduler();
        if (ms instanceof ConcurrentMergeScheduler) {
          ((ConcurrentMergeScheduler) ms).setSuppressExceptions();
        }
        w = new IndexWriter(dir, iwc);
        if (r != null && random().nextInt(5) == 3) {
          if (random().nextBoolean()) {
            if (VERBOSE) {
              System.out.println("TEST: addIndexes IR[]");
            }
            w.addIndexes(new IndexReader[] {r});
          } else {
            if (VERBOSE) {
              System.out.println("TEST: addIndexes Directory[]");
            }
            w.addIndexes(new Directory[] {dirCopy});
          }
        } else {
          if (VERBOSE) {
            System.out.println("TEST: addDocument");
          }
          w.addDocument(docs.nextDoc());
        }
        dir.setRandomIOExceptionRateOnOpen(0.0);
        w.close();
        w = null;

View Full Code Here

  public void test() throws Exception {
    final Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
    int charsToIndex = atLeast(100000);
    int charsIndexed = 0;
    //System.out.println("bytesToIndex=" + charsToIndex);
    while(charsIndexed < charsToIndex) {
      Document doc = docs.nextDoc();
      charsIndexed += doc.get("body").length();
      w.addDocument(doc);
      //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
    }
    IndexReader r = w.getReader();
View Full Code Here

  @Test
  public void testNRTThreads() throws Exception {

    final long t0 = System.currentTimeMillis();

    final LineFileDocs docs = new LineFileDocs(random);
    final File tempDir = _TestUtil.getTempDir("nrtopenfiles");
    final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
    conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
      @Override
      public void warm(IndexReader reader) throws IOException {
        if (VERBOSE) {
          System.out.println("TEST: now warm merged reader=" + reader);
        }
        final int maxDoc = reader.maxDoc();
        int sum = 0;
        final int inc = Math.max(1, maxDoc/50);
        for(int docID=0;docID<maxDoc;docID += inc) {
          if (reader.isDeleted(docID)) {
            final Document doc = reader.document(docID);
            sum += doc.getFields().size();
          }
        }

        IndexSearcher searcher = newSearcher(reader);
        sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
        searcher.close();

        if (VERBOSE) {
          System.out.println("TEST: warm visited " + sum + " fields");
        }
      }
      });

    final IndexWriter writer = new IndexWriter(dir, conf);
    if (VERBOSE) {
      writer.setInfoStream(System.out);
    }
    MergeScheduler ms = writer.getConfig().getMergeScheduler();
    if (ms instanceof ConcurrentMergeScheduler) {
      // try to keep max file open count down
      ((ConcurrentMergeScheduler) ms).setMaxThreadCount(1);
      ((ConcurrentMergeScheduler) ms).setMaxMergeCount(1);
    }
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    if (lmp.getMergeFactor() > 5) {
      lmp.setMergeFactor(5);
    }

    final int NUM_INDEX_THREADS = 2;
    final int NUM_SEARCH_THREADS = 3;
    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : 5;

    final AtomicBoolean failed = new AtomicBoolean();
    final AtomicInteger addCount = new AtomicInteger();
    final AtomicInteger delCount = new AtomicInteger();

    final List<String> delIDs = Collections.synchronizedList(new ArrayList<String>());

    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC*1000;
    Thread[] threads = new Thread[NUM_INDEX_THREADS];
    for(int thread=0;thread<NUM_INDEX_THREADS;thread++) {
      threads[thread] = new Thread() {
          @Override
          public void run() {
            final List<String> toDeleteIDs = new ArrayList<String>();
            while(System.currentTimeMillis() < stopTime && !failed.get()) {
              try {
                Document doc = docs.nextDoc();
                if (doc == null) {
                  break;
                }
                final String addedField;
                if (random.nextBoolean()) {
                  addedField = "extra" + random.nextInt(10);
                  doc.add(new Field(addedField, "a random field", Field.Store.NO, Field.Index.ANALYZED));
                } else {
                  addedField = null;
                }
                if (random.nextBoolean()) {
                  if (VERBOSE) {
                    //System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("id"));
                  }
                  writer.addDocument(doc);
                } else {
                  // we use update but it never replaces a
                  // prior doc
                  if (VERBOSE) {
                    //System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("id"));
                  }
                  writer.updateDocument(new Term("id", doc.get("id")), doc);
                }
                if (random.nextInt(5) == 3) {
                  if (VERBOSE) {
                    //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("id"));
                  }
                  toDeleteIDs.add(doc.get("id"));
                }
                if (random.nextInt(50) == 17) {
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes");
                  }
                  for(String id : toDeleteIDs) {
                    writer.deleteDocuments(new Term("id", id));
                  }
                  final int count = delCount.addAndGet(toDeleteIDs.size());
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes");
                  }
                  delIDs.addAll(toDeleteIDs);
                  toDeleteIDs.clear();
                }
                addCount.getAndIncrement();
                if (addedField != null) {
                  doc.removeField(addedField);
                }
              } catch (Exception exc) {
                System.out.println(Thread.currentThread().getName() + ": hit exc");
                exc.printStackTrace();
                failed.set(true);
                throw new RuntimeException(exc);
              }
            }
            if (VERBOSE) {
              System.out.println(Thread.currentThread().getName() + ": indexing done");
            }
          }
        };
      threads[thread].setDaemon(true);
      threads[thread].start();
    }

    if (VERBOSE) {
      System.out.println("TEST: DONE start indexing threads [" + (System.currentTimeMillis()-t0) + " ms]");
    }

    // let index build up a bit
    Thread.sleep(100);

    IndexReader r = IndexReader.open(writer, true);
    boolean any = false;

    // silly starting guess:
    final AtomicInteger totTermCount = new AtomicInteger(100);

    final ExecutorService es = Executors.newCachedThreadPool();

    while(System.currentTimeMillis() < stopTime && !failed.get()) {
      if (random.nextBoolean()) {
        if (VERBOSE) {
          System.out.println("TEST: now reopen r=" + r);
        }
        final IndexReader r2 = r.reopen();
        if (r != r2) {
          r.close();
          r = r2;
        }
      } else {
        if (VERBOSE) {
          System.out.println("TEST: now close reader=" + r);
        }
        r.close();
        writer.commit();
        final Set<String> openDeletedFiles = dir.getOpenDeletedFiles();
        if (openDeletedFiles.size() > 0) {
          System.out.println("OBD files: " + openDeletedFiles);
        }
        any |= openDeletedFiles.size() > 0;
        //assertEquals("open but deleted: " + openDeletedFiles, 0, openDeletedFiles.size());
        if (VERBOSE) {
          System.out.println("TEST: now open");
        }
        r = IndexReader.open(writer, true);
      }
      if (VERBOSE) {
        System.out.println("TEST: got new reader=" + r);
      }
      //System.out.println("numDocs=" + r.numDocs() + "
      //openDelFileCount=" + dir.openDeleteFileCount());

      smokeTestReader(r);

      if (r.numDocs() > 0) {

        final IndexSearcher s = new IndexSearcher(r, es);

        // run search threads
        final long searchStopTime = System.currentTimeMillis() + 500;
        final Thread[] searchThreads = new Thread[NUM_SEARCH_THREADS];
        final AtomicInteger totHits = new AtomicInteger();
        for(int thread=0;thread<NUM_SEARCH_THREADS;thread++) {
          searchThreads[thread] = new Thread() {
              @Override
                public void run() {
                try {
                  TermEnum termEnum = s.getIndexReader().terms(new Term("body", ""));
                  int seenTermCount = 0;
                  int shift;
                  int trigger;
                  if (totTermCount.get() == 0) {
                    shift = 0;
                    trigger = 1;
                  } else {
                    shift = random.nextInt(totTermCount.get()/10);
                    trigger = totTermCount.get()/10;
                  }
                  while(System.currentTimeMillis() < searchStopTime) {
                    Term term = termEnum.term();
                    if (term == null) {
                      if (seenTermCount == 0) {
                        break;
                      }
                      totTermCount.set(seenTermCount);
                      seenTermCount = 0;
                      trigger = totTermCount.get()/10;
                      //System.out.println("trigger " + trigger);
                      shift = random.nextInt(totTermCount.get()/10);
                      termEnum = s.getIndexReader().terms(new Term("body", ""));
                      continue;
                    }
                    seenTermCount++;
                    // search 10 terms
                    if (trigger == 0) {
                      trigger = 1;
                    }
                    if ((seenTermCount + shift) % trigger == 0) {
                      //if (VERBOSE) {
                      //System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
                      //}
                      totHits.addAndGet(runQuery(s, new TermQuery(term)));
                    }
                  }
                  if (VERBOSE) {
                    System.out.println(Thread.currentThread().getName() + ": search done");
                  }
                } catch (Throwable t) {
                  failed.set(true);
                  t.printStackTrace(System.out);
                  throw new RuntimeException(t);
                }
              }
            };
          searchThreads[thread].setDaemon(true);
          searchThreads[thread].start();
        }

        for(int thread=0;thread<NUM_SEARCH_THREADS;thread++) {
          searchThreads[thread].join();
        }

        if (VERBOSE) {
          System.out.println("TEST: DONE search: totHits=" + totHits);
        }
      } else {
        Thread.sleep(100);
      }
    }

    es.shutdown();
    es.awaitTermination(1, TimeUnit.SECONDS);

    if (VERBOSE) {
      System.out.println("TEST: all searching done [" + (System.currentTimeMillis()-t0) + " ms]");
    }

    //System.out.println("numDocs=" + r.numDocs() + " openDelFileCount=" + dir.openDeleteFileCount());
    r.close();
    final Set<String> openDeletedFiles = dir.getOpenDeletedFiles();
    if (openDeletedFiles.size() > 0) {
      System.out.println("OBD files: " + openDeletedFiles);
    }
    any |= openDeletedFiles.size() > 0;

    assertFalse("saw non-zero open-but-deleted count", any);
    if (VERBOSE) {
      System.out.println("TEST: now join");
    }
    for(int thread=0;thread<NUM_INDEX_THREADS;thread++) {
      threads[thread].join();
    }
    if (VERBOSE) {
      System.out.println("TEST: done join [" + (System.currentTimeMillis()-t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
    }
   
    final IndexReader r2 = writer.getReader();
    final IndexSearcher s = newSearcher(r2);
    for(String id : delIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("id", id)), 1);
      if (hits.totalHits != 0) {
        fail("doc id=" + id + " is supposed to be deleted, but got docID=" + hits.scoreDocs[0].doc);
      }
    }
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), r2.numDocs());
    r2.close();

    writer.commit();
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());

    assertFalse(writer.anyNonBulkMerges);
    writer.close(false);
    _TestUtil.checkIndex(dir);
    s.close();
    dir.close();
    _TestUtil.rmDir(tempDir);
    docs.close();
    if (VERBOSE) {
      System.out.println("TEST: done [" + (System.currentTimeMillis()-t0) + " ms]");
    }
  }
View Full Code Here

   */
  public static void createRandomIndex(int numdocs, RandomIndexWriter writer,
      long seed) throws IOException {
    Random random = new Random(seed);
    // primary source for our data is from linefiledocs, its realistic.
    LineFileDocs lineFileDocs = new LineFileDocs(random, false); // no docvalues in 4x
   
    // TODO: we should add other fields that use things like docs&freqs but omit
    // positions,
    // because linefiledocs doesn't cover all the possibilities.
    for (int i = 0; i < numdocs; i++) {
      writer.addDocument(lineFileDocs.nextDoc());
    }
   
    lineFileDocs.close();
  }
View Full Code Here

      assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
    }
  }
 
  public void testRandomRealisticKeys() throws IOException {
    LineFileDocs lineFile = new LineFileDocs(random());
    Map<String, Long> mapping = new HashMap<>();
    List<Input> keys = new ArrayList<>();
   
    int howMany = atLeast(100); // this might bring up duplicates
    for (int i = 0; i < howMany; i++) {
      Document nextDoc = lineFile.nextDoc();
      String title = nextDoc.getField("title").stringValue();
      int randomWeight = random().nextInt(100);
      keys.add(new Input(title, randomWeight));
      if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
          mapping.put(title, Long.valueOf(randomWeight));
      }
    }
    AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(new MockAnalyzer(random()), new MockAnalyzer(random()),
        AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, random().nextBoolean());
    boolean doPayloads = random().nextBoolean();
    if (doPayloads) {
      List<Input> keysAndPayloads = new ArrayList<>();
      for (Input termFreq : keys) {
        keysAndPayloads.add(new Input(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
      }
      analyzingSuggester.build(new InputArrayIterator(keysAndPayloads));
    } else {
      analyzingSuggester.build(new InputArrayIterator(keys))
    }
   
    for (Input termFreq : keys) {
      List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
      for (LookupResult lookupResult : lookup) {
        assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
        if (doPayloads) {
          assertEquals(lookupResult.payload.utf8ToString(), Long.toString(lookupResult.value));
        } else {
          assertNull(lookupResult.payload);
        }
      }
    }
   
    lineFile.close();
  }
View Full Code Here

 
  // Build FST for all unique terms in the test line docs
  // file, up until a time limit
  public void testRealTerms() throws Exception {

    final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
    final int RUN_TIME_MSEC = atLeast(500);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));

    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
    final File tempDir = createTempDir("fstlines");
    final Directory dir = newFSDirectory(tempDir);
    final IndexWriter writer = new IndexWriter(dir, conf);
    final long stopTime = System.currentTimeMillis() + RUN_TIME_MSEC;
    Document doc;
    int docCount = 0;
    while((doc = docs.nextDoc()) != null && System.currentTimeMillis() < stopTime) {
      writer.addDocument(doc);
      docCount++;
    }
    IndexReader r = DirectoryReader.open(writer, true);
    writer.close();
View Full Code Here

   * populates a writer with random stuff. this must be fully reproducable with
   * the seed!
   */
  public static void createRandomIndex(int numdocs, RandomIndexWriter writer,
      Random random) throws IOException {
    LineFileDocs lineFileDocs = new LineFileDocs(random);

    for (int i = 0; i < numdocs; i++) {
      writer.addDocument(lineFileDocs.nextDoc());
    }
   
    lineFileDocs.close();
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.LineFileDocs$DocState

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.