Examples of org.apache.lucene.util.LineFileDocs

org.apache.lucene.util.LineFileDocs
Minimal port of benchmark's LneDocSource + DocMaker, so tests can enum docs from a line file created by benchmark's WriteLineDoc task

    assertNull(reader.termPositionsEnum(new Term("not-in-index", "foo")));
    assertNull(reader.terms("not-in-index"));
  }
  
  public void testDuellMemIndex() throws IOException {
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    int numDocs = atLeast(10);
    MemoryIndex memory = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
    for (int i = 0; i < numDocs; i++) {
      Directory dir = newDirectory();
      MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
      IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
      Document nextDoc = lineFileDocs.nextDoc();
      Document doc = new Document();
      for (IndexableField field : nextDoc.getFields()) {
        if (field.fieldType().indexed()) {
          doc.add(field);
          if (random().nextInt(3) == 0) {
            doc.add(field);  // randomly add the same field twice
          }
        }
      }
      
      writer.addDocument(doc);
      writer.close();
      for (IndexableField field : doc.getFields()) {
          memory.addField(field.name(), ((Field)field).stringValue(), mockAnalyzer);  
      }
      DirectoryReader competitor = DirectoryReader.open(dir);
      AtomicReader memIndexReader= (AtomicReader) memory.createSearcher().getIndexReader();
      duellReaders(competitor, memIndexReader);
      IOUtils.close(competitor, memIndexReader);
      memory.reset();
      dir.close();
    }
    lineFileDocs.close();
  }

View Full Code Here


  private static LineFileDocs lineDocFile;


  @BeforeClass
  public static void beforeClass() throws Exception {
    lineDocFile = new LineFileDocs(random(), defaultCodecSupportsDocValues());
  }

View Full Code Here

      assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
    }
  }
  
  public void testRandomRealisticKeys() throws IOException {
    LineFileDocs lineFile = new LineFileDocs(random());
    Map<String, Long> mapping = new HashMap<String, Long>();
    List<TermFreq> keys = new ArrayList<TermFreq>();
    
    int howMany = atLeast(100); // this might bring up duplicates
    for (int i = 0; i < howMany; i++) {
      Document nextDoc = lineFile.nextDoc();
      String title = nextDoc.getField("title").stringValue();
      int randomWeight = random().nextInt(100);
      keys.add(new TermFreq(title, randomWeight));
      if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
          mapping.put(title, Long.valueOf(randomWeight));
      }
    }
    
    AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(new MockAnalyzer(random()));
    analyzingSuggester.setPreservePositionIncrements(random().nextBoolean());
    boolean doPayloads = random().nextBoolean();
    if (doPayloads) {
      List<TermFreqPayload> keysAndPayloads = new ArrayList<TermFreqPayload>();
      for (TermFreq termFreq : keys) {
        keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
      }
      analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads));
    } else {
      analyzingSuggester.build(new TermFreqArrayIterator(keys));  
    }
    
    for (TermFreq termFreq : keys) {
      List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
      for (LookupResult lookupResult : lookup) {
        assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
        if (doPayloads) {
          assertEquals(lookupResult.payload.utf8ToString(), Long.toString(lookupResult.value));
        } else {
          assertNull(lookupResult.payload);
        }
      }
    }
    
    lineFile.close();
  }

View Full Code Here

  // concurrency
  private final class ChangeIndices extends Thread {
    @Override
    public void run() {
      try {
        final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
        int numDocs = 0;
        while (System.nanoTime() < endTimeNanos) {
          final int what = random().nextInt(3);
          final NodeState node = nodes[random().nextInt(nodes.length)];
          if (numDocs == 0 || what == 0) {
            node.writer.addDocument(docs.nextDoc());
            numDocs++;
          } else if (what == 1) {
            node.writer.updateDocument(new Term("docid", ""+random().nextInt(numDocs)),
                                        docs.nextDoc());
            numDocs++;
          } else {
            node.writer.deleteDocuments(new Term("docid", ""+random().nextInt(numDocs)));
          }
          // TODO: doc blocks too

View Full Code Here

    }
  }


  private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect, RandomIndexWriter iw) throws IOException {


    final LineFileDocs docs = new LineFileDocs(random);
    Document doc = null;
    Field field = null, currentField = null;
    StringReader bogus = new StringReader("");
    if (iw != null) {
      doc = new Document();
      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
      if (random.nextBoolean()) {
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(random.nextBoolean());
        ft.setStoreTermVectorPositions(random.nextBoolean());
        if (ft.storeTermVectorPositions() && !PREFLEX_IMPERSONATION_IS_ACTIVE) {
          ft.setStoreTermVectorPayloads(random.nextBoolean());
        }
      }
      if (random.nextBoolean()) {
        ft.setOmitNorms(true);
      }
      String pf = _TestUtil.getPostingsFormat("dummy");
      boolean supportsOffsets = !doesntSupportOffsets.contains(pf);
      switch(random.nextInt(4)) {
        case 0: ft.setIndexOptions(IndexOptions.DOCS_ONLY); break;
        case 1: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break;
        case 2: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break;
        default:
                if (supportsOffsets && offsetsAreCorrect) {
                  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                } else {
                  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
                }
      }
      currentField = field = new Field("dummy", bogus, ft);
      doc.add(currentField);
    }
    
    try {
      for (int i = 0; i < iterations; i++) {
        String text;
        
        if (random.nextInt(10) == 7) {
          // real data from linedocs
          text = docs.nextDoc().get("body");
          if (text.length() > maxWordLength) {
            
            // Take a random slice from the text...:
            int startPos = random.nextInt(text.length() - maxWordLength);
            if (startPos > 0 && Character.isLowSurrogate(text.charAt(startPos))) {

View Full Code Here

   * populates a writer with random stuff. this must be fully reproducable with the seed!
   */
  public static void createRandomIndex(int numdocs, RandomIndexWriter writer, long seed) throws IOException {
    Random random = new Random(seed);
    // primary source for our data is from linefiledocs, its realistic.
    LineFileDocs lineFileDocs = new LineFileDocs(random);


    // TODO: we should add other fields that use things like docs&freqs but omit positions,
    // because linefiledocs doesn't cover all the possibilities.
    for (int i = 0; i < numdocs; i++) {
      Document document = lineFileDocs.nextDoc();
      // grab the title and add some SortedSet instances for fun
      String title = document.get("titleTokenized");
      String split[] = title.split("\\s+");
      for (String trash : split) {
        document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
      }
      writer.addDocument(document);
    }
    
    lineFileDocs.close();
  }

View Full Code Here

    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
        new MockAnalyzer(random()));
    Similarity provider = new MySimProvider();
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    final LineFileDocs docs = new LineFileDocs(random());
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
      Document doc = docs.nextDoc();
      float nextFloat = random().nextFloat();
      Field f = new TextField(floatTestField, "" + nextFloat, Field.Store.YES);
      f.setBoost(nextFloat);


      doc.add(f);
      writer.addDocument(doc);
      doc.removeField(floatTestField);
      if (rarely()) {
        writer.commit();
      }
    }
    writer.commit();
    writer.close();
    AtomicReader open = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir));
    NumericDocValues norms = open.getNormValues(floatTestField);
    assertNotNull(norms);
    for (int i = 0; i < open.maxDoc(); i++) {
      Document document = open.document(i);
      float expected = Float.parseFloat(document.get(floatTestField));
      assertEquals(expected, Float.intBitsToFloat((int)norms.get(i)), 0.0f);
    }
    open.close();
    dir.close();
    docs.close();
  }

View Full Code Here

    assertNull(reader.termPositionsEnum(new Term("not-in-index", "foo")));
    assertNull(reader.terms("not-in-index"));
  }
  
  public void testDuellMemIndex() throws IOException {
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    int numDocs = atLeast(10);
    MemoryIndex memory = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
    for (int i = 0; i < numDocs; i++) {
      Directory dir = newDirectory();
      MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
      IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
      Document nextDoc = lineFileDocs.nextDoc();
      Document doc = new Document();
      for (IndexableField field : nextDoc.getFields()) {
        if (field.fieldType().indexed()) {
          doc.add(field);
          if (random().nextInt(3) == 0) {
            doc.add(field);  // randomly add the same field twice
          }
        }
      }
      
      writer.addDocument(doc);
      writer.close();
      for (IndexableField field : doc.getFields()) {
          memory.addField(field.name(), ((Field)field).stringValue(), mockAnalyzer);  
      }
      DirectoryReader competitor = DirectoryReader.open(dir);
      AtomicReader memIndexReader= (AtomicReader) memory.createSearcher().getIndexReader();
      duellReaders(competitor, memIndexReader);
      IOUtils.close(competitor, memIndexReader);
      memory.reset();
      dir.close();
    }
    lineFileDocs.close();
  }

View Full Code Here

   */
  public static void createRandomIndex(int numdocs, RandomIndexWriter writer,
      long seed) throws IOException {
    Random random = new Random(seed);
    // primary source for our data is from linefiledocs, its realistic.
    LineFileDocs lineFileDocs = new LineFileDocs(random, false); // no docvalues in 4x
    
    // TODO: we should add other fields that use things like docs&freqs but omit
    // positions,
    // because linefiledocs doesn't cover all the possibilities.
    for (int i = 0; i < numdocs; i++) {
      writer.addDocument(lineFileDocs.nextDoc());
    }
    
    lineFileDocs.close();
  }

View Full Code Here

  
  // Build FST for all unique terms in the test line docs
  // file, up until a time limit
  public void testRealTerms() throws Exception {


    final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
    final int RUN_TIME_MSEC = atLeast(500);
    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
    final File tempDir = _TestUtil.getTempDir("fstlines");
    final Directory dir = newFSDirectory(tempDir);
    final IndexWriter writer = new IndexWriter(dir, conf);
    final long stopTime = System.currentTimeMillis() + RUN_TIME_MSEC;
    Document doc;
    int docCount = 0;
    while((doc = docs.nextDoc()) != null && System.currentTimeMillis() < stopTime) {
      writer.addDocument(doc);
      docCount++;
    }
    IndexReader r = DirectoryReader.open(writer, true);
    writer.close();

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.util.LineFileDocs

org.apache.lucene.analysis.BaseTokenStreamTestCase

org.apache.lucene.codecs.lucene40.TestReuseDocsEnum

org.apache.lucene.document.Document

org.apache.lucene.document.Field

org.apache.lucene.document.FieldType

org.apache.lucene.document.SortedDocValuesField

org.apache.lucene.document.StringField

org.apache.lucene.index.memory.MemoryIndexTest

org.apache.lucene.index.TestCustomNorms

org.apache.lucene.index.TestDuelingCodecs

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.