Package org.apache.lucene.index

Examples of org.apache.lucene.index.RandomIndexWriter$MockIndexWriter


  }

  public void testMissingTermAndField() throws Exception {
    String fieldName = "field1";
    Directory rd = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), rd);
    Document doc = new Document();
    doc.add(newStringField(fieldName, "value1", Field.Store.NO));
    w.addDocument(doc);
    IndexReader reader = SlowCompositeReaderWrapper.wrap(w.getReader());
    assertTrue(reader.getContext() instanceof AtomicReaderContext);
    AtomicReaderContext context = (AtomicReaderContext) reader.getContext();
    w.close();

    DocIdSet idSet = termFilter(fieldName, "value1").getDocIdSet(context, context.reader().getLiveDocs());
    assertNotNull("must not be null", idSet);
    DocIdSetIterator iter = idSet.iterator();
    assertEquals(iter.nextDoc(), 0);
View Full Code Here


    rd.close();
  }
 
  public void testRandom() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    int num = atLeast(100);
    List<Term> terms = new ArrayList<Term>();
    for (int i = 0; i < num; i++) {
      String field = "field" + i;
      String string = _TestUtil.randomRealisticUnicodeString(random());
      terms.add(new Term(field, string));
      Document doc = new Document();
      doc.add(newStringField(field, string, Field.Store.NO));
      w.addDocument(doc);
    }
    IndexReader reader = w.getReader();
    w.close();
   
    IndexSearcher searcher = newSearcher(reader);
   
    int numQueries = atLeast(10);
    for (int i = 0; i < numQueries; i++) {
View Full Code Here

 
  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
   
    // Add series of docs with specific information for MoreLikeThis
    addDoc(writer, "lucene");
    addDoc(writer, "lucene release");

    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
  }
View Full Code Here

    final String groupField = "hotel";
    FieldType customType = new FieldType();
    customType.setStored(true);

    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
        random(),
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT,
            new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    boolean canUseDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
    boolean useDv = canUseDV && random().nextBoolean();

    // 0
    Document doc = new Document();
    addField(doc, groupField, "a", useDv);
    addField(doc, "airport", "ams", useDv);
    addField(doc, "duration", "5", useDv);
    w.addDocument(doc);

    // 1
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    addField(doc, "airport", "dus", useDv);
    addField(doc, "duration", "10", useDv);
    w.addDocument(doc);

    // 2
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    addField(doc, "airport", "ams", useDv);
    addField(doc, "duration", "10", useDv);
    w.addDocument(doc);
    w.commit(); // To ensure a second segment

    // 3
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    addField(doc, "airport", "ams", useDv);
    addField(doc, "duration", "5", useDv);
    w.addDocument(doc);

    // 4
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    addField(doc, "airport", "ams", useDv);
    addField(doc, "duration", "5", useDv);
    w.addDocument(doc);

    IndexSearcher indexSearcher = newSearcher(w.getReader());

    List<TermGroupFacetCollector.FacetEntry> entries = null;
    AbstractGroupFacetCollector groupedAirportFacetCollector = null;
    TermGroupFacetCollector.GroupedFacetResult airportResult = null;
   
    for (int limit : new int[] { 2, 10, 100, Integer.MAX_VALUE }) {
      // any of these limits is plenty for the data we have

      groupedAirportFacetCollector = createRandomCollector
        (useDv ? "hotel_dv" : "hotel",
         useDv ? "airport_dv" : "airport", null, false);
      indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
      int maxOffset = 5;
      airportResult = groupedAirportFacetCollector.mergeSegmentResults
        (Integer.MAX_VALUE == limit ? limit : maxOffset + limit, 0, false);
     
      assertEquals(3, airportResult.getTotalCount());
      assertEquals(0, airportResult.getTotalMissingCount());

      entries = airportResult.getFacetEntries(maxOffset, limit);
      assertEquals(0, entries.size());

      entries = airportResult.getFacetEntries(0, limit);
      assertEquals(2, entries.size());
      assertEquals("ams", entries.get(0).getValue().utf8ToString());
      assertEquals(2, entries.get(0).getCount());
      assertEquals("dus", entries.get(1).getValue().utf8ToString());
      assertEquals(1, entries.get(1).getCount());

      entries = airportResult.getFacetEntries(1, limit);
      assertEquals(1, entries.size());
      assertEquals("dus", entries.get(0).getValue().utf8ToString());
      assertEquals(1, entries.get(0).getCount());
    }

    AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
    indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
    TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, false);
    assertEquals(4, durationResult.getTotalCount());
    assertEquals(0, durationResult.getTotalMissingCount());

    entries = durationResult.getFacetEntries(0, 10);
    assertEquals(2, entries.size());
    assertEquals("10", entries.get(0).getValue().utf8ToString());
    assertEquals(2, entries.get(0).getCount());
    assertEquals("5", entries.get(1).getValue().utf8ToString());
    assertEquals(2, entries.get(1).getCount());

    // 5
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    // missing airport
    if (useDv) {
      addField(doc, "airport", "", useDv);
    }
    addField(doc, "duration", "5", useDv);
    w.addDocument(doc);

    // 6
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    addField(doc, "airport", "bru", useDv);
    addField(doc, "duration", "10", useDv);
    w.addDocument(doc);

    // 7
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    addField(doc, "airport", "bru", useDv);
    addField(doc, "duration", "15", useDv);
    w.addDocument(doc);

    // 8
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    addField(doc, "airport", "bru", useDv);
    addField(doc, "duration", "10", useDv);
    w.addDocument(doc);

    indexSearcher.getIndexReader().close();
    indexSearcher = newSearcher(w.getReader());
    groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv);
    indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
    airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true);
    entries = airportResult.getFacetEntries(1, 2);
    assertEquals(2, entries.size());
    if (useDv) {
      assertEquals(6, airportResult.getTotalCount());
      assertEquals(0, airportResult.getTotalMissingCount());
      assertEquals("bru", entries.get(0).getValue().utf8ToString());
      assertEquals(2, entries.get(0).getCount());
      assertEquals("", entries.get(1).getValue().utf8ToString());
      assertEquals(1, entries.get(1).getCount());
    } else {
      assertEquals(5, airportResult.getTotalCount());
      assertEquals(1, airportResult.getTotalMissingCount());
      assertEquals("bru", entries.get(0).getValue().utf8ToString());
      assertEquals(2, entries.get(0).getCount());
      assertEquals("dus", entries.get(1).getValue().utf8ToString());
      assertEquals(1, entries.get(1).getCount());
    }

    groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
    indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
    durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true);
    assertEquals(5, durationResult.getTotalCount());
    assertEquals(0, durationResult.getTotalMissingCount());

    entries = durationResult.getFacetEntries(1, 1);
    assertEquals(1, entries.size());
    assertEquals("5", entries.get(0).getValue().utf8ToString());
    assertEquals(2, entries.get(0).getCount());

    // 9
    doc = new Document();
    addField(doc, groupField, "c", useDv);
    addField(doc, "airport", "bru", useDv);
    addField(doc, "duration", "15", useDv);
    w.addDocument(doc);

    // 10
    doc = new Document();
    addField(doc, groupField, "c", useDv);
    addField(doc, "airport", "dus", useDv);
    addField(doc, "duration", "10", useDv);
    w.addDocument(doc);

    indexSearcher.getIndexReader().close();
    indexSearcher = newSearcher(w.getReader());
    groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
    indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
    airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
    entries = airportResult.getFacetEntries(0, 10);
    if (useDv) {
      assertEquals(8, airportResult.getTotalCount());
      assertEquals(0, airportResult.getTotalMissingCount());
      assertEquals(4, entries.size());
      assertEquals("", entries.get(0).getValue().utf8ToString());
      assertEquals(1, entries.get(0).getCount());
      assertEquals("ams", entries.get(1).getValue().utf8ToString());
      assertEquals(2, entries.get(1).getCount());
      assertEquals("bru", entries.get(2).getValue().utf8ToString());
      assertEquals(3, entries.get(2).getCount());
      assertEquals("dus", entries.get(3).getValue().utf8ToString());
      assertEquals(2, entries.get(3).getCount());
    } else {
      assertEquals(7, airportResult.getTotalCount());
      assertEquals(1, airportResult.getTotalMissingCount());
      assertEquals(3, entries.size());
      assertEquals("ams", entries.get(0).getValue().utf8ToString());
      assertEquals(2, entries.get(0).getCount());
      assertEquals("bru", entries.get(1).getValue().utf8ToString());
      assertEquals(3, entries.get(1).getCount());
      assertEquals("dus", entries.get(2).getValue().utf8ToString());
      assertEquals(2, entries.get(2).getCount());
    }

    groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false);
    indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
    durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true);
    assertEquals(5, durationResult.getTotalCount());
    assertEquals(0, durationResult.getTotalMissingCount());

    entries = durationResult.getFacetEntries(0, 10);
    assertEquals(2, entries.size());
    assertEquals("10", entries.get(0).getValue().utf8ToString());
    assertEquals(3, entries.get(0).getCount());
    assertEquals("15", entries.get(1).getValue().utf8ToString());
    assertEquals(2, entries.get(1).getCount());

    w.close();
    indexSearcher.getIndexReader().close();
    dir.close();
  }
View Full Code Here

    final String groupField = "hotel";
    FieldType customType = new FieldType();
    customType.setStored(true);

    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
        random(),
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT,
            new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
    boolean useDv = false;

    // Cannot assert this since we use NoMergePolicy:
    w.setDoRandomForceMergeAssert(false);

    // 0
    Document doc = new Document();
    doc.add(new StringField("x", "x", Field.Store.NO));
    w.addDocument(doc);

    // 1
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    doc.add(new StringField("airport", "ams", Field.Store.NO));
    w.addDocument(doc);

    w.commit();
    w.deleteDocuments(new TermQuery(new Term("airport", "ams")));

    // 2
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    doc.add(new StringField("airport", "ams", Field.Store.NO));
    w.addDocument(doc);

    // 3
    doc = new Document();
    addField(doc, groupField, "a", useDv);
    doc.add(new StringField("airport", "dus", Field.Store.NO));

    w.addDocument(doc);

    // 4
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    doc.add(new StringField("airport", "ams", Field.Store.NO));
    w.addDocument(doc);

    // 5
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    doc.add(new StringField("airport", "ams", Field.Store.NO));
    w.addDocument(doc);

    // 6
    doc = new Document();
    addField(doc, groupField, "b", useDv);
    doc.add(new StringField("airport", "ams", Field.Store.NO));
    w.addDocument(doc);
    w.commit();

    // 7
    doc = new Document();
    doc.add(new StringField("x", "x", Field.Store.NO));
    w.addDocument(doc);
    w.commit();

    w.close();
    IndexSearcher indexSearcher = newSearcher(DirectoryReader.open(dir));
    AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true);
    indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
    TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
    assertEquals(3, airportResult.getTotalCount());
View Full Code Here

        System.out.println("  content=" + contentBrs[contentIDX]);
      }
    }

    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(
        random,
        dir,
        newIndexWriterConfig(
            TEST_VERSION_CURRENT,
            new MockAnalyzer(random)
        )
    );
    boolean canUseDV = !"Lucene3x".equals(writer.w.getConfig().getCodec().getName());
    boolean useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();

    Document doc = new Document();
    Document docNoGroup = new Document();
    Document docNoFacet = new Document();
    Document docNoGroupNoFacet = new Document();
    Field group = newStringField("group", "", Field.Store.NO);
    Field groupDc = new SortedDocValuesField("group_dv", new BytesRef());
    if (useDv) {
      doc.add(groupDc);
      docNoFacet.add(groupDc);
    }
    doc.add(group);
    docNoFacet.add(group);
    Field[] facetFields;
    if (useDv) {
      assert !multipleFacetValuesPerDocument;
      facetFields = new Field[2];
      facetFields[0] = newStringField("facet", "", Field.Store.NO);
      doc.add(facetFields[0]);
      docNoGroup.add(facetFields[0]);
      facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
      doc.add(facetFields[1]);
      docNoGroup.add(facetFields[1]);
    } else {
      facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
      for (int i = 0; i < facetFields.length; i++) {
        facetFields[i] = newStringField("facet", "", Field.Store.NO);
        doc.add(facetFields[i]);
        docNoGroup.add(facetFields[i]);
      }
    }
    Field content = newStringField("content", "", Field.Store.NO);
    doc.add(content);
    docNoGroup.add(content);
    docNoFacet.add(content);
    docNoGroupNoFacet.add(content);

    NavigableSet<String> uniqueFacetValues = new TreeSet<String>(new Comparator<String>() {

      @Override
      public int compare(String a, String b) {
        if (a == b) {
          return 0;
        } else if (a == null) {
          return -1;
        } else if (b == null) {
          return 1;
        } else {
          return a.compareTo(b);
        }
      }

    });
    Map<String, Map<String, Set<String>>> searchTermToFacetToGroups = new HashMap<String, Map<String, Set<String>>>();
    int facetWithMostGroups = 0;
    for (int i = 0; i < numDocs; i++) {
      final String groupValue;
      if (random.nextInt(24) == 17) {
        // So we test the "doc doesn't have the group'd
        // field" case:
        if (useDv) {
          groupValue = "";
        } else {
          groupValue = null;
        }
      } else {
        groupValue = groups.get(random.nextInt(groups.size()));
      }

      String contentStr = contentBrs[random.nextInt(contentBrs.length)];
      if (!searchTermToFacetToGroups.containsKey(contentStr)) {
        searchTermToFacetToGroups.put(contentStr, new HashMap<String, Set<String>>());
      }
      Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);

      List<String> facetVals = new ArrayList<String>();
      if (useDv || random.nextInt(24) != 18) {
        if (useDv) {
          String facetValue = facetValues.get(random.nextInt(facetValues.size()));
          uniqueFacetValues.add(facetValue);
          if (!facetToGroups.containsKey(facetValue)) {
            facetToGroups.put(facetValue, new HashSet<String>());
          }
          Set<String> groupsInFacet = facetToGroups.get(facetValue);
          groupsInFacet.add(groupValue);
          if (groupsInFacet.size() > facetWithMostGroups) {
            facetWithMostGroups = groupsInFacet.size();
          }
          facetFields[0].setStringValue(facetValue);
          facetFields[1].setBytesValue(new BytesRef(facetValue));
          facetVals.add(facetValue);
        } else {
          for (Field facetField : facetFields) {
            String facetValue = facetValues.get(random.nextInt(facetValues.size()));
            uniqueFacetValues.add(facetValue);
            if (!facetToGroups.containsKey(facetValue)) {
              facetToGroups.put(facetValue, new HashSet<String>());
            }
            Set<String> groupsInFacet = facetToGroups.get(facetValue);
            groupsInFacet.add(groupValue);
            if (groupsInFacet.size() > facetWithMostGroups) {
              facetWithMostGroups = groupsInFacet.size();
            }
            facetField.setStringValue(facetValue);
            facetVals.add(facetValue);
          }
        }
      } else {
        uniqueFacetValues.add(null);
        if (!facetToGroups.containsKey(null)) {
          facetToGroups.put(null, new HashSet<String>());
        }
        Set<String> groupsInFacet = facetToGroups.get(null);
        groupsInFacet.add(groupValue);
        if (groupsInFacet.size() > facetWithMostGroups) {
          facetWithMostGroups = groupsInFacet.size();
        }
      }

      if (VERBOSE) {
        System.out.println("  doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals);
      }

      if (groupValue != null) {
        if (useDv) {
          groupDc.setBytesValue(new BytesRef(groupValue));
        }
        group.setStringValue(groupValue);
      } else if (useDv) {
        // DV cannot have missing values:
        groupDc.setBytesValue(new BytesRef());
      }
      content.setStringValue(contentStr);
      if (groupValue == null && facetVals.isEmpty()) {
        writer.addDocument(docNoGroupNoFacet);
      } else if (facetVals.isEmpty()) {
        writer.addDocument(docNoFacet);
      } else if (groupValue == null) {
        writer.addDocument(docNoGroup);
      } else {
        writer.addDocument(doc);
      }
    }

    DirectoryReader reader = writer.getReader();
    writer.close();

    return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv);
  }
View Full Code Here

@SuppressCodecs("Lucene3x")
public class TestICUCollationDocValuesField extends LuceneTestCase {
 
  public void testBasic() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    Field field = newField("field", "", StringField.TYPE_STORED);
    ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", Collator.getInstance(ULocale.ENGLISH));
    doc.add(field);
    doc.add(collationField);

    field.setStringValue("ABC");
    collationField.setStringValue("ABC");
    iw.addDocument(doc);
   
    field.setStringValue("abc");
    collationField.setStringValue("abc");
    iw.addDocument(doc);
   
    IndexReader ir = iw.getReader();
    iw.close();
   
    IndexSearcher is = newSearcher(ir);
   
    SortField sortField = new SortField("collated", SortField.Type.STRING);
   
View Full Code Here

    Random random = random();
    directory = newDirectory();
    stopword = "" + randomChar();
    CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword));
    analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
    RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
    Document doc = new Document();
    Field id = new StringField("id", "", Field.Store.NO);
    Field field = new TextField("field", "", Field.Store.NO);
    doc.add(id);
    doc.add(field);
   
    // index some docs
    int numDocs = atLeast(1000);
    for (int i = 0; i < numDocs; i++) {
      id.setStringValue(Integer.toString(i));
      field.setStringValue(randomFieldContents());
      iw.addDocument(doc);
    }
   
    // delete some docs
    int numDeletes = numDocs/20;
    for (int i = 0; i < numDeletes; i++) {
      Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs)));
      if (random.nextBoolean()) {
        iw.deleteDocuments(toDelete);
      } else {
        iw.deleteDocuments(new TermQuery(toDelete));
      }
    }
   
    reader = iw.getReader();
    s1 = newSearcher(reader);
    s2 = newSearcher(reader);
    iw.close();
  }
View Full Code Here

    dir.close();
  }
 
  public void testRanges() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    Field field = newField("field", "", StringField.TYPE_STORED);
    Collator collator = Collator.getInstance(); // uses -Dtests.locale
    if (random().nextBoolean()) {
      collator.setStrength(Collator.PRIMARY);
    }
    ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", collator);
    doc.add(field);
    doc.add(collationField);
   
    int numDocs = atLeast(500);
    for (int i = 0; i < numDocs; i++) {
      String value = _TestUtil.randomSimpleString(random());
      field.setStringValue(value);
      collationField.setStringValue(value);
      iw.addDocument(doc);
    }
   
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
   
    int numChecks = atLeast(100);
    for (int i = 0; i < numChecks; i++) {
      String start = _TestUtil.randomSimpleString(random());
View Full Code Here

  protected static RandomIndexWriter newRandomIndexWriter(final Directory dir,
                                                          final Analyzer analyzer,
                                                          final Codec codec,
                                                          final IndexWriterConfig config)
  throws IOException {
    final RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    writer.setDoRandomForceMergeAssert(true);
    return writer;
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.RandomIndexWriter$MockIndexWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.