conf.setMaxBufferedDocs(2); // force few segments
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // avoid merges so that we're left with few segments
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetFields facetFields = new PayloadFacetFields(taxoWriter, fip);
HashMap<String,Integer> expectedCounts = new HashMap<String,Integer>(DIMENSIONS.length);
int numDocs = atLeast(10);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numCategories = random.nextInt(3) + 1;
ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>(numCategories);
HashSet<String> docDimensions = new HashSet<String>();
while (numCategories-- > 0) {
String dim = DIMENSIONS[random.nextInt(DIMENSIONS.length)];
// we should only increment the expected count by 1 per document
docDimensions.add(dim);
categories.add(new CategoryPath(dim, Integer.toString(i), Integer.toString(numCategories)));
}
facetFields.addFields(doc, categories);
doc.add(new StringField("docid", Integer.toString(i), Store.YES));
doc.add(new TextField("foo", "content" + i, Store.YES));
indexWriter.addDocument(doc);
// update expected count per dimension
for (String dim : docDimensions) {
Integer val = expectedCounts.get(dim);
if (val == null) {
expectedCounts.put(dim, Integer.valueOf(1));
} else {
expectedCounts.put(dim, Integer.valueOf(val.intValue() + 1));
}
}
if (random.nextDouble() < 0.2) { // add some documents that will be deleted
doc = new Document();
doc.add(new StringField("del", "key", Store.NO));
facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dummy")));
indexWriter.addDocument(doc);
}
}
indexWriter.commit();