Collection col = new CollectionDocumentList(sourceDocs, "filename");
indexer.createDirectIndex(new Collection[]{col});
indexer.createInvertedIndex();
Index index = !fieldsExpected ?
Index.createIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)
: Index.createIndex(ApplicationSetup.TERRIER_INDEX_PATH, "fields");
assertNotNull(index);
MetaIndex meta = index.getMetaIndex();
assertNotNull(meta);
assertEquals("doc1", index.getMetaIndex().getItem("filename", 0));
assertEquals("doc2", index.getMetaIndex().getItem("filename", 1));
IterablePosting ip = null;
BitPostingIndexInputStream bpiis = null;
/** INVERTED FILE */
Lexicon<String> lexicon = index.getLexicon();
/**
* Test {@link IterablePosting} entries from a {@link InvertedIndex}
*/
InvertedIndex invertedIndex = index.getInvertedIndex();
assertNotNull(invertedIndex);
// for each term
for (int t = 0; t < termStrings.length; t++) {
LexiconEntry le = lexicon.getLexiconEntry(termStrings[t]);
assertNotNull(le);
ip = invertedIndex.getPostings((BitIndexPointer) le);
// for each document
int d = 0;
while (ip.next() != IterablePosting.EOL) {
assertEquals(invIds[t][d], ip.getId());
assertEquals(invTfs[t][d], ip.getFrequency());
assertEquals(doclens[invIds[t][d]], ip.getDocumentLength());
if (fieldsExpected) {
assertEquals(2, invFfs[t][d].length);
for (int f = 0; f < 2; f++) {
assertEquals(invFfs[t][d][f], ((FieldIterablePosting) ip).getFieldFrequencies()[f]);
}
}
d++;
}
ip.close();
}
// post-check
assertEquals(IterablePosting.EOL, ip.next());
/**
* Test {@link IterablePosting} entries from a {@link InvertedIndexInputStream}
*/
bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("inverted");
assertNotNull(bpiis);
// for each term
for (int t = 0; t < invIds.length; t++) {
assertTrue(bpiis.hasNext());
ip = bpiis.next();
assertNotNull(ip);
// for each document
int d = 0;
while (ip.next() != IterablePosting.EOL) {
assertEquals(invIds[t][d], ip.getId());
assertEquals(invTfs[t][d], ip.getFrequency());
assertEquals(doclens[invIds[t][d]], ip.getDocumentLength());
if (fieldsExpected) {
assertEquals(2, invFfs[t][d].length);
for (int f = 0; f < 2; f++) {
assertEquals(invFfs[t][d][f], ((FieldIterablePosting) ip).getFieldFrequencies()[f]);
}
}
d++;
}
}
// post-check
assertFalse(bpiis.hasNext());
/**
* Test posting array entries from a {@link InvertedIndex}
*/
// for each term
for (int t = 0; t < termStrings.length; t++) {
LexiconEntry le = lexicon.getLexiconEntry(termStrings[t]);
assertNotNull(le);
int[][] documents = invertedIndex.getDocuments(le);
if (!fieldsExpected) {
assertTrue(documents.length >= 2);
}
else {
// array should have length at least 4: 1 for the id, 1 for the
// frequency, 2 for the fields (optionally more for the blocks)
assertTrue(documents.length >= 4);
}
// check number of terms
assertEquals(invIds[t].length, documents[0].length);
assertEquals(invTfs[t].length, documents[1].length);
// for each document
for (int d = 0; d < documents[0].length; d++) {
// test document id
assertEquals(invIds[t][d], documents[0][d]);
// test document frequency
assertEquals(invTfs[t][d], documents[1][d]);
if (fieldsExpected) {
// test number of indexed fields
assertEquals(2, invFfs[t][d].length);
// test field frequency
for (int f = 0; f < 2; f++) {
assertEquals(invFfs[t][d][f], documents[2+f][d]);
}
}
}
}
/** DIRECT FILE */
if (directExpected) {
DocumentIndex documentIndex = index.getDocumentIndex();
/**
* Test {@link IterablePosting} entries from a {@link DirectIndex}
*/
DirectIndex directIndex = index.getDirectIndex();
assertNotNull(directIndex);
// for each document
for (int d = 0; d < dirTfs.length; d++) {
DocumentIndexEntry de = documentIndex.getDocumentEntry(d);
assertNotNull(de);
ip = directIndex.getPostings((BitIndexPointer) de);
FieldPosting fp = fieldsExpected ? (FieldPosting)ip : null;
// for each term
int t = 0;
int countFoundTerms = 0;
while (ip.next() != IterablePosting.EOL) {
int termid = ip.getId();
assertTrue(termid >= 0);
String term = lexicon.getLexiconEntry(termid).getKey();
assertNotNull(term);
countFoundTerms++;
assertTrue(dirTfs[d].containsKey(term));
assertEquals(dirTfs[d].get(term), ip.getFrequency());
assertEquals(doclens[d], ip.getDocumentLength());
if (fieldsExpected) {
assertEquals(2, fp.getFieldFrequencies().length);
for (int f = 0; f < 2; f++) {
assertEquals(dirFfs[d].get(term)[f], fp.getFieldFrequencies()[f]);
}
}
t++;
}
assertEquals(dirTfs[d].size() ,countFoundTerms);
ip.close();
}
// post-check
assertEquals(IterablePosting.EOL, ip.next());
/**
* Test {@link IterablePosting} entries from a {@link DirectIndexInputStream}
*/
bpiis = (BitPostingIndexInputStream) index.getIndexStructureInputStream("direct");
assertNotNull(bpiis);
// for each document
for (int d = 0; d < dirTfs.length; d++) {
assertTrue(bpiis.hasNext());
ip = bpiis.next();