package it.unimi.dsi.mg4j.index;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.mg4j.index.BitStreamIndex;
import it.unimi.dsi.mg4j.index.DiskBasedIndex;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.mg4j.index.MultiTermIndexIterator;
import it.unimi.dsi.mg4j.query.nodes.Query;
import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.dsi.mg4j.query.parser.QueryParserException;
import it.unimi.dsi.mg4j.query.parser.SimpleParser;
import it.unimi.dsi.mg4j.search.DocumentIterator;
import it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor;
import it.unimi.dsi.mg4j.search.OrDocumentIterator;
import it.unimi.dsi.mg4j.search.visitor.AbstractDocumentIteratorVisitor;
import it.unimi.dsi.mg4j.tool.IndexBuilder;
import it.unimi.dsi.util.Interval;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
import junit.framework.TestCase;
import org.apache.commons.configuration.ConfigurationException;
import it.unimi.dsi.mg4j.document.StringArrayDocumentCollection;
import it.unimi.dsi.mg4j.search.IntArrayIndexIterator;
public class MultiTermIndexIteratorTest extends TestCase {
private BitStreamIndex index;
private SimpleParser simpleParser;
public void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
new IndexBuilder( basename, new StringArrayDocumentCollection( "a", "b", "c" ) ).run();
index = DiskBasedIndex.getInstance( basename + "-text", true, true );
simpleParser = new SimpleParser( index.termProcessor );
}
public void testSkipBug() throws QueryParserException, QueryBuilderVisitorException, IOException {
Query query = simpleParser.parse( "a + b + c" );
DocumentIteratorBuilderVisitor documentIteratorBuilderVisitor = new DocumentIteratorBuilderVisitor( null, index, Integer.MAX_VALUE );
DocumentIterator documentIterator = query.accept( documentIteratorBuilderVisitor );
assertEquals( 2, documentIterator.skipTo( 2 ) );
documentIterator.dispose();
}
public void test() throws IOException {
IndexIterator i0 = new IntArrayIndexIterator( new int[] { 0, 1, 2 },
new int[][] {
{ 0, 3 },
{ 0 },
{ 0 },
} );
IndexIterator i1 = new IntArrayIndexIterator( new int[] { 0, 2 },
new int[][] {
{ 1 },
{ 1 },
} );
IndexIterator i2 = new IntArrayIndexIterator( new int[] { 0, 1, 3 },
new int[][] {
{ 2 },
{ 2 },
{ 0 },
} );
MultiTermIndexIterator multiTermIndexIterator = (MultiTermIndexIterator)MultiTermIndexIterator.getInstance( i0, i1, i2 );
assertEquals( 3, multiTermIndexIterator.frequency() );
assertTrue( multiTermIndexIterator.hasNext() );
assertTrue( multiTermIndexIterator.hasNext() ); // To increase coverage
assertEquals( 0, multiTermIndexIterator.nextDocument() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
assertEquals( 4, multiTermIndexIterator.count() );
int[] position = multiTermIndexIterator.positionArray();
assertEquals( 0, position[ 0 ] );
assertEquals( 1, position[ 1 ] );
assertEquals( 2, position[ 2 ] );
assertEquals( 3, position[ 3 ] );
assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );
position = new int[ 4 ];
multiTermIndexIterator.positions( position );
assertEquals( 0, position[ 0 ] );
assertEquals( 1, position[ 1 ] );
assertEquals( 2, position[ 2 ] );
assertEquals( 3, position[ 3 ] );
assertEquals( Interval.valueOf( 3 ), multiTermIndexIterator.intervalIterator().nextInterval() );
IntIterator positions = multiTermIndexIterator.positions();
assertEquals( 0, positions.nextInt() );
assertEquals( 1, positions.nextInt() );
assertEquals( 2, positions.nextInt() );
assertEquals( 3, positions.nextInt() );
assertFalse( positions.hasNext() );
assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
assertFalse( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
assertEquals( 1, multiTermIndexIterator.nextDocument() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );
assertEquals( 2, multiTermIndexIterator.count() );
position = multiTermIndexIterator.positionArray();
assertEquals( 0, position[ 0 ] );
assertEquals( 2, position[ 1 ] );
positions = multiTermIndexIterator.positions();
assertEquals( 0, positions.nextInt() );
assertEquals( 2, positions.nextInt() );
assertFalse( positions.hasNext() );
assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
assertEquals( 2, multiTermIndexIterator.nextDocument() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );
assertEquals( 2, multiTermIndexIterator.count() );
position = multiTermIndexIterator.positionArray();
assertEquals( 0, position[ 0 ] );
assertEquals( 1, position[ 1 ] );
positions = multiTermIndexIterator.positions();
assertEquals( 0, positions.nextInt() );
assertEquals( 1, positions.nextInt() );
assertFalse( positions.hasNext() );
assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
// Here we get the iterator of the underlying IndexIterator
assertEquals( 3, multiTermIndexIterator.nextDocument() );
assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
assertEquals( 1, multiTermIndexIterator.count() );
position = multiTermIndexIterator.positionArray();
assertEquals( 0, position[ 0 ] );
positions = multiTermIndexIterator.positions();
assertEquals( 0, positions.nextInt() );
assertFalse( positions.hasNext() );
assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
// The end
assertFalse( multiTermIndexIterator.hasNext() );
assertFalse( multiTermIndexIterator.hasNext() ); // To increase coverage
}
// Contributed by Fabien Campagne
public void testMG4JMultiTermPositionIssue() throws IllegalAccessException, NoSuchMethodException, ConfigurationException, IOException, InvocationTargetException, InstantiationException, ClassNotFoundException, URISyntaxException {
String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
new IndexBuilder( basename, new StringArrayDocumentCollection(
"A B C D E F F G G",
"G A T H S K L J W L",
"E S K D L J F K L S J D L S J D",
"E B"
) ).run();
Index index = DiskBasedIndex.getInstance( basename + "-text", true, true );
/// String query = "A| B+C+G|W|S+J";
DocumentIterator iterator = OrDocumentIterator.getInstance(
index.documents("A"),
MultiTermIndexIterator.getInstance(
index.documents("B"),
index.documents("C"),
index.documents("G")
),
index.documents("W"),
MultiTermIndexIterator.getInstance(
index.documents("S"),
index.documents("J")
));
final int[] currDoc = new int[ 1 ];
// A visitor invoking positionArray() on IndexIterators positioned on the current document.
AbstractDocumentIteratorVisitor visitor = new AbstractDocumentIteratorVisitor() {
public Boolean visit(IndexIterator indexIterator) throws IOException {
if (indexIterator.count() > 0 && indexIterator.document() == currDoc[ 0 ] ) indexIterator.positionArray();
return Boolean.TRUE;
}
};
for (int document = 0; document < index.numberOfDocuments; document++) {
currDoc[ 0 ] = iterator.skipTo(document);
if (document == currDoc[ 0 ]) {
iterator.accept(visitor); // see method visit below.
}
}
while( iterator.hasNext() ) {
currDoc[ 0 ] = iterator.nextDocument();
iterator.accept( visitor );
}
}
}