package it.unimi.dsi.mg4j.test;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2005-2010 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.mg4j.index.BitStreamIndex;
import it.unimi.dsi.mg4j.index.DiskBasedIndex;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.IndexIterator;
import it.unimi.dsi.mg4j.index.IndexReader;
import it.unimi.dsi.mg4j.index.remote.IndexServer;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.mg4j.search.IntervalIterator;
import it.unimi.dsi.Util;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.InetAddress;
import java.net.URISyntaxException;
import junit.framework.Assert;
import org.apache.log4j.Logger;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
/**Compare IndexIterator of equals indexes.
*Given two index basename, IndexIteratorTest compare that every IndexIterator method give the same results.
*
* @author Alessandro Arrabito
*/
public class RemoteIndexIteratorTest {
@SuppressWarnings("unused")
private static final Logger LOGGER = Util.getLogger( RemoteIndexIteratorTest.class );
/**for start and debug the server too.*/
private static final boolean _DEBUG_SERVER = false;
private static final boolean _DEBUG_CLUSTER = false;
private static BitStreamIndex firstIndex;
private static Index secondIndex;
private static String firstBaseName;
private static String secondBaseName;
private static boolean textTerm =false;
public static void testIndexIterator() throws IOException{
IndexReader firstIndexReader =firstIndex.getReader();
IndexReader secondIndexReader =secondIndex.getReader(1000);
IndexIterator firstIterator = null;
IndexIterator secondIterator = null;
for(int i = 0;i<firstIndex.numberOfTerms;i++){
try{
System.out.println("term: " + i);
firstIterator = firstIndexReader.documents(i);
secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));
/** Compare hasNext*/
Assert.assertEquals(firstIterator.hasNext(), secondIterator.hasNext());
/** Compare frequency*/
Assert.assertEquals(firstIterator.frequency(), secondIterator.frequency());
/** Compare positions & count*/
while(firstIterator.hasNext()){
int fr = firstIterator.nextDocument();
int sr = secondIterator.nextDocument();
Assert.assertEquals(fr,sr);
/** Compare count*/
Assert.assertEquals(firstIterator.count(), secondIterator.count());
int[] firstPos = new int[1000];
int[] secondPos = new int[1000];
int fRet = firstIterator.positions(firstPos);
int sRet = secondIterator.positions(secondPos);
System.out.println(fRet + " " + sRet);
Assert.assertTrue(fRet == sRet);
for(int j = 0;j<fRet;j++)
Assert.assertEquals(firstPos[j],secondPos[j]);
}
/** Compare positions int[] positionArray()*/
while(firstIterator.hasNext()){
secondIterator.next();
int []firstPos = firstIterator.positionArray();
int []secondPos = secondIterator.positionArray();
Assert.assertTrue(firstPos.length == secondPos.length);
for(int j = 0;j<firstPos.length;j++)
Assert.assertTrue(firstPos[j] == secondPos[j]);
}
/** Compare IntIterator from positions() method */
firstIterator = firstIndexReader.documents(i);
secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));
while(firstIterator.hasNext()){
firstIterator.next();
secondIterator.next();
IntIterator firstIntIt = firstIterator.positions();
IntIterator secondIntIt = secondIterator.positions();
while(firstIntIt.hasNext()){
Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
}
Assert.assertEquals(firstIntIt.skip(2),secondIntIt.skip(2));
if(firstIntIt.hasNext()){
Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
}
Assert.assertEquals(firstIntIt.skip(9999999),secondIntIt.skip(9999999));
if(firstIntIt.hasNext()){
Assert.assertEquals(firstIntIt.nextInt(),secondIntIt.nextInt());
}
}
/** Compare IntervalIterator from Interval() method */
firstIterator = firstIndexReader.documents(i);
secondIterator = !textTerm?secondIndexReader.documents(i):secondIndexReader.documents(firstIndex.termMap.list().get(i));
while(firstIterator.hasNext()){
firstIterator.next();
secondIterator.next();
/** Compare position IntIterator*/
IntervalIterator firstIntervalIt = firstIterator.intervalIterator(firstIndex);
IntervalIterator secondIntervalIt = secondIterator.intervalIterator(secondIndex);
while(firstIntervalIt.hasNext()){
Interval firstIntv = firstIntervalIt.nextInterval();
Interval secondIntv = secondIntervalIt.nextInterval();
System.out.println("left:" + firstIntv.left + " " + "right:" + firstIntv.right);
Assert.assertEquals(firstIntv.left,secondIntv.left);
Assert.assertEquals(firstIntv.right,secondIntv.right);
}
}
}catch(AssertionError ae){
System.out.println("Error on Term:" + i);
ae.printStackTrace();
}
}
}
public static void main( final String arg[] ) throws FileNotFoundException, IOException, ClassNotFoundException,
IllegalArgumentException, SecurityException, IllegalAccessException, JSAPException,URISyntaxException,
org.apache.commons.configuration.ConfigurationException,InterruptedException, InstantiationException, InvocationTargetException, NoSuchMethodException {
String[] debugServerArg = new String("/home/alex/develop/MG4J/alex/sample/DOCS-text mg4j://localhost:9090").split(" ");
String[] debugClusterArg = new String("-t /home/alex/develop/MG4J/alex/sample/DOCS-text /home/alex/develop/MG4J/alex/sample/DOCS-split").split(" ");
SimpleJSAP jsap = new SimpleJSAP( "java IndexIteratorTest", "Compare IndexIterator of equals indexes." +
"\nGiven two index basename, IndexIteratorTest compare that every IndexIterator method give the same results.", new Parameter[] {
new UnflaggedOption( "basename_1", JSAP.STRING_PARSER, JSAP.REQUIRED, "The basename of the first index." ),
new Switch( "text_term", 't', "use text term during document method invocation on second index" ),
new UnflaggedOption( "basename_2", JSAP.STRING_PARSER, JSAP.REQUIRED, "The basename of the second index." )} );
JSAPResult jsapResult = jsap.parse( _DEBUG_SERVER ? debugServerArg :(_DEBUG_CLUSTER ? debugClusterArg : arg) );
if(!(jsapResult.contains("basename_1") && jsapResult.contains("basename_2")))
return;
firstBaseName = jsapResult.getString("basename_1");
secondBaseName = jsapResult.getString("basename_2");
firstIndex = DiskBasedIndex.getInstance(firstBaseName,true,true);
textTerm = jsapResult.getBoolean("text_term");
if(_DEBUG_SERVER){
new Thread(){
public void run(){
try {
IndexServer.start(Index.getInstance(firstBaseName),InetAddress.getLocalHost(),9090,false);
} catch (Exception e) {
e.printStackTrace();
}
}
}.start();
Thread.sleep(3000);
}
secondIndex = Index.getInstance(secondBaseName);
long startTime = System.currentTimeMillis();
testIndexIterator();
long endTime = System.currentTimeMillis();
System.out.println("Test terminate succesfully in:" + (endTime - startTime)/1000 + " sec.");
}
}
/* Test Results and conclusion
*
* 1(BUG) -FastBufferedInputStream BUG:during the test occour this exception "java.lang.ArrayIndexOutOfBoundsException" but this doesn't happen
* with FileInputStream.
* 2(?) -ClientInputStream implement Repositionable interface, the method position use skip method with negative value too, the strange thing
* is that this implementation is good but for the base class InputStream skip method is able to skip only in forward direction.
* 3(OK) - RemoteBitStreamIndex execute the test succesfully but the test make intensive use of method getLong on offset object a buffered
* version of RemoteLongList gan give better results.
* 4(Slow) - The test is very slow with a RemoteIndex.
* 5(!) - The test evidentiate a limitation on RemoteIndex, the RemoteDocumentItarator cannot use the buffer availability,
* to get many value from the server on a single request, this cause problem with other method that evaluate position.
*
*/