package cc.mallet.grmm.test;
import junit.framework.TestCase;
import junit.framework.Test;
import junit.framework.TestSuite;
import java.io.IOException;
import java.io.StringReader;
import java.util.regex.Pattern;
import cc.mallet.extract.StringTokenization;
import cc.mallet.grmm.learning.GenericAcrfData2TokenSequence;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.iterator.LineGroupIterator;
import cc.mallet.types.*;
import cc.mallet.types.tests.TestSerializable;
/**
* Created: Sep 15, 2005
*
* @author <A HREF="mailto:casutton@cs.umass.edu>casutton@cs.umass.edu</A>
* @version $Id: TestGenericAcrfData2TokenSequence.java,v 1.1 2007/10/22 21:37:41 mccallum Exp $
*/
public class TestGenericAcrfData2TokenSequence extends TestCase {
String sampleData = "LBLA LBLC ---- f1 f5 f7\n" +
"LBLB LBLC ---- f5 f6\n" +
"LBLB LBLD ----\n" +
"LBLA LBLD ---- f2 f1\n";
String sampleData2 = "LBLB LBLD ---- f1 f5 f7\n" +
"LBLA LBLC ---- f5 f6\n" +
"LBLA LBLC ----\n" +
"LBLB LBLD ---- f2 f1\n";
String sampleFixedData = "LBLA LBLC f1 f5 f7\n" +
"LBLB LBLC f5 f6\n" +
"LBLB LBLD\n" +
"LBLA LBLD f2 f1\n";
String sampleFixedData2 = "LBLB LBLD f1 f5 f7\n" +
"LBLA LBLC f5 f6\n" +
"LBLA LBLC\n" +
"LBLB LBLD f2 f1\n";
String labelsAtEndData = "f1 f5 f7 LBLB LBLD\n" +
"f5 f6 LBLA LBLC\n" +
"LBLA LBLC\n" +
"f2 f1 LBLB LBLD\n";
public TestGenericAcrfData2TokenSequence (String name)
{
super (name);
}
public void testFromSerialization () throws IOException, ClassNotFoundException
{
Pipe p = new GenericAcrfData2TokenSequence ();
InstanceList training = new InstanceList (p);
training.addThruPipe (new LineGroupIterator (new StringReader (sampleData), Pattern.compile ("^$"), true));
Pipe p2 = (Pipe) TestSerializable.cloneViaSerialization (p);
InstanceList l1 = new InstanceList (p);
l1.addThruPipe (new LineGroupIterator (new StringReader (sampleData2), Pattern.compile ("^$"), true));
InstanceList l2 = new InstanceList (p2);
l2.addThruPipe (new LineGroupIterator (new StringReader (sampleData2), Pattern.compile ("^$"), true));
// the readResolve alphabet thing doesn't kick in on first deserialization
assertTrue (p.getTargetAlphabet () != p2.getTargetAlphabet ());
assertEquals (1, l1.size ());
assertEquals (1, l2.size ());
Instance inst1 = l1.get (0);
Instance inst2 = l2.get (0);
LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();
LabelsSequence ls2 = (LabelsSequence) inst2.getTarget ();
assertEquals (4, ls1.size ());
assertEquals (4, ls2.size ());
for (int i = 0; i < 4; i++) {
assertEquals (ls1.get (i).toString (), ls2.get (i).toString ());
}
}
public void testFixedNumLabels () throws IOException, ClassNotFoundException
{
Pipe p = new GenericAcrfData2TokenSequence (2);
InstanceList training = new InstanceList (p);
training.addThruPipe (new LineGroupIterator (new StringReader (sampleFixedData), Pattern.compile ("^$"), true));
assertEquals (1, training.size ());
Instance inst1 = training.get (0);
LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();
assertEquals (4, ls1.size ());
}
public void testLabelsAtEnd () throws IOException, ClassNotFoundException
{
GenericAcrfData2TokenSequence p = new GenericAcrfData2TokenSequence (2);
p.setLabelsAtEnd (true);
InstanceList training = new InstanceList (p);
training.addThruPipe (new LineGroupIterator (new StringReader (labelsAtEndData), Pattern.compile ("^$"), true));
assertEquals (1, training.size ());
Instance inst1 = training.get (0);
StringTokenization toks = (StringTokenization) inst1.getData ();
LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();
assertEquals (4, ls1.size ());
assertEquals (3, toks.get(0).getFeatures ().size ());
assertEquals ("LBLB LBLD", ls1.getLabels (0).toString ());
LabelAlphabet globalDict = p.getLabelAlphabet (0);
assertEquals (2, p.numLevels ());
assertEquals (globalDict, ls1.getLabels (0).get (0).getLabelAlphabet ());
}
public void testNoTokenText ()
{
GenericAcrfData2TokenSequence p = new GenericAcrfData2TokenSequence (2);
p.setFeaturesIncludeToken(false);
p.setIncludeTokenText(false);
InstanceList training = new InstanceList (p);
training.addThruPipe (new LineGroupIterator (new StringReader (sampleFixedData), Pattern.compile ("^$"), true));
assertEquals (1, training.size ());
Instance inst1 = training.get (0);
LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();
assertEquals (4, ls1.size ());
TokenSequence ts1 = (TokenSequence) inst1.getData ();
assertEquals (3, ts1.get(0).getFeatures().size ());
assertEquals (2, ts1.get(1).getFeatures().size ());
}
public static Test suite ()
{
return new TestSuite (TestGenericAcrfData2TokenSequence.class);
}
public static void main (String[] args) throws Throwable
{
TestSuite theSuite;
if (args.length > 0) {
theSuite = new TestSuite ();
for (int i = 0; i < args.length; i++) {
theSuite.addTest (new TestGenericAcrfData2TokenSequence (args[i]));
}
} else {
theSuite = (TestSuite) TestGenericAcrfData2TokenSequence.suite ();
}
junit.textui.TestRunner.run (theSuite);
}
}