/* Copyright (C) 2006 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
package cc.mallet.pipe.tsf.tests;
import junit.framework.*;
import java.util.regex.Pattern;
import java.io.IOException;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.PrintInputAndTarget;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.SimpleTaggerSentence2TokenSequence;
import cc.mallet.pipe.iterator.ArrayIterator;
import cc.mallet.pipe.tsf.OffsetFeatureConjunction;
import cc.mallet.pipe.tsf.RegexMatches;
import cc.mallet.pipe.tsf.TokenText;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import cc.mallet.types.tests.TestSerializable;
/**
* $Id: TestOffsetFeatureConjunctions.java,v 1.1 2007/10/22 21:37:57 mccallum Exp $
*/
public class TestOffsetFeatureConjunctions extends TestCase {
public TestOffsetFeatureConjunctions (String name)
{
super (name);
}
private static String[] doc1 = { "Meet\nme\nat\n4\nPM\ntomorrow" };
public static void testMultiTag ()
{
Pipe mtPipe = new SerialPipes (new Pipe[] {
new SimpleTaggerSentence2TokenSequence (),
new TokenText (),
new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
new OffsetFeatureConjunction ("time",
new String[] { "digits", "ampm" },
new int[] { 0, 1 },
true),
new PrintInputAndTarget (),
});
Pipe noMtPipe = new SerialPipes (new Pipe[] {
new SimpleTaggerSentence2TokenSequence (),
new TokenText (),
new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
new OffsetFeatureConjunction ("time",
new String[] { "digits", "ampm" },
new int[] { 0, 1 },
false),
new PrintInputAndTarget (),
});
InstanceList mtLst = new InstanceList (mtPipe);
InstanceList noMtLst = new InstanceList (noMtPipe);
mtLst.addThruPipe (new ArrayIterator (doc1));
noMtLst.addThruPipe (new ArrayIterator (doc1));
Instance mtInst = mtLst.get (0);
Instance noMtInst = noMtLst.get (0);
TokenSequence mtTs = (TokenSequence) mtInst.getData ();
TokenSequence noMtTs = (TokenSequence) noMtInst.getData ();
assertEquals (6, mtTs.size ());
assertEquals (6, noMtTs.size ());
assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
assertEquals (1.0, noMtTs.get (3).getFeatureValue ("time"), 1e-15);
assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
assertEquals (0.0, noMtTs.get (4).getFeatureValue ("time"), 1e-15);
}
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException
{
Pipe origPipe = new SerialPipes (new Pipe[] {
new SimpleTaggerSentence2TokenSequence (),
new TokenText (),
new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
new OffsetFeatureConjunction ("time",
new String[] { "digits", "ampm" },
new int[] { 0, 1 },
true),
new PrintInputAndTarget (),
});
Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
InstanceList mtLst = new InstanceList (mtPipe);
mtLst.addThruPipe (new ArrayIterator (doc1));
Instance mtInst = mtLst.get (0);
TokenSequence mtTs = (TokenSequence) mtInst.getData ();
assertEquals (6, mtTs.size ());
assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
}
/**
* @return a <code>TestSuite</code>
*/
public static TestSuite suite ()
{
return new TestSuite (TestOffsetFeatureConjunctions.class);
}
public static void main (String[] args)
{
TestSuite theSuite;
if (args.length > 0) {
theSuite = new TestSuite ();
for (int i = 0; i < args.length; i++) {
theSuite.addTest (new TestOffsetFeatureConjunctions (args[i]));
}
} else {
theSuite = (TestSuite) suite ();
}
junit.textui.TestRunner.run (theSuite);
}
}