Package cc.mallet.grmm.test

Source Code of cc.mallet.grmm.test.TestGenericAcrfData2TokenSequence

package cc.mallet.grmm.test;

import junit.framework.TestCase;
import junit.framework.Test;
import junit.framework.TestSuite;

import java.io.IOException;
import java.io.StringReader;
import java.util.regex.Pattern;

import cc.mallet.extract.StringTokenization;
import cc.mallet.grmm.learning.GenericAcrfData2TokenSequence;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.iterator.LineGroupIterator;
import cc.mallet.types.*;
import cc.mallet.types.tests.TestSerializable;


/**
* Created: Sep 15, 2005
*
* @author <A HREF="mailto:casutton@cs.umass.edu>casutton@cs.umass.edu</A>
* @version $Id: TestGenericAcrfData2TokenSequence.java,v 1.1 2007/10/22 21:37:41 mccallum Exp $
*/
public class TestGenericAcrfData2TokenSequence extends TestCase {

  String sampleData = "LBLA LBLC ---- f1 f5 f7\n" +
          "LBLB LBLC ---- f5 f6\n" +
          "LBLB LBLD ----\n" +
          "LBLA LBLD ---- f2 f1\n";

  String sampleData2 = "LBLB LBLD ---- f1 f5 f7\n" +
          "LBLA LBLC ---- f5 f6\n" +
          "LBLA LBLC ----\n" +
          "LBLB LBLD ---- f2 f1\n";

  String sampleFixedData = "LBLA LBLC f1 f5 f7\n" +
          "LBLB LBLC f5 f6\n" +
          "LBLB LBLD\n" +
          "LBLA LBLD f2 f1\n";

  String sampleFixedData2 = "LBLB LBLD f1 f5 f7\n" +
          "LBLA LBLC f5 f6\n" +
          "LBLA LBLC\n" +
          "LBLB LBLD f2 f1\n";

  String labelsAtEndData = "f1 f5 f7 LBLB LBLD\n" +
          "f5 f6 LBLA LBLC\n" +
          "LBLA LBLC\n" +
          "f2 f1 LBLB LBLD\n";


  public TestGenericAcrfData2TokenSequence (String name)
  {
    super (name);
  }

  public void testFromSerialization () throws IOException, ClassNotFoundException
  {
    Pipe p = new GenericAcrfData2TokenSequence ();
    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (sampleData), Pattern.compile ("^$"), true));

    Pipe p2 = (Pipe) TestSerializable.cloneViaSerialization (p);

    InstanceList l1 = new InstanceList (p);
    l1.addThruPipe (new LineGroupIterator (new StringReader (sampleData2), Pattern.compile ("^$"), true));
    InstanceList l2 = new InstanceList (p2);
    l2.addThruPipe (new LineGroupIterator (new StringReader (sampleData2), Pattern.compile ("^$"), true));

    // the readResolve alphabet thing doesn't kick in on first deserialization
    assertTrue (p.getTargetAlphabet () != p2.getTargetAlphabet ());

    assertEquals (1, l1.size ());
    assertEquals (1, l2.size ());

    Instance inst1 = l1.get (0);
    Instance inst2 = l2.get (0);

    LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();
    LabelsSequence ls2 = (LabelsSequence) inst2.getTarget ();

    assertEquals (4, ls1.size ());
    assertEquals (4, ls2.size ());

    for (int i = 0; i < 4; i++) {
      assertEquals (ls1.get (i).toString (), ls2.get (i).toString ());
    }
  }

  public void testFixedNumLabels () throws IOException, ClassNotFoundException
  {
    Pipe p = new GenericAcrfData2TokenSequence (2);
    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (sampleFixedData), Pattern.compile ("^$"), true));

    assertEquals (1, training.size ());

    Instance inst1 = training.get (0);
    LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();

    assertEquals (4, ls1.size ());
  }

  public void testLabelsAtEnd () throws IOException, ClassNotFoundException
  {
    GenericAcrfData2TokenSequence p = new GenericAcrfData2TokenSequence (2);
    p.setLabelsAtEnd (true);

    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (labelsAtEndData), Pattern.compile ("^$"), true));

    assertEquals (1, training.size ());

    Instance inst1 = training.get (0);
    StringTokenization toks = (StringTokenization) inst1.getData ();
    LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();

    assertEquals (4, ls1.size ());
    assertEquals (3, toks.get(0).getFeatures ().size ());
    assertEquals ("LBLB LBLD", ls1.getLabels (0).toString ());

    LabelAlphabet globalDict = p.getLabelAlphabet (0);
    assertEquals (2, p.numLevels ());
    assertEquals (globalDict, ls1.getLabels (0).get (0).getLabelAlphabet ());
  }

  public void testNoTokenText ()
  {
    GenericAcrfData2TokenSequence p = new GenericAcrfData2TokenSequence (2);
    p.setFeaturesIncludeToken(false);
    p.setIncludeTokenText(false);

    InstanceList training = new InstanceList (p);
    training.addThruPipe (new LineGroupIterator (new StringReader (sampleFixedData), Pattern.compile ("^$"), true));

    assertEquals (1, training.size ());

    Instance inst1 = training.get (0);

    LabelsSequence ls1 = (LabelsSequence) inst1.getTarget ();
    assertEquals (4, ls1.size ());

    TokenSequence ts1 = (TokenSequence) inst1.getData ();
    assertEquals (3, ts1.get(0).getFeatures().size ());
    assertEquals (2, ts1.get(1).getFeatures().size ());
  }

  public static Test suite ()
  {
    return new TestSuite (TestGenericAcrfData2TokenSequence.class);
  }

  public static void main (String[] args) throws Throwable
  {
    TestSuite theSuite;
    if (args.length > 0) {
      theSuite = new TestSuite ();
      for (int i = 0; i < args.length; i++) {
        theSuite.addTest (new TestGenericAcrfData2TokenSequence (args[i]));
      }
    } else {
      theSuite = (TestSuite) TestGenericAcrfData2TokenSequence.suite ();
    }

    junit.textui.TestRunner.run (theSuite);
  }

}
TOP

Related Classes of cc.mallet.grmm.test.TestGenericAcrfData2TokenSequence

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.