Package cc.mallet.types.tests

Source Code of cc.mallet.types.tests.TestPagedInstanceList

/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */
package cc.mallet.types.tests;

import junit.framework.*;

import java.io.File;
import java.util.Iterator;

import cc.mallet.classify.*;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.PipeInputIterator;
import cc.mallet.pipe.iterator.RandomTokenSequenceIterator;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Dirichlet;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Instance;
import cc.mallet.types.PagedInstanceList;
import cc.mallet.util.Randoms;

/**
* Created: Apr 19, 2005
*
* @author <A HREF="mailto:casutton@cs.umass.edu>casutton@cs.umass.edu</A>
* @version $Id: TestPagedInstanceList.java,v 1.1 2007/10/22 21:37:55 mccallum Exp $
*/
public class TestPagedInstanceList extends TestCase {

  public TestPagedInstanceList (String name)
  {
    super (name);
  }

  public static Test suite ()
  {
    return new TestSuite (TestPagedInstanceList.class);
  }


  private static Alphabet dictOfSize (int size)
  {
    Alphabet ret = new Alphabet ();
    for (int i = 0; i < size; i++)
      ret.lookupIndex ("feature"+i);
    return ret;
  }

  public void testRandomTrained ()
  {
    Pipe p = new SerialPipes (new Pipe[]  {
      new TokenSequence2FeatureSequence (),
      new FeatureSequence2FeatureVector (),
      new Target2Label()});

    double testAcc1 = testRandomTrainedOn (new InstanceList (p));
    double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File(".")));
    assertEquals (testAcc1, testAcc2, 0.01);
  }

  private double testRandomTrainedOn (InstanceList training)
  {
    ClassifierTrainer trainer = new MaxEntTrainer ();

    Alphabet fd = dictOfSize (3);
    String[] classNames = new String[] {"class0", "class1", "class2"};

    Randoms r = new Randoms (1);
    Iterator<Instance> iter = new RandomTokenSequenceIterator (r,  new Dirichlet(fd, 2.0),
          30, 0, 10, 200, classNames);
    training.addThruPipe (iter);

    InstanceList testing = new InstanceList (training.getPipe ());
    testing.addThruPipe (new RandomTokenSequenceIterator (r,  new Dirichlet(fd, 2.0),
          30, 0, 10, 200, classNames));

    System.out.println ("Training set size = "+training.size());
    System.out.println ("Testing set size = "+testing.size());

    Classifier classifier = trainer.train (training);

    System.out.println ("Accuracy on training set:");
    System.out.println (classifier.getClass().getName()
                          + ": " + new Trial (classifier, training).getAccuracy());

    System.out.println ("Accuracy on testing set:");
    double testAcc = new Trial (classifier, testing).getAccuracy();
    System.out.println (classifier.getClass().getName()
                          + ": " + testAcc);

    return testAcc;
  }

  public static void main (String[] args) throws Throwable
  {
    TestSuite theSuite;
    if (args.length > 0) {
      theSuite = new TestSuite ();
      for (int i = 0; i < args.length; i++) {
        theSuite.addTest (new TestPagedInstanceList (args[i]));
      }
    } else {
      theSuite = (TestSuite) suite ();
    }

    junit.textui.TestRunner.run (theSuite);
  }

}
TOP

Related Classes of cc.mallet.types.tests.TestPagedInstanceList

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.