Package cc.mallet.grmm.examples

Source Code of cc.mallet.grmm.examples.SimpleCrfExample

/* Copyright (C) 2006 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */
package cc.mallet.grmm.examples;


import java.io.File;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.util.regex.Pattern;

import cc.mallet.grmm.learning.ACRF;
import cc.mallet.grmm.learning.ACRFTrainer;
import cc.mallet.grmm.learning.DefaultAcrfTrainer;
import cc.mallet.grmm.learning.GenericAcrfData2TokenSequence;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.iterator.LineGroupIterator;
import cc.mallet.types.InstanceList;
import cc.mallet.util.FileUtils;

/**
* $Id: SimpleCrfExample.java,v 1.1 2007/10/22 21:38:02 mccallum Exp $
*/
public class SimpleCrfExample {

  public static void main (String[] args) throws FileNotFoundException
  {
    File trainFile = new File (args[0]);
    File testFile = new File (args[1]);
    File crfFile = new File (args[2]);

    Pipe pipe = new SerialPipes (new Pipe[] {
        new GenericAcrfData2TokenSequence (2),
        new TokenSequence2FeatureVectorSequence (true, true),
    });

    InstanceList training = new InstanceList (pipe);
    training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
                                         Pattern.compile ("\\s*"),
                                         true));

    InstanceList testing = new InstanceList (pipe);
    training.addThruPipe (new LineGroupIterator (new FileReader (testFile),
                                         Pattern.compile ("\\s*"),
                                         true));

    ACRF.Template[] tmpls = new ACRF.Template[] {
            new ACRF.BigramTemplate (0),
            new ACRF.BigramTemplate (1),
            new ACRF.PairwiseFactorTemplate (0,1),
            new CrossTemplate1 (0,1)
    };

    ACRF acrf = new ACRF (pipe, tmpls);

    ACRFTrainer trainer = new DefaultAcrfTrainer ();
    trainer.train (acrf, training, null, testing, 99999);

    FileUtils.writeGzippedObject (crfFile, acrf);
  }
}
TOP

Related Classes of cc.mallet.grmm.examples.SimpleCrfExample

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.