Package experiments

Source Code of experiments.PartOfSpeech

package experiments;

import io.PartOfSpeechReader;

import java.io.IOException;
import java.util.ArrayList;

import sequence.CRF;
import sequence.HammingLoss;
import sequence.LinearTagger;
import sequence.Mira;
import sequence.OneYwithXFeatureFunction;
import sequence.Perceptron;
import sequence.SequenceInstance;
import sequence.TwoYwithXFeatureFunction;
import types.Alphabet;
import types.StaticUtils;

public class PartOfSpeech {

  public static void main(String[] args) throws IOException {
    // read in the data.
    ArrayList<SequenceInstance> allData = (new PartOfSpeechReader(
        new Alphabet(), new Alphabet())).readFile(args[0]);
    StaticUtils.shuffle(allData, 0);
    // randomly split data into training and testing part
    ArrayList<SequenceInstance>[] tmp = StaticUtils.splitS(allData, 150);
    ArrayList<SequenceInstance> train = tmp[0];
    ArrayList<SequenceInstance> test = tmp[1];
    Alphabet xA = allData.get(0).xAlphabet;
    Alphabet yA = allData.get(0).yAlphabet;
    System.out.println("num Features = " + allData.get(0).xAlphabet.size());
    LinearTagger h;
     h = PartOfSpeech.trainCRF(train, xA, yA);
     System.out.println("CRF    Train Accuracy = "
     + StaticUtils.computeAccuracyS(h, train));
     System.out.println("CRF    Test  Accuracy = "
     + StaticUtils.computeAccuracyS(h, test));
    h = PartOfSpeech.trainPerceptron(false, 20, train, xA, yA);
    System.out.println("Percep Train Accuracy = "
        + StaticUtils.computeAccuracyS(h, train));
    System.out.println("Percep Test  Accuracy = "
        + StaticUtils.computeAccuracyS(h, test));
    h = PartOfSpeech.trainPerceptron(true, 20, train, xA, yA);
    System.out.println("AvgPer Train Accuracy = "
        + StaticUtils.computeAccuracyS(h, train));
    System.out.println("AvgPer Test  Accuracy = "
        + StaticUtils.computeAccuracyS(h, test));
    h = PartOfSpeech.trainMira(false, 20, train, xA, yA);
    System.out.println("Mira Train Accuracy = "
        + StaticUtils.computeAccuracyS(h, train));
    System.out.println("Mira Test  Accuracy = "
        + StaticUtils.computeAccuracyS(h, test));
    h = PartOfSpeech.trainMira(true, 20, train, xA, yA);
    System.out.println("Avg Mira Train Accuracy = "
        + StaticUtils.computeAccuracyS(h, train));
    System.out.println("Avg Mira Test  Accuracy = "
        + StaticUtils.computeAccuracyS(h, test));
  }

  public static LinearTagger trainPerceptron(boolean doAveraging,
      int numIters, ArrayList<SequenceInstance> train, Alphabet xA,
      Alphabet yA) {
    Perceptron p = new Perceptron(doAveraging, numIters, xA, yA,
        new OneYwithXFeatureFunction(xA, yA));
    LinearTagger h = p.batchTrain(train);
    return h;
  }

  public static LinearTagger trainMira(boolean doAveraging, int numIters,
      ArrayList<SequenceInstance> train, Alphabet xA, Alphabet yA) {
    Mira p = new Mira(doAveraging, numIters, xA, yA,
        new TwoYwithXFeatureFunction(xA, yA), new HammingLoss());
    LinearTagger h = p.batchTrain(train);
    return h;
  }

  public static LinearTagger trainCRF(ArrayList<SequenceInstance> train,
      Alphabet xA, Alphabet yA) {
    CRF crf = new CRF(10, xA, yA, new OneYwithXFeatureFunction(xA, yA));
    LinearTagger h = crf.batchTrain(train);
    return h;
  }

}
TOP

Related Classes of experiments.PartOfSpeech

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.