String train = "./example-data/sequence/train.txt";
String testfile = "./example-data/sequence/test.txt";
String templateFile="./example-data/sequence/template";
AlphabetFactory factory;
Pipe featurePipe;
TempletGroup templets;
templets = new TempletGroup();
templets.load(templateFile);
factory = AlphabetFactory.buildFactory();
/**
* 标签字典。转为0、1、2、...
*/
LabelAlphabet labels = factory.DefaultLabelAlphabet();
/**
* 特征字典
*/
IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
// 将样本通过Pipe抽取特征
featurePipe = new Sequence2FeatureSequence(templets, features, labels);
Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });
System.out.print("读入训练数据 ...");
InstanceSet trainSet = new InstanceSet(pipe, factory);
// 训练集
trainSet.loadThruStagePipes(new SequenceReader(train, true, "utf8"));
System.out.println("训练样本个数 " + trainSet.size());
System.out.println("标签个数: " + labels.size()); //
System.out.println("特征个数" + features.size());
// 冻结特征集
features.setStopIncrement(true);
labels.setStopIncrement(true);
// viterbi解码
HammingLoss loss = new HammingLoss();
Inferencer inference = new LinearViterbi(templets, labels.size());
Update update = new LinearViterbiPAUpdate((LinearViterbi) inference, loss);
OnlineTrainer trainer = new OnlineTrainer(inference, update, loss,
features.size(), 50,0.1f);
Linear cl = trainer.train(trainSet);
// test data没有标注
Pipe tpipe = featurePipe;
// 测试集
InstanceSet testSet = new InstanceSet(tpipe);
testSet.loadThruPipes(new SequenceReader(testfile, false, "utf8"));
System.out.println("测试样本个数: " + testSet.size()); //