System.out.print("读入训练数据 ...");
InstanceSet trainSet = new InstanceSet(pipe, factory);
// 训练集
trainSet.loadThruStagePipes(new SequenceReader(train, true, "utf8"));
System.out.println("训练样本个数 " + trainSet.size());
System.out.println("标签个数: " + labels.size()); //
System.out.println("特征个数" + features.size());
// 冻结特征集
features.setStopIncrement(true);
labels.setStopIncrement(true);
// viterbi解码
HammingLoss loss = new HammingLoss();
Inferencer inference = new LinearViterbi(templets, labels.size());
Update update = new LinearViterbiPAUpdate((LinearViterbi) inference, loss);
OnlineTrainer trainer = new OnlineTrainer(inference, update, loss,
features.size(), 50,0.1f);
Linear cl = trainer.train(trainSet);
// test data没有标注
Pipe tpipe = featurePipe;
// 测试集
InstanceSet testSet = new InstanceSet(tpipe);
testSet.loadThruPipes(new SequenceReader(testfile, false, "utf8"));
System.out.println("测试样本个数: " + testSet.size()); //
String[][] labelsSet = new String[testSet.size()][];
for (int i = 0; i < testSet.size(); i++) {
Instance carrier = testSet.get(i);
int[] pred = (int[]) cl.classify(carrier).getLabel(0);