package cc.mallet.pipe;
import java.io.*;
import java.util.ArrayList;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
/**
* Convert a list of strings into a feature sequence
*/
public class StringList2FeatureSequence extends Pipe {
public long totalNanos = 0;
public StringList2FeatureSequence (Alphabet dataDict) {
super (dataDict, null);
}
public StringList2FeatureSequence () {
super(new Alphabet(), null);
}
public Instance pipe (Instance carrier) {
long start = System.nanoTime();
try {
ArrayList<String> tokens = (ArrayList<String>) carrier.getData();
FeatureSequence featureSequence =
new FeatureSequence ((Alphabet) getDataAlphabet(), tokens.size());
for (int i = 0; i < tokens.size(); i++) {
featureSequence.add (tokens.get(i));
}
carrier.setData(featureSequence);
totalNanos += System.nanoTime() - start;
} catch (ClassCastException cce) {
System.err.println("Expecting ArrayList<String>, found " + carrier.getData().getClass());
}
return carrier;
}
static final long serialVersionUID = 1;
}