new TokenSequenceRemoveStopwords(),
new TokenSequence2FeatureSequence(),
new FeatureSequence2FeatureVector() });
InstanceList instList = new InstanceList(instPipe);
instList.addThruPipe(new
FileIterator(directories, FileIterator.STARTING_DIRECTORIES));
System.out.println("Training 1");
NaiveBayesTrainer trainer = new NaiveBayesTrainer();
NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList);
Classification initialClassification = classifier.classify("Hello Everybody");
Classification initial2Classification = classifier.classify("Goodbye now");
System.out.println("Initial Classification = ");
initialClassification.print();
initial2Classification.print();
System.out.println("data alphabet " + classifier.getAlphabet());
System.out.println("label alphabet " + classifier.getLabelAlphabet());
// incrementally train...
String[] t2directories = {
"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
};
System.out.println("data alphabet size " + instList.getDataAlphabet().size());
System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
InstanceList instList2 = new InstanceList(instPipe);
instList2.addThruPipe(new
FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES));
System.out.println("Training 2");
System.out.println("data alphabet size " + instList2.getDataAlphabet().size());