TreeFactory factory = new TreeFactory(taxReader);
File infileReader = new File(System.class.getResource("/test/classifier/testNBClassifierSet.fasta").getFile());
LineageSequenceParser parser = new LineageSequenceParser(infileReader);
while (parser.hasNext()) {
factory.addSequence((LineageSequence) parser.next());
}
//after all the training set is being parsed, calculate the prior probability for all the words.
factory.calculateWordPrior();
HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
HierarchyTree root = factory.getRoot();
root.getNodeMap("GENUS", nodeMap);
ArrayList<HierarchyTree> nodeList = new ArrayList(nodeMap.values());
// test the first sequence
File queryReader = new File(System.class.getResource("/test/classifier/testNBClassifierSet.fasta").getFile());
parser = new LineageSequenceParser(queryReader);
LineageSequence pSeq = parser.next();
GoodWordIterator iterator = new GoodWordIterator(pSeq.getSeqString());
NBClassifier classifier = new NBClassifier(factory, iterator.getWordArr(), nodeList, useSeed, min_bootstrap_words);
assertEquals(pSeq.getSeqName(), "XG1_child1");
ValidationClassificationResult result = classifier.assignClass();
assertEquals("G1", ((HierarchyTree) result.getBestClass()).getName());
assertTrue(0.1 > result.getPosteriorProb());
pSeq = parser.next();
// test the 3rd getSequence()
pSeq = parser.next();
assertEquals(pSeq.getSeqName(), "XG2_child1");
iterator = new GoodWordIterator(pSeq.getSeqString());
classifier = new NBClassifier(factory, iterator.getWordArr(), nodeList, useSeed, min_bootstrap_words);
result = classifier.assignClass();
assertEquals("G2", ((HierarchyTree) result.getBestClass()).getName());
assertTrue(0.2 > result.getPosteriorProb());
pSeq = parser.next();
// test the 5th getSequence()
pSeq = parser.next();
assertEquals(pSeq.getSeqName(), "XPh2G6_child1");
iterator = new GoodWordIterator(pSeq.getSeqString());
classifier = new NBClassifier(factory, iterator.getWordArr(), nodeList, useSeed, min_bootstrap_words);
result = classifier.assignClass();
assertEquals("G1", ((HierarchyTree) result.getBestClass()).getName());
assertTrue(0.2 > result.getPosteriorProb());
//test the 8th sequence in G7, it is the same as the 9th sequence in G8
// the classifier should randomly choose a genus (either G7 or G8) because the score will be tie
parser.next();
parser.next();
pSeq = parser.next();
assertEquals(pSeq.getSeqName(), "XPh2G7_child1");
iterator = new GoodWordIterator(pSeq.getSeqString());
int G7_count = 0;
int G8_count = 0;
for ( int run = 0; run < DecisionMaker.NUM_OF_RUNS; run++){