factory.buildTree();
// get the lineage of the trainSeqFile
LineageSequenceParser trainParser = new LineageSequenceParser(new File(trainSeqFile));
ArrayList<LineageSequence> trainSeqList = new ArrayList<LineageSequence>();
while (trainParser.hasNext()) {
LineageSequence seq = (LineageSequence) trainParser.next();
trainSeqList.add(seq);
}
trainParser.close();
LineageSequenceParser parser = new LineageSequenceParser(new File(testSeqFile));
while (parser.hasNext()) {
LineageSequence seq = (LineageSequence) parser.next();
HashMap<String,HierarchyTree> queryAncestorNodes = getAncestorNodes(factory.getRoot(), seq.getSeqName(), seq.getAncestors());
for (LineageSequence trainSeq: trainSeqList){
if ( trainSeq.getSeqName().equals(seq.getSeqName())) continue;
HashMap<String,HierarchyTree> matchAncestorNodes = getAncestorNodes(factory.getRoot(), trainSeq.getSeqName(), trainSeq.getAncestors());
boolean withinTaxon = false;
String lowestCommonRank = null;
for (int i = ranks.size() -1; i >=0; i--){
HierarchyTree queryTaxon = queryAncestorNodes.get( ranks.get(i));
HierarchyTree matchTaxon = matchAncestorNodes.get( ranks.get(i));
if ( queryTaxon != null && matchTaxon != null){
if ( queryTaxon.getName().equals(matchTaxon.getName())){
if ( !withinTaxon){ // if the query and match are not in the same child taxon, add sab to the current taxon
lowestCommonRank = ranks.get(i);
//(sabCoutMap.get(ranks.get(i)))[sab]++;
}
withinTaxon = true;
}else {
withinTaxon = false;
}
}
}
if ( lowestCommonRank == null){ // not the rank we care
continue;
}
// we need to use overlap_trim mode and calculate distance as metric to count insertions, deletions and mismatches.
PairwiseAlignment result = PairwiseAligner.align(seq.getSeqString().replaceAll("U", "T"), trainSeq.getSeqString().replaceAll("U", "T"), scoringMatrix, mode);
short sab = (short) (100 - 100*dist.getDistance(result.getAlignedSeqj().getBytes(), result.getAlignedSeqi().getBytes(), 0));
sabCoutMap.get(lowestCommonRank)[sab]++;
}
}