public void calSabSimilarity(String taxonFile, String trainSeqFile, String testSeqFile) throws IOException{
TreeFactory factory = new TreeFactory(new FileReader(taxonFile));
factory.buildTree();
// get the lineage of the trainSeqFile
LineageSequenceParser trainParser = new LineageSequenceParser(new File(trainSeqFile));
HashMap<String, List<String>> lineageMap = new HashMap<String, List<String>>();
while (trainParser.hasNext()) {
LineageSequence seq = (LineageSequence) trainParser.next();
lineageMap.put(seq.getSeqName(), seq.getAncestors());
}
trainParser.close();
NuclSeqMatch sabCal = new NuclSeqMatch(trainSeqFile);
LineageSequenceParser parser = new LineageSequenceParser(new File(testSeqFile));
int count = 0;
while (parser.hasNext()) {
LineageSequence seq = (LineageSequence) parser.next();
HashMap<String,HierarchyTree> queryAncestorNodes = getAncestorNodes(factory.getRoot(), seq.getSeqName(), seq.getAncestors());
TreeSet<KmerMatchCore.BestMatch> matchResults = sabCal.findAllMatches(seq);
short withinLowestRankSab = -1;
short diffLowestRankSab = -1;
String bestDiffLowestRankMatch = null;
for (KmerMatchCore.BestMatch match: matchResults){
if ( match.getBestMatch().getSeqName().equals(seq.getSeqName())) continue;
short sab = (short)(Math.round(100*match.getSab()));
HashMap<String,HierarchyTree> matchAncestorNodes = getAncestorNodes(factory.getRoot(), match.getBestMatch().getSeqName(), lineageMap.get(match.getBestMatch().getSeqName()));
boolean withinTaxon = false;
for (int i = ranks.size() -1; i >=0; i--){
HierarchyTree queryTaxon = queryAncestorNodes.get( ranks.get(i));
HierarchyTree matchTaxon = matchAncestorNodes.get( ranks.get(i));
if ( queryTaxon != null && matchTaxon != null){
if ( queryTaxon.getName().equals(matchTaxon.getName())){
if ( !withinTaxon){ // if the query and match are not in the same child taxon, add sab to the current taxon
(sabCoutMap.get(ranks.get(i)))[sab]++;
}
withinTaxon = true;
}else {
withinTaxon = false;
}
}
}
// find within or different lowest level rank sab score, be either species or genus or any rank
HierarchyTree speciesQueryTaxon = queryAncestorNodes.get( ranks.get(ranks.size()-1));
HierarchyTree speciesMatchTaxon = matchAncestorNodes.get( ranks.get(ranks.size()-1));
if ( speciesQueryTaxon != null && speciesMatchTaxon != null && speciesQueryTaxon.getName().equals(speciesMatchTaxon.getName())){
withinLowestRankSab = sab >= withinLowestRankSab ? sab: withinLowestRankSab;
}else {
if ( sab >= diffLowestRankSab ){
bestDiffLowestRankMatch = match.getBestMatch().getSeqName();
diffLowestRankSab = sab;
}
}
}
if ( withinLowestRankSab > 0){
withinLowestRankSabSet.add(withinLowestRankSab);
}
if ( diffLowestRankSab > 0 ){
diffLowestRankSabSet.add(diffLowestRankSab);
}
//System.out.println(seq.getSeqName() + "\t" + withinLowestRankSab + "\t" + diffLowestRankSab );
count++;
if ( count % 100 == 0){
System.out.println(count);
}
}
parser.close();
}