Package edu.msu.cme.rdp.classifier.train.validation.distance

Source Code of edu.msu.cme.rdp.classifier.train.validation.distance.PairwiseSeqDistance

/*
* Copyright (C) 2013 wangqion
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
package edu.msu.cme.rdp.classifier.train.validation.distance;

import edu.msu.cme.rdp.alignment.AlignmentMode;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAligner;
import edu.msu.cme.rdp.alignment.pairwise.PairwiseAlignment;
import edu.msu.cme.rdp.alignment.pairwise.ScoringMatrix;
import edu.msu.cme.rdp.alignment.pairwise.rna.DistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.IdentityDistanceModel;
import edu.msu.cme.rdp.alignment.pairwise.rna.OverlapCheckFailedException;
import edu.msu.cme.rdp.classifier.train.LineageSequence;
import edu.msu.cme.rdp.classifier.train.LineageSequenceParser;
import edu.msu.cme.rdp.classifier.train.validation.HierarchyTree;
import edu.msu.cme.rdp.classifier.train.validation.Taxonomy;
import edu.msu.cme.rdp.classifier.train.validation.TreeFactory;
import edu.msu.cme.rdp.readseq.stat.StdevCal;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;

/**
*
* @author wangqion
*/
public class PairwiseSeqDistance {
    private static final char gapChar = '-';   
    private ScoringMatrix scoringMatrix = ScoringMatrix.getDefaultNuclMatrix();
    private AlignmentMode mode = AlignmentMode.overlap;
    private boolean show_alignment = false;
    private static final Options options = new Options();
    private static DistanceModel dist = new IdentityDistanceModel();
    private HashMap<Taxonomy, ArrayList<Double>> distanceMap = new HashMap<Taxonomy, ArrayList<Double>>();
    private ArrayList<LineageSequence> seqList = new ArrayList<LineageSequence>();
    TreeFactory factory = null;
       
    static {         
        options.addOption("a", "alignment-mode", true, "Alignment mode: overlap, glocal, local or global. default = overlap");
        options.addOption("w", "show_alignment", false, "if true, output the detailed alignment to stdout. default = false");
    }
   
   
    public PairwiseSeqDistance(String trainseqFile, String taxFile, AlignmentMode mode, boolean show_alignment ) throws IOException, OverlapCheckFailedException{
        this.mode = mode;
        this.show_alignment = show_alignment;
        factory = new TreeFactory(new FileReader(taxFile));
        LineageSequenceParser parser = new LineageSequenceParser(new File(trainseqFile));
       
        while (parser.hasNext()) {
            LineageSequence tmp = (LineageSequence) parser.next();
            seqList.add(tmp);
            factory.addSequence((LineageSequence) parser.next());
        }
        parser.close();
       
        calDist();
    }
  
    private void calDist() throws OverlapCheckFailedException{
        HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
        HierarchyTree root = factory.getRoot();
        root.getNodeMap(factory.getLowestRank(), nodeMap);
       
        for ( int i= 0; i < seqList.size(); i++){
            LineageSequence seqx = seqList.get(i);
            HierarchyTree treex = nodeMap.get((String) seqx.getAncestors().get(seqx.getAncestors().size() - 1));
            for ( int j = i+1; j < seqList.size(); j++){
                LineageSequence seqy = seqList.get(j);
                HierarchyTree treey = nodeMap.get((String) seqy.getAncestors().get(seqy.getAncestors().size() - 1));
               
                Taxonomy lowestCommonAnc = findLowestCommonAncestor(treex, treey);              
                PairwiseAlignment result = PairwiseAligner.align(seqx.getSeqString().replaceAll("U", "T"), seqy.getSeqString().replaceAll("U", "T"), scoringMatrix, mode);
                double distance = dist.getDistance(result.getAlignedSeqj().getBytes(), result.getAlignedSeqi().getBytes(), 0);

                if ( show_alignment){
                    System.out.println(">\t" + seqx.getSeqName() + "\t" + seqy.getSeqName() + "\t" + String.format("%.3f", distance) + "\t" + lowestCommonAnc.getHierLevel());
                    System.out.println(result.getAlignedSeqi() + "\n");
                    System.out.println(result.getAlignedSeqj() + "\n");
                }
       
                ArrayList<Double> distList = distanceMap.get(lowestCommonAnc);
                if ( distList == null){
                    distList = new ArrayList<Double>();
                    distList.add(distance);
                    distanceMap.put(lowestCommonAnc, distList);
                }else {
                    distList.add(distance);
                }
            }
        }       
    }

    public void printSummary( PrintStream outStream ){
        HashMap<String, ArrayList<Double>> rankDistanceMap = new HashMap<String, ArrayList<Double>>();
        outStream.println("\nrank\ttaxonname\ttotalcount\tmean_distance\tstdev");

        for ( Taxonomy taxon: distanceMap.keySet()){
            StdevCal.Std result = StdevCal.calStd(distanceMap.get(taxon));
            outStream.println(taxon.getHierLevel() + "\t" + taxon.getName() + "\t" + result.getTotalCount() + "\t" + String.format("%.3f",result.getMean()) + "\t" + String.format("%.3f",result.getStdev()));
            ArrayList<Double> distList = rankDistanceMap.get(taxon.getHierLevel());
            if ( distList == null){
                distList = new ArrayList<Double>();
                distList.addAll(distanceMap.get(taxon));
                rankDistanceMap.put(taxon.getHierLevel(), distList);
            }else {
                distList.addAll(distanceMap.get(taxon));
            }
        }       
       
        outStream.println("\nrank\ttotalcount\tmean_distance\tstdev");
        for (String rank: rankDistanceMap.keySet()){
            StdevCal.Std result = StdevCal.calStd(rankDistanceMap.get(rank));
            outStream.println(rank + "\t" + result.getTotalCount() + "\t" + String.format("%.3f", result.getMean()) + "\t" + String.format("%.3f", result.getStdev()));

        }
        outStream.close();
    }
       
    private static Taxonomy findLowestCommonAncestor(HierarchyTree treex, HierarchyTree treey){
        ArrayList<HierarchyTree> ancestorx = new ArrayList<HierarchyTree>();
        ArrayList<HierarchyTree> ancestory = new ArrayList<HierarchyTree>();
        HierarchyTree parent = treex.getParent();
        ancestorx.add(treex);
        while(parent != null) {
            ancestorx.add(parent);
            parent = parent.getParent();
        }
        ancestory.add(treey);
        parent = treey.getParent();
        while(parent != null) {
            ancestory.add(parent);
            parent = parent.getParent();
        }
       
        Taxonomy lowestCommonAnc = ancestorx.get(ancestorx.size() -1).getTaxonomy();
        for ( int i = 2; i <= ancestorx.size() ; i++){
            if (  (ancestory.size() -i ) >= 0 ){
                if ( ancestorx.get( ancestorx.size() -i).getTaxonomy().equals(ancestory.get( ancestory.size() -i).getTaxonomy()) ){
                    lowestCommonAnc = ancestorx.get( ancestorx.size() -i).getTaxonomy();
                }
            }else {
               break;
            }
        }
        return lowestCommonAnc;
    }
   
   
     /**
     * This program does the pairwise alignment between each pair of sequences,
     * reports a summary of the average distances and the stdev at each rank.
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
      
        String trainseqFile = null;
        String taxFile = null;
        PrintStream outStream = null;
        AlignmentMode mode = AlignmentMode.overlap;
        boolean show_alignment = false;
      
         try {
            CommandLine line = new PosixParser().parse(options, args);

            if (line.hasOption("show_alignment")) {
                show_alignment = true;
            }
            if (line.hasOption("alignment-mode")) {
                String m = line.getOptionValue("alignment-mode").toLowerCase();
                mode = AlignmentMode.valueOf(m);
             
            }
           
            if ( args.length != 3){
                throw new Exception("wrong arguments");
            }
            args = line.getArgs();           
            trainseqFile = args[0];
            taxFile = args[1];
            outStream = new PrintStream(new File(args[2]));
         }catch (Exception e) {
            System.err.println("Command Error: " + e.getMessage());
            new HelpFormatter().printHelp(80, " [options] trainseqFile taxonFile outFile", "", options, "");
            return;
        }
                       
        PairwiseSeqDistance theObj = new PairwiseSeqDistance(trainseqFile,taxFile, mode, show_alignment);
       
        theObj.printSummary(outStream);
   }
}
TOP

Related Classes of edu.msu.cme.rdp.classifier.train.validation.distance.PairwiseSeqDistance

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.