Package edu.msu.cme.rdp.classifier.io

Source Code of edu.msu.cme.rdp.classifier.io.ClassificationResultFormatter

/*
* Copyright (C) 2012 Michigan State University <rdpstaff at msu.edu>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package edu.msu.cme.rdp.classifier.io;

import edu.msu.cme.rdp.classifier.ClassificationResult;
import edu.msu.cme.rdp.classifier.RankAssignment;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

/**
*
* @author wangqion
*/
public class ClassificationResultFormatter {
    // list of major rankd

    public static String[] RANKS = { "domain", "phylum", "class", "order", "family", "genus"};
    public static String[] RANKS_WITHSPECIES = { "domain", "phylum", "class", "order", "family", "genus", "species"};
    public static final List<ClassificationResultFormatter.FORMAT> fileFormats
            = new ArrayList(Arrays.asList(FORMAT.allRank,FORMAT.dbformat,FORMAT.fixRank,FORMAT.filterbyconf,FORMAT.biom));

    public enum FORMAT {

        allRank, fixRank, dbformat, filterbyconf, biom;
    }

    public static String getOutput(ClassificationResult result, FORMAT format){
        return getOutput(result, format, 0f, RANKS);
    }
   
    public static String getOutput(ClassificationResult result, FORMAT format, float conf, String[] ranks) {
        switch (format) {
            case allRank:
                return getAllRankOutput(result);
            case fixRank:
                return getFixRankOutput(ranks, result);
            case dbformat:
                return getDBOutput(result, conf);
            case filterbyconf:
                return getFilterByConfOutput(ranks, result, conf);
            case biom:
                return getBiomOutput(ranks, result, conf, ';');
            default:
                getAllRankOutput(result);
        }
        return null;
    }

    public static String getAllRankOutput(ClassificationResult result) {
        StringBuilder assignmentStr = new StringBuilder(result.getSequence().getSeqName() + "\t");
        if (result.isReverse()) {
            assignmentStr.append("-");
        }
        for (RankAssignment assignment : (List<RankAssignment>) result.getAssignments()) {
            assignmentStr.append("\t").append(assignment.getName()).append("\t").append(assignment.getRank()).append("\t").append(assignment.getConfidence());
        }
        assignmentStr.append("\n");
        return assignmentStr.toString();
    }

    public static String getAllRankOutput(ClassificationResult result, double conf) {
        StringBuilder assignmentStr = new StringBuilder(result.getSequence().getSeqName() + "\t");
        if (result.isReverse()) {
            assignmentStr.append("-");
        }
        for (RankAssignment assignment : (List<RankAssignment>) result.getAssignments()) {

            if (assignment.getConfidence() >= conf) {
                assignmentStr.append("\t").append(assignment.getName()).append("\t").append(assignment.getRank()).append("\t").append(assignment.getConfidence());
            }

        }
        assignmentStr.append("\n");
        return assignmentStr.toString();
    }

    public static String getFixRankOutput(ClassificationResult result) {
        return getFixRankOutput(RANKS, result);
    }

    public static String getFixRankOutput(String[] ranks, ClassificationResult result) {
        StringBuilder assignmentStr = new StringBuilder();

        HashMap<String, RankAssignment> rankMap = new HashMap<String, RankAssignment>();
        for (RankAssignment assignment : (List<RankAssignment>) result.getAssignments()) {
            rankMap.put(assignment.getRank().toLowerCase(), assignment);
        }
       
        // if the score is missing for the rank, report the conf and name from the lower rank
        RankAssignment prevAssign = null;
        for (int i = ranks.length -1; i>=0; i--) {
            RankAssignment assign = rankMap.get(ranks[i]);
            if (assign != null) {
                assignmentStr.insert(0, "\t" + assign.getName() +"\t" + assign.getRank() + "\t" + assign.getConfidence());
                prevAssign = assign;
            } else {
                assignmentStr.insert(0, "\t" + prevAssign.getName() +"\t" + ranks[i] + "\t" + prevAssign.getConfidence());
            }
           
        }
        if (result.isReverse()) {
            assignmentStr.insert(0,"-");
        } else {
            assignmentStr.insert(0, "");
        }
        assignmentStr.insert(0, result.getSequence().getSeqName() + "\t");
        assignmentStr.append("\n");
       
        return assignmentStr.toString();

    }
   
    public static String getFilterByConfOutput(ClassificationResult result, float conf) {
        return getFilterByConfOutput(RANKS, result, conf);
    }
   
    public static String getFilterByConfOutput(String[] ranks, ClassificationResult result, float conf) {
        StringBuilder assignmentStr = new StringBuilder();

        HashMap<String, RankAssignment> rankMap = new HashMap<String, RankAssignment>();
        for (RankAssignment assignment : (List<RankAssignment>) result.getAssignments()) {
            rankMap.put(assignment.getRank().toLowerCase(), assignment);
        }
        // if the score is missing for the rank, report the conf and name from the lower rank if above the conf
        // if the lower rank is below the conf, output unclassified node name and the conf from the one above the conf
        RankAssignment prevAssign = result.getAssignments().get(0);
        assignmentStr.append(result.getSequence().getSeqName());
        for (int i = 0; i <= ranks.length -1; i++) {
            RankAssignment assign = rankMap.get(ranks[i]);
            if (assign != null) {
                if ( assign.getConfidence() <= conf){
                    assignmentStr.append("\t" + "unclassified_" + prevAssign.getName() );                  
                }else {
                    assignmentStr.append("\t" + assign.getName() );
                    prevAssign = assign;
                }
               
            } else {
                if ( prevAssign != null && prevAssign.getConfidence() >= conf){
                    assignmentStr.append("\t" + "unclassified_" + prevAssign.getName()  );
                }
            }
           
        }
        assignmentStr.append("\n");
        return assignmentStr.toString();

    }

    /**
    * Output the classification result suitable to load into biom format.
    * Concatenate the rank and the taxon name, remove quotes in the taxon name
    */
    public static String getBiomOutput(String[] ranks, ClassificationResult result, float conf, char delimiter) {
        StringBuilder assignmentStr = new StringBuilder();

        HashMap<String, RankAssignment> rankMap = new HashMap<String, RankAssignment>();
        for (RankAssignment assignment : (List<RankAssignment>) result.getAssignments()) {
            rankMap.put(assignment.getRank().toLowerCase(), assignment);
        }
        // if the score is missing for the rank, report the conf and name from the lower rank if above the conf
        // if the lower rank is below the conf, output unclassified node name and the conf from the one above the conf
        // remove the quotes in the name
        RankAssignment prevAssign = result.getAssignments().get(0);
        assignmentStr.append(result.getSequence().getSeqName() + "\t");
        for (int i = 0; i <= ranks.length -1; i++) {
            RankAssignment assign = rankMap.get(ranks[i]);
            String rank = RANKS[i].substring(0,1).toLowerCase();
            if (assign != null) {
                if ( assign.getConfidence() <= conf){
                    assignmentStr.append(rank + "__" + "unclassified_" + prevAssign.getName().replaceAll("\"", "") );                  
                }else {
                    assignmentStr.append( rank + "__"+ assign.getName().replaceAll("\"", "") );
                    prevAssign = assign;
                }
               
            } else {
                if ( prevAssign != null && prevAssign.getConfidence() >= conf){
                    assignmentStr.append( rank + "__" + "unclassified_" + prevAssign.getName().replaceAll("\"", "") );
                }
            }
           
            if ( i < ranks.length -1){
                assignmentStr.append(delimiter);
            }
        }
        return assignmentStr.toString();

    }

    public static String getDBOutput(ClassificationResult result, float conf) {
        StringBuilder assignmentStr = new StringBuilder();
  boolean set = false;
  List assignments = result.getAssignments();
        for (int i = assignments.size() - 1; i >= 0; i--) {
          int markAssigned = 0;
    RankAssignment assign = (RankAssignment) assignments.get(i);
    if (!set && assign.getConfidence() >= conf) {
      markAssigned = 1;
      set = true;
    }
     assignmentStr.append(result.getSequence().getSeqName()).append("\t").append(result.getTrainsetNo()).append("\t").append(assign.getTaxid()).append("\t").append(assign.getConfidence()).append("\t").append(markAssigned).append("\n");
  }

        return assignmentStr.toString();
    }
}
TOP

Related Classes of edu.msu.cme.rdp.classifier.io.ClassificationResultFormatter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.