Source Code of edu.msu.cme.rdp.classifier.rrnaclassifier.ClassificationParser

/*
 * Copyright (C) 2012 Michigan State University <rdpstaff at msu.edu>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package edu.msu.cme.rdp.classifier.rrnaclassifier;


import edu.msu.cme.rdp.classifier.HierarchyTree;
import edu.msu.cme.rdp.classifier.ClassificationResult;
import edu.msu.cme.rdp.classifier.RankAssignment;
import edu.msu.cme.rdp.classifier.utils.ClassifierFactory;
import edu.msu.cme.rdp.classifier.utils.ClassifierSequence;
import edu.msu.cme.rdp.classifier.utils.HierarchyVersion;
import java.io.*;
import java.util.ArrayList;
import java.util.List;


/**
 *
 * @author fishjord
 */
public class ClassificationParser {


    private BufferedReader reader;
    private static HierarchyTree root;   // the root is the root of trained Classifier, should match the root of the classification results
    private static HierarchyVersion versionNo;


    public ClassificationParser(String fileName, ClassifierFactory classifierFactory) throws FileNotFoundException {
        this(new FileInputStream(fileName), classifierFactory);
    }


    public ClassificationParser(File f, ClassifierFactory classifierFactory) throws FileNotFoundException {
        this(new FileInputStream(f), classifierFactory);


    }


    public ClassificationParser(InputStream is, ClassifierFactory classifierFactory) {
        root = classifierFactory.getRoot();
        versionNo = classifierFactory.getHierarchyTrainsetNo();
        this.reader = new BufferedReader(new InputStreamReader(is));
    }


    public ClassificationParser(Reader reader) {
        if(reader instanceof BufferedReader)
            this.reader = (BufferedReader) reader;
        else
            this.reader = new BufferedReader(reader);
    }


    public void close() throws IOException {
        reader.close();
    }


    
    public HierarchyTree getRoot(){
        return root;
    }
    
    public ClassificationResult next() throws IOException {
        String line = reader.readLine();
        if(line == null)
            return null;


        String delimiter = "\t";  // delimiter can be either tab or ;
        if ( line.contains(";")) {
            delimiter= ";";
        }
        
        String [] tokens = line.split(delimiter);
        List<RankAssignment> assignments = new ArrayList();
        ClassifierSequence seq = new ClassifierSequence(tokens[0], "", "");
        
        boolean seqreversed = (tokens[1].equalsIgnoreCase("-"));


        if ( !root.getName().equalsIgnoreCase(tokens[2].trim()) || !root.getRank().equalsIgnoreCase(tokens[3].trim())) {
            throw new IllegalArgumentException("Root of \"" + line + "\" does not match the Root node in the original Classifier training data: " + root.getName() + "\t" + root.getRank());
        }
        
        HierarchyTree currRoot = root;


        assignments.add(new RankAssignment(currRoot, Float.valueOf(tokens[4].trim())));


        for(int index = 5;index < tokens.length;index += 3) {
            String nextName = tokens[index].trim();
            boolean found = false;
            for(HierarchyTree tree : (List<HierarchyTree>)currRoot.getSubclasses()) {
                if(tree.getName().equals(nextName)) {
                    currRoot = tree;
                    assignments.add(new RankAssignment(currRoot, Float.valueOf(tokens[index + 2].trim())));
                    found = true;
                    break;
                }
            }
            if ( !found){
                throw new IllegalArgumentException("taxon Node " + nextName + " in line \"" + line + "\" is not found in the original Classifier training data.");
            }
        }
        return new ClassificationResult(seq, seqreversed, assignments, versionNo);
    }


}
Source Code of edu.msu.cme.rdp.classifier.rrnaclassifier.ClassificationParser

Related Classes of edu.msu.cme.rdp.classifier.rrnaclassifier.ClassificationParser