Package edu.msu.cme.rdp.classifier.train.validation

Source Code of edu.msu.cme.rdp.classifier.train.validation.HierarchyTree

/*
* HierarchyTree.java
*
* Created on June 24, 2002, 2:36 PM
*/
/**
*
* @author  wangqion
* @version
*/
package edu.msu.cme.rdp.classifier.train.validation;

import edu.msu.cme.rdp.classifier.train.LineageSequence;
import edu.msu.cme.rdp.readseq.utils.orientation.GoodWordIterator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;


public class HierarchyTree {

    private String name;
    private int numOfLeaves;
    private HierarchyTree parent;
    private Map<String, HierarchyTree> subclasses = new HashMap();
    private Map leaves = new HashMap();
    private short[] wordOccurrence;  //size is 64k for word size 8
    private boolean wordOccurDone = false;
    private int totalSeqs = 0; // total number of train sequences
    private int numTotalTestedseq = 0;   // no singleton included
    private int missCount = 0; // the number of missclassified at and below this node (indicates all the genera under this node)
    private Taxonomy taxon;    // the unique id for each node

    /** Creates new HierarchyTree given the name and its parent */
    public HierarchyTree(String n, HierarchyTree p, Taxonomy tax) {
        name = n;
        taxon = tax;
        numOfLeaves = -1;
        addParent(p);
    }

    /** Adds the parent HierarchyTree, also add this node to the parent tree as a child */
    private void addParent(HierarchyTree p) {
        parent = p;
        if (parent != null) {
            parent.addSubclass(this);
        }
    }

    /** Adds a subclass */
    private void addSubclass(HierarchyTree c) {
        subclasses.put(c.getName(), c);
    }

    /** Gets the name of the treenode */
    public String getName() {
        return name;
    }

    /** Gets the parent treenode */
    public HierarchyTree getParent() {
        return parent;
    }

    /** Gets the array of the subclasses if any */
    public Collection<HierarchyTree> getSubclasses() {
        return subclasses.values();
    }

    /** Gets the array of Sequenceleaves if any */
    public Collection getLeaves() {
        return leaves.values();
    }

    /** Gets the subclass with the given name */
    public HierarchyTree getSubclassbyName(String n) {
        return subclasses.get(n);
    }

    /** Gets the size of the children */
    public int getSizeofChildren() {
        int size;
        if ((size = subclasses.size()) > 0) {
            return size;
        }
        return getSizeofLeaves();
    }

    /** Gets the size of the subclasses */
    public int getSizeofSubclasses() {
        return subclasses.size();
    }

    /** Gets the size of sequence leaves directly belong to this tree, not
     * including the hidden leaves */
    public int getSizeofLeaves() {
        return numOfLeaves;
    }

    /** This method initiate the word occurrence from the sequences for the
     * lowest level of the hierarchy tree
     */
    public void initWordOccurrence(LineageSequence pSeq, float[] wordPriorArr, boolean initWordOccurrence) throws IOException {
        if (numOfLeaves < 0) {
            numOfLeaves = 1;
        } else {
            numOfLeaves++;
        }
       
        if ( !initWordOccurrence) return;

        GoodWordIterator iterator = new GoodWordIterator(pSeq.getSeqString());
        if (wordOccurrence == null) {
            wordOccurrence = new short[iterator.getMask() + 1];
        }

        // create a temporary list and initialize the value to be -1;
        int[] wordList = new int[iterator.getNumofWords()];

        for (int i = 0; i < wordList.length; i++) {
            wordList[i] = -1;
        }

        int numUniqueWords = 0// indicate the number of unique words
        // duplicated words in one sequence are only counted once
        while (iterator.hasNext()) {
            int index = iterator.next()// index is the actual integer representation of the word
           
            if (!isWordExist(wordList, index)) {
                wordList[numUniqueWords] = index;
                wordOccurrence[index]++;
                numUniqueWords++;
                // now add the word to the wordPriorArr
                // duplicated words in one sequence are only counted once
                wordPriorArr[index]++;
            }
        }
       
        // change the flag to be true
        wordOccurDone = true;
    }

    /** This method initiate the word occurrence from the sequences for the
     * lowest level of the hierarchy tree
     */
    public void unhideSeq(GoodWordIterator iterator) throws IOException {
        if (numOfLeaves < 0) {
            numOfLeaves = 1;
        } else {
            numOfLeaves++;
        }

        iterator.resetCurIndex();

        // create a temporary list and initialize the value to be -1;
        int[] wordList = new int[iterator.getNumofWords()];

        for (int i = 0; i < wordList.length; i++) {
            wordList[i] = -1;
        }

        int numUniqueWords = 0// indicate the number of unique words
        // duplicated words in one sequence are only counted once
        while (iterator.hasNext()) {
            int index = iterator.next()// index is the actual integer representation of the word

            if (!isWordExist(wordList, index)) {
                wordList[numUniqueWords] = index;
                wordOccurrence[index]++;
                numUniqueWords++;
            }
        }

        //also need to unhide parent,
        changeParentSeqCount(1);
    }

    /** This method hides a sequence by removing the words of that sequence
     * from the total wordOccurrence
     */
    public void hideSeq(GoodWordIterator iterator) throws IOException {
        if (wordOccurrence == null) {
            throw new IllegalStateException("unable to hide the sequence, the word occurrence is null ");
        }

        iterator.resetCurIndex();

        // create a temporary list and initialize the value to be -1;
        int[] wordList = new int[iterator.getNumofWords()];

        for (int i = 0; i < wordList.length; i++) {
            wordList[i] = -1;
        }

        int num = 0;
        // duplicated words in one sequence are only counted once

        while (iterator.hasNext()) {
            int index = iterator.next();
            if (!isWordExist(wordList, index)) {
                wordList[num] = index;
                wordOccurrence[index]--;
                num++;
            }
        }

        // also need to hide parent, since parent doesn't count word occurrence
        // we only reduce the number of leaves.
        //  reduce the number of leaves  
        numOfLeaves--;
        changeParentSeqCount(-1);
    }

    public void changeParentSeqCount(int i) {

        if (parent != null) {
            parent.numOfLeaves += i;
            parent.changeParentSeqCount(i);
        }

    }

    /** check if this word already been added to the wordOccurrence
     */
    private boolean isWordExist(int[] wordList, int wordIndex) {
        for (int i = 0; i < wordList.length; i++) {
            if (wordList[i] == wordIndex) {
                return true;
            }
            if (wordList[i] == -1) {
                return false;
            }
        }
        return false;
    }

    public boolean isWordOccurDone() {
        return wordOccurDone;
    }

    public int getWordOccurrenceSize() {
        return wordOccurrence.length;
    }

    public int getNumberofUniqueWords() {
        int count = 0;
        for (int i = 0; i < wordOccurrence.length; i++) {
            if (wordOccurrence[i] > 0) {
                count++;
            }
        }
        return count;
    }

    /** creates the word occurrence array from its children if the word occurrence
     * for this node does not exist
     */
    public void createWordOccurrenceFromSubclasses() {
        if (isWordOccurDone()) {
            return;
        }

        if (subclasses.size() > 0) {
            int len = 0;
            for(HierarchyTree child : subclasses.values()) {
                if(!child.isWordOccurDone()) {
                    child.createWordOccurrenceFromSubclasses();;
                }
               
                len = child.getWordOccurrenceSize();
            }
           
            wordOccurrence = new short[len];
            for (int i = 0; i < len; i++) {
                for(HierarchyTree child : subclasses.values()) {
                    wordOccurrence[i] += child.getWordOccurrence(i);
                }
            }
        }
    }

    /** Gets the word occurrence for the given word index
     */
    public int getWordOccurrence(int wordIndex) {
        return wordOccurrence[wordIndex];
    }

    /** Counts the number of sequence leaves below this tree */
    public int getNumOfLeaves() {
        if (!(numOfLeaves < 0)) {
            return numOfLeaves;
        }
        if (getSizeofSubclasses() <= 0) {
            numOfLeaves = getSizeofLeaves();
            return numOfLeaves;
        }
        numOfLeaves = 0;
        for(HierarchyTree child : subclasses.values()) {
            numOfLeaves += child.getNumOfLeaves();
        }
        return numOfLeaves;
    }

    public void increTotalSeqs() {
        totalSeqs++;
        if (parent != null) {
            parent.increTotalSeqs();
        }
    }

    public int getTotalSeqs() {
        return totalSeqs;
    }

    public int getMissCount() {
        return this.missCount;
    }

    public int getNumTotalTestedseq() {
        return this.numTotalTestedseq;
    }

    public void incNumTotalTestedseq() {
        numTotalTestedseq++;
        if (parent != null) {
            parent.incNumTotalTestedseq();
        }
    }

    public void incMissCount() {
        missCount++;
        if (parent != null) {
            parent.incMissCount();
        }
    }

    public Taxonomy getTaxonomy() {
        return taxon;
    }

    public boolean isSingleton() {
        return (totalSeqs > 1) ? false : true;

    }

    /** get all the lowest level nodes in given hierarchy level starting from the given root
     */
   
    public void getNodeList(String level, List nodeList) {

        if (this.taxon.getHierLevel().equalsIgnoreCase(level)) {
            nodeList.add(this);
            return;
        }
        //start from the root of the tree, get the subclasses.
        Collection al = new ArrayList();

        if ((al = this.getSubclasses()).isEmpty()) {
            return;
        }
        Iterator i = al.iterator();
        while (i.hasNext()) {
            ((HierarchyTree) i.next()).getNodeList(level, nodeList);
        }
    }
   
    /** get all the lowest level nodes in given hierarchy level starting from the given root
     */
    public void getNodeMap(String level, HashMap<String, HierarchyTree> nodeMap) {

        if (this.taxon.getHierLevel().equalsIgnoreCase(level)) {
            nodeMap.put(this.name, this);
            return;
        }
        //start from the root of the tree, get the subclasses.
        Collection al = new ArrayList();

        if ((al = this.getSubclasses()).isEmpty()) {
            return;
        }
        Iterator i = al.iterator();
        while (i.hasNext()) {
            ((HierarchyTree) i.next()).getNodeMap(level, nodeMap);
        }
    }
}
TOP

Related Classes of edu.msu.cme.rdp.classifier.train.validation.HierarchyTree

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.