Package weka.classifiers.trees.ft

Source Code of weka.classifiers.trees.ft.FTNode

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
*    FTNode.java
*    Copyright (C) 2007 University of Porto, Porto, Portugal
*
*/

package weka.classifiers.trees.ft;

import weka.classifiers.functions.SimpleLinearRegression;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.C45Split;
import weka.classifiers.trees.j48.NoSplit;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
* Class for Functional tree structure.
*
* @author Jo\~{a}o Gama
* @author Carlos Ferreira
*
* @version $Revision: 1.4 $
*/
public class FTNode
  extends FTtree {  
 
  /** for serialization. */
  private static final long serialVersionUID = 2317688685139295063L;

  /**
   * Constructor for Functional tree node.
   *
   * @param errorOnProbabilities Use error on probabilities for stopping criterion of LogitBoost?
   * @param numBoostingIterations sets the numBoostingIterations parameter
   * @param minNumInstances minimum number of instances at which a node is considered for splitting
   *
   */
  public FTNode( boolean errorOnProbabilities, int numBoostingIterations,
                 int minNumInstances, double weightTrimBeta, boolean useAIC) {
    m_errorOnProbabilities = errorOnProbabilities;
    m_fixedNumIterations = numBoostingIterations;     
    m_minNumInstances = minNumInstances;
    m_maxIterations = 200;
    setWeightTrimBeta(weightTrimBeta);
    setUseAIC(useAIC);
  }        
   
  /**
   * Method for building a Functional tree (only called for the root node).
   * Grows an initial Functional Tree.
   *
   * @param data the data to train with
   * @throws Exception if something goes wrong
   */
  public void buildClassifier(Instances data) throws Exception{
 
    // Insert new attributes
    data= insertNewAttr(data);
        
    //build tree using all the data
    buildTree(data, null, data.numInstances(), 0);
 
  }

  /**
   * Method for building the tree structure.
   * Builds a logistic model, splits the node and recursively builds tree for child nodes.
   * @param data the training data passed on to this node
   * @param higherRegressions An array of regression functions produced by LogitBoost at higher
   * levels in the tree. They represent a logistic regression model that is refined locally
   * at this node.
   * @param totalInstanceWeight the total number of training examples
   * @param higherNumParameters effective number of parameters in the logistic regression model built
   * in parent nodes
   * @throws Exception if something goes wrong
   */
  public void buildTree(Instances data, SimpleLinearRegression[][] higherRegressions,
                        double totalInstanceWeight, double higherNumParameters) throws Exception{

    //save some stuff
    m_totalInstanceWeight = totalInstanceWeight;
    m_train = new Instances(data);
    m_train= removeExtAttributes( m_train);
       
    m_isLeaf = true;
    m_sons = null;
 
    m_numInstances = m_train.numInstances();
    m_numClasses = m_train.numClasses();       
 
    //init
    m_numericData = getNumericData(m_train);     
    m_numericDataHeader = new Instances(m_numericData, 0);
 
    m_regressions = initRegressions();
    m_numRegressions = 0;
 
    if (higherRegressions != null) m_higherRegressions = higherRegressions;
    else m_higherRegressions = new SimpleLinearRegression[m_numClasses][0]

    m_numHigherRegressions = m_higherRegressions[0].length; 
       
    m_numParameters = higherNumParameters;
       
    //build logistic model
    if (m_numInstances >= m_numFoldsBoosting) {
      if (m_fixedNumIterations > 0){
        performBoosting(m_fixedNumIterations);
      } else if (getUseAIC()) {
        performBoostingInfCriterion();
      } else {
        performBoostingCV();
      }
    }
       
    m_numParameters += m_numRegressions;
 
    //only keep the simple regression functions that correspond to the selected number of LogitBoost iterations
    m_regressions = selectRegressions(m_regressions);
        
    boolean grow;
      
    //Compute logistic probs
    double[][] FsConst;
    double[] probsConst;
    int j;
    FsConst = getFs(m_numericData);
       
    for (j = 0; j < data.numInstances(); j++)
      {
        probsConst=probs(FsConst[j]);
        // auxiliary to compute constructor error
        if (data.instance(j).classValue()!=getConstError(probsConst)) m_constError=m_constError +1;
        for (int i = 0; i<data.classAttribute().numValues() ; i++)
          data.instance(j).setValue(i,probsConst[i]);
      }
 
       
    //needed by dynamic data
    m_modelSelection=new  C45ModelSelection(m_minNumInstances, data);
     
    m_localModel = m_modelSelection.selectModel(data);
      
    //split node if more than minNumInstances...
    if (m_numInstances > m_minNumInstances) {
      grow = (m_localModel.numSubsets() > 1);
    } else {
      grow = false;
    }
       
    // logitboost uses distribution for instance
    m_hasConstr=false;
    m_train=data;
    if (grow) { 
      //create and build children of node
      m_isLeaf = false;
      Instances[] localInstances = m_localModel.split(data);
      // deletes extended attributes
      if (((C45Split)m_localModel).attIndex() >=0 && ((C45Split)m_localModel).attIndex()< data.classAttribute().numValues())
        m_hasConstr=true;                        
              
      m_sons = new FTNode[m_localModel.numSubsets()];
      for (int i = 0; i < m_sons.length; i++) {
        m_sons[i] = new FTNode(m_errorOnProbabilities,m_fixedNumIterations,
                               m_minNumInstances,getWeightTrimBeta(), getUseAIC());
        m_sons[i].buildTree(localInstances[i],
                            mergeArrays(m_regressions, m_higherRegressions), m_totalInstanceWeight, m_numParameters);   
        localInstances[i] = null;
      }     
    }
    else{
      m_leafclass=m_localModel.distribution().maxClass();
    }
  }

  /**
   * Method for prunning a tree using C4.5 pruning procedure.
   *
   * @exception Exception if something goes wrong
   */
  public double prune() throws Exception {

    double errorsLeaf;
    double errorsTree;
    double errorsConstModel;
    double treeError=0;
    int i;
    double probBranch;

    // Compute error if this Tree would be leaf without contructor
    errorsLeaf = getEstimatedErrorsForDistribution(m_localModel.distribution());
    if (m_isLeaf ) {
      return  errorsLeaf;
    } else {
      //Computes da error of the constructor model
      errorsConstModel = getEtimateConstModel(m_localModel.distribution());
      errorsTree=0;
      for (i = 0; i < m_sons.length; i++) {
        probBranch = m_localModel.distribution().perBag(i) /
          m_localModel.distribution().total();
        errorsTree += probBranch* m_sons[i].prune();
      }
      // Decide if leaf is best choice.

      if (Utils.smOrEq(errorsLeaf, errorsTree) && Utils.smOrEq(errorsLeaf, errorsConstModel)) {
        // Free son Trees
        m_sons = null;
        m_isLeaf = true;
        m_hasConstr=false;
        m_leafclass=m_localModel.distribution().maxClass();
        // Get NoSplit Model for node.
        m_localModel = new NoSplit(m_localModel.distribution());
        treeError=errorsLeaf;

      }else{
        // Decide if Constructor is best choice.
        if (Utils.smOrEq(errorsConstModel, errorsTree)) {
          // Free son Trees
          m_sons = null;
          m_isLeaf = true;
          m_hasConstr =true;
          // Get NoSplit Model for node.
          m_localModel = new NoSplit(m_localModel.distribution());
          treeError=errorsConstModel;
        } else
          treeError=errorsTree;
      }
    }
    return  treeError;
  }
  /**
   * Returns the class probabilities for an instance given by the Functional Tree.
   * @param instance the instance
   * @return the array of probabilities
   */
  public double[] distributionForInstance(Instance instance) throws Exception {
    double[] probs;

    if (m_isLeaf && m_hasConstr) { //leaf
      //leaf: use majoraty class or constructor model
      probs = modelDistributionForInstance(instance);
    } else {
      if (m_isLeaf && !m_hasConstr)
        {
          probs=new double[instance.numClasses()];
          probs[m_leafclass]=(double)1;
        }else{
              
        probs = modelDistributionForInstance(instance);
        //Built auxiliary split instance   
        Instance instanceSplit=new Instance(instance.numAttributes()+instance.numClasses());
        instanceSplit.setDataset(instance.dataset());
          
        // Inserts attribute and their value
        for(int i=0; i< instance.numClasses();i++)
          {
            instanceSplit.dataset().insertAttributeAt( new Attribute("N"+ (instance.numClasses()-i)), 0);
            instanceSplit.setValue(i,probs[i]);
          }
        for(int i=0; i< instance.numAttributes();i++)
          instanceSplit.setValue(i+instance.numClasses(),instance.value(i));
          
        //chooses best branch          
        int branch = m_localModel.whichSubset(instanceSplit); //split
          
        //delete added attributes
        for(int i=0; i< instance.numClasses();i++)
          instanceSplit.dataset().deleteAttributeAt(0);
           
        probs = m_sons[branch].distributionForInstance(instance);
      }
    }
    return probs;
 
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.4 $");
  }
}
TOP

Related Classes of weka.classifiers.trees.ft.FTNode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.