/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* J48.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.trees;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.Sourcable;
import weka.classifiers.trees.j48.C45PruneableClassifierTree;
import weka.classifiers.trees.j48.ClassifierTree;
import weka.classifiers.trees.j48.HDIGModelSelection;
import weka.classifiers.trees.j48.ModelSelection;
import weka.classifiers.trees.j48.PruneableClassifierTree;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Matchable;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Summarizable;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
/**
<!-- globalinfo-start -->
* Class for generating a pruned or unpruned HD-IG product distance decision tree.
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -U
* Use unpruned tree.</pre>
*
* <pre> -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)</pre>
*
* <pre> -M <minimum number of instances>
* Set minimum number of instances per leaf.
* (default 2)</pre>
*
* <pre> -R
* Use reduced error pruning.</pre>
*
* <pre> -N <number of folds>
* Set number of folds for reduced error
* pruning. One fold is used as pruning set.
* (default 3)</pre>
*
* <pre> -S
* Don't perform subtree raising.</pre>
*
* <pre> -L
* Do not clean up after the tree has been built.</pre>
*
* <pre> -A
* Laplace smoothing for predicted probabilities.</pre>
*
* <pre> -Q <seed>
* Seed for random data shuffling (default 1).</pre>
*
<!-- options-end -->
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision: 1.9 $
*/
public class HDIGTree extends Classifier implements OptionHandler, Drawable, Matchable, Sourcable, WeightedInstancesHandler, Summarizable, AdditionalMeasureProducer, TechnicalInformationHandler {
/** The decision tree */
private ClassifierTree m_root;
/** Unpruned tree? */
private boolean m_unpruned = false;
/** Confidence level */
private float m_CF = 0.25f;
/** Minimum number of instances */
private int m_minNumObj = 2;
/** Determines whether probabilities are smoothed using
Laplace correction when predictions are generated */
private boolean m_useLaplace = false;
/** Use reduced error pruning? */
private boolean m_reducedErrorPruning = false;
/** Number of folds for reduced error pruning. */
private int m_numFolds = 3;
/** Subtree raising to be performed? */
private boolean m_subtreeRaising = true;
/** Cleanup after the tree has been built. */
private boolean m_noCleanup = false;
/** Random number seed for reduced-error pruning. */
private int m_Seed = 1;
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Class for generating a pruned or unpruned HD-IG product decision tree. For more " +
"information, see\n\n" +
getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation( Type.BOOK );
result.setValue( Field.AUTHOR, "Ryan N. Lichtenwalter" );
result.setValue( Field.YEAR, "2009" );
return result;
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result;
try {
if( !m_reducedErrorPruning ) {
result = new C45PruneableClassifierTree( null, !m_unpruned, m_CF, m_subtreeRaising, !m_noCleanup ).getCapabilities();
} else {
result = new PruneableClassifierTree( null, !m_unpruned, m_numFolds, !m_noCleanup, m_Seed ).getCapabilities();
}
} catch( Exception e ) {
result = new Capabilities( this );
}
result.setOwner( this );
return result;
}
/**
* Generates the classifier.
*
* @param instances the data to train the classifier with
* @throws Exception if classifier can't be built successfully
*/
public void buildClassifier( Instances instances ) throws Exception {
ModelSelection modSelection = new HDIGModelSelection( m_minNumObj, instances );
if( !m_reducedErrorPruning ) {
m_root = new C45PruneableClassifierTree( modSelection, !m_unpruned, m_CF, m_subtreeRaising, !m_noCleanup );
} else {
m_root = new PruneableClassifierTree( modSelection, !m_unpruned, m_numFolds, !m_noCleanup, m_Seed );
}
m_root.buildClassifier( instances );
}
/**
* Classifies an instance.
*
* @param instance the instance to classify
* @return the classification for the instance
* @throws Exception if instance can't be classified successfully
*/
public double classifyInstance( Instance instance ) throws Exception {
return m_root.classifyInstance( instance );
}
/**
* Returns class probabilities for an instance.
*
* @param instance the instance to calculate the class probabilities for
* @return the class probabilities
* @throws Exception if distribution can't be computed successfully
*/
public final double[] distributionForInstance( Instance instance ) throws Exception {
return m_root.distributionForInstance( instance, m_useLaplace );
}
/**
* Returns the type of graph this classifier
* represents.
* @return Drawable.TREE
*/
public int graphType() {
return Drawable.TREE;
}
/**
* Returns graph describing the tree.
*
* @return the graph describing the tree
* @throws Exception if graph can't be computed
*/
public String graph() throws Exception {
return m_root.graph();
}
/**
* Returns tree in prefix order.
*
* @return the tree in prefix order
* @throws Exception if something goes wrong
*/
public String prefix() throws Exception {
return m_root.prefix();
}
/**
* Returns tree as an if-then statement.
*
* @param className the name of the Java class
* @return the tree as a Java if-then type statement
* @throws Exception if something goes wrong
*/
public String toSource( String className ) throws Exception {
StringBuffer[] source = m_root.toSource( className );
return "class " + className + " {\n\n" + " public static double classify(Object[] i)\n" + " throws Exception {\n\n" + " double p = Double.NaN;\n" + source[0] // Assignment code
+ " return p;\n" + " }\n" + source[1] // Support code
+ "}\n";
}
/**
* Returns an enumeration describing the available options.
*
* Valid options are: <p>
*
* -U <br>
* Use unpruned tree.<p>
*
* -C confidence <br>
* Set confidence threshold for pruning. (Default: 0.25) <p>
*
* -M number <br>
* Set minimum number of instances per leaf. (Default: 2) <p>
*
* -R <br>
* Use reduced error pruning. No subtree raising is performed. <p>
*
* -N number <br>
* Set number of folds for reduced error pruning. One fold is
* used as the pruning set. (Default: 3) <p>
*
* -B <br>
* Use binary splits for nominal attributes. <p>
*
* -S <br>
* Don't perform subtree raising. <p>
*
* -L <br>
* Do not clean up after the tree has been built.
*
* -A <br>
* If set, Laplace smoothing is used for predicted probabilites. <p>
*
* -Q <br>
* The seed for reduced-error pruning. <p>
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector( 9 );
newVector.addElement( new Option( "\tUse unpruned tree.",
"U", 0, "-U" ) );
newVector.addElement( new Option( "\tSet confidence threshold for pruning.\n" +
"\t(default 0.25)",
"C", 1, "-C <pruning confidence>" ) );
newVector.addElement( new Option( "\tSet minimum number of instances per leaf.\n" +
"\t(default 2)",
"M", 1, "-M <minimum number of instances>" ) );
newVector.addElement( new Option( "\tUse reduced error pruning.",
"R", 0, "-R" ) );
newVector.addElement( new Option( "\tSet number of folds for reduced error\n" +
"\tpruning. One fold is used as pruning set.\n" +
"\t(default 3)",
"N", 1, "-N <number of folds>" ) );
newVector.addElement( new Option( "\tDon't perform subtree raising.",
"S", 0, "-S" ) );
newVector.addElement( new Option( "\tDo not clean up after the tree has been built.",
"L", 0, "-L" ) );
newVector.addElement( new Option( "\tLaplace smoothing for predicted probabilities.",
"A", 0, "-A" ) );
newVector.addElement( new Option( "\tSeed for random data shuffling (default 1).",
"Q", 1, "-Q <seed>" ) );
return newVector.elements();
}
/**
* Parses a given list of options.
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -U
* Use unpruned tree.</pre>
*
* <pre> -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)</pre>
*
* <pre> -M <minimum number of instances>
* Set minimum number of instances per leaf.
* (default 2)</pre>
*
* <pre> -R
* Use reduced error pruning.</pre>
*
* <pre> -N <number of folds>
* Set number of folds for reduced error
* pruning. One fold is used as pruning set.
* (default 3)</pre>
*
* <pre> -S
* Don't perform subtree raising.</pre>
*
* <pre> -L
* Do not clean up after the tree has been built.</pre>
*
* <pre> -A
* Laplace smoothing for predicted probabilities.</pre>
*
* <pre> -Q <seed>
* Seed for random data shuffling (default 1).</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions( String[] options ) throws Exception {
// Other options
String minNumString = Utils.getOption( 'M', options );
if( minNumString.length() != 0 ) {
m_minNumObj = Integer.parseInt( minNumString );
} else {
m_minNumObj = 2;
}
m_useLaplace = Utils.getFlag( 'A', options );
// Pruning options
m_unpruned = Utils.getFlag( 'U', options );
m_subtreeRaising = !Utils.getFlag( 'S', options );
m_noCleanup = Utils.getFlag( 'L', options );
if( ( m_unpruned ) && ( !m_subtreeRaising ) ) {
throw new Exception( "Subtree raising doesn't need to be unset for unpruned tree!" );
}
m_reducedErrorPruning = Utils.getFlag( 'R', options );
if( ( m_unpruned ) && ( m_reducedErrorPruning ) ) {
throw new Exception( "Unpruned tree and reduced error pruning can't be selected " +
"simultaneously!" );
}
String confidenceString = Utils.getOption( 'C', options );
if( confidenceString.length() != 0 ) {
if( m_reducedErrorPruning ) {
throw new Exception( "Setting the confidence doesn't make sense " +
"for reduced error pruning." );
} else if( m_unpruned ) {
throw new Exception( "Doesn't make sense to change confidence for unpruned " + "tree!" );
} else {
m_CF = ( new Float( confidenceString ) ).floatValue();
if( ( m_CF <= 0 ) || ( m_CF >= 1 ) ) {
throw new Exception( "Confidence has to be greater than zero and smaller " +
"than one!" );
}
}
} else {
m_CF = 0.25f;
}
String numFoldsString = Utils.getOption( 'N', options );
if( numFoldsString.length() != 0 ) {
if( !m_reducedErrorPruning ) {
throw new Exception( "Setting the number of folds" +
" doesn't make sense if" +
" reduced error pruning is not selected." );
} else {
m_numFolds = Integer.parseInt( numFoldsString );
}
} else {
m_numFolds = 3;
}
String seedString = Utils.getOption( 'Q', options );
if( seedString.length() != 0 ) {
m_Seed = Integer.parseInt( seedString );
} else {
m_Seed = 1;
}
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
String[] options = new String[14];
int current = 0;
if( m_noCleanup ) {
options[current++] = "-L";
}
if( m_unpruned ) {
options[current++] = "-U";
} else {
if( !m_subtreeRaising ) {
options[current++] = "-S";
}
if( m_reducedErrorPruning ) {
options[current++] = "-R";
options[current++] = "-N";
options[current++] = "" + m_numFolds;
options[current++] = "-Q";
options[current++] = "" + m_Seed;
} else {
options[current++] = "-C";
options[current++] = "" + m_CF;
}
}
options[current++] = "-M";
options[current++] = "" + m_minNumObj;
if( m_useLaplace ) {
options[current++] = "-A";
}
while( current < options.length ) {
options[current++] = "";
}
return options;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "The seed used for randomizing the data " +
"when reduced-error pruning is used.";
}
/**
* Get the value of Seed.
*
* @return Value of Seed.
*/
public int getSeed() {
return m_Seed;
}
/**
* Set the value of Seed.
*
* @param newSeed Value to assign to Seed.
*/
public void setSeed( int newSeed ) {
m_Seed = newSeed;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String useLaplaceTipText() {
return "Whether counts at leaves are smoothed based on Laplace.";
}
/**
* Get the value of useLaplace.
*
* @return Value of useLaplace.
*/
public boolean getUseLaplace() {
return m_useLaplace;
}
/**
* Set the value of useLaplace.
*
* @param newuseLaplace Value to assign to useLaplace.
*/
public void setUseLaplace( boolean newuseLaplace ) {
m_useLaplace = newuseLaplace;
}
/**
* Returns a description of the classifier.
*
* @return a description of the classifier
*/
public String toString() {
if( m_root == null ) {
return "No classifier built";
}
if( m_unpruned ) {
return "HD-IG product unpruned tree\n------------------\n" + m_root.toString();
} else {
return "HD-IG product pruned tree\n------------------\n" + m_root.toString();
}
}
/**
* Returns a superconcise version of the model
*
* @return a summary of the model
*/
public String toSummaryString() {
return "Number of leaves: " + m_root.numLeaves() + "\n" + "Size of the tree: " + m_root.numNodes() + "\n";
}
/**
* Returns the size of the tree
* @return the size of the tree
*/
public double measureTreeSize() {
return m_root.numNodes();
}
/**
* Returns the number of leaves
* @return the number of leaves
*/
public double measureNumLeaves() {
return m_root.numLeaves();
}
/**
* Returns the number of rules (same as number of leaves)
* @return the number of rules
*/
public double measureNumRules() {
return m_root.numLeaves();
}
/**
* Returns an enumeration of the additional measure names
* @return an enumeration of the measure names
*/
public Enumeration enumerateMeasures() {
Vector newVector = new Vector( 3 );
newVector.addElement( "measureTreeSize" );
newVector.addElement( "measureNumLeaves" );
newVector.addElement( "measureNumRules" );
return newVector.elements();
}
/**
* Returns the value of the named measure
* @param additionalMeasureName the name of the measure to query for its value
* @return the value of the named measure
* @throws IllegalArgumentException if the named measure is not supported
*/
public double getMeasure( String additionalMeasureName ) {
if( additionalMeasureName.compareToIgnoreCase( "measureNumRules" ) == 0 ) {
return measureNumRules();
} else if( additionalMeasureName.compareToIgnoreCase( "measureTreeSize" ) == 0 ) {
return measureTreeSize();
} else if( additionalMeasureName.compareToIgnoreCase( "measureNumLeaves" ) == 0 ) {
return measureNumLeaves();
} else {
throw new IllegalArgumentException( additionalMeasureName + " not supported (j48)" );
}
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String unprunedTipText() {
return "Whether pruning is performed.";
}
/**
* Get the value of unpruned.
*
* @return Value of unpruned.
*/
public boolean getUnpruned() {
return m_unpruned;
}
/**
* Set the value of unpruned. Turns reduced-error pruning
* off if set.
* @param v Value to assign to unpruned.
*/
public void setUnpruned( boolean v ) {
if( v ) {
m_reducedErrorPruning = false;
}
m_unpruned = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String confidenceFactorTipText() {
return "The confidence factor used for pruning (smaller values incur " + "more pruning).";
}
/**
* Get the value of CF.
*
* @return Value of CF.
*/
public float getConfidenceFactor() {
return m_CF;
}
/**
* Set the value of CF.
*
* @param v Value to assign to CF.
*/
public void setConfidenceFactor( float v ) {
m_CF = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String minNumObjTipText() {
return "The minimum number of instances per leaf.";
}
/**
* Get the value of minNumObj.
*
* @return Value of minNumObj.
*/
public int getMinNumObj() {
return m_minNumObj;
}
/**
* Set the value of minNumObj.
*
* @param v Value to assign to minNumObj.
*/
public void setMinNumObj( int v ) {
m_minNumObj = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String reducedErrorPruningTipText() {
return "Whether reduced-error pruning is used instead of C.4.5 pruning.";
}
/**
* Get the value of reducedErrorPruning.
*
* @return Value of reducedErrorPruning.
*/
public boolean getReducedErrorPruning() {
return m_reducedErrorPruning;
}
/**
* Set the value of reducedErrorPruning. Turns
* unpruned trees off if set.
*
* @param v Value to assign to reducedErrorPruning.
*/
public void setReducedErrorPruning( boolean v ) {
if( v ) {
m_unpruned = false;
}
m_reducedErrorPruning = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numFoldsTipText() {
return "Determines the amount of data used for reduced-error pruning. " + " One fold is used for pruning, the rest for growing the tree.";
}
/**
* Get the value of numFolds.
*
* @return Value of numFolds.
*/
public int getNumFolds() {
return m_numFolds;
}
/**
* Set the value of numFolds.
*
* @param v Value to assign to numFolds.
*/
public void setNumFolds( int v ) {
m_numFolds = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String binarySplitsTipText() {
return "Whether to use binary splits on nominal attributes when " + "building the trees.";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String subtreeRaisingTipText() {
return "Whether to consider the subtree raising operation when pruning.";
}
/**
* Get the value of subtreeRaising.
*
* @return Value of subtreeRaising.
*/
public boolean getSubtreeRaising() {
return m_subtreeRaising;
}
/**
* Set the value of subtreeRaising.
*
* @param v Value to assign to subtreeRaising.
*/
public void setSubtreeRaising( boolean v ) {
m_subtreeRaising = v;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String saveInstanceDataTipText() {
return "Whether to save the training data for visualization.";
}
/**
* Check whether instance data is to be saved.
*
* @return true if instance data is saved
*/
public boolean getSaveInstanceData() {
return m_noCleanup;
}
/**
* Set whether instance data is to be saved.
* @param v true if instance data is to be saved
*/
public void setSaveInstanceData( boolean v ) {
m_noCleanup = v;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract( "$Revision: 1.0 $" );
}
/**
* Main method for testing this class
*
* @param argv the commandline options
*/
public static void main( String[] argv ) {
runClassifier( new HDIGTree(), argv );
}
}