weka.classifiers.trees.m5
Class RuleNode

java.lang.Object
  extended byweka.classifiers.Classifier
      extended byweka.classifiers.trees.m5.RuleNode
All Implemented Interfaces:
java.lang.Cloneable, OptionHandler, java.io.Serializable

public class RuleNode
extends Classifier

Constructs a node for use in an m5 tree or rule

Version:
$Revision: 1.8 $
Author:
Mark Hall (mhall@cs.waikato.ac.nz)
See Also:
Serialized Form

Field Summary
private  int m_classIndex
          the class index
private  double m_devFraction
          a node will not be split if its class standard deviation is less than 5% of the class standard deviation of all the instances
private  double m_globalAbsDeviation
          the absolute deviation of the global class
private  double m_globalDeviation
          a node will not be split if the class deviation of its instances is less than m_devFraction of the deviation of the global class
private  int m_id
          Node id.
private  int[] m_indices
          Indices of the attributes to be used in generating a linear model at this node
private  Instances m_instances
          instances reaching this node
private  boolean m_isLeaf
          Node is a leaf
private  int m_leafModelNum
          the number assigned to the linear model if this node is a leaf
protected  RuleNode m_left
          child nodes
private  PreConstructedLinearModel m_nodeModel
          the linear model at this node
private  int m_numAttributes
          the number of attributes
protected  int m_numInstances
          the number of instances reaching this node
 int m_numParameters
          the number of paramters in the chosen model for this node---either the subtree model or the linear model.
private  RuleNode m_parent
          the parent of this node
private  double m_pruningMultiplier
           
private  boolean m_regressionTree
          Make a regression tree instead of a model tree
protected  RuleNode m_right
           
private  double m_rootMeanSquaredError
          the mean squared error of the model at this node (either linear or subtree)
private  boolean m_saveInstances
          Save the instances at each node (for visualizing in the Explorer's treevisualizer.
private  int m_splitAtt
          attribute this node splits on
private  double m_splitNum
          a node will not be split if it contains less then m_splitNum instances
private  double m_splitValue
          the value of the split attribute
private static double SMOOTHING_CONSTANT
          Constant used in original m5 smoothing calculation
 
Fields inherited from class weka.classifiers.Classifier
m_Debug
 
Constructor Summary
RuleNode(double globalDev, double globalAbsDev, RuleNode parent)
          Creates a new RuleNode instance.
 
Method Summary
protected  int assignIDs(int lastID)
          Assigns a unique identifier to each node in the tree
private  boolean[] attsTestedAbove()
          Returns an array containing the indexes of attributes used in tests above this node
private  boolean[] attsTestedBelow()
          Returns an array containing the indexes of attributes used in tests below this node
 void buildClassifier(Instances data)
          Build this node (find an attribute and split point)
private  void buildLinearModel(int[] indices)
          Build a linear model for this node using those attributes specified in indices.
 double classifyInstance(Instance inst)
          Classify an instance using this node.
 void findBestLeaf(double[] maxCoverage, RuleNode[] bestLeaf)
          Find the leaf with greatest coverage
 double getMinNumInstances()
          Get the minimum number of instances to allow at a leaf node
 PreConstructedLinearModel getModel()
          Get the linear model at this node
 int getNumInstances()
          Return the number of instances that reach this node.
 boolean getRegressionTree()
          Get the value of regressionTree.
 void graph(java.lang.StringBuffer text)
          Assign a unique identifier to each node in the tree and then calls graphTree
protected  void graphTree(java.lang.StringBuffer text)
          Return a dotty style string describing the tree
 void installLinearModels()
          Traverses the tree and installs linear models at each node.
 void installSmoothedModels()
           
 boolean isLeaf()
          Return true if this node is a leaf
 RuleNode leftNode()
          Get the left child of this node
 java.lang.String nodeToString()
          Returns a description of this node (debugging purposes)
 int numberOfLinearModels()
          Get the number of linear models in the tree
 int numLeaves(int leafCounter)
          Sets the leaves' numbers
private  int numParameters()
          Get the number of parameters in the model at this node
 RuleNode parentNode()
          Get the parent of this node
 void printAllModels()
          Print all the linear models at the learf (debugging purposes)
 java.lang.String printLeafModels()
          print all leaf models
 java.lang.String printNodeLinearModel()
          print the linear model at this node
 void prune()
          Recursively prune the tree
private  double pruningFactor(int num_instances, int num_params)
          Compute the pruning factor
 void returnLeaves(FastVector[] v)
          Return a list containing all the leaves in the tree
 RuleNode rightNode()
          Get the right child of this node
protected  double rootMeanSquaredError()
          Get the root mean squared error at this node
 void setMinNumInstances(double minNum)
          Set the minumum number of instances to allow at a leaf node
 void setRegressionTree(boolean newregressionTree)
          Set the value of regressionTree.
protected  void setSaveInstances(boolean save)
          Set whether to save instances for visualization purposes.
protected static double smoothingOriginal(double n, double pred, double supportPred)
          Applies the m5 smoothing procedure to a prediction
 void split()
          Finds an attribute and split point for this node
 int splitAtt()
          Get the index of the splitting attribute for this node
 double splitVal()
          Get the split point for this node
 java.lang.String toString()
          print the linear model at this node
 java.lang.String treeToString(int level)
          Recursively builds a textual description of the tree
 
Methods inherited from class weka.classifiers.Classifier
debugTipText, distributionForInstance, forName, getDebug, getOptions, listOptions, makeCopies, setDebug, setOptions
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

m_instances

private Instances m_instances
instances reaching this node


m_classIndex

private int m_classIndex
the class index


m_numInstances

protected int m_numInstances
the number of instances reaching this node


m_numAttributes

private int m_numAttributes
the number of attributes


m_isLeaf

private boolean m_isLeaf
Node is a leaf


m_splitAtt

private int m_splitAtt
attribute this node splits on


m_splitValue

private double m_splitValue
the value of the split attribute


m_nodeModel

private PreConstructedLinearModel m_nodeModel
the linear model at this node


m_numParameters

public int m_numParameters
the number of paramters in the chosen model for this node---either the subtree model or the linear model. The constant term is counted as a paramter---this is for pruning purposes


m_rootMeanSquaredError

private double m_rootMeanSquaredError
the mean squared error of the model at this node (either linear or subtree)


m_left

protected RuleNode m_left
child nodes


m_right

protected RuleNode m_right

m_parent

private RuleNode m_parent
the parent of this node


m_splitNum

private double m_splitNum
a node will not be split if it contains less then m_splitNum instances


m_devFraction

private double m_devFraction
a node will not be split if its class standard deviation is less than 5% of the class standard deviation of all the instances


m_pruningMultiplier

private double m_pruningMultiplier

m_leafModelNum

private int m_leafModelNum
the number assigned to the linear model if this node is a leaf. = 0 if this node is not a leaf


m_globalDeviation

private double m_globalDeviation
a node will not be split if the class deviation of its instances is less than m_devFraction of the deviation of the global class


m_globalAbsDeviation

private double m_globalAbsDeviation
the absolute deviation of the global class


m_indices

private int[] m_indices
Indices of the attributes to be used in generating a linear model at this node


SMOOTHING_CONSTANT

private static final double SMOOTHING_CONSTANT
Constant used in original m5 smoothing calculation

See Also:
Constant Field Values

m_id

private int m_id
Node id.


m_saveInstances

private boolean m_saveInstances
Save the instances at each node (for visualizing in the Explorer's treevisualizer.


m_regressionTree

private boolean m_regressionTree
Make a regression tree instead of a model tree

Constructor Detail

RuleNode

public RuleNode(double globalDev,
                double globalAbsDev,
                RuleNode parent)
Creates a new RuleNode instance.

Parameters:
globalDev - the global standard deviation of the class
globalAbsDev - the global absolute deviation of the class
parent - the parent of this node
Method Detail

buildClassifier

public void buildClassifier(Instances data)
                     throws java.lang.Exception
Build this node (find an attribute and split point)

Specified by:
buildClassifier in class Classifier
Parameters:
data - the instances on which to build this node
Throws:
java.lang.Exception - if an error occurs

classifyInstance

public double classifyInstance(Instance inst)
                        throws java.lang.Exception
Classify an instance using this node. Recursively calls classifyInstance on child nodes.

Overrides:
classifyInstance in class Classifier
Parameters:
inst - the instance to classify
Returns:
the prediction for this instance
Throws:
java.lang.Exception - if an error occurs

smoothingOriginal

protected static double smoothingOriginal(double n,
                                          double pred,
                                          double supportPred)
                                   throws java.lang.Exception
Applies the m5 smoothing procedure to a prediction

Parameters:
n - number of instances in selected child of this node
pred - the prediction so far
supportPred - the prediction of the linear model at this node
Returns:
the current prediction smoothed with the prediction of the linear model at this node
Throws:
java.lang.Exception - if an error occurs

split

public void split()
           throws java.lang.Exception
Finds an attribute and split point for this node

Throws:
java.lang.Exception - if an error occurs

buildLinearModel

private void buildLinearModel(int[] indices)
                       throws java.lang.Exception
Build a linear model for this node using those attributes specified in indices.

Parameters:
indices - an array of attribute indices to include in the linear model
Throws:
java.lang.Exception

attsTestedAbove

private boolean[] attsTestedAbove()
Returns an array containing the indexes of attributes used in tests above this node

Returns:
an array of attribute indexes

attsTestedBelow

private boolean[] attsTestedBelow()
Returns an array containing the indexes of attributes used in tests below this node

Returns:
an array of attribute indexes

numLeaves

public int numLeaves(int leafCounter)
Sets the leaves' numbers

Parameters:
leafCounter - the number of leaves counted
Returns:
the number of the total leaves under the node

toString

public java.lang.String toString()
print the linear model at this node


printNodeLinearModel

public java.lang.String printNodeLinearModel()
print the linear model at this node


printLeafModels

public java.lang.String printLeafModels()
print all leaf models


nodeToString

public java.lang.String nodeToString()
Returns a description of this node (debugging purposes)

Returns:
a string describing this node

treeToString

public java.lang.String treeToString(int level)
Recursively builds a textual description of the tree

Parameters:
level - the level of this node
Returns:
string describing the tree

installLinearModels

public void installLinearModels()
                         throws java.lang.Exception
Traverses the tree and installs linear models at each node. This method must be called if pruning is not to be performed.

Throws:
java.lang.Exception - if an error occurs

installSmoothedModels

public void installSmoothedModels()
                           throws java.lang.Exception
Throws:
java.lang.Exception

prune

public void prune()
           throws java.lang.Exception
Recursively prune the tree

Throws:
java.lang.Exception - if an error occurs

pruningFactor

private double pruningFactor(int num_instances,
                             int num_params)
Compute the pruning factor

Parameters:
num_instances - number of instances
num_params - number of parameters in the model
Returns:
the pruning factor

findBestLeaf

public void findBestLeaf(double[] maxCoverage,
                         RuleNode[] bestLeaf)
Find the leaf with greatest coverage

Parameters:
maxCoverage - the greatest coverage found so far
bestLeaf - the leaf with the greatest coverage

returnLeaves

public void returnLeaves(FastVector[] v)
Return a list containing all the leaves in the tree

Parameters:
v - a single element array containing a vector of leaves

parentNode

public RuleNode parentNode()
Get the parent of this node

Returns:
the parent of this node

leftNode

public RuleNode leftNode()
Get the left child of this node

Returns:
the left child of this node

rightNode

public RuleNode rightNode()
Get the right child of this node

Returns:
the right child of this node

splitAtt

public int splitAtt()
Get the index of the splitting attribute for this node

Returns:
the index of the splitting attribute

splitVal

public double splitVal()
Get the split point for this node

Returns:
the split point for this node

numberOfLinearModels

public int numberOfLinearModels()
Get the number of linear models in the tree

Returns:
the number of linear models

isLeaf

public boolean isLeaf()
Return true if this node is a leaf

Returns:
true if this node is a leaf

rootMeanSquaredError

protected double rootMeanSquaredError()
Get the root mean squared error at this node

Returns:
the root mean squared error

getModel

public PreConstructedLinearModel getModel()
Get the linear model at this node

Returns:
the linear model at this node

getNumInstances

public int getNumInstances()
Return the number of instances that reach this node.

Returns:
the number of instances at this node.

numParameters

private int numParameters()
Get the number of parameters in the model at this node

Returns:
the number of parameters in the model at this node

getRegressionTree

public boolean getRegressionTree()
Get the value of regressionTree.

Returns:
Value of regressionTree.

setMinNumInstances

public void setMinNumInstances(double minNum)
Set the minumum number of instances to allow at a leaf node

Parameters:
minNum - the minimum number of instances

getMinNumInstances

public double getMinNumInstances()
Get the minimum number of instances to allow at a leaf node

Returns:
a double value

setRegressionTree

public void setRegressionTree(boolean newregressionTree)
Set the value of regressionTree.

Parameters:
newregressionTree - Value to assign to regressionTree.

printAllModels

public void printAllModels()
Print all the linear models at the learf (debugging purposes)


assignIDs

protected int assignIDs(int lastID)
Assigns a unique identifier to each node in the tree

Parameters:
lastID - last id number used
Returns:
ID after processing child nodes

graph

public void graph(java.lang.StringBuffer text)
Assign a unique identifier to each node in the tree and then calls graphTree

Parameters:
text - a StringBuffer value

graphTree

protected void graphTree(java.lang.StringBuffer text)
Return a dotty style string describing the tree

Parameters:
text - a StringBuffer value

setSaveInstances

protected void setSaveInstances(boolean save)
Set whether to save instances for visualization purposes. Default is to save memory.

Parameters:
save - a boolean value