weka.classifiers.trees
Class REPTree.Tree

java.lang.Object
  extended byweka.classifiers.trees.REPTree.Tree
All Implemented Interfaces:
java.io.Serializable
Enclosing class:
REPTree

protected class REPTree.Tree
extends java.lang.Object
implements java.io.Serializable

An inner class for building and storing the tree structure

See Also:
Serialized Form

Field Summary
protected  int m_Attribute
          The attribute to split on.
protected  double[] m_ClassProbs
          Class probabilities from the training data in the nominal case.
protected  double[] m_Distribution
          The (unnormalized) class distribution in the nominal case.
protected  double[] m_HoldOutDist
          Class distribution of hold-out set at node in the nominal case.
protected  double m_HoldOutError
          The hold-out error of the node.
protected  Instances m_Info
          The header information (for printing the tree).
protected  double[] m_Prop
          The proportions of training instances going down each branch.
protected  double m_SplitPoint
          The split point.
protected  REPTree.Tree[] m_Successors
          The subtrees of this tree.
 
Constructor Summary
protected REPTree.Tree()
           
 
Method Summary
protected  void backfitHoldOutInstance(Instance inst, double weight, REPTree.Tree parent)
          Inserts an instance from the hold-out set into the tree.
protected  void backfitHoldOutSet(Instances data)
          Inserts hold-out set into tree.
protected  void buildTree(int[][] sortedIndices, double[][] weights, Instances data, double totalWeight, double[] classProbs, Instances header, double minNum, double minVariance, int depth, int maxDepth)
          Recursively generates a tree.
protected  double distribution(double[][] props, double[][][] dists, int att, int[] sortedIndices, double[] weights, double[][] subsetWeights, Instances data)
          Computes class distribution for an attribute.
protected  double[] distributionForInstance(Instance instance)
          Computes class distribution of an instance using the tree.
protected  double gain(double[][] dist, double priorVal)
          Computes value of splitting criterion after split.
protected  void insertHoldOutInstance(Instance inst, double weight, REPTree.Tree parent)
          Inserts an instance from the hold-out set into the tree.
protected  void insertHoldOutSet(Instances data)
          Inserts hold-out set into tree.
protected  java.lang.String leafString(REPTree.Tree parent)
          Outputs description of a leaf node.
protected  double numericDistribution(double[][] props, double[][][] dists, int att, int[] sortedIndices, double[] weights, double[][] subsetWeights, Instances data, double[] vals)
          Computes class distribution for an attribute.
protected  int numNodes()
          Computes size of the tree.
protected  double priorVal(double[][] dist)
          Computes value of splitting criterion before split.
protected  double reducedErrorPrune()
          Prunes the tree using the hold-out data (bottom-up).
protected  double singleVariance(double s, double sS, double weight)
          Computes the variance for a single set
 java.lang.String sourceExpression(int index)
          Returns a string containing java source code equivalent to the test made at this node.
protected  void splitData(int[][][] subsetIndices, double[][][] subsetWeights, int att, double splitPoint, int[][] sortedIndices, double[][] weights, Instances data)
          Splits instances into subsets.
protected  int toGraph(java.lang.StringBuffer text, int num, REPTree.Tree parent)
          Outputs one node for graph.
 java.lang.StringBuffer[] toSource(java.lang.String className, REPTree.Tree parent)
          Returns source code for the tree as if-then statements.
protected  java.lang.String toString(int level, REPTree.Tree parent)
          Recursively outputs the tree.
protected  double variance(double[] s, double[] sS, double[] sumOfWeights)
          Computes variance for subsets.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

m_Info

protected Instances m_Info
The header information (for printing the tree).


m_Successors

protected REPTree.Tree[] m_Successors
The subtrees of this tree.


m_Attribute

protected int m_Attribute
The attribute to split on.


m_SplitPoint

protected double m_SplitPoint
The split point.


m_Prop

protected double[] m_Prop
The proportions of training instances going down each branch.


m_ClassProbs

protected double[] m_ClassProbs
Class probabilities from the training data in the nominal case. Holds the mean in the numeric case.


m_Distribution

protected double[] m_Distribution
The (unnormalized) class distribution in the nominal case. Holds the sum of squared errors and the weight in the numeric case.


m_HoldOutDist

protected double[] m_HoldOutDist
Class distribution of hold-out set at node in the nominal case. Straight sum of weights in the numeric case (i.e. array has only one element.


m_HoldOutError

protected double m_HoldOutError
The hold-out error of the node. The number of miss-classified instances in the nominal case, the sum of squared errors in the numeric case.

Constructor Detail

REPTree.Tree

protected REPTree.Tree()
Method Detail

distributionForInstance

protected double[] distributionForInstance(Instance instance)
                                    throws java.lang.Exception
Computes class distribution of an instance using the tree.

Throws:
java.lang.Exception

sourceExpression

public final java.lang.String sourceExpression(int index)
Returns a string containing java source code equivalent to the test made at this node. The instance being tested is called "i". This routine assumes to be called in the order of branching, enabling us to set the >= condition test (the last one) of a numeric splitpoint to just "true" (because being there in the flow implies that the previous less-than test failed).

Parameters:
index - index of the value tested
Returns:
a value of type 'String'

toSource

public java.lang.StringBuffer[] toSource(java.lang.String className,
                                         REPTree.Tree parent)
                                  throws java.lang.Exception
Returns source code for the tree as if-then statements. The class is assigned to variable "p", and assumes the tested instance is named "i". The results are returned as two stringbuffers: a section of code for assignment of the class, and a section of code containing support code (eg: other support methods). TODO: If the outputted source code encounters a missing value for the evaluated attribute, it stops branching and uses the class distribution of the current node to decide the return value. This is unlike the behaviour of distributionForInstance().

Parameters:
className - the classname that this static classifier has
parent - parent node of the current node
Returns:
an array containing two stringbuffers, the first string containing assignment code, and the second containing source for support code.
Throws:
java.lang.Exception - if something goes wrong

toGraph

protected int toGraph(java.lang.StringBuffer text,
                      int num,
                      REPTree.Tree parent)
               throws java.lang.Exception
Outputs one node for graph.

Throws:
java.lang.Exception

leafString

protected java.lang.String leafString(REPTree.Tree parent)
                               throws java.lang.Exception
Outputs description of a leaf node.

Throws:
java.lang.Exception

toString

protected java.lang.String toString(int level,
                                    REPTree.Tree parent)
Recursively outputs the tree.


buildTree

protected void buildTree(int[][] sortedIndices,
                         double[][] weights,
                         Instances data,
                         double totalWeight,
                         double[] classProbs,
                         Instances header,
                         double minNum,
                         double minVariance,
                         int depth,
                         int maxDepth)
                  throws java.lang.Exception
Recursively generates a tree.

Throws:
java.lang.Exception

numNodes

protected int numNodes()
Computes size of the tree.


splitData

protected void splitData(int[][][] subsetIndices,
                         double[][][] subsetWeights,
                         int att,
                         double splitPoint,
                         int[][] sortedIndices,
                         double[][] weights,
                         Instances data)
                  throws java.lang.Exception
Splits instances into subsets.

Throws:
java.lang.Exception

distribution

protected double distribution(double[][] props,
                              double[][][] dists,
                              int att,
                              int[] sortedIndices,
                              double[] weights,
                              double[][] subsetWeights,
                              Instances data)
                       throws java.lang.Exception
Computes class distribution for an attribute.

Throws:
java.lang.Exception

numericDistribution

protected double numericDistribution(double[][] props,
                                     double[][][] dists,
                                     int att,
                                     int[] sortedIndices,
                                     double[] weights,
                                     double[][] subsetWeights,
                                     Instances data,
                                     double[] vals)
                              throws java.lang.Exception
Computes class distribution for an attribute.

Throws:
java.lang.Exception

variance

protected double variance(double[] s,
                          double[] sS,
                          double[] sumOfWeights)
Computes variance for subsets.


singleVariance

protected double singleVariance(double s,
                                double sS,
                                double weight)
Computes the variance for a single set


priorVal

protected double priorVal(double[][] dist)
Computes value of splitting criterion before split.


gain

protected double gain(double[][] dist,
                      double priorVal)
Computes value of splitting criterion after split.


reducedErrorPrune

protected double reducedErrorPrune()
                            throws java.lang.Exception
Prunes the tree using the hold-out data (bottom-up).

Throws:
java.lang.Exception

insertHoldOutSet

protected void insertHoldOutSet(Instances data)
                         throws java.lang.Exception
Inserts hold-out set into tree.

Throws:
java.lang.Exception

insertHoldOutInstance

protected void insertHoldOutInstance(Instance inst,
                                     double weight,
                                     REPTree.Tree parent)
                              throws java.lang.Exception
Inserts an instance from the hold-out set into the tree.

Throws:
java.lang.Exception

backfitHoldOutSet

protected void backfitHoldOutSet(Instances data)
                          throws java.lang.Exception
Inserts hold-out set into tree.

Throws:
java.lang.Exception

backfitHoldOutInstance

protected void backfitHoldOutInstance(Instance inst,
                                      double weight,
                                      REPTree.Tree parent)
                               throws java.lang.Exception
Inserts an instance from the hold-out set into the tree.

Throws:
java.lang.Exception