Set the class attribute. If set, then class based evaluation of clustering
is performed.
- Version:
- $Revision: 1.23 $
- Author:
- Mark Hall (mhall@cs.waikato.ac.nz)
Field Summary |
private int[] |
m_classToCluster
will hold the mapping of classes to clusters (for class based
evaluation) |
private double[] |
m_clusterAssignments
holds the assigments of instances to clusters for a particular testing
dataset |
private Clusterer |
m_Clusterer
the clusterer |
private java.lang.StringBuffer |
m_clusteringResults
holds a string describing the results of clustering the training data |
private boolean |
m_doXval
do cross validation (DensityBasedClusterers only) |
private int |
m_numClusters
holds the number of clusters found by the clusterer |
private int |
m_numFolds
the number of folds to use for cross validation |
private int |
m_seed
seed to use for cross validation |
private Instances |
m_trainInstances
the instances to cluster |
Method Summary |
private static java.lang.String |
attributeValuesString(Instance instance,
Range attRange)
Builds a string listing the attribute values in a specified range of indices,
separated by commas and enclosed in brackets. |
java.lang.String |
clusterResultsToString()
return the results of clustering. |
static java.lang.String |
crossValidateModel(java.lang.String clustererString,
Instances data,
int numFolds,
java.lang.String[] options,
java.util.Random random)
Performs a cross-validation
for a distribution clusterer on a set of instances. |
static java.lang.String |
evaluateClusterer(Clusterer clusterer,
java.lang.String[] options)
Evaluates a clusterer with the options given in an array of
strings. |
void |
evaluateClusterer(Instances test)
Evaluate the clusterer on a set of instances. |
private void |
evaluateClustersWithRespectToClass(Instances inst)
Evaluates cluster assignments with respect to actual class labels.
|
int[] |
getClassesToClusters()
Return the array (ordered by cluster number) of minimum error class to
cluster mappings |
double[] |
getClusterAssignments()
Return an array of cluster assignments corresponding to the most
recent set of instances clustered. |
int |
getNumClusters()
Return the number of clusters found for the most recent call to
evaluateClusterer |
static void |
main(java.lang.String[] args)
Main method for testing this class. |
private static java.lang.String |
makeOptionString(Clusterer clusterer)
Make up the help string giving all the command line options |
private void |
mapClasses(int lev,
int[][] counts,
int[] clusterTotals,
double[] current,
double[] best,
int error)
Finds the minimum error mapping of classes to clusters. |
private static java.lang.String |
printClusterings(Clusterer clusterer,
Instances train,
java.lang.String testFileName,
Range attributesToOutput)
Print the cluster assignments for either the training
or the testing data. |
private static java.lang.String |
printClusterStats(Clusterer clusterer,
java.lang.String fileName)
Print the cluster statistics for either the training
or the testing data. |
void |
setClusterer(Clusterer clusterer)
set the clusterer |
void |
setDoXval(boolean x)
set whether or not to do cross validation |
void |
setFolds(int folds)
set the number of folds to use for cross validation |
void |
setSeed(int s)
set the seed to use for cross validation |
private java.lang.String |
toMatrixString(int[][] counts,
int[] clusterTotals,
Instances inst)
Returns a "confusion" style matrix of classes to clusters assignments |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
m_trainInstances
private Instances m_trainInstances
- the instances to cluster
m_Clusterer
private Clusterer m_Clusterer
- the clusterer
m_doXval
private boolean m_doXval
- do cross validation (DensityBasedClusterers only)
m_numFolds
private int m_numFolds
- the number of folds to use for cross validation
m_seed
private int m_seed
- seed to use for cross validation
m_clusteringResults
private java.lang.StringBuffer m_clusteringResults
- holds a string describing the results of clustering the training data
m_numClusters
private int m_numClusters
- holds the number of clusters found by the clusterer
m_clusterAssignments
private double[] m_clusterAssignments
- holds the assigments of instances to clusters for a particular testing
dataset
m_classToCluster
private int[] m_classToCluster
- will hold the mapping of classes to clusters (for class based
evaluation)
ClusterEvaluation
public ClusterEvaluation()
- Constructor. Sets defaults for each member variable. Default Clusterer
is EM.
setClusterer
public void setClusterer(Clusterer clusterer)
- set the clusterer
- Parameters:
clusterer
- the clusterer to use
setDoXval
public void setDoXval(boolean x)
- set whether or not to do cross validation
- Parameters:
x
- true if cross validation is to be done
setFolds
public void setFolds(int folds)
- set the number of folds to use for cross validation
- Parameters:
folds
- the number of folds
setSeed
public void setSeed(int s)
- set the seed to use for cross validation
- Parameters:
s
- the seed.
clusterResultsToString
public java.lang.String clusterResultsToString()
- return the results of clustering.
- Returns:
- a string detailing the results of clustering a data set
getNumClusters
public int getNumClusters()
- Return the number of clusters found for the most recent call to
evaluateClusterer
- Returns:
- the number of clusters found
getClusterAssignments
public double[] getClusterAssignments()
- Return an array of cluster assignments corresponding to the most
recent set of instances clustered.
- Returns:
- an array of cluster assignments
getClassesToClusters
public int[] getClassesToClusters()
- Return the array (ordered by cluster number) of minimum error class to
cluster mappings
- Returns:
- an array of class to cluster mappings
evaluateClusterer
public void evaluateClusterer(Instances test)
throws java.lang.Exception
- Evaluate the clusterer on a set of instances. Calculates clustering
statistics and stores cluster assigments for the instances in
m_clusterAssignments
- Parameters:
test
- the set of instances to cluster
- Throws:
java.lang.Exception
- if something goes wrong
evaluateClustersWithRespectToClass
private void evaluateClustersWithRespectToClass(Instances inst)
throws java.lang.Exception
- Evaluates cluster assignments with respect to actual class labels.
Assumes that m_Clusterer has been trained and tested on
inst (minus the class).
- Parameters:
inst
- the instances (including class) to evaluate with respect to
- Throws:
java.lang.Exception
- if something goes wrong
toMatrixString
private java.lang.String toMatrixString(int[][] counts,
int[] clusterTotals,
Instances inst)
throws java.lang.Exception
- Returns a "confusion" style matrix of classes to clusters assignments
- Parameters:
counts
- the counts of classes for each clusterclusterTotals
- total number of examples in each clusterinst
- the training instances (with class)
- Throws:
java.lang.Exception
- if matrix can't be generated
mapClasses
private void mapClasses(int lev,
int[][] counts,
int[] clusterTotals,
double[] current,
double[] best,
int error)
- Finds the minimum error mapping of classes to clusters. Recursively
considers all possible class to cluster assignments.
- Parameters:
lev
- the cluster being processedcounts
- the counts of classes in clustersclusterTotals
- the total number of examples in each clustercurrent
- the current path through the class to cluster assignment
treebest
- the best assignment path seenerror
- accumulates the error for a particular path
evaluateClusterer
public static java.lang.String evaluateClusterer(Clusterer clusterer,
java.lang.String[] options)
throws java.lang.Exception
- Evaluates a clusterer with the options given in an array of
strings. It takes the string indicated by "-t" as training file, the
string indicated by "-T" as test file.
If the test file is missing, a stratified ten-fold
cross-validation is performed (distribution clusterers only).
Using "-x" you can change the number of
folds to be used, and using "-s" the random seed.
If the "-p" option is present it outputs the classification for
each test instance. If you provide the name of an object file using
"-l", a clusterer will be loaded from the given file. If you provide the
name of an object file using "-d", the clusterer built from the
training data will be saved to the given file.
- Parameters:
clusterer
- machine learning clustereroptions
- the array of string containing the options
- Returns:
- a string describing the results
- Throws:
java.lang.Exception
- if model could not be evaluated successfully
crossValidateModel
public static java.lang.String crossValidateModel(java.lang.String clustererString,
Instances data,
int numFolds,
java.lang.String[] options,
java.util.Random random)
throws java.lang.Exception
- Performs a cross-validation
for a distribution clusterer on a set of instances.
- Parameters:
clustererString
- a string naming the class of the clustererdata
- the data on which the cross-validation is to be
performednumFolds
- the number of folds for the cross-validationoptions
- the options to the clustererrandom
- a random number generator
- Returns:
- a string containing the cross validated log likelihood
- Throws:
java.lang.Exception
- if a clusterer could not be generated
printClusterStats
private static java.lang.String printClusterStats(Clusterer clusterer,
java.lang.String fileName)
throws java.lang.Exception
- Print the cluster statistics for either the training
or the testing data.
- Parameters:
clusterer
- the clusterer to use for generating statistics.
- Returns:
- a string containing cluster statistics.
- Throws:
if
- statistics can't be generated.
java.lang.Exception
printClusterings
private static java.lang.String printClusterings(Clusterer clusterer,
Instances train,
java.lang.String testFileName,
Range attributesToOutput)
throws java.lang.Exception
- Print the cluster assignments for either the training
or the testing data.
- Parameters:
clusterer
- the clusterer to use for cluster assignments
- Returns:
- a string containing the instance indexes and cluster assigns.
- Throws:
if
- cluster assignments can't be printed
java.lang.Exception
attributeValuesString
private static java.lang.String attributeValuesString(Instance instance,
Range attRange)
- Builds a string listing the attribute values in a specified range of indices,
separated by commas and enclosed in brackets.
- Parameters:
instance
- the instance to print the values from
- Returns:
- a string listing values of the attributes in the range
makeOptionString
private static java.lang.String makeOptionString(Clusterer clusterer)
- Make up the help string giving all the command line options
- Parameters:
clusterer
- the clusterer to include options for
- Returns:
- a string detailing the valid command line options
main
public static void main(java.lang.String[] args)
- Main method for testing this class.
- Parameters:
args
- the options