[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

details Machine Learning VIGRA

Classes

class  CompleteOOBInfo
class  CorrelationVisitor
class  OnlineLearnVisitor
class  OOB_Error
class  OOB_PerTreeError
class  ProblemSpec< LabelType >
 problem specification class for the random forest. More...
class  RandomForest< LabelType, PreprocessorTag >
class  RandomForestOptions
 Options object for the random forest. More...
class  RandomForestProgressVisitor
class  Sampler< Random >
 Create random samples from a sequence of indices. More...
class  SamplerOptions
 Options object for the Sampler class. More...
class  StopVisiting
class  VariableImportanceVisitor
class  VisitorBase
class  VisitorNode< Visitor, Next >

Namespaces

namespace  vigra::rf::visitors

Enumerations

enum  Problem_t
 problem types

Functions

template<class RF , class PR , class SM , class ST >
void after_tree_ip_impl (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class A >
detail::VisitorNode< A > create_visitor (A &a)
template<class A , class B >
detail::VisitorNode< A,
detail::VisitorNode< B > > 
create_visitor (A &a, B &b)
template<class A , class B , class C >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C > > > 
create_visitor (A &a, B &b, C &c)
template<class A , class B , class C , class D >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C,
detail::VisitorNode< D > > > > 
create_visitor (A &a, B &b, C &c, D &d)
template<class A , class B , class C , class D , class E >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C,
detail::VisitorNode< D,
detail::VisitorNode< E > > > > > 
create_visitor (A &a, B &b, C &c, D &d, E &e)
template<class A , class B , class C , class D , class E , class F >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C,
detail::VisitorNode< D,
detail::VisitorNode< E,
detail::VisitorNode< F > > > > > > 
create_visitor (A &a, B &b, C &c, D &d, E &e, F &f)
template<class A , class B , class C , class D , class E , class F , class G >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C,
detail::VisitorNode< D,
detail::VisitorNode< E,
detail::VisitorNode< F,
detail::VisitorNode< G > > > > > > > 
create_visitor (A &a, B &b, C &c, D &d, E &e, F &f, G &g)
template<class A , class B , class C , class D , class E , class F , class G , class H >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C,
detail::VisitorNode< D,
detail::VisitorNode< E,
detail::VisitorNode< F,
detail::VisitorNode< G,
detail::VisitorNode< H > > > > > > > > 
create_visitor (A &a, B &b, C &c, D &d, E &e, F &f, G &g, H &h)
template<class A , class B , class C , class D , class E , class F , class G , class H , class I >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C,
detail::VisitorNode< D,
detail::VisitorNode< E,
detail::VisitorNode< F,
detail::VisitorNode< G,
detail::VisitorNode< H,
detail::VisitorNode< I > > > > > > > > > 
create_visitor (A &a, B &b, C &c, D &d, E &e, F &f, G &g, H &h, I &i)
template<class A , class B , class C , class D , class E , class F , class G , class H , class I , class J >
detail::VisitorNode< A,
detail::VisitorNode< B,
detail::VisitorNode< C,
detail::VisitorNode< D,
detail::VisitorNode< E,
detail::VisitorNode< F,
detail::VisitorNode< G,
detail::VisitorNode< H,
detail::VisitorNode< I,
detail::VisitorNode< J > > > > > > > > > > 
create_visitor (A &a, B &b, C &c, D &d, E &e, F &f, G &g, H &h, I &i, J &j)
template<class U , class C1 , class U2 , class C2 , class Split_t , class Stop_t , class Visitor_t , class Random_t >
void reLearnTree (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &response, int treeId, Visitor_t visitor_, Split_t split_, Stop_t stop_, Random_t &random)
void reset_tree (int tree_id)
double return_val ()
double return_val ()
void sample ()
 VariableImportanceVisitor (int rep_cnt=10)
template<class Tree , class Split , class Region , class Feature_t , class Label_t >
void visit_after_split (Tree &tree, Split &split, Region &parent, Region &leftChild, Region &rightChild, Feature_t &features, Label_t &labels)
template<class Tree , class Split , class Region , class Feature_t , class Label_t >
void visit_after_split (Tree &tree, Split &split, Region &parent, Region &leftChild, Region &rightChild, Feature_t &features, Label_t &labels)
template<class Tree , class Split , class Region , class Feature_t , class Label_t >
void visit_after_split (Tree &tree, Split &split, Region &parent, Region &leftChild, Region &rightChild, Feature_t &features, Label_t &labels)
template<class Tree , class Split , class Region , class Feature_t , class Label_t >
void visit_after_split (Tree &tree, Split &split, Region &parent, Region &leftChild, Region &rightChild, Feature_t &features, Label_t &labels)
template<class RF , class PR , class SM , class ST >
void visit_after_tree (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class RF , class PR , class SM , class ST >
void visit_after_tree (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class RF , class PR , class SM , class ST >
void visit_after_tree (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class RF , class PR , class SM , class ST >
void visit_after_tree (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class RF , class PR , class SM , class ST >
void visit_after_tree (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class RF , class PR , class SM , class ST >
void visit_after_tree (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class RF , class PR , class SM , class ST >
void visit_after_tree (RF &rf, PR &pr, SM &sm, ST &st, int index)
template<class RF , class PR >
void visit_at_beginning (RF const &rf, PR const &pr)
template<class RF , class PR >
void visit_at_beginning (RF &rf, const PR &pr)
template<class RF , class PR >
void visit_at_beginning (RF const &rf, PR const &pr)
template<class RF , class PR >
void visit_at_end (RF const &rf, PR const &pr)
template<class RF , class PR >
void visit_at_end (RF &rf, PR &pr)
template<class RF , class PR >
void visit_at_end (RF &rf, PR &pr)
template<class RF , class PR >
void visit_at_end (RF &rf, PR &pr)
template<class RF , class PR >
void visit_at_end (RF &rf, PR &pr)
template<class RF , class PR >
void visit_at_end (RF const &rf, PR const &pr)
template<class RF , class PR >
void visit_at_end (RF const &rf, PR const &pr)
template<class TR , class IntT , class TopT , class Feat >
void visit_external_node (TR &tr, IntT index, TopT node_t, Feat &features)
template<class TR , class IntT , class TopT , class Feat >
void visit_internal_node (TR &tr, IntT index, TopT node_t, Feat &features)
template<class TR , class IntT , class TopT , class Feat >
void visit_internal_node (TR &tr, IntT index, TopT node_t, Feat &features)

Variables

MultiArray< 2, double > breiman_per_tree
MultiArray< 2, double > corr_noise
MultiArray< 2, double > distance
MultiArray< 2, double > gini_missc
MultiArray< 2, double > noise
ArrayVector< int > numChoices
double oob_breiman
double oob_breiman
double oob_mean
MultiArray< 2, double > oob_per_tree
double oob_per_tree2
double oob_std
double oobError
MultiArray< 4, double > oobroc_per_tree
MultiArray< 2, double > similarity
MultiArray< 2, double > variable_importance_

Learning

Following functions differ in the degree of customization allowed

template<class U , class C1 , class U2 , class C2 , class Split_t , class Stop_t , class Visitor_t , class Random_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &response, Visitor_t visitor, Split_t split, Stop_t stop, Random_t const &random)
 learn on data with custom config and random number generator
template<class U , class C1 , class U2 , class C2 , class Split_t , class Stop_t , class Visitor_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &response, Visitor_t visitor, Split_t split, Stop_t stop)
template<class U , class C1 , class U2 , class C2 , class Visitor_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels, Visitor_t visitor)
template<class U , class C1 , class U2 , class C2 , class Visitor_t , class Split_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels, Visitor_t visitor, Split_t split)
template<class U , class C1 , class U2 , class C2 >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels)
 learn on data with default configuration

prediction

template<class U , class C , class Stop >
LabelType predictLabel (MultiArrayView< 2, U, C >const &features, Stop &stop) const
 predict a label given a feature.
template<class U , class C >
LabelType predictLabel (MultiArrayView< 2, U, C > const &features, ArrayVectorView< double > prior) const
 predict a label with features and class priors
template<class T1 , class T2 , class C >
void predictProbabilities (OnlinePredictionSet< T1 > &predictionSet, MultiArrayView< 2, T2, C > &prob)
template<class U , class C1 , class T , class C2 >
void predictRaw (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &prob) const
template<class U , class C >
LabelType predictLabel (MultiArrayView< 2, U, C >const &features)
template<class U , class C1 , class T , class C2 >
void predictLabels (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &labels) const
 predict multiple labels with given features
template<class U , class C1 , class T , class C2 , class Stop >
void predictLabels (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &labels, Stop &stop) const
template<class U , class C1 , class T , class C2 , class Stop >
void predictProbabilities (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &prob, Stop &stop) const
 predict the class probabilities for multiple labels
template<class U , class C1 , class T , class C2 >
void predictProbabilities (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &prob) const
 predict the class probabilities for multiple labels


Detailed Description

This module provides classification algorithms that map features to labels or label probabilities. Look at the RandomForest class first for a overview of most of the functionality provided as well as use cases.


Function Documentation

void visit_after_split ( Tree &  tree,
Split &  split,
Region &  parent,
Region &  leftChild,
Region &  rightChild,
Feature_t &  features,
Label_t &  labels 
)

do something after the the Split has decided how to process the Region (Stack entry)

Parameters:
treereference to the tree that is currently being learned
splitreference to the split object
parentcurrent stack entry which was used to decide the split
leftChildleft stack entry that will be pushed
rightChildright stack entry that will be pushed.
featuresfeatures matrix
labelslabel matrix
See also:
RF_Traits::StackEntry_t

Reimplemented in CorrelationVisitor, VariableImportanceVisitor, and OnlineLearnVisitor.

void visit_after_tree ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

do something after each tree has been learned

Parameters:
rfreference to the random forest object that called this visitor
prreference to the preprocessor that processed the input
smreference to the sampler object
streference to the first stack entry
indexindex of current tree

Reimplemented in RandomForestProgressVisitor, VariableImportanceVisitor, ClusterImportanceVisitor, CompleteOOBInfo, OOB_Error, OOB_PerTreeError, and OnlineLearnVisitor.

void visit_at_end ( RF const &  rf,
PR const &  pr 
)

do something after all trees have been learned

Parameters:
rfreference to the random forest object that called this visitor
prreference to the preprocessor that processed the input

Reimplemented in CorrelationVisitor, and RandomForestProgressVisitor.

void visit_at_beginning ( RF const &  rf,
PR const &  pr 
)

do something before learning starts

Parameters:
rfreference to the random forest object that called this visitor
prreference to the Processor class used.

Reimplemented in RandomForestProgressVisitor, and ClusterImportanceVisitor.

void visit_external_node ( TR &  tr,
IntT  index,
TopT  node_t,
Feat &  features 
)

do some thing while traversing tree after it has been learned (external nodes)

Parameters:
trreference to the tree object that called this visitor
indexindex in the topology_ array we currently are at
node_ttype of node we have (will be e_.... - )
featuresfeature matrix
See also:
NodeTags;

you can create the node by using a switch on node_tag and using the corresponding Node objects. Or - if you do not care about the type use the NodeBase class.

void visit_internal_node ( TR &  tr,
IntT  index,
TopT  node_t,
Feat &  features 
)

do something when visiting a internal node after it has been learned

See also:
visit_external_node

Reimplemented in OnlineLearnVisitor.

double return_val ( )

return a double value. The value of the first visitor encountered that has a return value is returned with the RandomForest::learn() method - or -1.0 if no return value visitor existed. This functionality basically only exists so that the OOB - visitor can return the oob error rate like in the old version of the random forest.

Reimplemented in StopVisiting.

double return_val ( )

return a double value. The value of the first visitor encountered that has a return value is returned with the RandomForest::learn() method - or -1.0 if no return value visitor existed. This functionality basically only exists so that the OOB - visitor can return the oob error rate like in the old version of the random forest.

Reimplemented from VisitorBase.

detail::VisitorNode<A> vigra::rf::visitors::create_visitor ( A &  a)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B> > vigra::rf::visitors::create_visitor ( A &  a,
B &  b 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C> > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C, detail::VisitorNode<D> > > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c,
D &  d 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C, detail::VisitorNode<D, detail::VisitorNode<E> > > > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c,
D &  d,
E &  e 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C, detail::VisitorNode<D, detail::VisitorNode<E, detail::VisitorNode<F> > > > > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c,
D &  d,
E &  e,
F &  f 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C, detail::VisitorNode<D, detail::VisitorNode<E, detail::VisitorNode<F, detail::VisitorNode<G> > > > > > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c,
D &  d,
E &  e,
F &  f,
G &  g 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C, detail::VisitorNode<D, detail::VisitorNode<E, detail::VisitorNode<F, detail::VisitorNode<G, detail::VisitorNode<H> > > > > > > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c,
D &  d,
E &  e,
F &  f,
G &  g,
H &  h 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C, detail::VisitorNode<D, detail::VisitorNode<E, detail::VisitorNode<F, detail::VisitorNode<G, detail::VisitorNode<H, detail::VisitorNode<I> > > > > > > > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c,
D &  d,
E &  e,
F &  f,
G &  g,
H &  h,
I &  i 
)

factory method to to be used with RandomForest::learn()

detail::VisitorNode<A, detail::VisitorNode<B, detail::VisitorNode<C, detail::VisitorNode<D, detail::VisitorNode<E, detail::VisitorNode<F, detail::VisitorNode<G, detail::VisitorNode<H, detail::VisitorNode<I, detail::VisitorNode<J> > > > > > > > > > vigra::rf::visitors::create_visitor ( A &  a,
B &  b,
C &  c,
D &  d,
E &  e,
F &  f,
G &  g,
H &  h,
I &  i,
J &  j 
)

factory method to to be used with RandomForest::learn()

void visit_at_beginning ( RF &  rf,
const PR &  pr 
)

Initialize, set the number of trees

void reset_tree ( int  tree_id)

Reset a tree

void visit_after_tree ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

simply increase the tree count

Reimplemented from VisitorBase.

void visit_after_split ( Tree &  tree,
Split &  split,
Region &  parent,
Region &  leftChild,
Region &  rightChild,
Feature_t &  features,
Label_t &  labels 
)

do something after the the Split has decided how to process the Region (Stack entry)

Parameters:
treereference to the tree that is currently being learned
splitreference to the split object
parentcurrent stack entry which was used to decide the split
leftChildleft stack entry that will be pushed
rightChildright stack entry that will be pushed.
featuresfeatures matrix
labelslabel matrix
See also:
RF_Traits::StackEntry_t

Reimplemented from VisitorBase.

void visit_internal_node ( TR &  tr,
IntT  index,
TopT  node_t,
Feat &  features 
)

do something when visiting a internal node during getToLeaf

remember as last node id, for finding the parent of the last external node also: adjust class counts and borders

Reimplemented from VisitorBase.

void visit_after_tree ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

does the basic calculation per tree

Reimplemented from VisitorBase.

void visit_at_end ( RF &  rf,
PR &  pr 
)

Does the normalisation

void visit_after_tree ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

do something after each tree has been learned

Parameters:
rfreference to the random forest object that called this visitor
prreference to the preprocessor that processed the input
smreference to the sampler object
streference to the first stack entry
indexindex of current tree

Reimplemented from VisitorBase.

void visit_at_end ( RF &  rf,
PR &  pr 
)

Normalise variable importance after the number of trees is known.

void visit_after_tree ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

do something after each tree has been learned

Parameters:
rfreference to the random forest object that called this visitor
prreference to the preprocessor that processed the input
smreference to the sampler object
streference to the first stack entry
indexindex of current tree

Reimplemented from VisitorBase.

void visit_at_end ( RF &  rf,
PR &  pr 
)

Normalise variable importance after the number of trees is known.

VariableImportanceVisitor ( int  rep_cnt = 10)

Constructor

Parameters:
rep_cnt(defautl: 10) how often should the permutation take place. Set to 1 to make calculation faster (but possibly more instable)
void visit_after_split ( Tree &  tree,
Split &  split,
Region &  parent,
Region &  leftChild,
Region &  rightChild,
Feature_t &  features,
Label_t &  labels 
)

calculates impurity decrease based variable importance after every split.

Reimplemented from VisitorBase.

void after_tree_ip_impl ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

compute permutation based var imp. (Only an Array of size oob_sample_count x 1 is created.

  • apposed to oob_sample_count x feature_count in the other method.
See also:
FieldProxy
void visit_after_tree ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

calculate permutation based impurity after every tree has been learned default behaviour is that this happens out of place. If you have very big data sets and want to avoid copying of data set the in_place_ flag to true.

Reimplemented from VisitorBase.

void visit_at_end ( RF &  rf,
PR &  pr 
)

Normalise variable importance after the number of trees is known.

void visit_after_tree ( RF &  rf,
PR &  pr,
SM &  sm,
ST &  st,
int  index 
)

do something after each tree has been learned

Parameters:
rfreference to the random forest object that called this visitor
prreference to the preprocessor that processed the input
smreference to the sampler object
streference to the first stack entry
indexindex of current tree

Reimplemented from VisitorBase.

void visit_at_end ( RF const &  rf,
PR const &  pr 
)

do something after all trees have been learned

Parameters:
rfreference to the random forest object that called this visitor
prreference to the preprocessor that processed the input

Reimplemented from VisitorBase.

void visit_at_beginning ( RF const &  rf,
PR const &  pr 
)

do something before learning starts

Parameters:
rfreference to the random forest object that called this visitor
prreference to the Processor class used.

Reimplemented from VisitorBase.

void visit_at_end ( RF const &  rf,
PR const &  pr 
)

do something after all trees have been learned

Parameters:
rfreference to the random forest object that called this visitor
prreference to the preprocessor that processed the input

Reimplemented from VisitorBase.

void visit_after_split ( Tree &  tree,
Split &  split,
Region &  parent,
Region &  leftChild,
Region &  rightChild,
Feature_t &  features,
Label_t &  labels 
)

do something after the the Split has decided how to process the Region (Stack entry)

Parameters:
treereference to the tree that is currently being learned
splitreference to the split object
parentcurrent stack entry which was used to decide the split
leftChildleft stack entry that will be pushed
rightChildright stack entry that will be pushed.
featuresfeatures matrix
labelslabel matrix
See also:
RF_Traits::StackEntry_t

Reimplemented from VisitorBase.

void reLearnTree ( MultiArrayView< 2, U, C1 > const &  features,
MultiArrayView< 2, U2, C2 > const &  response,
int  treeId,
Visitor_t  visitor_,
Split_t  split_,
Stop_t  stop_,
Random_t &  random 
)
Todo:
replace this crappy class out. It uses function pointers. and is making code slower according to me. Comment from Nathan: This is copied from Rahul, so me=Rahul
void learn ( MultiArrayView< 2, U, C1 > const &  features,
MultiArrayView< 2, U2, C2 > const &  response,
Visitor_t  visitor,
Split_t  split,
Stop_t  stop,
Random_t const &  random 
)

learn on data with custom config and random number generator

Parameters:
featuresa N x M matrix containing N samples with M features
responsea N x D matrix containing the corresponding response. Current split functors assume D to be 1 and ignore any additional columns. This is not enforced to allow future support for uncertain labels, label independent strata etc. The Preprocessor specified during construction should be able to handle features and labels features and the labels. see also: SplitFunctor, Preprocessing
visitorvisitor which is to be applied after each split, tree and at the end. Use rf_default for using default value. (No Visitors) see also: rf::visitors
splitsplit functor to be used to calculate each split use rf_default() for using default value. (GiniSplit) see also: rf::split
stoppredicate to be used to calculate each split use rf_default() for using default value. (EarlyStoppStd)
randomRandomNumberGenerator to be used. Use rf_default() to use default value.(RandomMT19337)
LabelType predictLabel ( MultiArrayView< 2, U, C >const &  features,
Stop &  stop 
) const

predict a label given a feature.

Parameters:
features,:a 1 by featureCount matrix containing data point to be predicted (this only works in classification setting)
stop,:early stopping criterion
Returns:
double value representing class. You can use the predictLabels() function together with the rf.external_parameter().class_type_ attribute to get back the same type used during learning.
LabelType predictLabel ( MultiArrayView< 2, U, C > const &  features,
ArrayVectorView< double >  prior 
) const

predict a label with features and class priors

Parameters:
features,:same as above.
prior,:iterator to prior weighting of classes
Returns:
sam as above.
void sample ( )

Create a new sample.

void learn ( MultiArrayView< 2, U, C1 > const &  features,
MultiArrayView< 2, U2, C2 > const &  labels 
)

learn on data with default configuration

Parameters:
featuresa N x M matrix containing N samples with M features
labelsa N x D matrix containing the corresponding N labels. Current split functors assume D to be 1 and ignore any additional columns. this is not enforced to allow future support for uncertain labels.

learning is done with:

See also:
rf::split, EarlyStoppStd
  • Randomly seeded random number generator
  • default gini split functor as described by Breiman
  • default The standard early stopping criterion
void predictLabels ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  labels 
) const

predict multiple labels with given features

Parameters:
features,:a n by featureCount matrix containing data point to be predicted (this only works in classification setting)
labels,:a n by 1 matrix passed by reference to store output.
void predictProbabilities ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  prob,
Stop &  stop 
) const

predict the class probabilities for multiple labels

Parameters:
featuressame as above
proba n x class_count_ matrix. passed by reference to save class probabilities
stopearlystopping criterion
See also:
EarlyStopping
void predictProbabilities ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  prob 
) const

predict the class probabilities for multiple labels

Parameters:
featuressame as above
proba n x class_count_ matrix. passed by reference to save class probabilities

Variable Documentation

double oobError

Average error of one randomized decision tree

double oob_breiman

Ensemble oob error rate

MultiArray<2, double> oob_per_tree

OOB Error rate of each individual tree

double oob_mean

Mean of oob_per_tree

double oob_std

Standard deviation of oob_per_tree

double oob_breiman

Ensemble OOB error

See also:
OOB_Error
double oob_per_tree2

Per Tree OOB error calculated as in OOB_PerTreeError (Ulli's version)

MultiArray<2, double> breiman_per_tree

Column containing the development of the Ensemble error rate with increasing number of trees

MultiArray<4, double> oobroc_per_tree

4 dimensional array containing the development of confusion matrices with number of trees - can be used to estimate ROC curves etc.

oobroc_per_tree(ii,jj,kk,ll) corresponds true label = ii predicted label = jj confusion matrix after ll trees

explanation of third index:

Two class case: kk = 0 - (treeCount-1) Threshold is on Probability for class 0 is kk/(treeCount-1); More classes: kk = 0. Threshold on probability set by argMax of the probability array.

MultiArray<2, double> variable_importance_

This Array has the same entries as the R - random forest variable importance. Matrix is featureCount by (classCount +2) variable_importance_(ii,jj) is the variable importance measure of the ii-th variable according to: jj = 0 - (classCount-1) classwise permutation importance jj = rowCount(variable_importance_) -2 permutation importance jj = rowCount(variable_importance_) -1 gini decrease importance.

permutation importance: The difference between the fraction of OOB samples classified correctly before and after permuting (randomizing) the ii-th column is calculated. The ii-th column is permuted rep_cnt times.

class wise permutation importance: same as permutation importance. We only look at those OOB samples whose response corresponds to class jj.

gini decrease importance: row ii corresponds to the sum of all gini decreases induced by variable ii in each node of the random forest.

MultiArray<2, double> gini_missc

gini_missc(ii, jj) describes how well variable jj can describe a partition created on variable ii(when variable ii was chosen)

MultiArray<2, double> noise

additional noise features.

MultiArray<2, double> corr_noise

how well can a noise column describe a partition created on variable ii.

MultiArray<2, double> similarity

Similarity Matrix

(numberOfFeatures + 1) by (number Of Features + 1) Matrix gini_missc

  • row normalized by the number of times the column was chosen
  • mean of corr_noise subtracted
  • and symmetrised.
MultiArray<2, double> distance

Distance Matrix 1-similarity

ArrayVector<int> numChoices

How often was variable ii chosen

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.9.0 (Tue Nov 6 2012)