The lamp-forest
artifact contains an implementation of the Extremely Randomized Trees algorithm.
This implementation is stand alone, it has no dependencies on other lamp modules (neither native dependencies).
Extremely Randomized Trees (extratrees) is a decision tree based regression or classification method similar to random forests, (see).
Missing feature values (Double.NaNs) are detected and in each split the samples with the missing feature values are grouped either to the left or to the right group depending on which one has a better fit.
import lamp.extratrees._
import org.saddle._
val features = Mat(Vec(1d, 1d, 1d, 1d, 2d,2d,2d))
// features: Mat[Double] = [7 x 1]
// 1.0000
// 1.0000
// 1.0000
// 1.0000
// 2.0000
// 2.0000
// 2.0000
//
val target = Vec(1d, 1d, 1d, 1d, 0d, 0d, 0d)
// target: Vec[Double] = [7 x 1]
// 1.0000
// 1.0000
// 1.0000
// 1.0000
// 0.0000
// 0.0000
// 0.0000
//
val trees = buildForestRegression(
data = features, // feature matrix
target = target, // regression target
nMin = 1, // minimum node size before splitting
k = 1, // number of features to consider splitting
m = 100, // number of trees
parallelism = 1
)
// trees: Seq[RegressionTree] = Vector(
// RegressionNonLeaf(
// left = RegressionLeaf(targetMean = 1.0),
// right = RegressionLeaf(targetMean = 0.0),
// splitFeature = 0,
// cutpoint = 1.2661298840655273,
// splitMissingIsLess = false
// ),
// RegressionNonLeaf(
// left = RegressionLeaf(targetMean = 1.0),
// right = RegressionLeaf(targetMean = 0.0),
// splitFeature = 0,
// cutpoint = 1.9361555864384075,
// splitMissingIsLess = false
// ),
// RegressionNonLeaf(
// left = RegressionLeaf(targetMean = 1.0),
// right = RegressionLeaf(targetMean = 0.0),
// splitFeature = 0,
// cutpoint = 1.3419881711383712,
// splitMissingIsLess = false
// ),
// RegressionNonLeaf(
// left = RegressionLeaf(targetMean = 1.0),
// right = RegressionLeaf(targetMean = 0.0),
// splitFeature = 0,
// cutpoint = 1.6565125680135382,
// splitMissingIsLess = false
// ),
// RegressionNonLeaf(
// left = RegressionLeaf(targetMean = 1.0),
// right = RegressionLeaf(targetMean = 0.0),
// splitFeature = 0,
// cutpoint = 1.4916809015091368,
// splitMissingIsLess = false
// ),
// RegressionNonLeaf(
// left = RegressionLeaf(targetMean = 1.0),
// right = RegressionLeaf(targetMean = 0.0),
// splitFeature = 0,
// cutpoint = 1.7755343393641003,
// splitMissingIsLess = false
// ),
// RegressionNonLeaf(
// left = RegressionLeaf(targetMean = 1.0),
// right = RegressionLeaf(targetMean = 0.0),
// splitFeature = 0,
// cutpoint = 1.690991952660824,
// splitMissingIsLess = false
// ...
val output = predictRegression(trees, features)
// output: Vec[Double] = [7 x 1]
// 1.0000
// 1.0000
// 1.0000
// 1.0000
// 0.0000
// 0.0000
// 0.0000
//