Add any of these lines to your build.sbt:
// The core library
libraryDependencies += "io.github.pityka" %% "saddle-core" % "4.0.0-M11"
// Inlined binary operation instances for Mat and Vec
libraryDependencies += "io.github.pityka" %% "saddle-ops-inlined" % "4.0.0-M11"
// Linear algebra interface to BLAS
libraryDependencies += "io.github.pityka" %% "saddle-linalg" % "4.0.0-M11"
// Binary representation of data frames and matrices
libraryDependencies += "io.github.pityka" %% "saddle-binary" % "4.0.0-M11"
// Circe Encoder and Decoder instances
libraryDependencies += "io.github.pityka" %% "saddle-circe" % "4.0.0-M11"
// Jsoniter-scala codec instances
libraryDependencies += "io.github.pityka" %% "saddle-jsoniter" % "4.0.0-M11"
// Interface to joda time (not maintained)
libraryDependencies += "io.github.pityka" %% "saddle-time" % "4.0.0-M11"
// Interface to EJML (not maintained)
libraryDependencies += "io.github.pityka" %% "saddle-stats" % "4.0.0-M11"
Dependencies #
The actively maintained artifacts have minimal dependencies:
saddle-io
has no dependency (not even saddle-core)saddle-core
depends on cats-kernelsaddle-linalg
depends on a fork of netlib-javasaddle-binary
depends on ujsonsaddle-circe
depends on circesaddle-jsoniter
depends on jsoniter-scala
Example: SVD on the Iris dataset #
import scala.io.Source
import org.saddle._
val irisURL = "https://gist.githubusercontent.com/pityka/d05bb892541d71c2a06a0efb6933b323/raw/639388c2cbc2120a14dcf466e85730eb8be498bb/iris.csv"
// irisURL: String = "https://gist.githubusercontent.com/pityka/d05bb892541d71c2a06a0efb6933b323/raw/639388c2cbc2120a14dcf466e85730eb8be498bb/iris.csv"
val iris : Frame[Int,String,Double] = csv.CsvParser.parseInputStreamWithHeader[Double](
inputStream = new java.net.URL(irisURL).openStream,
cols = List(0,1,2,3),
recordSeparator = "\n").toOption.get
// iris: Frame[Int, String, Double] = [150 x 4]
// sepal_length sepal_width petal_length petal_width
// ------------ ----------- ------------ -----------
// 0 -> 5.1000 3.5000 1.4000 0.2000
// 1 -> 4.9000 3.0000 1.4000 0.2000
// 2 -> 4.7000 3.2000 1.3000 0.2000
// 3 -> 4.6000 3.1000 1.5000 0.2000
// 4 -> 5.0000 3.6000 1.4000 0.2000
// ...
// 145 -> 6.7000 3.0000 5.2000 2.3000
// 146 -> 6.3000 2.5000 5.0000 1.9000
// 147 -> 6.5000 3.0000 5.2000 2.0000
// 148 -> 6.2000 3.4000 5.4000 2.3000
// 149 -> 5.9000 3.0000 5.1000 1.8000
//
import org.saddle.linalg._
val centered = iris.mapVec(_.demeaned)
// centered: Frame[Int, String, Double] = [150 x 4]
// sepal_length sepal_width petal_length petal_width
// ------------ ----------- ------------ -----------
// 0 -> -0.7433 0.4460 -2.3587 -0.9987
// 1 -> -0.9433 -0.0540 -2.3587 -0.9987
// 2 -> -1.1433 0.1460 -2.4587 -0.9987
// 3 -> -1.2433 0.0460 -2.2587 -0.9987
// 4 -> -0.8433 0.5460 -2.3587 -0.9987
// ...
// 145 -> 0.8567 -0.0540 1.4413 1.1013
// 146 -> 0.4567 -0.5540 1.2413 0.7013
// 147 -> 0.6567 -0.0540 1.4413 0.8013
// 148 -> 0.3567 0.3460 1.6413 1.1013
// 149 -> 0.0567 -0.0540 1.3413 0.6013
//
val SVDResult(u, s, vt) = centered.toMat.svd(2)
// u: Mat[Double] = [150 x 2]
// 0.1070 0.0544
// 0.1082 -0.0282
// 0.1152 -0.0229
// 0.1095 -0.0518
// ...
// -0.0608 -0.0624
// -0.0703 0.0131
// -0.0758 0.0193
// -0.0554 -0.0471
//
// s: Vec[Double] = [2 x 1]
// 25.0899
// 6.0079
//
// vt: Mat[Double] = [2 x 4]
// -0.3616 0.0823 -0.8566 -0.3588
// 0.6565 0.7297 -0.1758 -0.0747
//
val pca = u.mDiagFromRight(s).toFrame
// pca: Frame[Int, Int, Double] = [150 x 2]
// 0 1
// ------- -------
// 0 -> 2.6842 0.3266
// 1 -> 2.7154 -0.1696
// 2 -> 2.8898 -0.1373
// 3 -> 2.7464 -0.3111
// 4 -> 2.7286 0.3339
// ...
// 145 -> -1.9440 0.1874
// 146 -> -1.5257 -0.3750
// 147 -> -1.7640 0.0785
// 148 -> -1.9016 0.1159
// 149 -> -1.3897 -0.2829
//
val joined = iris.rconcat(pca.mapColIndex(i => s"PCA$i"))
// joined: Frame[Int, String, Double] = [150 x 6]
// sepal_length sepal_width petal_length petal_width PCA0 PCA1
// ------------ ----------- ------------ ----------- ------- -------
// 0 -> 5.1000 3.5000 1.4000 0.2000 2.6842 0.3266
// 1 -> 4.9000 3.0000 1.4000 0.2000 2.7154 -0.1696
// 2 -> 4.7000 3.2000 1.3000 0.2000 2.8898 -0.1373
// 3 -> 4.6000 3.1000 1.5000 0.2000 2.7464 -0.3111
// 4 -> 5.0000 3.6000 1.4000 0.2000 2.7286 0.3339
// ...
// 145 -> 6.7000 3.0000 5.2000 2.3000 -1.9440 0.1874
// 146 -> 6.3000 2.5000 5.0000 1.9000 -1.5257 -0.3750
// 147 -> 6.5000 3.0000 5.2000 2.0000 -1.7640 0.0785
// 148 -> 6.2000 3.4000 5.4000 2.3000 -1.9016 0.1159
// 149 -> 5.9000 3.0000 5.1000 1.8000 -1.3897 -0.2829
//
val reducedByColumn = joined.reduce(_.values.mean)
// reducedByColumn: Series[String, Double] = [6 x 1]
// sepal_length -> 5.8433
// sepal_width -> 3.0540
// petal_length -> 3.7587
// petal_width -> 1.1987
// PCA0 -> -0.0000
// PCA1 -> 0.0000
//
val reducedByRow = joined.rreduce(_.values.max)
// reducedByRow: Series[Int, scalar.Scalar[Double]] = [150 x 1]
// 0 -> 5.1
// 1 -> 4.9
// 2 -> 4.7
// 3 -> 4.6
// 4 -> 5.0
// ...
// 145 -> 6.7
// 146 -> 6.3
// 147 -> 6.5
// 148 -> 6.2
// 149 -> 5.9
//