From d4c0f174b16e4aa2df01833e2d1fadec5bf0544f Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:42:56 +0100 Subject: [PATCH] Add PCA implementation with fit and transform methods --- src/compute/models/pca.rs | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/compute/models/pca.rs diff --git a/src/compute/models/pca.rs b/src/compute/models/pca.rs new file mode 100644 index 0000000..1ede2d9 --- /dev/null +++ b/src/compute/models/pca.rs @@ -0,0 +1,85 @@ +use crate::matrix::{Matrix, SeriesOps}; +use rand; + +/// Returns the `n_components` principal axes (rows) and the centred data’s mean. +pub struct PCA { + pub components: Matrix, // (n_components, n_features) + pub mean: Matrix, // (1, n_features) +} + +impl PCA { + pub fn fit(x: &Matrix, n_components: usize, iters: usize) -> Self { + let m = x.rows(); + let n = x.cols(); + assert!(n_components <= n); + + // ----- centre data ----- + let mean_vec = { + let mut v = Matrix::zeros(1, n); + for j in 0..n { + let mut s = 0.0; + for i in 0..m { + s += x[(i, j)]; + } + v[(0, j)] = s / m as f64; + } + v + }; + let x_centered = x - &mean_vec; + + // ----- covariance matrix C = Xᵀ·X / (m-1) ----- + let cov = x_centered.transpose().dot(&x_centered) * (1.0 / (m as f64 - 1.0)); + + // ----- power iteration to find top eigenvectors ----- + let mut comp = Matrix::zeros(n_components, n); + let mut b = Matrix::zeros(1, n); // current vector + for c in 0..n_components { + // random initial vector + for j in 0..n { + b[(0, j)] = rand::random::() - 0.5; + } + // subtract projections on previously found components + for prev in 0..c { + // let proj = b.dot(Matrix::from_vec(data, rows, cols).transpose())[(0, 0)]; + // let proj = b.dot(&comp.row(prev).transpose())[(0, 0)]; + let proj = b.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)]; + // subtract projection to maintain orthogonality + for j in 0..n { + b[(0, j)] -= proj * comp[(prev, j)]; + } + } + // iterate + for _ in 0..iters { + // b = C·bᵀ + let mut nb = cov.dot(&b.transpose()).transpose(); + // subtract projections again to maintain orthogonality + for prev in 0..c { + let proj = nb.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)]; + for j in 0..n { + nb[(0, j)] -= proj * comp[(prev, j)]; + } + } + // normalise + let norm = nb.data().iter().map(|v| v * v).sum::().sqrt(); + for j in 0..n { + nb[(0, j)] /= norm; + } + b = nb; + } + // store component + for j in 0..n { + comp[(c, j)] = b[(0, j)]; + } + } + Self { + components: comp, + mean: mean_vec, + } + } + + /// Project new data on the learned axes. + pub fn transform(&self, x: &Matrix) -> Matrix { + let x_centered = x - &self.mean; + x_centered.dot(&self.components.transpose()) + } +}