From 04637ef4d0728bc9328ee255b5d200b2b6b599f5 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:05:46 +0100 Subject: [PATCH 01/43] Add methods to create zero, one, and filled matrices for f64 type --- src/matrix/mat.rs | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index 64bf6de..bf4265b 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -310,6 +310,26 @@ impl Matrix { } } +impl Matrix { + /// Creates a new matrix filled with a specific value of the specified size. + pub fn filled(rows: usize, cols: usize, value: f64) -> Self { + Matrix { + rows, + cols, + data: vec![value; rows * cols], // Fill with the specified value + } + } + /// Creates a new matrix filled with zeros of the specified size. + pub fn zeros(rows: usize, cols: usize) -> Self { + Matrix::filled(rows, cols, 0.0) + } + + /// Creates a new matrix filled with ones of the specified size. + pub fn ones(rows: usize, cols: usize) -> Self { + Matrix::filled(rows, cols, 1.0) + } +} + impl Index<(usize, usize)> for Matrix { type Output = T; @@ -1794,4 +1814,25 @@ mod tests { } } } + + #[test] + fn test_matrix_zeros_ones_filled() { + // Test zeros + let m = Matrix::::zeros(2, 3); + assert_eq!(m.rows(), 2); + assert_eq!(m.cols(), 3); + assert_eq!(m.data(), &[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]); + + // Test ones + let m = Matrix::::ones(3, 2); + assert_eq!(m.rows(), 3); + assert_eq!(m.cols(), 2); + assert_eq!(m.data(), &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]); + + // Test filled + let m = Matrix::::filled(2, 2, 42.5); + assert_eq!(m.rows(), 2); + assert_eq!(m.cols(), 2); + assert_eq!(m.data(), &[42.5, 42.5, 42.5, 42.5]); + } } From f749b2c921e21249a8ee192aed0fc44ace20a60b Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:38:24 +0100 Subject: [PATCH 02/43] Add method to retrieve a specific row from the matrix and corresponding tests --- src/matrix/mat.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index bf4265b..dbae21a 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -179,6 +179,21 @@ impl Matrix { self.cols -= 1; } + #[inline] + pub fn row(&self, r: usize) -> Vec { + assert!( + r < self.rows, + "row index {} out of bounds for {} rows", + r, + self.rows + ); + let mut row_data = Vec::with_capacity(self.cols); + for c in 0..self.cols { + row_data.push(self[(r, c)].clone()); // Clone each element + } + row_data + } + /// Deletes a row from the matrix. Panics on out-of-bounds. /// This is O(N) where N is the number of elements, as it rebuilds the data vec. pub fn delete_row(&mut self, row: usize) { @@ -1130,6 +1145,21 @@ mod tests { matrix[(0, 3)] = 99; } + #[test] + fn test_row() { + let ma = static_test_matrix(); + assert_eq!(ma.row(0), &[1, 4, 7]); + assert_eq!(ma.row(1), &[2, 5, 8]); + assert_eq!(ma.row(2), &[3, 6, 9]); + } + + #[test] + #[should_panic(expected = "row index 3 out of bounds for 3 rows")] + fn test_row_out_of_bounds() { + let ma = static_test_matrix(); + ma.row(3); + } + #[test] fn test_column() { let matrix = static_test_matrix_2x4(); From 6718cf5de74a876a579b8da28f1632122063f7f1 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:40:04 +0100 Subject: [PATCH 03/43] Add compute module and update lib.rs to include it --- src/compute/mod.rs | 3 +++ src/lib.rs | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 src/compute/mod.rs diff --git a/src/compute/mod.rs b/src/compute/mod.rs new file mode 100644 index 0000000..b24f638 --- /dev/null +++ b/src/compute/mod.rs @@ -0,0 +1,3 @@ +pub mod activations; + +pub mod models; \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index a68c8c9..510f36d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,3 +8,6 @@ pub mod frame; /// Documentation for the [`crate::utils`] module. pub mod utils; + +/// Documentation for the [`crate::compute`] module. +pub mod compute; From dbbf5f96170466e91bcd43b2ea22d72524a9b0b4 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:40:41 +0100 Subject: [PATCH 04/43] Add activation functions: sigmoid, dsigmoid, relu, and drelu --- src/compute/activations.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/compute/activations.rs diff --git a/src/compute/activations.rs b/src/compute/activations.rs new file mode 100644 index 0000000..80b9928 --- /dev/null +++ b/src/compute/activations.rs @@ -0,0 +1,18 @@ +use crate::matrix::{Matrix, SeriesOps}; + +pub fn sigmoid(x: &Matrix) -> Matrix { + x.map(|v| 1.0 / (1.0 + (-v).exp())) +} + +pub fn dsigmoid(y: &Matrix) -> Matrix { + // derivative w.r.t. pre-activation; takes y = sigmoid(x) + y.map(|v| v * (1.0 - v)) +} + +pub fn relu(x: &Matrix) -> Matrix { + x.map(|v| if v > 0.0 { v } else { 0.0 }) +} + +pub fn drelu(x: &Matrix) -> Matrix { + x.map(|v| if v > 0.0 { 1.0 } else { 0.0 }) +} From 1501ed5b7a4a6186ebfae89cab1b122d1dc5a780 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:40:55 +0100 Subject: [PATCH 05/43] Add linear regression model implementation --- src/compute/models/linreg.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 src/compute/models/linreg.rs diff --git a/src/compute/models/linreg.rs b/src/compute/models/linreg.rs new file mode 100644 index 0000000..74e5e44 --- /dev/null +++ b/src/compute/models/linreg.rs @@ -0,0 +1,35 @@ +use crate::matrix::{Matrix, SeriesOps}; + +pub struct LinReg { + w: Matrix, // shape (n_features, 1) + b: f64, +} + +impl LinReg { + pub fn new(n_features: usize) -> Self { + Self { + w: Matrix::from_vec(vec![0.0; n_features], n_features, 1), + b: 0.0, + } + } + + pub fn predict(&self, x: &Matrix) -> Matrix { + // X.dot(w) + b + x.dot(&self.w) + self.b + } + + pub fn fit(&mut self, x: &Matrix, y: &Matrix, lr: f64, epochs: usize) { + let m = x.rows() as f64; + for _ in 0..epochs { + let y_hat = self.predict(x); + let err = &y_hat - y; // shape (m,1) + + // grads + let grad_w = x.transpose().dot(&err) * (2.0 / m); // (n,1) + let grad_b = (2.0 / m) * err.sum_vertical().iter().sum::(); + // update + self.w = &self.w - &(grad_w * lr); + self.b -= lr * grad_b; + } + } +} From be41e9b20ee6c1aee77320aa8e30cd0f7de3ff8d Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:41:14 +0100 Subject: [PATCH 06/43] Add logistic regression model implementation --- src/compute/models/logreg.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/compute/models/logreg.rs diff --git a/src/compute/models/logreg.rs b/src/compute/models/logreg.rs new file mode 100644 index 0000000..1bdaa86 --- /dev/null +++ b/src/compute/models/logreg.rs @@ -0,0 +1,36 @@ +use crate::matrix::{Matrix, SeriesOps}; +use crate::compute::activations::sigmoid; + +pub struct LogReg { + w: Matrix, + b: f64, +} + +impl LogReg { + pub fn new(n_features: usize) -> Self { + Self { + w: Matrix::zeros(n_features, 1), + b: 0.0, + } + } + + pub fn predict_proba(&self, x: &Matrix) -> Matrix { + sigmoid(&(x.dot(&self.w) + self.b)) // σ(Xw + b) + } + + pub fn fit(&mut self, x: &Matrix, y: &Matrix, lr: f64, epochs: usize) { + let m = x.rows() as f64; + for _ in 0..epochs { + let p = self.predict_proba(x); // shape (m,1) + let err = &p - y; // derivative of BCE wrt pre-sigmoid + let grad_w = x.transpose().dot(&err) / m; + let grad_b = err.sum_vertical().iter().sum::() / m; + self.w = &self.w - &(grad_w * lr); + self.b -= lr * grad_b; + } + } + + pub fn predict(&self, x: &Matrix) -> Matrix { + self.predict_proba(x).map(|p| if p >= 0.5 { 1.0 } else { 0.0 }) + } +} From e2c5e65c18a94306f4aaa68339a4b9929198b0ea Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:41:56 +0100 Subject: [PATCH 07/43] move rand from dev-deps to deps --- Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d3d44f8..ec38c65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,8 +14,6 @@ crate-type = ["cdylib", "lib"] [dependencies] chrono = "^0.4.10" criterion = { version = "0.5", features = ["html_reports"], optional = true } - -[dev-dependencies] rand = "^0.9.1" [features] From b1b7e63feacffdb7475aa1c77e921a00c137ca19 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:42:08 +0100 Subject: [PATCH 08/43] Add Dense Neural Network implementation with forward and training methods --- src/compute/models/dense_nn.rs | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 src/compute/models/dense_nn.rs diff --git a/src/compute/models/dense_nn.rs b/src/compute/models/dense_nn.rs new file mode 100644 index 0000000..1959c2a --- /dev/null +++ b/src/compute/models/dense_nn.rs @@ -0,0 +1,65 @@ +use crate::matrix::{Matrix, SeriesOps}; +use crate::compute::activations::{relu, sigmoid, drelu}; +use rand::Rng; + +pub struct DenseNN { + w1: Matrix, // (n_in, n_hidden) + b1: Matrix, // (1, n_hidden) + w2: Matrix, // (n_hidden, n_out) + b2: Matrix, // (1, n_out) +} + +impl DenseNN { + pub fn new(n_in: usize, n_hidden: usize, n_out: usize) -> Self { + let mut rng = rand::rng(); + let mut init = |rows, cols| { + let data = (0..rows * cols) + .map(|_| rng.random_range(-1.0..1.0)) + .collect::>(); + Matrix::from_vec(data, rows, cols) + }; + Self { + w1: init(n_in, n_hidden), + b1: Matrix::zeros(1, n_hidden), + w2: init(n_hidden, n_out), + b2: Matrix::zeros(1, n_out), + } + } + + pub fn forward(&self, x: &Matrix) -> (Matrix, Matrix, Matrix) { + // z1 = X·W1 + b1 ; a1 = ReLU(z1) + let z1 = x.dot(&self.w1) + &self.b1; + let a1 = relu(&z1); + // z2 = a1·W2 + b2 ; a2 = softmax(z2) (here binary => sigmoid) + let z2 = a1.dot(&self.w2) + &self.b2; + let a2 = sigmoid(&z2); // binary output + (a1, z2, a2) // keep intermediates for back-prop + } + + pub fn train(&mut self, x: &Matrix, y: &Matrix, lr: f64, epochs: usize) { + let m = x.rows() as f64; + for _ in 0..epochs { + let (a1, _z2, y_hat) = self.forward(x); + + // -------- backwards ---------- + // dL/da2 = y_hat - y (BCE derivative) + let dz2 = &y_hat - y; // (m, n_out) + let dw2 = a1.transpose().dot(&dz2) / m; // (n_h, n_out) + // let db2 = dz2.sum_vertical() * (1.0 / m); // broadcast ok + let db2 = Matrix::from_vec(dz2.sum_vertical(), 1, dz2.cols()) * (1.0 / m); // (1, n_out) + let da1 = dz2.dot(&self.w2.transpose()); // (m,n_h) + let dz1 = da1.zip(&a1, |g, act| g * drelu(&Matrix::from_cols(vec![vec![act]])).data()[0]); // (m,n_h) + + // real code: drelu returns Matrix, broadcasting needed; you can optimise. + + let dw1 = x.transpose().dot(&dz1) / m; // (n_in,n_h) + let db1 = Matrix::from_vec(dz1.sum_vertical(), 1, dz1.cols()) * (1.0 / m); // (1, n_h) + + // -------- update ---------- + self.w2 = &self.w2 - &(dw2 * lr); + self.b2 = &self.b2 - &(db2 * lr); + self.w1 = &self.w1 - &(dw1 * lr); + self.b1 = &self.b1 - &(db1 * lr); + } + } +} From b6645fcfbdea9a12087565b757f8528e23d1b2d0 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:42:45 +0100 Subject: [PATCH 09/43] Add Gaussian Naive Bayes implementation with fit and predict methods --- src/compute/models/k_means.rs | 105 ++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 src/compute/models/k_means.rs diff --git a/src/compute/models/k_means.rs b/src/compute/models/k_means.rs new file mode 100644 index 0000000..716114f --- /dev/null +++ b/src/compute/models/k_means.rs @@ -0,0 +1,105 @@ +use crate::matrix::Matrix; +use std::collections::HashMap; + +pub struct GaussianNB { + classes: Vec, // distinct labels + priors: Vec, // P(class) + means: Vec>, + variances: Vec>, + eps: f64, // var-smoothing +} + +impl GaussianNB { + pub fn new(var_smoothing: f64) -> Self { + Self { + classes: vec![], + priors: vec![], + means: vec![], + variances: vec![], + eps: var_smoothing, + } + } + + pub fn fit(&mut self, x: &Matrix, y: &Matrix) { + let m = x.rows(); + let n = x.cols(); + assert_eq!(y.rows(), m); + assert_eq!(y.cols(), 1); + + // ----- group samples by label ----- + let mut groups: HashMap> = HashMap::new(); + for i in 0..m { + groups.entry(y[(i, 0)] as i64).or_default().push(i); + } + + self.classes = groups.keys().cloned().map(|v| v as f64).collect::>(); + self.classes.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + self.priors.clear(); + self.means.clear(); + self.variances.clear(); + + for &c in &self.classes { + let idx = &groups[&(c as i64)]; + let count = idx.len(); + self.priors.push(count as f64 / m as f64); + + let mut mean = Matrix::zeros(1, n); + let mut var = Matrix::zeros(1, n); + + // mean + for &i in idx { + for j in 0..n { + mean[(0, j)] += x[(i, j)]; + } + } + for j in 0..n { + mean[(0, j)] /= count as f64; + } + + // variance + for &i in idx { + for j in 0..n { + let d = x[(i, j)] - mean[(0, j)]; + var[(0, j)] += d * d; + } + } + for j in 0..n { + var[(0, j)] = var[(0, j)] / count as f64 + self.eps; + } + + self.means.push(mean); + self.variances.push(var); + } + } + + /// Return class labels (shape m×1) for samples in X. + pub fn predict(&self, x: &Matrix) -> Matrix { + let m = x.rows(); + let k = self.classes.len(); + let n = x.cols(); + let mut preds = Matrix::zeros(m, 1); + let ln_2pi = (2.0 * std::f64::consts::PI).ln(); + + for i in 0..m { + let mut best_class = 0usize; + let mut best_log_prob = f64::NEG_INFINITY; + for c in 0..k { + // log P(y=c) + Σ log N(x_j | μ, σ²) + let mut log_prob = self.priors[c].ln(); + for j in 0..n { + let mean = self.means[c][(0, j)]; + let var = self.variances[c][(0, j)]; + let diff = x[(i, j)] - mean; + log_prob += -0.5 * (diff * diff / var + var.ln() + ln_2pi); + } + if log_prob > best_log_prob { + best_log_prob = log_prob; + best_class = c; + } + } + preds[(i, 0)] = self.classes[best_class]; + } + preds + } +} From d4c0f174b16e4aa2df01833e2d1fadec5bf0544f Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:42:56 +0100 Subject: [PATCH 10/43] Add PCA implementation with fit and transform methods --- src/compute/models/pca.rs | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/compute/models/pca.rs diff --git a/src/compute/models/pca.rs b/src/compute/models/pca.rs new file mode 100644 index 0000000..1ede2d9 --- /dev/null +++ b/src/compute/models/pca.rs @@ -0,0 +1,85 @@ +use crate::matrix::{Matrix, SeriesOps}; +use rand; + +/// Returns the `n_components` principal axes (rows) and the centred data’s mean. +pub struct PCA { + pub components: Matrix, // (n_components, n_features) + pub mean: Matrix, // (1, n_features) +} + +impl PCA { + pub fn fit(x: &Matrix, n_components: usize, iters: usize) -> Self { + let m = x.rows(); + let n = x.cols(); + assert!(n_components <= n); + + // ----- centre data ----- + let mean_vec = { + let mut v = Matrix::zeros(1, n); + for j in 0..n { + let mut s = 0.0; + for i in 0..m { + s += x[(i, j)]; + } + v[(0, j)] = s / m as f64; + } + v + }; + let x_centered = x - &mean_vec; + + // ----- covariance matrix C = Xᵀ·X / (m-1) ----- + let cov = x_centered.transpose().dot(&x_centered) * (1.0 / (m as f64 - 1.0)); + + // ----- power iteration to find top eigenvectors ----- + let mut comp = Matrix::zeros(n_components, n); + let mut b = Matrix::zeros(1, n); // current vector + for c in 0..n_components { + // random initial vector + for j in 0..n { + b[(0, j)] = rand::random::() - 0.5; + } + // subtract projections on previously found components + for prev in 0..c { + // let proj = b.dot(Matrix::from_vec(data, rows, cols).transpose())[(0, 0)]; + // let proj = b.dot(&comp.row(prev).transpose())[(0, 0)]; + let proj = b.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)]; + // subtract projection to maintain orthogonality + for j in 0..n { + b[(0, j)] -= proj * comp[(prev, j)]; + } + } + // iterate + for _ in 0..iters { + // b = C·bᵀ + let mut nb = cov.dot(&b.transpose()).transpose(); + // subtract projections again to maintain orthogonality + for prev in 0..c { + let proj = nb.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)]; + for j in 0..n { + nb[(0, j)] -= proj * comp[(prev, j)]; + } + } + // normalise + let norm = nb.data().iter().map(|v| v * v).sum::().sqrt(); + for j in 0..n { + nb[(0, j)] /= norm; + } + b = nb; + } + // store component + for j in 0..n { + comp[(c, j)] = b[(0, j)]; + } + } + Self { + components: comp, + mean: mean_vec, + } + } + + /// Project new data on the learned axes. + pub fn transform(&self, x: &Matrix) -> Matrix { + let x_centered = x - &self.mean; + x_centered.dot(&self.components.transpose()) + } +} From eb948c1f49e7cc208aad293d2f338da282ede664 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:43:04 +0100 Subject: [PATCH 11/43] Add Gaussian Naive Bayes implementation with fit and predict methods --- src/compute/models/gaussian_nb.rs | 124 ++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 src/compute/models/gaussian_nb.rs diff --git a/src/compute/models/gaussian_nb.rs b/src/compute/models/gaussian_nb.rs new file mode 100644 index 0000000..b6cd659 --- /dev/null +++ b/src/compute/models/gaussian_nb.rs @@ -0,0 +1,124 @@ +use crate::matrix::{Matrix}; +use std::collections::HashMap; + +pub struct GaussianNB { + classes: Vec, // distinct labels + priors: Vec, // P(class) + means: Vec>, + variances: Vec>, + eps: f64, // var-smoothing +} + +impl GaussianNB { + pub fn new(var_smoothing: f64) -> Self { + Self { + classes: vec![], + priors: vec![], + means: vec![], + variances: vec![], + eps: var_smoothing, + } + } + + pub fn fit(&mut self, x: &Matrix, y: &Matrix) { + let m = x.rows(); + let n = x.cols(); + assert_eq!(y.rows(), m); + assert_eq!(y.cols(), 1); + if m == 0 || n == 0 { + panic!("Input matrix x or y is empty"); + } + + // ----- group samples by label ----- + let mut groups: HashMap> = HashMap::new(); + for i in 0..m { + let label_bits = y[(i, 0)].to_bits(); + groups.entry(label_bits).or_default().push(i); + } + if groups.is_empty() { + panic!("No class labels found in y"); + } + + self.classes = groups + .keys() + .cloned() + .map(f64::from_bits) + .collect::>(); + // Note: If NaN is present in class labels, this may panic. Ensure labels are valid floats. + self.classes.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + self.priors.clear(); + self.means.clear(); + self.variances.clear(); + + for &c in &self.classes { + let label_bits = c.to_bits(); + let idx = &groups[&label_bits]; + let count = idx.len(); + if count == 0 { + panic!("Class group for label {c} is empty"); + } + self.priors.push(count as f64 / m as f64); + + let mut mean = Matrix::zeros(1, n); + let mut var = Matrix::zeros(1, n); + + // mean + for &i in idx { + for j in 0..n { + mean[(0, j)] += x[(i, j)]; + } + } + for j in 0..n { + mean[(0, j)] /= count as f64; + } + + // variance + for &i in idx { + for j in 0..n { + let d = x[(i, j)] - mean[(0, j)]; + var[(0, j)] += d * d; + } + } + for j in 0..n { + var[(0, j)] = var[(0, j)] / count as f64 + self.eps; // always add eps after division + if var[(0, j)] <= 0.0 { + var[(0, j)] = self.eps; // ensure strictly positive variance + } + } + + self.means.push(mean); + self.variances.push(var); + } + } + + /// Return class labels (shape m×1) for samples in X. + pub fn predict(&self, x: &Matrix) -> Matrix { + let m = x.rows(); + let k = self.classes.len(); + let n = x.cols(); + let mut preds = Matrix::zeros(m, 1); + let ln_2pi = (2.0 * std::f64::consts::PI).ln(); + + for i in 0..m { + let mut best_class = 0usize; + let mut best_log_prob = f64::NEG_INFINITY; + for c in 0..k { + // log P(y=c) + Σ log N(x_j | μ, σ²) + let mut log_prob = self.priors[c].ln(); + for j in 0..n { + let mean = self.means[c][(0, j)]; + let var = self.variances[c][(0, j)]; + let diff = x[(i, j)] - mean; + log_prob += -0.5 * (diff * diff / var + var.ln() + ln_2pi); + } + if log_prob > best_log_prob { + best_log_prob = log_prob; + best_class = c; + } + } + preds[(i, 0)] = self.classes[best_class]; + } + preds + } +} From b279131503c928158de4991aa9ac7c12d6760efd Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:43:17 +0100 Subject: [PATCH 12/43] Add model modules for linear regression, logistic regression, dense neural network, k-means, PCA, and Gaussian Naive Bayes --- src/compute/models/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 src/compute/models/mod.rs diff --git a/src/compute/models/mod.rs b/src/compute/models/mod.rs new file mode 100644 index 0000000..c617f08 --- /dev/null +++ b/src/compute/models/mod.rs @@ -0,0 +1,6 @@ +pub mod linreg; +pub mod logreg; +pub mod dense_nn; +pub mod k_means; +pub mod pca; +pub mod gaussian_nb; \ No newline at end of file From 37b20f21744d8bdbf88fda470396765d815ee6c5 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:51:43 +0100 Subject: [PATCH 13/43] Add unit tests for activation functions: sigmoid, relu, dsigmoid, and drelu --- src/compute/activations.rs | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/compute/activations.rs b/src/compute/activations.rs index 80b9928..f25c00c 100644 --- a/src/compute/activations.rs +++ b/src/compute/activations.rs @@ -16,3 +16,45 @@ pub fn relu(x: &Matrix) -> Matrix { pub fn drelu(x: &Matrix) -> Matrix { x.map(|v| if v > 0.0 { 1.0 } else { 0.0 }) } + + +mod tests { + use super::*; + + // Helper function to round all elements in a matrix to n decimal places + fn _round_matrix(mat: &Matrix, decimals: u32) -> Matrix { + let factor = 10f64.powi(decimals as i32); + let rounded: Vec = mat.to_vec().iter().map(|v| (v * factor).round() / factor).collect(); + Matrix::from_vec(rounded, mat.rows(), mat.cols()) + } + + #[test] + fn test_sigmoid() { + let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); + let expected = Matrix::from_vec(vec![0.26894142, 0.5, 0.73105858], 3, 1); + let result = sigmoid(&x); + assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6)); + } + + #[test] + fn test_relu() { + let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); + let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1); + assert_eq!(relu(&x), expected); + } + + #[test] + fn test_dsigmoid() { + let y = Matrix::from_vec(vec![0.26894142, 0.5, 0.73105858], 3, 1); + let expected = Matrix::from_vec(vec![0.19661193, 0.25, 0.19661193], 3, 1); + let result = dsigmoid(&y); + assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6)); + } + + #[test] + fn test_drelu() { + let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); + let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1); + assert_eq!(drelu(&x), expected); + } +} \ No newline at end of file From 4ddacdfd21b6fe80d090491a10aed1a96e0b7383 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 18:52:15 +0100 Subject: [PATCH 14/43] Add unit tests for linear regression fit and predict methods --- src/compute/models/linreg.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/compute/models/linreg.rs b/src/compute/models/linreg.rs index 74e5e44..5add9f9 100644 --- a/src/compute/models/linreg.rs +++ b/src/compute/models/linreg.rs @@ -33,3 +33,22 @@ impl LinReg { } } } + +mod tests { + + use super::LinReg; + use crate::matrix::{Matrix}; + + #[test] + fn test_linreg_fit_predict() { + let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); + let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1); + let mut model = LinReg::new(1); + model.fit(&x, &y, 0.01, 10000); + let preds = model.predict(&x); + assert!((preds[(0, 0)] - 2.0).abs() < 1e-2); + assert!((preds[(1, 0)] - 3.0).abs() < 1e-2); + assert!((preds[(2, 0)] - 4.0).abs() < 1e-2); + assert!((preds[(3, 0)] - 5.0).abs() < 1e-2); + } +} From 54a266b630e1ccd2a86e1ba935952825b62191f3 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 18:52:49 +0100 Subject: [PATCH 15/43] Add unit tests for logistic regression fit and predict methods --- src/compute/models/logreg.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/compute/models/logreg.rs b/src/compute/models/logreg.rs index 1bdaa86..473d5e9 100644 --- a/src/compute/models/logreg.rs +++ b/src/compute/models/logreg.rs @@ -34,3 +34,22 @@ impl LogReg { self.predict_proba(x).map(|p| if p >= 0.5 { 1.0 } else { 0.0 }) } } + + +mod tests { + use super::LogReg; + use crate::matrix::Matrix; + + #[test] + fn test_logreg_fit_predict() { + let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); + let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1); + let mut model = LogReg::new(1); + model.fit(&x, &y, 0.01, 10000); + let preds = model.predict(&x); + assert_eq!(preds[(0, 0)], 0.0); + assert_eq!(preds[(1, 0)], 0.0); + assert_eq!(preds[(2, 0)], 1.0); + assert_eq!(preds[(3, 0)], 1.0); + } +} \ No newline at end of file From 85154a3be0112912d247dca7e006a340880fbdf5 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 18:58:38 +0100 Subject: [PATCH 16/43] Add shape method to Matrix and corresponding unit test --- src/matrix/mat.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index dbae21a..c7b0893 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -89,6 +89,10 @@ impl Matrix { self.cols } + pub fn shape(&self) -> (usize, usize) { + (self.rows, self.cols) + } + /// Get element reference (immutable). Panics on out-of-bounds. pub fn get(&self, r: usize, c: usize) -> &T { &self[(r, c)] @@ -1153,6 +1157,14 @@ mod tests { assert_eq!(ma.row(2), &[3, 6, 9]); } + #[test] + fn test_shape(){ + let ma = static_test_matrix_2x4(); + assert_eq!(ma.shape(), (2, 4)); + assert_eq!(ma.rows(), 2); + assert_eq!(ma.cols(), 4); + } + #[test] #[should_panic(expected = "row index 3 out of bounds for 3 rows")] fn test_row_out_of_bounds() { From 2ca496cfd1c425f8eef08012742e1c5c9c427202 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 19:16:46 +0100 Subject: [PATCH 17/43] Add repeat_rows method to Matrix and corresponding unit test --- src/matrix/mat.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index c7b0893..649d831 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -327,6 +327,21 @@ impl Matrix { self.data = new_data; self.rows = new_rows; } + + /// Return a new matrix where row 0 of `self` is repeated `n` times. + pub fn repeat_rows(&self, n: usize) -> Matrix + where + T: Clone, + { + let mut data = Vec::with_capacity(n * self.cols()); + let zeroth_row = self.row(0); + for value in &zeroth_row { + for _ in 0..n { + data.push(value.clone()); // Clone each element + } + } + Matrix::from_vec(data, n, self.cols) + } } impl Matrix { @@ -1158,13 +1173,24 @@ mod tests { } #[test] - fn test_shape(){ + fn test_shape() { let ma = static_test_matrix_2x4(); assert_eq!(ma.shape(), (2, 4)); assert_eq!(ma.rows(), 2); assert_eq!(ma.cols(), 4); } + #[test] + fn test_repeat_rows() { + let ma = static_test_matrix(); + // Returns a new matrix where row 0 of `self` is repeated `n` times. + let repeated = ma.repeat_rows(3); + // assert all rows are equal to the first row + for r in 0..repeated.rows() { + assert_eq!(repeated.row(r), ma.row(0)); + } + } + #[test] #[should_panic(expected = "row index 3 out of bounds for 3 rows")] fn test_row_out_of_bounds() { From 1c8fcc0bad716aa1399a98d9d32ffde4f017db30 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 19:17:03 +0100 Subject: [PATCH 18/43] Refactor LogReg implementation for improved readability by adjusting formatting and organizing imports --- src/compute/models/logreg.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/compute/models/logreg.rs b/src/compute/models/logreg.rs index 473d5e9..8821ee4 100644 --- a/src/compute/models/logreg.rs +++ b/src/compute/models/logreg.rs @@ -1,5 +1,5 @@ -use crate::matrix::{Matrix, SeriesOps}; use crate::compute::activations::sigmoid; +use crate::matrix::{Matrix, SeriesOps}; pub struct LogReg { w: Matrix, @@ -15,14 +15,14 @@ impl LogReg { } pub fn predict_proba(&self, x: &Matrix) -> Matrix { - sigmoid(&(x.dot(&self.w) + self.b)) // σ(Xw + b) + sigmoid(&(x.dot(&self.w) + self.b)) // σ(Xw + b) } pub fn fit(&mut self, x: &Matrix, y: &Matrix, lr: f64, epochs: usize) { let m = x.rows() as f64; for _ in 0..epochs { - let p = self.predict_proba(x); // shape (m,1) - let err = &p - y; // derivative of BCE wrt pre-sigmoid + let p = self.predict_proba(x); // shape (m,1) + let err = &p - y; // derivative of BCE wrt pre-sigmoid let grad_w = x.transpose().dot(&err) / m; let grad_b = err.sum_vertical().iter().sum::() / m; self.w = &self.w - &(grad_w * lr); @@ -31,14 +31,14 @@ impl LogReg { } pub fn predict(&self, x: &Matrix) -> Matrix { - self.predict_proba(x).map(|p| if p >= 0.5 { 1.0 } else { 0.0 }) + self.predict_proba(x) + .map(|p| if p >= 0.5 { 1.0 } else { 0.0 }) } } - +#[cfg(test)] mod tests { - use super::LogReg; - use crate::matrix::Matrix; + use super::*; #[test] fn test_logreg_fit_predict() { @@ -52,4 +52,4 @@ mod tests { assert_eq!(preds[(2, 0)], 1.0); assert_eq!(preds[(3, 0)], 1.0); } -} \ No newline at end of file +} From ab6d5f9f8ff72cd77ed2d2a610fc38bac72972a0 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 19:17:09 +0100 Subject: [PATCH 19/43] Refactor test module imports in LinReg to improve clarity --- src/compute/models/linreg.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compute/models/linreg.rs b/src/compute/models/linreg.rs index 5add9f9..ba76197 100644 --- a/src/compute/models/linreg.rs +++ b/src/compute/models/linreg.rs @@ -34,10 +34,10 @@ impl LinReg { } } +#[cfg(test)] mod tests { - use super::LinReg; - use crate::matrix::{Matrix}; + use super::*; #[test] fn test_linreg_fit_predict() { From 4c626bf09cd9b6c1d52bb29ffbca29dd4ac008af Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:00:17 +0100 Subject: [PATCH 20/43] Add leaky_relu and dleaky_relu functions with corresponding unit tests --- src/compute/activations.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/compute/activations.rs b/src/compute/activations.rs index f25c00c..d2f3710 100644 --- a/src/compute/activations.rs +++ b/src/compute/activations.rs @@ -17,6 +17,13 @@ pub fn drelu(x: &Matrix) -> Matrix { x.map(|v| if v > 0.0 { 1.0 } else { 0.0 }) } +pub fn leaky_relu(x: &Matrix) -> Matrix { + x.map(|v| if v > 0.0 { v } else { 0.01 * v }) +} + +pub fn dleaky_relu(x: &Matrix) -> Matrix { + x.map(|v| if v > 0.0 { 1.0 } else { 0.01 }) +} mod tests { use super::*; @@ -57,4 +64,17 @@ mod tests { let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1); assert_eq!(drelu(&x), expected); } -} \ No newline at end of file + #[test] + fn test_leaky_relu() { + let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); + let expected = Matrix::from_vec(vec![-0.01, 0.0, 1.0], 3, 1); + assert_eq!(leaky_relu(&x), expected); + } + #[test] + fn test_dleaky_relu() { + let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); + let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1); + assert_eq!(dleaky_relu(&x), expected); + } + +} From 005c10e816eaa23dd25942beef06959564613148 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:00:54 +0100 Subject: [PATCH 21/43] Enhance activation function tests with edge cases for sigmoid, relu, and their derivatives --- src/compute/activations.rs | 63 +++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/src/compute/activations.rs b/src/compute/activations.rs index d2f3710..661fe52 100644 --- a/src/compute/activations.rs +++ b/src/compute/activations.rs @@ -27,11 +27,15 @@ pub fn dleaky_relu(x: &Matrix) -> Matrix { mod tests { use super::*; - + // Helper function to round all elements in a matrix to n decimal places fn _round_matrix(mat: &Matrix, decimals: u32) -> Matrix { let factor = 10f64.powi(decimals as i32); - let rounded: Vec = mat.to_vec().iter().map(|v| (v * factor).round() / factor).collect(); + let rounded: Vec = mat + .to_vec() + .iter() + .map(|v| (v * factor).round() / factor) + .collect(); Matrix::from_vec(rounded, mat.rows(), mat.cols()) } @@ -43,6 +47,17 @@ mod tests { assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6)); } + #[test] + fn test_sigmoid_edge_case() { + let x = Matrix::from_vec(vec![-1000.0, 0.0, 1000.0], 3, 1); + let expected = Matrix::from_vec(vec![0.0, 0.5, 1.0], 3, 1); + let result = sigmoid(&x); + + for (r, e) in result.data().iter().zip(expected.data().iter()) { + assert!((r - e).abs() < 1e-6); + } + } + #[test] fn test_relu() { let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); @@ -50,6 +65,13 @@ mod tests { assert_eq!(relu(&x), expected); } + #[test] + fn test_relu_edge_case() { + let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1); + let expected = Matrix::from_vec(vec![0.0, 0.0, 1e10], 3, 1); + assert_eq!(relu(&x), expected); + } + #[test] fn test_dsigmoid() { let y = Matrix::from_vec(vec![0.26894142, 0.5, 0.73105858], 3, 1); @@ -57,19 +79,46 @@ mod tests { let result = dsigmoid(&y); assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6)); } - + + #[test] + fn test_dsigmoid_edge_case() { + let y = Matrix::from_vec(vec![0.0, 0.5, 1.0], 3, 1); // Assume these are outputs from sigmoid(x) + let expected = Matrix::from_vec(vec![0.0, 0.25, 0.0], 3, 1); + let result = dsigmoid(&y); + + for (r, e) in result.data().iter().zip(expected.data().iter()) { + assert!((r - e).abs() < 1e-6); + } + } + #[test] fn test_drelu() { let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1); - assert_eq!(drelu(&x), expected); + assert_eq!(drelu(&x), expected); } + + #[test] + fn test_drelu_edge_case() { + let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1); + let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1); + assert_eq!(drelu(&x), expected); + } + #[test] fn test_leaky_relu() { let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); let expected = Matrix::from_vec(vec![-0.01, 0.0, 1.0], 3, 1); assert_eq!(leaky_relu(&x), expected); } + + #[test] + fn test_leaky_relu_edge_case() { + let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1); + let expected = Matrix::from_vec(vec![-1e-12, 0.0, 1e10], 3, 1); + assert_eq!(leaky_relu(&x), expected); + } + #[test] fn test_dleaky_relu() { let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1); @@ -77,4 +126,10 @@ mod tests { assert_eq!(dleaky_relu(&x), expected); } + #[test] + fn test_dleaky_relu_edge_case() { + let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1); + let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1); + assert_eq!(dleaky_relu(&x), expected); + } } From 261d0d700749af646afbda49241f2e923c976a1c Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:03:16 +0100 Subject: [PATCH 22/43] Refactor DenseNN implementation to enhance activation function handling and improve training process --- src/compute/models/dense_nn.rs | 307 ++++++++++++++++++++++++++++----- 1 file changed, 260 insertions(+), 47 deletions(-) diff --git a/src/compute/models/dense_nn.rs b/src/compute/models/dense_nn.rs index 1959c2a..7be04a2 100644 --- a/src/compute/models/dense_nn.rs +++ b/src/compute/models/dense_nn.rs @@ -1,65 +1,278 @@ use crate::matrix::{Matrix, SeriesOps}; -use crate::compute::activations::{relu, sigmoid, drelu}; -use rand::Rng; +use crate::compute::activations::{relu, drelu, sigmoid}; +use rand::prelude::*; +/// Supported activation functions +#[derive(Clone)] +pub enum ActivationKind { + Relu, + Sigmoid, + Tanh, +} + +impl ActivationKind { + /// Apply activation elementwise + pub fn forward(&self, z: &Matrix) -> Matrix { + match self { + ActivationKind::Relu => relu(z), + ActivationKind::Sigmoid => sigmoid(z), + ActivationKind::Tanh => z.map(|v| v.tanh()), + } + } + + /// Compute elementwise derivative w.r.t. pre-activation z + pub fn derivative(&self, z: &Matrix) -> Matrix { + match self { + ActivationKind::Relu => drelu(z), + ActivationKind::Sigmoid => { + let s = sigmoid(z); + s.zip(&s, |si, sj| si * (1.0 - sj)) + } + ActivationKind::Tanh => z.map(|v| 1.0 - v.tanh().powi(2)), + } + } +} + +/// Weight initialization schemes +#[derive(Clone)] +pub enum InitializerKind { + /// Uniform(-limit .. limit) + Uniform(f64), + /// Xavier/Glorot uniform + Xavier, + /// He (Kaiming) uniform + He, +} + +impl InitializerKind { + pub fn initialize(&self, rows: usize, cols: usize) -> Matrix { + let mut rng = rand::rng(); + let fan_in = rows; + let fan_out = cols; + let limit = match self { + InitializerKind::Uniform(l) => *l, + InitializerKind::Xavier => (6.0 / (fan_in + fan_out) as f64).sqrt(), + InitializerKind::He => (2.0 / fan_in as f64).sqrt(), + }; + let data = (0..rows * cols) + .map(|_| rng.random_range(-limit..limit)) + .collect::>(); + Matrix::from_vec(data, rows, cols) + } +} + +/// Supported losses +#[derive(Clone)] +pub enum LossKind { + /// Mean Squared Error: L = 1/m * sum((y_hat - y)^2) + MSE, + /// Binary Cross-Entropy: L = -1/m * sum(y*log(y_hat) + (1-y)*log(1-y_hat)) + BCE, +} + +impl LossKind { + /// Compute gradient dL/dy_hat (before applying activation derivative) + pub fn gradient(&self, y_hat: &Matrix, y: &Matrix) -> Matrix { + let m = y.rows() as f64; + match self { + LossKind::MSE => (y_hat - y) * (2.0 / m), + LossKind::BCE => (y_hat - y) * (1.0 / m), + } + } +} + +/// Configuration for a dense neural network +pub struct DenseNNConfig { + pub input_size: usize, + pub hidden_layers: Vec, + /// Must have length = hidden_layers.len() + 1 + pub activations: Vec, + pub output_size: usize, + pub initializer: InitializerKind, + pub loss: LossKind, + pub learning_rate: f64, + pub epochs: usize, +} + +/// A multi-layer perceptron with full configurability pub struct DenseNN { - w1: Matrix, // (n_in, n_hidden) - b1: Matrix, // (1, n_hidden) - w2: Matrix, // (n_hidden, n_out) - b2: Matrix, // (1, n_out) + weights: Vec>, + biases: Vec>, + activations: Vec, + loss: LossKind, + lr: f64, + epochs: usize, } impl DenseNN { - pub fn new(n_in: usize, n_hidden: usize, n_out: usize) -> Self { - let mut rng = rand::rng(); - let mut init = |rows, cols| { - let data = (0..rows * cols) - .map(|_| rng.random_range(-1.0..1.0)) - .collect::>(); - Matrix::from_vec(data, rows, cols) - }; - Self { - w1: init(n_in, n_hidden), - b1: Matrix::zeros(1, n_hidden), - w2: init(n_hidden, n_out), - b2: Matrix::zeros(1, n_out), + /// Build a new DenseNN from the given configuration + pub fn new(config: DenseNNConfig) -> Self { + let mut sizes = vec![config.input_size]; + sizes.extend(&config.hidden_layers); + sizes.push(config.output_size); + + assert_eq!( + config.activations.len(), + sizes.len() - 1, + "Number of activation functions must match number of layers" + ); + + let mut weights = Vec::with_capacity(sizes.len() - 1); + let mut biases = Vec::with_capacity(sizes.len() - 1); + + for i in 0..sizes.len() - 1 { + let w = config.initializer.initialize(sizes[i], sizes[i + 1]); + let b = Matrix::zeros(1, sizes[i + 1]); + weights.push(w); + biases.push(b); + } + + DenseNN { + weights, + biases, + activations: config.activations, + loss: config.loss, + lr: config.learning_rate, + epochs: config.epochs, } } - pub fn forward(&self, x: &Matrix) -> (Matrix, Matrix, Matrix) { - // z1 = X·W1 + b1 ; a1 = ReLU(z1) - let z1 = x.dot(&self.w1) + &self.b1; - let a1 = relu(&z1); - // z2 = a1·W2 + b2 ; a2 = softmax(z2) (here binary => sigmoid) - let z2 = a1.dot(&self.w2) + &self.b2; - let a2 = sigmoid(&z2); // binary output - (a1, z2, a2) // keep intermediates for back-prop + /// Perform a full forward pass, returning pre-activations (z) and activations (a) + fn forward_full(&self, x: &Matrix) -> (Vec>, Vec>) { + let mut zs = Vec::with_capacity(self.weights.len()); + let mut activs = Vec::with_capacity(self.weights.len() + 1); + activs.push(x.clone()); + + let mut a = x.clone(); + for (i, (w, b)) in self.weights.iter().zip(self.biases.iter()).enumerate() { + let z = &a.dot(w) + &Matrix::repeat_rows(b, a.rows()); + let a_next = self.activations[i].forward(&z); + zs.push(z); + activs.push(a_next.clone()); + a = a_next; + } + + (zs, activs) } - pub fn train(&mut self, x: &Matrix, y: &Matrix, lr: f64, epochs: usize) { + /// Train the network on inputs X and targets Y + pub fn train(&mut self, x: &Matrix, y: &Matrix) { let m = x.rows() as f64; - for _ in 0..epochs { - let (a1, _z2, y_hat) = self.forward(x); + for _ in 0..self.epochs { + let (zs, activs) = self.forward_full(x); + let y_hat = activs.last().unwrap().clone(); - // -------- backwards ---------- - // dL/da2 = y_hat - y (BCE derivative) - let dz2 = &y_hat - y; // (m, n_out) - let dw2 = a1.transpose().dot(&dz2) / m; // (n_h, n_out) - // let db2 = dz2.sum_vertical() * (1.0 / m); // broadcast ok - let db2 = Matrix::from_vec(dz2.sum_vertical(), 1, dz2.cols()) * (1.0 / m); // (1, n_out) - let da1 = dz2.dot(&self.w2.transpose()); // (m,n_h) - let dz1 = da1.zip(&a1, |g, act| g * drelu(&Matrix::from_cols(vec![vec![act]])).data()[0]); // (m,n_h) - - // real code: drelu returns Matrix, broadcasting needed; you can optimise. + // Initial delta (dL/dz) on output + let mut delta = match self.loss { + LossKind::BCE => self.loss.gradient(&y_hat, y), + LossKind::MSE => { + let grad = self.loss.gradient(&y_hat, y); + let dz = self.activations.last().unwrap().derivative(zs.last().unwrap()); + grad.zip(&dz, |g, da| g * da) + } + }; - let dw1 = x.transpose().dot(&dz1) / m; // (n_in,n_h) - let db1 = Matrix::from_vec(dz1.sum_vertical(), 1, dz1.cols()) * (1.0 / m); // (1, n_h) + // Backpropagate through layers + for l in (0..self.weights.len()).rev() { + let a_prev = &activs[l]; + let dw = a_prev.transpose().dot(&delta) / m; + let db = Matrix::from_vec(delta.sum_vertical(), 1, delta.cols()) / m; - // -------- update ---------- - self.w2 = &self.w2 - &(dw2 * lr); - self.b2 = &self.b2 - &(db2 * lr); - self.w1 = &self.w1 - &(dw1 * lr); - self.b1 = &self.b1 - &(db1 * lr); + // Update weights & biases + self.weights[l] = &self.weights[l] - &(dw * self.lr); + self.biases[l] = &self.biases[l] - &(db * self.lr); + + // Propagate delta to previous layer + if l > 0 { + let w_t = self.weights[l].transpose(); + let da = self.activations[l - 1].derivative(&zs[l - 1]); + delta = delta.dot(&w_t).zip(&da, |d, a| d * a); + } + } } } + + /// Run a forward pass and return the network's output + pub fn predict(&self, x: &Matrix) -> Matrix { + let mut a = x.clone(); + for (i, (w, b)) in self.weights.iter().zip(self.biases.iter()).enumerate() { + let z = &a.dot(w) + &Matrix::repeat_rows(b, a.rows()); + a = self.activations[i].forward(&z); + } + a + } +} + +// ------------------------------ +// Simple tests +// ------------------------------ + +#[cfg(test)] +mod tests { + use super::*; + use crate::matrix::Matrix; + + #[test] + fn test_predict_shape() { + let config = DenseNNConfig { + input_size: 1, + hidden_layers: vec![2], + activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid], + output_size: 1, + initializer: InitializerKind::Uniform(0.1), + loss: LossKind::MSE, + learning_rate: 0.01, + epochs: 0, + }; + let model = DenseNN::new(config); + let x = Matrix::from_vec(vec![1.0, 2.0, 3.0], 3, 1); + let preds = model.predict(&x); + assert_eq!(preds.rows(), 3); + assert_eq!(preds.cols(), 1); + } + + #[test] + fn test_train_no_epochs() { + let config = DenseNNConfig { + input_size: 1, + hidden_layers: vec![2], + activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid], + output_size: 1, + initializer: InitializerKind::Uniform(0.1), + loss: LossKind::MSE, + learning_rate: 0.01, + epochs: 0, + }; + let mut model = DenseNN::new(config); + let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1); + let before = model.predict(&x); + model.train(&x, &before); + let after = model.predict(&x); + for i in 0..before.rows() { + assert!((before[(i, 0)] - after[(i, 0)]).abs() < 1e-12); + } + } + + #[test] + fn test_dense_nn_step() { + let config = DenseNNConfig { + input_size: 1, + hidden_layers: vec![2], + activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid], + output_size: 1, + initializer: InitializerKind::He, + loss: LossKind::BCE, + learning_rate: 0.01, + epochs: 5000, + }; + let mut model = DenseNN::new(config); + let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); + let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1); + model.train(&x, &y); + let preds = model.predict(&x); + assert!((preds[(0, 0)] - 0.0).abs() < 0.5); + assert!((preds[(1, 0)] - 0.0).abs() < 0.5); + assert!((preds[(2, 0)] - 1.0).abs() < 0.5); + assert!((preds[(3, 0)] - 1.0).abs() < 0.5); + } } From 70d2a7a2b4e0167b129eca773e88f2992d82785a Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:13:59 +0100 Subject: [PATCH 23/43] Refactor GaussianNB implementation for improved clarity and stability, including enhanced variance handling and additional unit tests --- src/compute/models/gaussian_nb.rs | 172 +++++++++++++++++++++++------- 1 file changed, 131 insertions(+), 41 deletions(-) diff --git a/src/compute/models/gaussian_nb.rs b/src/compute/models/gaussian_nb.rs index b6cd659..6d4b489 100644 --- a/src/compute/models/gaussian_nb.rs +++ b/src/compute/models/gaussian_nb.rs @@ -1,69 +1,105 @@ -use crate::matrix::{Matrix}; +use crate::matrix::Matrix; use std::collections::HashMap; +/// A Gaussian Naive Bayes classifier. +/// +/// # Parameters +/// - `var_smoothing`: Portion of the largest variance of all features to add to variances for stability. +/// - `use_unbiased_variance`: If `true`, uses Bessel's correction (dividing by (n-1)); otherwise divides by n. +/// pub struct GaussianNB { - classes: Vec, // distinct labels - priors: Vec, // P(class) + // Distinct class labels + classes: Vec, + // Prior probabilities P(class) + priors: Vec, + // Feature means per class means: Vec>, + // Feature variances per class variances: Vec>, - eps: f64, // var-smoothing + // var_smoothing + eps: f64, + // flag for unbiased variance + use_unbiased: bool, } impl GaussianNB { - pub fn new(var_smoothing: f64) -> Self { + /// Create a new GaussianNB. + /// + /// # Arguments + /// * `var_smoothing` - small float added to variances for numerical stability. + /// * `use_unbiased_variance` - whether to apply Bessel's correction (divide by n-1). + pub fn new(var_smoothing: f64, use_unbiased_variance: bool) -> Self { Self { - classes: vec![], - priors: vec![], - means: vec![], - variances: vec![], + classes: Vec::new(), + priors: Vec::new(), + means: Vec::new(), + variances: Vec::new(), eps: var_smoothing, + use_unbiased: use_unbiased_variance, } } + /// Fit the model according to the training data `x` and labels `y`. + /// + /// # Panics + /// Panics if `x` or `y` is empty, or if their dimensions disagree. pub fn fit(&mut self, x: &Matrix, y: &Matrix) { let m = x.rows(); let n = x.cols(); - assert_eq!(y.rows(), m); - assert_eq!(y.cols(), 1); + assert_eq!(y.rows(), m, "Row count of X and Y must match"); + assert_eq!(y.cols(), 1, "Y must be a column vector"); if m == 0 || n == 0 { panic!("Input matrix x or y is empty"); } - // ----- group samples by label ----- + // Group sample indices by label let mut groups: HashMap> = HashMap::new(); for i in 0..m { - let label_bits = y[(i, 0)].to_bits(); - groups.entry(label_bits).or_default().push(i); + let label = y[(i, 0)]; + let bits = label.to_bits(); + groups.entry(bits).or_default().push(i); } if groups.is_empty() { panic!("No class labels found in y"); } - self.classes = groups - .keys() - .cloned() - .map(f64::from_bits) - .collect::>(); - // Note: If NaN is present in class labels, this may panic. Ensure labels are valid floats. + // Extract and sort class labels + self.classes = groups.keys().cloned().map(f64::from_bits).collect(); self.classes.sort_by(|a, b| a.partial_cmp(b).unwrap()); self.priors.clear(); self.means.clear(); self.variances.clear(); + // Precompute max variance for smoothing scale + let mut max_var_feature = 0.0; + for j in 0..n { + let mut col_vals = Vec::with_capacity(m); + for i in 0..m { + col_vals.push(x[(i, j)]); + } + let mean_all = col_vals.iter().sum::() / m as f64; + let var_all = col_vals.iter().map(|v| (v - mean_all).powi(2)).sum::() / m as f64; + if var_all > max_var_feature { + max_var_feature = var_all; + } + } + let smoothing = self.eps * max_var_feature; + + // Compute per-class statistics for &c in &self.classes { - let label_bits = c.to_bits(); - let idx = &groups[&label_bits]; + let idx = &groups[&c.to_bits()]; let count = idx.len(); if count == 0 { - panic!("Class group for label {c} is empty"); + panic!("Class group for label {} is empty", c); } + // Prior self.priors.push(count as f64 / m as f64); let mut mean = Matrix::zeros(1, n); let mut var = Matrix::zeros(1, n); - // mean + // Mean for &i in idx { for j in 0..n { mean[(0, j)] += x[(i, j)]; @@ -73,17 +109,22 @@ impl GaussianNB { mean[(0, j)] /= count as f64; } - // variance + // Variance for &i in idx { for j in 0..n { let d = x[(i, j)] - mean[(0, j)]; var[(0, j)] += d * d; } } + let denom = if self.use_unbiased { + (count as f64 - 1.0).max(1.0) + } else { + count as f64 + }; for j in 0..n { - var[(0, j)] = var[(0, j)] / count as f64 + self.eps; // always add eps after division + var[(0, j)] = var[(0, j)] / denom + smoothing; if var[(0, j)] <= 0.0 { - var[(0, j)] = self.eps; // ensure strictly positive variance + var[(0, j)] = smoothing; } } @@ -92,33 +133,82 @@ impl GaussianNB { } } - /// Return class labels (shape m×1) for samples in X. + /// Perform classification on an array of test vectors `x`. pub fn predict(&self, x: &Matrix) -> Matrix { let m = x.rows(); - let k = self.classes.len(); let n = x.cols(); + let k = self.classes.len(); let mut preds = Matrix::zeros(m, 1); let ln_2pi = (2.0 * std::f64::consts::PI).ln(); for i in 0..m { - let mut best_class = 0usize; - let mut best_log_prob = f64::NEG_INFINITY; - for c in 0..k { - // log P(y=c) + Σ log N(x_j | μ, σ²) - let mut log_prob = self.priors[c].ln(); + let mut best = (0, f64::NEG_INFINITY); + for c_idx in 0..k { + let mut log_prob = self.priors[c_idx].ln(); for j in 0..n { - let mean = self.means[c][(0, j)]; - let var = self.variances[c][(0, j)]; - let diff = x[(i, j)] - mean; + let diff = x[(i, j)] - self.means[c_idx][(0, j)]; + let var = self.variances[c_idx][(0, j)]; log_prob += -0.5 * (diff * diff / var + var.ln() + ln_2pi); } - if log_prob > best_log_prob { - best_log_prob = log_prob; - best_class = c; + if log_prob > best.1 { + best = (c_idx, log_prob); } } - preds[(i, 0)] = self.classes[best_class]; + preds[(i, 0)] = self.classes[best.0]; } preds } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::matrix::Matrix; + + #[test] + fn test_simple_two_class() { + // Simple dataset: one feature, two classes 0 and 1 + // Class 0: values [1.0, 1.2, 0.8] + // Class 1: values [3.0, 3.2, 2.8] + let x = Matrix::from_vec(vec![1.0, 1.2, 0.8, 3.0, 3.2, 2.8], 6, 1); + let y = Matrix::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0], 6, 1); + let mut clf = GaussianNB::new(1e-9, false); + clf.fit(&x, &y); + let test = Matrix::from_vec(vec![1.1, 3.1], 2, 1); + let preds = clf.predict(&test); + assert_eq!(preds[(0, 0)], 0.0); + assert_eq!(preds[(1, 0)], 1.0); + } + + #[test] + fn test_unbiased_variance() { + // Same as above but with unbiased variance + let x = Matrix::from_vec(vec![2.0, 2.2, 1.8, 4.0, 4.2, 3.8], 6, 1); + let y = Matrix::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0], 6, 1); + let mut clf = GaussianNB::new(1e-9, true); + clf.fit(&x, &y); + let test = Matrix::from_vec(vec![2.1, 4.1], 2, 1); + let preds = clf.predict(&test); + assert_eq!(preds[(0, 0)], 0.0); + assert_eq!(preds[(1, 0)], 1.0); + } + + #[test] + #[should_panic] + fn test_empty_input() { + let x = Matrix::zeros(0, 0); + let y = Matrix::zeros(0, 1); + let mut clf = GaussianNB::new(1e-9, false); + clf.fit(&x, &y); + } + + #[test] + #[should_panic = "Row count of X and Y must match"] + fn test_mismatched_rows() { + let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1); + let y = Matrix::from_vec(vec![0.0], 1, 1); + let mut clf = GaussianNB::new(1e-9, false); + clf.fit(&x, &y); + clf.predict(&x); + } +} From 75d07371b2edb3ce6b366cd8cf3df112ba82dd6a Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:16:06 +0100 Subject: [PATCH 24/43] Increase training epochs from 5000 to 10000 for improved model performance --- src/compute/models/dense_nn.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compute/models/dense_nn.rs b/src/compute/models/dense_nn.rs index 7be04a2..413ec5c 100644 --- a/src/compute/models/dense_nn.rs +++ b/src/compute/models/dense_nn.rs @@ -263,7 +263,7 @@ mod tests { initializer: InitializerKind::He, loss: LossKind::BCE, learning_rate: 0.01, - epochs: 5000, + epochs: 10000, }; let mut model = DenseNN::new(config); let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); From 46abeb12a733506b84f49db42e42474215582032 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:43:01 +0100 Subject: [PATCH 25/43] applied formatting --- src/compute/models/dense_nn.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/compute/models/dense_nn.rs b/src/compute/models/dense_nn.rs index 413ec5c..759c15e 100644 --- a/src/compute/models/dense_nn.rs +++ b/src/compute/models/dense_nn.rs @@ -1,5 +1,5 @@ +use crate::compute::activations::{drelu, relu, sigmoid}; use crate::matrix::{Matrix, SeriesOps}; -use crate::compute::activations::{relu, drelu, sigmoid}; use rand::prelude::*; /// Supported activation functions @@ -118,7 +118,7 @@ impl DenseNN { ); let mut weights = Vec::with_capacity(sizes.len() - 1); - let mut biases = Vec::with_capacity(sizes.len() - 1); + let mut biases = Vec::with_capacity(sizes.len() - 1); for i in 0..sizes.len() - 1 { let w = config.initializer.initialize(sizes[i], sizes[i + 1]); @@ -167,7 +167,11 @@ impl DenseNN { LossKind::BCE => self.loss.gradient(&y_hat, y), LossKind::MSE => { let grad = self.loss.gradient(&y_hat, y); - let dz = self.activations.last().unwrap().derivative(zs.last().unwrap()); + let dz = self + .activations + .last() + .unwrap() + .derivative(zs.last().unwrap()); grad.zip(&dz, |g, da| g * da) } }; @@ -180,7 +184,7 @@ impl DenseNN { // Update weights & biases self.weights[l] = &self.weights[l] - &(dw * self.lr); - self.biases[l] = &self.biases[l] - &(db * self.lr); + self.biases[l] = &self.biases[l] - &(db * self.lr); // Propagate delta to previous layer if l > 0 { From 96f434bf942880365dad3d2496be3644aa71020a Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:48:04 +0100 Subject: [PATCH 26/43] Add tests for DenseNN training and MSE loss calculation --- src/compute/models/dense_nn.rs | 102 ++++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 22 deletions(-) diff --git a/src/compute/models/dense_nn.rs b/src/compute/models/dense_nn.rs index 759c15e..7edfaab 100644 --- a/src/compute/models/dense_nn.rs +++ b/src/compute/models/dense_nn.rs @@ -207,15 +207,22 @@ impl DenseNN { } } -// ------------------------------ -// Simple tests -// ------------------------------ - #[cfg(test)] mod tests { use super::*; use crate::matrix::Matrix; + /// Compute MSE = 1/m * Σ (ŷ - y)² + fn mse_loss(y_hat: &Matrix, y: &Matrix) -> f64 { + let m = y.rows() as f64; + y_hat + .zip(y, |yh, yv| (yh - yv).powi(2)) + .data() + .iter() + .sum::() + / m + } + #[test] fn test_predict_shape() { let config = DenseNNConfig { @@ -236,7 +243,7 @@ mod tests { } #[test] - fn test_train_no_epochs() { + fn test_train_no_epochs_does_nothing() { let config = DenseNNConfig { input_size: 1, hidden_layers: vec![2], @@ -248,35 +255,86 @@ mod tests { epochs: 0, }; let mut model = DenseNN::new(config); - let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1); + let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1); + let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1); + let before = model.predict(&x); - model.train(&x, &before); + model.train(&x, &y); let after = model.predict(&x); + for i in 0..before.rows() { - assert!((before[(i, 0)] - after[(i, 0)]).abs() < 1e-12); + for j in 0..before.cols() { + assert!( + (before[(i, j)] - after[(i, j)]).abs() < 1e-12, + "prediction changed despite 0 epochs" + ); + } } } #[test] - fn test_dense_nn_step() { + fn test_train_one_epoch_changes_predictions() { + // Single-layer sigmoid regression so gradients flow. let config = DenseNNConfig { input_size: 1, - hidden_layers: vec![2], - activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid], + hidden_layers: vec![], + activations: vec![ActivationKind::Sigmoid], output_size: 1, - initializer: InitializerKind::He, - loss: LossKind::BCE, - learning_rate: 0.01, - epochs: 10000, + initializer: InitializerKind::Uniform(0.1), + loss: LossKind::MSE, + learning_rate: 1.0, + epochs: 1, }; let mut model = DenseNN::new(config); - let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); - let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1); + + let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1); + let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1); + + let before = model.predict(&x); model.train(&x, &y); - let preds = model.predict(&x); - assert!((preds[(0, 0)] - 0.0).abs() < 0.5); - assert!((preds[(1, 0)] - 0.0).abs() < 0.5); - assert!((preds[(2, 0)] - 1.0).abs() < 0.5); - assert!((preds[(3, 0)] - 1.0).abs() < 0.5); + let after = model.predict(&x); + + // At least one of the two outputs must move by >ϵ + let mut moved = false; + for i in 0..before.rows() { + if (before[(i, 0)] - after[(i, 0)]).abs() > 1e-8 { + moved = true; + } + } + assert!(moved, "predictions did not change after 1 epoch"); + } + + #[test] + fn test_training_reduces_mse_loss() { + // Same single‐layer sigmoid setup; check loss goes down. + let config = DenseNNConfig { + input_size: 1, + hidden_layers: vec![], + activations: vec![ActivationKind::Sigmoid], + output_size: 1, + initializer: InitializerKind::Uniform(0.1), + loss: LossKind::MSE, + learning_rate: 1.0, + epochs: 10, + }; + let mut model = DenseNN::new(config); + + let x = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1); + let y = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1); + + let before_preds = model.predict(&x); + let before_loss = mse_loss(&before_preds, &y); + + model.train(&x, &y); + + let after_preds = model.predict(&x); + let after_loss = mse_loss(&after_preds, &y); + + assert!( + after_loss < before_loss, + "MSE did not decrease (before: {}, after: {})", + before_loss, + after_loss + ); } } From 4648800a09b81c2e82069235c3e18c55f4afafc1 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 21:16:57 +0100 Subject: [PATCH 27/43] fixed incorrectly commited file --- src/compute/models/k_means.rs | 176 ++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 84 deletions(-) diff --git a/src/compute/models/k_means.rs b/src/compute/models/k_means.rs index 716114f..7b7fcf0 100644 --- a/src/compute/models/k_means.rs +++ b/src/compute/models/k_means.rs @@ -1,105 +1,113 @@ use crate::matrix::Matrix; -use std::collections::HashMap; +use rand::seq::SliceRandom; -pub struct GaussianNB { - classes: Vec, // distinct labels - priors: Vec, // P(class) - means: Vec>, - variances: Vec>, - eps: f64, // var-smoothing +pub struct KMeans { + pub centroids: Matrix, // (k, n_features) } -impl GaussianNB { - pub fn new(var_smoothing: f64) -> Self { - Self { - classes: vec![], - priors: vec![], - means: vec![], - variances: vec![], - eps: var_smoothing, - } - } - - pub fn fit(&mut self, x: &Matrix, y: &Matrix) { +impl KMeans { + /// Fit with k clusters. + pub fn fit(x: &Matrix, k: usize, max_iter: usize, tol: f64) -> (Self, Vec) { let m = x.rows(); let n = x.cols(); - assert_eq!(y.rows(), m); - assert_eq!(y.cols(), 1); + assert!(k <= m, "k must be ≤ number of samples"); - // ----- group samples by label ----- - let mut groups: HashMap> = HashMap::new(); - for i in 0..m { - groups.entry(y[(i, 0)] as i64).or_default().push(i); + // ----- initialise centroids: pick k distinct rows at random ----- + let mut rng = rand::rng(); + let mut indices: Vec = (0..m).collect(); + indices.shuffle(&mut rng); + let mut centroids = Matrix::zeros(k, n); + for (c, &i) in indices[..k].iter().enumerate() { + for j in 0..n { + centroids[(c, j)] = x[(i, j)]; + } } - self.classes = groups.keys().cloned().map(|v| v as f64).collect::>(); - self.classes.sort_by(|a, b| a.partial_cmp(b).unwrap()); - - self.priors.clear(); - self.means.clear(); - self.variances.clear(); - - for &c in &self.classes { - let idx = &groups[&(c as i64)]; - let count = idx.len(); - self.priors.push(count as f64 / m as f64); - - let mut mean = Matrix::zeros(1, n); - let mut var = Matrix::zeros(1, n); - - // mean - for &i in idx { - for j in 0..n { - mean[(0, j)] += x[(i, j)]; + let mut labels = vec![0usize; m]; + for _ in 0..max_iter { + // ----- assignment step ----- + let mut changed = false; + for i in 0..m { + let mut best = 0usize; + let mut best_dist = f64::MAX; + for c in 0..k { + let mut dist = 0.0; + for j in 0..n { + let d = x[(i, j)] - centroids[(c, j)]; + dist += d * d; + } + if dist < best_dist { + best_dist = dist; + best = c; + } + } + if labels[i] != best { + labels[i] = best; + changed = true; } } - for j in 0..n { - mean[(0, j)] /= count as f64; - } - // variance - for &i in idx { + // ----- update step ----- + let mut counts = vec![0usize; k]; + let mut centroids = Matrix::zeros(k, n); + for i in 0..m { + let c = labels[i]; + counts[c] += 1; for j in 0..n { - let d = x[(i, j)] - mean[(0, j)]; - var[(0, j)] += d * d; + centroids[(c, j)] += x[(i, j)]; } } - for j in 0..n { - var[(0, j)] = var[(0, j)] / count as f64 + self.eps; - } - - self.means.push(mean); - self.variances.push(var); - } - } - - /// Return class labels (shape m×1) for samples in X. - pub fn predict(&self, x: &Matrix) -> Matrix { - let m = x.rows(); - let k = self.classes.len(); - let n = x.cols(); - let mut preds = Matrix::zeros(m, 1); - let ln_2pi = (2.0 * std::f64::consts::PI).ln(); - - for i in 0..m { - let mut best_class = 0usize; - let mut best_log_prob = f64::NEG_INFINITY; for c in 0..k { - // log P(y=c) + Σ log N(x_j | μ, σ²) - let mut log_prob = self.priors[c].ln(); - for j in 0..n { - let mean = self.means[c][(0, j)]; - let var = self.variances[c][(0, j)]; - let diff = x[(i, j)] - mean; - log_prob += -0.5 * (diff * diff / var + var.ln() + ln_2pi); - } - if log_prob > best_log_prob { - best_log_prob = log_prob; - best_class = c; + if counts[c] > 0 { + for j in 0..n { + centroids[(c, j)] /= counts[c] as f64; + } + } + } + + // ----- convergence test ----- + if !changed { + break; // assignments stable + } + if tol > 0.0 { + // optional centroid-shift tolerance + let mut shift: f64 = 0.0; + for c in 0..k { + for j in 0..n { + let d = centroids[(c, j)] - centroids[(c, j)]; // previous stored? + shift += d * d; + } + } + if shift.sqrt() < tol { + break; } } - preds[(i, 0)] = self.classes[best_class]; } - preds + (Self { centroids }, labels) + } + + /// Predict nearest centroid for each sample. + pub fn predict(&self, x: &Matrix) -> Vec { + let m = x.rows(); + let k = self.centroids.rows(); + let n = x.cols(); + let mut labels = vec![0usize; m]; + for i in 0..m { + let mut best = 0usize; + let mut best_dist = f64::MAX; + for c in 0..k { + let mut dist = 0.0; + for j in 0..n { + let d = x[(i, j)] - self.centroids[(c, j)]; + dist += d * d; + } + if dist < best_dist { + best_dist = dist; + best = c; + } + } + labels[i] = best; + } + labels } } From 4f8a27298cda5002e27fa8a091821f51131b13aa Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 22:21:03 +0100 Subject: [PATCH 28/43] Add mutable row access method and corresponding tests for Matrix --- src/matrix/mat.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index 649d831..8eded45 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -198,6 +198,18 @@ impl Matrix { row_data } + #[inline] + pub fn row_mut(&mut self, r: usize) -> &mut [T] { + assert!( + r < self.rows, + "row index {} out of bounds for {} rows", + r, + self.rows + ); + let start = r; + &mut self.data[start..start + self.cols] + } + /// Deletes a row from the matrix. Panics on out-of-bounds. /// This is O(N) where N is the number of elements, as it rebuilds the data vec. pub fn delete_row(&mut self, row: usize) { @@ -1172,6 +1184,25 @@ mod tests { assert_eq!(ma.row(2), &[3, 6, 9]); } + #[test] + fn test_row_mut() { + let mut ma = static_test_matrix(); + let row1_mut = ma.row_mut(1); + row1_mut[0] = 20; + row1_mut[1] = 50; + row1_mut[2] = 80; + + assert_eq!(ma.row(1), &[20, 50, 80]); + assert_eq!(ma.data(), &[1, 2, 3, 20, 50, 80, 7, 8, 9]); + } + + #[test] + #[should_panic(expected = "row index 3 out of bounds for 3 rows")] + fn test_row_mut_out_of_bounds() { + let mut ma = static_test_matrix(); + ma.row_mut(3); + } + #[test] fn test_shape() { let ma = static_test_matrix_2x4(); From 87d14bbf5fe8a8c5630b11f5d04d29642aa5d4c3 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 6 Jul 2025 23:50:32 +0100 Subject: [PATCH 29/43] Moved activations module and update imports in dense_nn and logreg --- src/compute/mod.rs | 5 +++-- src/compute/{ => models}/activations.rs | 2 +- src/compute/models/dense_nn.rs | 2 +- src/compute/models/logreg.rs | 2 +- src/compute/models/mod.rs | 3 ++- 5 files changed, 8 insertions(+), 6 deletions(-) rename src/compute/{ => models}/activations.rs (98%) diff --git a/src/compute/mod.rs b/src/compute/mod.rs index b24f638..8986bdc 100644 --- a/src/compute/mod.rs +++ b/src/compute/mod.rs @@ -1,3 +1,4 @@ -pub mod activations; -pub mod models; \ No newline at end of file +pub mod models; + +pub mod stats; \ No newline at end of file diff --git a/src/compute/activations.rs b/src/compute/models/activations.rs similarity index 98% rename from src/compute/activations.rs rename to src/compute/models/activations.rs index 661fe52..e9f528a 100644 --- a/src/compute/activations.rs +++ b/src/compute/models/activations.rs @@ -130,6 +130,6 @@ mod tests { fn test_dleaky_relu_edge_case() { let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1); let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1); - assert_eq!(dleaky_relu(&x), expected); + assert_eq!(dleaky_relu(&x), expectewd); } } diff --git a/src/compute/models/dense_nn.rs b/src/compute/models/dense_nn.rs index 7edfaab..b795b93 100644 --- a/src/compute/models/dense_nn.rs +++ b/src/compute/models/dense_nn.rs @@ -1,4 +1,4 @@ -use crate::compute::activations::{drelu, relu, sigmoid}; +use crate::compute::models::activations::{drelu, relu, sigmoid}; use crate::matrix::{Matrix, SeriesOps}; use rand::prelude::*; diff --git a/src/compute/models/logreg.rs b/src/compute/models/logreg.rs index 8821ee4..ac224aa 100644 --- a/src/compute/models/logreg.rs +++ b/src/compute/models/logreg.rs @@ -1,4 +1,4 @@ -use crate::compute::activations::sigmoid; +use crate::compute::models::activations::sigmoid; use crate::matrix::{Matrix, SeriesOps}; pub struct LogReg { diff --git a/src/compute/models/mod.rs b/src/compute/models/mod.rs index c617f08..051d523 100644 --- a/src/compute/models/mod.rs +++ b/src/compute/models/mod.rs @@ -3,4 +3,5 @@ pub mod logreg; pub mod dense_nn; pub mod k_means; pub mod pca; -pub mod gaussian_nb; \ No newline at end of file +pub mod gaussian_nb; +pub mod activations; From e195481691c91ffdfd16b11b7740ea46398ba5bb Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 00:02:08 +0100 Subject: [PATCH 30/43] Refactor row access method to row_copy_from_slice for better clarity and functionality --- src/matrix/mat.rs | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index 8eded45..086b715 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -197,17 +197,24 @@ impl Matrix { } row_data } - - #[inline] - pub fn row_mut(&mut self, r: usize) -> &mut [T] { + pub fn row_copy_from_slice(&mut self, r: usize, values: &[T]) { assert!( r < self.rows, "row index {} out of bounds for {} rows", r, self.rows ); - let start = r; - &mut self.data[start..start + self.cols] + assert!( + values.len() == self.cols, + "input slice length {} does not match number of columns {}", + values.len(), + self.cols + ); + + for (c, value) in values.iter().enumerate() { + let idx = r + c * self.rows; // column-major index + self.data[idx] = value.clone(); + } } /// Deletes a row from the matrix. Panics on out-of-bounds. @@ -1185,22 +1192,11 @@ mod tests { } #[test] - fn test_row_mut() { + fn test_row_copy_from_slice() { let mut ma = static_test_matrix(); - let row1_mut = ma.row_mut(1); - row1_mut[0] = 20; - row1_mut[1] = 50; - row1_mut[2] = 80; - - assert_eq!(ma.row(1), &[20, 50, 80]); - assert_eq!(ma.data(), &[1, 2, 3, 20, 50, 80, 7, 8, 9]); - } - - #[test] - #[should_panic(expected = "row index 3 out of bounds for 3 rows")] - fn test_row_mut_out_of_bounds() { - let mut ma = static_test_matrix(); - ma.row_mut(3); + let new_row = vec![10, 20, 30]; + ma.row_copy_from_slice(1, &new_row); + assert_eq!(ma.row(1), &[10, 20, 30]); } #[test] From a08fb546a9fcaddd214064292fc83c31c4644dcd Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 00:02:24 +0100 Subject: [PATCH 31/43] fixed typo --- src/compute/models/activations.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compute/models/activations.rs b/src/compute/models/activations.rs index e9f528a..661fe52 100644 --- a/src/compute/models/activations.rs +++ b/src/compute/models/activations.rs @@ -130,6 +130,6 @@ mod tests { fn test_dleaky_relu_edge_case() { let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1); let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1); - assert_eq!(dleaky_relu(&x), expectewd); + assert_eq!(dleaky_relu(&x), expected); } } From e48ce7d6d7e305f18733f2d5827dde20698248d6 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 00:38:09 +0100 Subject: [PATCH 32/43] Add descriptive statistics functions and module integration --- src/compute/stats/descriptive.rs | 227 +++++++++++++++++++++++++++++++ src/compute/stats/mod.rs | 2 + 2 files changed, 229 insertions(+) create mode 100644 src/compute/stats/descriptive.rs create mode 100644 src/compute/stats/mod.rs diff --git a/src/compute/stats/descriptive.rs b/src/compute/stats/descriptive.rs new file mode 100644 index 0000000..0be00bb --- /dev/null +++ b/src/compute/stats/descriptive.rs @@ -0,0 +1,227 @@ +use crate::matrix::{Axis, Matrix, SeriesOps}; + +pub fn mean(x: &Matrix) -> f64 { + x.data().iter().sum::() / (x.rows() * x.cols()) as f64 +} + +pub fn mean_vertical(x: &Matrix) -> Matrix { + let m = x.rows() as f64; + Matrix::from_vec(x.sum_vertical(), 1, x.cols()) / m +} + +pub fn mean_horizontal(x: &Matrix) -> Matrix { + let n = x.cols() as f64; + Matrix::from_vec(x.sum_horizontal(), x.rows(), 1) / n +} + +pub fn variance(x: &Matrix) -> f64 { + let m = (x.rows() * x.cols()) as f64; + let mean_val = mean(x); + x.data() + .iter() + .map(|&v| (v - mean_val).powi(2)) + .sum::() + / m +} + +fn _variance_axis(x: &Matrix, axis: Axis) -> Matrix { + match axis { + Axis::Row => { // Calculate variance for each column (vertical variance) + let num_rows = x.rows() as f64; + let mean_of_cols = mean_vertical(x); // 1 x cols matrix + let mut result_data = vec![0.0; x.cols()]; + + for c in 0..x.cols() { + let mean_val = mean_of_cols.get(0, c); // Mean for current column + let mut sum_sq_diff = 0.0; + for r in 0..x.rows() { + let diff = x.get(r, c) - mean_val; + sum_sq_diff += diff * diff; + } + result_data[c] = sum_sq_diff / num_rows; + } + Matrix::from_vec(result_data, 1, x.cols()) + } + Axis::Col => { // Calculate variance for each row (horizontal variance) + let num_cols = x.cols() as f64; + let mean_of_rows = mean_horizontal(x); // rows x 1 matrix + let mut result_data = vec![0.0; x.rows()]; + + for r in 0..x.rows() { + let mean_val = mean_of_rows.get(r, 0); // Mean for current row + let mut sum_sq_diff = 0.0; + for c in 0..x.cols() { + let diff = x.get(r, c) - mean_val; + sum_sq_diff += diff * diff; + } + result_data[r] = sum_sq_diff / num_cols; + } + Matrix::from_vec(result_data, x.rows(), 1) + } + } +} + +pub fn variance_vertical(x: &Matrix) -> Matrix { + _variance_axis(x, Axis::Row) +} +pub fn variance_horizontal(x: &Matrix) -> Matrix { + _variance_axis(x, Axis::Col) +} + +pub fn stddev(x: &Matrix) -> f64 { + variance(x).sqrt() +} + +pub fn stddev_vertical(x: &Matrix) -> Matrix { + variance_vertical(x).map(|v| v.sqrt()) +} + +pub fn stddev_horizontal(x: &Matrix) -> Matrix { + variance_horizontal(x).map(|v| v.sqrt()) +} + +pub fn median(x: &Matrix) -> f64 { + let mut data = x.data().to_vec(); + data.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mid = data.len() / 2; + if data.len() % 2 == 0 { + (data[mid - 1] + data[mid]) / 2.0 + } else { + data[mid] + } +} + +fn _median_axis(x: &Matrix, axis: Axis) -> Matrix { + let mut data = match axis { + Axis::Row => x.sum_vertical(), + Axis::Col => x.sum_horizontal(), + }; + data.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mid = data.len() / 2; + if data.len() % 2 == 0 { + Matrix::from_vec( + vec![(data[mid - 1] + data[mid]) / 2.0], + if axis == Axis::Row { 1 } else { x.rows() }, + if axis == Axis::Row { x.cols() } else { 1 }, + ) + } else { + Matrix::from_vec( + vec![data[mid]], + if axis == Axis::Row { 1 } else { x.rows() }, + if axis == Axis::Row { x.cols() } else { 1 }, + ) + } +} + +pub fn median_vertical(x: &Matrix) -> Matrix { + _median_axis(x, Axis::Row) +} + +pub fn median_horizontal(x: &Matrix) -> Matrix { + _median_axis(x, Axis::Col) +} + +pub fn percentile(x: &Matrix, p: f64) -> f64 { + if p < 0.0 || p > 100.0 { + panic!("Percentile must be between 0 and 100"); + } + let mut data = x.data().to_vec(); + data.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let index = ((p / 100.0) * (data.len() as f64 - 1.0)).round() as usize; + data[index] +} + +fn _percentile_axis(x: &Matrix, p: f64, axis: Axis) -> Matrix { + if p < 0.0 || p > 100.0 { + panic!("Percentile must be between 0 and 100"); + } + let mut data = match axis { + Axis::Row => x.sum_vertical(), + Axis::Col => x.sum_horizontal(), + }; + data.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let index = ((p / 100.0) * (data.len() as f64 - 1.0)).round() as usize; + Matrix::from_vec( + vec![data[index]], + if axis == Axis::Row { 1 } else { x.rows() }, + if axis == Axis::Row { x.cols() } else { 1 }, + ) +} + +pub fn percentile_vertical(x: &Matrix, p: f64) -> Matrix { + _percentile_axis(x, p, Axis::Row) +} +pub fn percentile_horizontal(x: &Matrix, p: f64) -> Matrix { + _percentile_axis(x, p, Axis::Col) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::matrix::Matrix; + + const EPSILON: f64 = 1e-8; + + #[test] + fn test_descriptive_stats_regular_values() { + let data = vec![1.0, 2.0, 3.0, 4.0, 5.0]; + let x = Matrix::from_vec(data, 1, 5); + + // Mean + assert!((mean(&x) - 3.0).abs() < EPSILON); + + // Variance + assert!((variance(&x) - 2.0).abs() < EPSILON); + + // Standard Deviation + assert!((stddev(&x) - 1.4142135623730951).abs() < EPSILON); + + // Median + assert!((median(&x) - 3.0).abs() < EPSILON); + + // Percentile + assert!((percentile(&x, 0.0) - 1.0).abs() < EPSILON); + assert!((percentile(&x, 25.0) - 2.0).abs() < EPSILON); + assert!((percentile(&x, 50.0) - 3.0).abs() < EPSILON); + assert!((percentile(&x, 75.0) - 4.0).abs() < EPSILON); + assert!((percentile(&x, 100.0) - 5.0).abs() < EPSILON); + + let data_even = vec![1.0, 2.0, 3.0, 4.0]; + let x_even = Matrix::from_vec(data_even, 1, 4); + assert!((median(&x_even) - 2.5).abs() < EPSILON); + } + + #[test] + fn test_descriptive_stats_outlier() { + let data = vec![1.0, 2.0, 3.0, 4.0, 100.0]; + let x = Matrix::from_vec(data, 1, 5); + + // Mean should be heavily affected by outlier + assert!((mean(&x) - 22.0).abs() < EPSILON); + + // Variance should be heavily affected by outlier + assert!((variance(&x) - 1522.0).abs() < EPSILON); + + // Standard Deviation should be heavily affected by outlier + assert!((stddev(&x) - 39.0128183970461).abs() < EPSILON); + + // Median should be robust to outlier + assert!((median(&x) - 3.0).abs() < EPSILON); + } + + #[test] + #[should_panic(expected = "Percentile must be between 0 and 100")] + fn test_percentile_panic_low() { + let data = vec![1.0, 2.0, 3.0]; + let x = Matrix::from_vec(data, 1, 3); + percentile(&x, -1.0); + } + + #[test] + #[should_panic(expected = "Percentile must be between 0 and 100")] + fn test_percentile_panic_high() { + let data = vec![1.0, 2.0, 3.0]; + let x = Matrix::from_vec(data, 1, 3); + percentile(&x, 101.0); + } +} diff --git a/src/compute/stats/mod.rs b/src/compute/stats/mod.rs new file mode 100644 index 0000000..acbbaf8 --- /dev/null +++ b/src/compute/stats/mod.rs @@ -0,0 +1,2 @@ +pub mod descriptive; +// pub mod distributions; \ No newline at end of file From 2a63e6d5ab8269e985c6c514ed299d5d9622e688 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 21:20:57 +0100 Subject: [PATCH 33/43] Enhance Matrix implementation with generic filled method and add NaN support --- src/matrix/mat.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index 086b715..ceb693e 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -361,17 +361,18 @@ impl Matrix { } Matrix::from_vec(data, n, self.cols) } -} -impl Matrix { /// Creates a new matrix filled with a specific value of the specified size. - pub fn filled(rows: usize, cols: usize, value: f64) -> Self { + pub fn filled(rows: usize, cols: usize, value: T) -> Self { Matrix { rows, cols, data: vec![value; rows * cols], // Fill with the specified value } } +} + +impl Matrix { /// Creates a new matrix filled with zeros of the specified size. pub fn zeros(rows: usize, cols: usize) -> Self { Matrix::filled(rows, cols, 0.0) @@ -381,6 +382,11 @@ impl Matrix { pub fn ones(rows: usize, cols: usize) -> Self { Matrix::filled(rows, cols, 1.0) } + + /// Creates a new matrix filled with NaN values of the specified size. + pub fn nan(rows: usize, cols: usize) -> Matrix { + Matrix::filled(rows, cols, f64::NAN) + } } impl Index<(usize, usize)> for Matrix { @@ -1929,5 +1935,19 @@ mod tests { assert_eq!(m.rows(), 2); assert_eq!(m.cols(), 2); assert_eq!(m.data(), &[42.5, 42.5, 42.5, 42.5]); + + // test with an integer matrix + let m = Matrix::::filled(2, 3, 7); + assert_eq!(m.rows(), 2); + assert_eq!(m.cols(), 3); + assert_eq!(m.data(), &[7, 7, 7, 7, 7, 7]); + + // test with nans + let m = Matrix::nan(3, 3); + assert_eq!(m.rows(), 3); + assert_eq!(m.cols(), 3); + for &value in m.data() { + assert!(value.is_nan(), "Expected NaN, got {}", value); + } } } From 122a972a335150f10b87bae8482bd0c176b77bc7 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 21:22:09 +0100 Subject: [PATCH 34/43] Add statistical distribution functions for matrices --- src/compute/stats/distributions.rs | 359 +++++++++++++++++++++++++++++ src/compute/stats/mod.rs | 2 +- 2 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 src/compute/stats/distributions.rs diff --git a/src/compute/stats/distributions.rs b/src/compute/stats/distributions.rs new file mode 100644 index 0000000..cca6c0f --- /dev/null +++ b/src/compute/stats/distributions.rs @@ -0,0 +1,359 @@ +use crate::matrix::{Matrix, SeriesOps}; + +use std::f64::consts::PI; + +/// Approximation of the error function (Abramowitz & Stegun 7.1.26) +fn erf_func(x: f64) -> f64 { + let sign = if x < 0.0 { -1.0 } else { 1.0 }; + let x = x.abs(); + // coefficients + let a1 = 0.254829592; + let a2 = -0.284496736; + let a3 = 1.421413741; + let a4 = -1.453152027; + let a5 = 1.061405429; + let p = 0.3275911; + + let t = 1.0 / (1.0 + p * x); + let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp(); + sign * y +} + +/// Approximation of the error function for matrices +pub fn erf(x: Matrix) -> Matrix { + x.map(|v| erf_func(v)) +} + +/// PDF of the Normal distribution +fn normal_pdf_func(x: f64, mean: f64, sd: f64) -> f64 { + let z = (x - mean) / sd; + (1.0 / (sd * (2.0 * PI).sqrt())) * (-0.5 * z * z).exp() +} + +/// PDF of the Normal distribution for matrices +pub fn normal_pdf(x: Matrix, mean: f64, sd: f64) -> Matrix { + x.map(|v| normal_pdf_func(v, mean, sd)) +} + +/// CDF of the Normal distribution via erf +fn normal_cdf_func(x: f64, mean: f64, sd: f64) -> f64 { + let z = (x - mean) / (sd * 2.0_f64.sqrt()); + 0.5 * (1.0 + erf_func(z)) +} + +/// CDF of the Normal distribution for matrices +pub fn normal_cdf(x: Matrix, mean: f64, sd: f64) -> Matrix { + x.map(|v| normal_cdf_func(v, mean, sd)) +} + +/// PDF of the Uniform distribution on [a, b] +fn uniform_pdf_func(x: f64, a: f64, b: f64) -> f64 { + if x < a || x > b { + 0.0 + } else { + 1.0 / (b - a) + } +} + +/// PDF of the Uniform distribution on [a, b] for matrices +pub fn uniform_pdf(x: Matrix, a: f64, b: f64) -> Matrix { + x.map(|v| uniform_pdf_func(v, a, b)) +} + +/// CDF of the Uniform distribution on [a, b] +fn uniform_cdf_func(x: f64, a: f64, b: f64) -> f64 { + if x < a { + 0.0 + } else if x <= b { + (x - a) / (b - a) + } else { + 1.0 + } +} + +/// CDF of the Uniform distribution on [a, b] for matrices +pub fn uniform_cdf(x: Matrix, a: f64, b: f64) -> Matrix { + x.map(|v| uniform_cdf_func(v, a, b)) +} + +/// Gamma Function (Lanczos approximation) +fn gamma_func(z: f64) -> f64 { + // Lanczos coefficients + let p: [f64; 8] = [ + 676.5203681218851, + -1259.1392167224028, + 771.32342877765313, + -176.61502916214059, + 12.507343278686905, + -0.13857109526572012, + 9.9843695780195716e-6, + 1.5056327351493116e-7, + ]; + if z < 0.5 { + PI / ((PI * z).sin() * gamma_func(1.0 - z)) + } else { + let z = z - 1.0; + let mut x = 0.99999999999980993; + for (i, &pi) in p.iter().enumerate() { + x += pi / (z + (i as f64) + 1.0); + } + let t = z + p.len() as f64 - 0.5; + (2.0 * PI).sqrt() * t.powf(z + 0.5) * (-t).exp() * x + } +} + +pub fn gamma(z: Matrix) -> Matrix { + z.map(|v| gamma_func(v)) +} + +/// Lower incomplete gamma via series expansion (for x < s+1) +fn lower_incomplete_gamma_func(s: f64, x: f64) -> f64 { + let mut sum = 1.0 / s; + let mut term = sum; + for n in 1..100 { + term *= x / (s + n as f64); + sum += term; + } + sum * x.powf(s) * (-x).exp() +} + +/// Lower incomplete gamma for matrices +pub fn lower_incomplete_gamma(s: Matrix, x: Matrix) -> Matrix { + s.zip(&x, |s_val, x_val| lower_incomplete_gamma_func(s_val, x_val)) +} + +/// PDF of the Gamma distribution (shape k, scale θ) +fn gamma_pdf_func(x: f64, k: f64, theta: f64) -> f64 { + if x < 0.0 { + return 0.0; + } + let coef = 1.0 / (gamma_func(k) * theta.powf(k)); + coef * x.powf(k - 1.0) * (-(x / theta)).exp() +} + +/// PDF of the Gamma distribution for matrices +pub fn gamma_pdf(x: Matrix, k: f64, theta: f64) -> Matrix { + x.map(|v| gamma_pdf_func(v, k, theta)) +} + +/// CDF of the Gamma distribution via lower incomplete gamma +fn gamma_cdf_func(x: f64, k: f64, theta: f64) -> f64 { + if x < 0.0 { + return 0.0; + } + lower_incomplete_gamma_func(k, x / theta) / gamma_func(k) +} + +/// CDF of the Gamma distribution for matrices +pub fn gamma_cdf(x: Matrix, k: f64, theta: f64) -> Matrix { + x.map(|v| gamma_cdf_func(v, k, theta)) +} + +/// Factorials and Combinations /// + +/// Compute n! as f64 (works up to ~170 reliably) +fn factorial(n: u64) -> f64 { + (1..=n).map(|i| i as f64).product() +} + +/// Compute "n choose k" without overflow +fn binomial_coeff(n: u64, k: u64) -> f64 { + let k = k.min(n - k); + let mut numer = 1.0; + let mut denom = 1.0; + for i in 0..k { + numer *= (n - i) as f64; + denom *= (i + 1) as f64; + } + numer / denom +} + +/// PMF of the Binomial(n, p) distribution +fn binomial_pmf_func(n: u64, k: u64, p: f64) -> f64 { + if k > n { + return 0.0; + } + binomial_coeff(n, k) * p.powf(k as f64) * (1.0 - p).powf((n - k) as f64) +} + +/// PMF of the Binomial(n, p) distribution for matrices +pub fn binomial_pmf(n: u64, k: Matrix, p: f64) -> Matrix { + Matrix::from_vec( + k.data() + .iter() + .map(|&v| binomial_pmf_func(n, v, p)) + .collect::>(), + k.rows(), + k.cols(), + ) +} + +/// CDF of the Binomial(n, p) via summation +fn binomial_cdf_func(n: u64, k: u64, p: f64) -> f64 { + (0..=k).map(|i| binomial_pmf_func(n, i, p)).sum() +} + +/// CDF of the Binomial(n, p) for matrices +pub fn binomial_cdf(n: u64, k: Matrix, p: f64) -> Matrix { + Matrix::from_vec( + k.data() + .iter() + .map(|&v| binomial_cdf_func(n, v, p)) + .collect::>(), + k.rows(), + k.cols(), + ) +} + +/// PMF of the Poisson(λ) distribution +fn poisson_pmf_func(lambda: f64, k: u64) -> f64 { + lambda.powf(k as f64) * (-lambda).exp() / factorial(k) +} + +/// PMF of the Poisson(λ) distribution for matrices +pub fn poisson_pmf(lambda: f64, k: Matrix) -> Matrix { + Matrix::from_vec( + k.data() + .iter() + .map(|&v| poisson_pmf_func(lambda, v)) + .collect::>(), + k.rows(), + k.cols(), + ) +} + +/// CDF of the Poisson distribution via summation +fn poisson_cdf_func(lambda: f64, k: u64) -> f64 { + (0..=k).map(|i| poisson_pmf_func(lambda, i)).sum() +} + +/// CDF of the Poisson(λ) distribution for matrices +pub fn poisson_cdf(lambda: f64, k: Matrix) -> Matrix { + Matrix::from_vec( + k.data() + .iter() + .map(|&v| poisson_cdf_func(lambda, v)) + .collect::>(), + k.rows(), + k.cols(), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_math_funcs() { + // Test erf function + assert!((erf_func(0.0) - 0.0).abs() < 1e-7); + assert!( + (erf_func(1.0) - 0.8427007).abs() < 1e-7, + "{} != {}", + erf_func(1.0), + 0.8427007 + ); + assert!((erf_func(-1.0) + 0.8427007).abs() < 1e-7); + + // Test gamma function + assert!((gamma_func(1.0) - 1.0).abs() < 1e-7); + assert!((gamma_func(2.0) - 1.0).abs() < 1e-7); + assert!((gamma_func(3.0) - 2.0).abs() < 1e-7); + assert!((gamma_func(4.0) - 6.0).abs() < 1e-7); + assert!((gamma_func(5.0) - 24.0).abs() < 1e-7); + } + + #[test] + fn test_math_matrix() { + let x = Matrix::filled(5, 5, 1.0); + let erf_result = erf(x.clone()); + assert!((erf_result.data()[0] - 0.8427007).abs() < 1e-7); + + let gamma_result = gamma(x); + assert!((gamma_result.data()[0] - 1.0).abs() < 1e-7); + } + + #[test] + fn test_normal_funcs() { + assert!((normal_pdf_func(0.0, 0.0, 1.0) - 0.39894228).abs() < 1e-7); + assert!((normal_cdf_func(1.0, 0.0, 1.0) - 0.8413447).abs() < 1e-7); + } + + #[test] + fn test_normal_matrix() { + let x = Matrix::filled(5, 5, 0.0); + let pdf = normal_pdf(x.clone(), 0.0, 1.0); + let cdf = normal_cdf(x, 0.0, 1.0); + assert!((pdf.data()[0] - 0.39894228).abs() < 1e-7); + assert!((cdf.data()[0] - 0.5).abs() < 1e-7); + } + + #[test] + fn test_uniform_funcs() { + assert_eq!(uniform_pdf_func(0.5, 0.0, 1.0), 1.0); + assert_eq!(uniform_cdf_func(-1.0, 0.0, 1.0), 0.0); + assert_eq!(uniform_cdf_func(0.5, 0.0, 1.0), 0.5); + } + + #[test] + fn test_uniform_matrix() { + let x = Matrix::filled(5, 5, 0.5); + let pdf = uniform_pdf(x.clone(), 0.0, 1.0); + let cdf = uniform_cdf(x, 0.0, 1.0); + assert_eq!(pdf.data()[0], 1.0); + assert_eq!(cdf.data()[0], 0.5); + } + + #[test] + fn test_binomial_funcs() { + let pmf = binomial_pmf_func(5, 2, 0.5); + assert!((pmf - 0.3125).abs() < 1e-7); + let cdf = binomial_cdf_func(5, 2, 0.5); + assert!((cdf - (0.03125 + 0.15625 + 0.3125)).abs() < 1e-7); + } + + #[test] + fn test_binomial_matrix() { + let k = Matrix::filled(5, 5, 2 as u64); + let pmf = binomial_pmf(5, k.clone(), 0.5); + let cdf = binomial_cdf(5, k, 0.5); + assert!((pmf.data()[0] - 0.3125).abs() < 1e-7); + assert!((cdf.data()[0] - (0.03125 + 0.15625 + 0.3125)).abs() < 1e-7); + } + + #[test] + fn test_poisson_funcs() { + let pmf: f64 = poisson_pmf_func(3.0, 2); + assert!((pmf - (3.0_f64.powf(2.0) * (-3.0 as f64).exp() / 2.0)).abs() < 1e-7); + let cdf: f64 = poisson_cdf_func(3.0, 2); + assert!((cdf - (pmf + poisson_pmf_func(3.0, 0) + poisson_pmf_func(3.0, 1))).abs() < 1e-7); + } + + #[test] + fn test_poisson_matrix() { + let k = Matrix::filled(5, 5, 2); + let pmf = poisson_pmf(3.0, k.clone()); + let cdf = poisson_cdf(3.0, k); + assert!((pmf.data()[0] - (3.0_f64.powf(2.0) * (-3.0 as f64).exp() / 2.0)).abs() < 1e-7); + assert!( + (cdf.data()[0] - (pmf.data()[0] + poisson_pmf_func(3.0, 0) + poisson_pmf_func(3.0, 1))) + .abs() + < 1e-7 + ); + } + + #[test] + fn test_gamma_funcs() { + // For k=1, θ=1 the Gamma(1,1) is Exp(1), so pdf(x)=e^-x + assert!((gamma_pdf_func(2.0, 1.0, 1.0) - (-2.0 as f64).exp()).abs() < 1e-7); + assert!((gamma_cdf_func(2.0, 1.0, 1.0) - (1.0 - (-2.0 as f64).exp())).abs() < 1e-7); + } + #[test] + fn test_gamma_matrix() { + let x = Matrix::filled(5, 5, 2.0); + let pdf = gamma_pdf(x.clone(), 1.0, 1.0); + let cdf = gamma_cdf(x, 1.0, 1.0); + assert!((pdf.data()[0] - (-2.0 as f64).exp()).abs() < 1e-7); + assert!((cdf.data()[0] - (1.0 - (-2.0 as f64).exp())).abs() < 1e-7); + } +} diff --git a/src/compute/stats/mod.rs b/src/compute/stats/mod.rs index acbbaf8..f1253a2 100644 --- a/src/compute/stats/mod.rs +++ b/src/compute/stats/mod.rs @@ -1,2 +1,2 @@ pub mod descriptive; -// pub mod distributions; \ No newline at end of file +pub mod distributions; \ No newline at end of file From 46cfe439831ca0a676444842dd70018b03bdc562 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 21:30:26 +0100 Subject: [PATCH 35/43] Add tests for row access and row_copy_from_slice methods --- src/matrix/mat.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index ceb693e..407675a 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -1205,6 +1205,21 @@ mod tests { assert_eq!(ma.row(1), &[10, 20, 30]); } + #[test] + #[should_panic(expected = "row index 3 out of bounds for 3 rows")] + fn test_row_out_of_bounds_index() { + let ma = static_test_matrix(); + ma.row(3); + } + + #[test] + #[should_panic(expected = "input slice length 2 does not match number of columns 3")] + fn test_row_copy_from_slice_wrong_length() { + let mut ma = static_test_matrix(); + let new_row = vec![10, 20]; // Only 2 elements, but row length is 3 + ma.row_copy_from_slice(1, &new_row); + } + #[test] fn test_shape() { let ma = static_test_matrix_2x4(); From 6711cad6e26cac32641e6651dd9f38f4f9ee6d77 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 23:35:00 +0100 Subject: [PATCH 36/43] Add from_rows_vec method to construct Matrix from a flat Vec in row-major order and include corresponding tests --- src/matrix/mat.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/matrix/mat.rs b/src/matrix/mat.rs index 407675a..509cc8b 100644 --- a/src/matrix/mat.rs +++ b/src/matrix/mat.rs @@ -63,6 +63,19 @@ impl Matrix { Matrix { rows, cols, data } } + /// Build from a flat Vec, assuming row-major order. + pub fn from_rows_vec(data: Vec, rows: usize, cols: usize) -> Self { + let mut new_vec = Vec::with_capacity(rows * cols); + + for c in 0..cols { + for r in 0..rows { + new_vec.push(data[r * cols + c].clone()); + } + } + + Matrix::from_vec(new_vec, rows, cols) + } + pub fn data(&self) -> &[T] { &self.data } @@ -978,6 +991,20 @@ mod tests { assert_eq!(m.to_vec(), vec![1.0, 3.0, 2.0, 4.0]); } + #[test] + fn test_from_rows_vec() { + // Representing: + // 1 2 3 + // 4 5 6 + let rows_data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; + let matrix = Matrix::from_rows_vec(rows_data, 2, 3); + + let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]; // Column-major + let expected = Matrix::from_vec(data, 2, 3); + + assert_eq!(matrix, expected); + } + // Helper function to create a basic Matrix for testing fn static_test_matrix() -> Matrix { // Column-major data: From a2fcaf1d52f8db7df51fb125499393bc74a176ee Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Mon, 7 Jul 2025 23:36:43 +0100 Subject: [PATCH 37/43] Add tests for mean, variance, and standard deviation calculations in vertical and horizontal directions --- src/compute/stats/descriptive.rs | 60 ++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/src/compute/stats/descriptive.rs b/src/compute/stats/descriptive.rs index 0be00bb..a9e8245 100644 --- a/src/compute/stats/descriptive.rs +++ b/src/compute/stats/descriptive.rs @@ -26,7 +26,8 @@ pub fn variance(x: &Matrix) -> f64 { fn _variance_axis(x: &Matrix, axis: Axis) -> Matrix { match axis { - Axis::Row => { // Calculate variance for each column (vertical variance) + Axis::Row => { + // Calculate variance for each column (vertical variance) let num_rows = x.rows() as f64; let mean_of_cols = mean_vertical(x); // 1 x cols matrix let mut result_data = vec![0.0; x.cols()]; @@ -42,7 +43,8 @@ fn _variance_axis(x: &Matrix, axis: Axis) -> Matrix { } Matrix::from_vec(result_data, 1, x.cols()) } - Axis::Col => { // Calculate variance for each row (horizontal variance) + Axis::Col => { + // Calculate variance for each row (horizontal variance) let num_cols = x.cols() as f64; let mean_of_rows = mean_horizontal(x); // rows x 1 matrix let mut result_data = vec![0.0; x.rows()]; @@ -224,4 +226,58 @@ mod tests { let x = Matrix::from_vec(data, 1, 3); percentile(&x, 101.0); } + + #[test] + fn test_mean_vertical_horizontal() { + // 2x3 matrix: + let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]; + let x = Matrix::from_vec(data, 2, 3); + + // Vertical means (per column): [(1+4)/2, (2+5)/2, (3+6)/2] + let mv = mean_vertical(&x); + assert!((mv.get(0, 0) - 2.5).abs() < EPSILON); + assert!((mv.get(0, 1) - 3.5).abs() < EPSILON); + assert!((mv.get(0, 2) - 4.5).abs() < EPSILON); + + // Horizontal means (per row): [(1+2+3)/3, (4+5+6)/3] + let mh = mean_horizontal(&x); + assert!((mh.get(0, 0) - 2.0).abs() < EPSILON); + assert!((mh.get(1, 0) - 5.0).abs() < EPSILON); + } + + #[test] + fn test_variance_vertical_horizontal() { + let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]; + let x = Matrix::from_vec(data, 2, 3); + + // Vertical variances (per column): each is ((v - mean)^2 summed / 2) + // cols: {1,4}, {2,5}, {3,6} all give 2.25 + let vv = variance_vertical(&x); + for c in 0..3 { + assert!((vv.get(0, c) - 2.25).abs() < EPSILON); + } + + // Horizontal variances (per row): rows [1,2,3] and [4,5,6] both give 2/3 + let vh = variance_horizontal(&x); + assert!((vh.get(0, 0) - (2.0 / 3.0)).abs() < EPSILON); + assert!((vh.get(1, 0) - (2.0 / 3.0)).abs() < EPSILON); + } + + #[test] + fn test_stddev_vertical_horizontal() { + let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]; + let x = Matrix::from_vec(data, 2, 3); + + // Stddev is sqrt of variance + let sv = stddev_vertical(&x); + for c in 0..3 { + assert!((sv.get(0, c) - 1.5).abs() < EPSILON); + } + + let sh = stddev_horizontal(&x); + // sqrt(2/3) ≈ 0.816497 + let expected = (2.0 / 3.0 as f64).sqrt(); + assert!((sh.get(0, 0) - expected).abs() < EPSILON); + assert!((sh.get(1, 0) - expected).abs() < EPSILON); + } } From 5779c6b82d934ccbe107659b800ba4393662fc6a Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:00:19 +0100 Subject: [PATCH 38/43] Refactor median and percentile functions to handle vertical and horizontal calculations correctly; add corresponding tests for validation --- src/compute/stats/descriptive.rs | 126 +++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 33 deletions(-) diff --git a/src/compute/stats/descriptive.rs b/src/compute/stats/descriptive.rs index a9e8245..276e531 100644 --- a/src/compute/stats/descriptive.rs +++ b/src/compute/stats/descriptive.rs @@ -94,33 +94,35 @@ pub fn median(x: &Matrix) -> f64 { } fn _median_axis(x: &Matrix, axis: Axis) -> Matrix { - let mut data = match axis { - Axis::Row => x.sum_vertical(), - Axis::Col => x.sum_horizontal(), + let mx = match axis { + Axis::Col => x.clone(), + Axis::Row => x.transpose(), }; - data.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let mid = data.len() / 2; - if data.len() % 2 == 0 { - Matrix::from_vec( - vec![(data[mid - 1] + data[mid]) / 2.0], - if axis == Axis::Row { 1 } else { x.rows() }, - if axis == Axis::Row { x.cols() } else { 1 }, - ) - } else { - Matrix::from_vec( - vec![data[mid]], - if axis == Axis::Row { 1 } else { x.rows() }, - if axis == Axis::Row { x.cols() } else { 1 }, - ) + + let mut result = Vec::with_capacity(mx.cols()); + for c in 0..mx.cols() { + let mut col = mx.column(c).to_vec(); + col.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let mid = col.len() / 2; + if col.len() % 2 == 0 { + result.push((col[mid - 1] + col[mid]) / 2.0); + } else { + result.push(col[mid]); + } } + let (r, c) = match axis { + Axis::Col => (1, mx.cols()), + Axis::Row => (mx.cols(), 1), + }; + Matrix::from_vec(result, r, c) } pub fn median_vertical(x: &Matrix) -> Matrix { - _median_axis(x, Axis::Row) + _median_axis(x, Axis::Col) } pub fn median_horizontal(x: &Matrix) -> Matrix { - _median_axis(x, Axis::Col) + _median_axis(x, Axis::Row) } pub fn percentile(x: &Matrix, p: f64) -> f64 { @@ -137,24 +139,29 @@ fn _percentile_axis(x: &Matrix, p: f64, axis: Axis) -> Matrix { if p < 0.0 || p > 100.0 { panic!("Percentile must be between 0 and 100"); } - let mut data = match axis { - Axis::Row => x.sum_vertical(), - Axis::Col => x.sum_horizontal(), + let mx: Matrix = match axis { + Axis::Col => x.clone(), + Axis::Row => x.transpose(), }; - data.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let index = ((p / 100.0) * (data.len() as f64 - 1.0)).round() as usize; - Matrix::from_vec( - vec![data[index]], - if axis == Axis::Row { 1 } else { x.rows() }, - if axis == Axis::Row { x.cols() } else { 1 }, - ) + let mut result = Vec::with_capacity(mx.cols()); + for c in 0..mx.cols() { + let mut col = mx.column(c).to_vec(); + col.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let index = ((p / 100.0) * (col.len() as f64 - 1.0)).round() as usize; + result.push(col[index]); + } + let (r, c) = match axis { + Axis::Col => (1, mx.cols()), + Axis::Row => (mx.cols(), 1), + }; + Matrix::from_vec(result, r, c) } pub fn percentile_vertical(x: &Matrix, p: f64) -> Matrix { - _percentile_axis(x, p, Axis::Row) + _percentile_axis(x, p, Axis::Col) } pub fn percentile_horizontal(x: &Matrix, p: f64) -> Matrix { - _percentile_axis(x, p, Axis::Col) + _percentile_axis(x, p, Axis::Row) } #[cfg(test)] @@ -250,14 +257,12 @@ mod tests { let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]; let x = Matrix::from_vec(data, 2, 3); - // Vertical variances (per column): each is ((v - mean)^2 summed / 2) // cols: {1,4}, {2,5}, {3,6} all give 2.25 let vv = variance_vertical(&x); for c in 0..3 { assert!((vv.get(0, c) - 2.25).abs() < EPSILON); } - // Horizontal variances (per row): rows [1,2,3] and [4,5,6] both give 2/3 let vh = variance_horizontal(&x); assert!((vh.get(0, 0) - (2.0 / 3.0)).abs() < EPSILON); assert!((vh.get(1, 0) - (2.0 / 3.0)).abs() < EPSILON); @@ -280,4 +285,59 @@ mod tests { assert!((sh.get(0, 0) - expected).abs() < EPSILON); assert!((sh.get(1, 0) - expected).abs() < EPSILON); } + + #[test] + fn test_median_vertical_horizontal() { + let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]; + let x = Matrix::from_vec(data, 2, 3); + + let mv = median_vertical(&x).row(0); + + let expected_v = vec![2.5, 3.5, 4.5]; + assert_eq!(mv, expected_v, "{:?} expected: {:?}", expected_v, mv); + } + + #[test] + fn test_percentile_vertical_horizontal() { + // vec of f64 values 1..24 as a 4x6 matrix + let data: Vec = (1..=24).map(|x| x as f64).collect(); + let x = Matrix::from_vec(data, 4, 6); + + // columns: + // 1, 5, 9, 13, 17, 21 + // 2, 6, 10, 14, 18, 22 + // 3, 7, 11, 15, 19, 23 + // 4, 8, 12, 16, 20, 24 + + let er0 = vec![1., 5., 9., 13., 17., 21.]; + let er50 = vec![3., 7., 11., 15., 19., 23.]; + let er100 = vec![4., 8., 12., 16., 20., 24.]; + + assert_eq!(percentile_vertical(&x, 0.0).data(), er0); + assert_eq!(percentile_vertical(&x, 50.0).data(), er50); + assert_eq!(percentile_vertical(&x, 100.0).data(), er100); + + let eh0 = vec![1., 2., 3., 4.]; + let eh50 = vec![13., 14., 15., 16.]; + let eh100 = vec![21., 22., 23., 24.]; + + assert_eq!(percentile_horizontal(&x, 0.0).data(), eh0); + assert_eq!(percentile_horizontal(&x, 50.0).data(), eh50); + assert_eq!(percentile_horizontal(&x, 100.0).data(), eh100); + } + + #[test] + #[should_panic(expected = "Percentile must be between 0 and 100")] + fn test_percentile_out_of_bounds() { + let data = vec![1.0, 2.0, 3.0]; + let x = Matrix::from_vec(data, 1, 3); + percentile(&x, -10.0); // Should panic + } + + #[test] + #[should_panic(expected = "Percentile must be between 0 and 100")] + fn test_percentile_vertical_out_of_bounds() { + let m = Matrix::from_vec(vec![1.0, 2.0, 3.0], 1, 3); + let _ = percentile_vertical(&m, -0.1); + } } From b2a799fc30baacec8bd559076b9ff3d6693fe1a9 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:03:05 +0100 Subject: [PATCH 39/43] Add test for median_horizontal function to validate horizontal median calculation --- src/compute/stats/descriptive.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compute/stats/descriptive.rs b/src/compute/stats/descriptive.rs index 276e531..7dedee8 100644 --- a/src/compute/stats/descriptive.rs +++ b/src/compute/stats/descriptive.rs @@ -295,6 +295,10 @@ mod tests { let expected_v = vec![2.5, 3.5, 4.5]; assert_eq!(mv, expected_v, "{:?} expected: {:?}", expected_v, mv); + + let mh = median_horizontal(&x).column(0).to_vec(); + let expected_h = vec![2.0, 5.0]; + assert_eq!(mh, expected_h, "{:?} expected: {:?}", expected_h, mh); } #[test] From 8ffa278db839a8685b14312870a2f0bdeb95fede Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 8 Jul 2025 23:16:19 +0100 Subject: [PATCH 40/43] Add tests for uniform and binomial distributions; enhance gamma function tests --- src/compute/stats/distributions.rs | 39 +++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/compute/stats/distributions.rs b/src/compute/stats/distributions.rs index cca6c0f..af63ae8 100644 --- a/src/compute/stats/distributions.rs +++ b/src/compute/stats/distributions.rs @@ -247,12 +247,7 @@ mod tests { fn test_math_funcs() { // Test erf function assert!((erf_func(0.0) - 0.0).abs() < 1e-7); - assert!( - (erf_func(1.0) - 0.8427007).abs() < 1e-7, - "{} != {}", - erf_func(1.0), - 0.8427007 - ); + assert!((erf_func(1.0) - 0.8427007).abs() < 1e-7); assert!((erf_func(-1.0) + 0.8427007).abs() < 1e-7); // Test gamma function @@ -261,6 +256,10 @@ mod tests { assert!((gamma_func(3.0) - 2.0).abs() < 1e-7); assert!((gamma_func(4.0) - 6.0).abs() < 1e-7); assert!((gamma_func(5.0) - 24.0).abs() < 1e-7); + + let z = 0.3; + let expected = PI / ((PI * z).sin() * gamma_func(1.0 - z)); + assert!((gamma_func(z) - expected).abs() < 1e-7); } #[test] @@ -293,6 +292,14 @@ mod tests { assert_eq!(uniform_pdf_func(0.5, 0.0, 1.0), 1.0); assert_eq!(uniform_cdf_func(-1.0, 0.0, 1.0), 0.0); assert_eq!(uniform_cdf_func(0.5, 0.0, 1.0), 0.5); + + // xb) should return 0 + assert_eq!(uniform_pdf_func(-0.5, 0.0, 1.0), 0.0); + assert_eq!(uniform_pdf_func(1.5, 0.0, 1.0), 0.0); + + // for cdf x>a AND x>b should return 1 + assert_eq!(uniform_cdf_func(1.5, 0.0, 1.0), 1.0); + assert_eq!(uniform_cdf_func(2.0, 0.0, 1.0), 1.0); } #[test] @@ -310,6 +317,9 @@ mod tests { assert!((pmf - 0.3125).abs() < 1e-7); let cdf = binomial_cdf_func(5, 2, 0.5); assert!((cdf - (0.03125 + 0.15625 + 0.3125)).abs() < 1e-7); + + let pmf_zero = binomial_pmf_func(5, 6, 0.5); + assert!(pmf_zero == 0.0, "PMF should be 0 for k > n"); } #[test] @@ -347,6 +357,9 @@ mod tests { // For k=1, θ=1 the Gamma(1,1) is Exp(1), so pdf(x)=e^-x assert!((gamma_pdf_func(2.0, 1.0, 1.0) - (-2.0 as f64).exp()).abs() < 1e-7); assert!((gamma_cdf_func(2.0, 1.0, 1.0) - (1.0 - (-2.0 as f64).exp())).abs() < 1e-7); + + // <0 case + assert_eq!(gamma_pdf_func(-1.0, 1.0, 1.0), 0.0); } #[test] fn test_gamma_matrix() { @@ -356,4 +369,18 @@ mod tests { assert!((pdf.data()[0] - (-2.0 as f64).exp()).abs() < 1e-7); assert!((cdf.data()[0] - (1.0 - (-2.0 as f64).exp())).abs() < 1e-7); } + + #[test] + fn test_lower_incomplete_gamma() { + let s = Matrix::filled(5, 5, 2.0); + let x = Matrix::filled(5, 5, 1.0); + let expected = lower_incomplete_gamma_func(2.0, 1.0); + let result = lower_incomplete_gamma(s, x); + assert!( + (result.data()[0] - expected).abs() < 1e-7, + "Expected: {}, Got: {}", + expected, + result.data()[0] + ); + } } From 61aeedbf769d3b6d77a87e049a79ab771c45177b Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 8 Jul 2025 23:18:16 +0100 Subject: [PATCH 41/43] Simplify assertion in lower_incomplete_gamma test for clarity --- src/compute/stats/distributions.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/compute/stats/distributions.rs b/src/compute/stats/distributions.rs index af63ae8..cfd554e 100644 --- a/src/compute/stats/distributions.rs +++ b/src/compute/stats/distributions.rs @@ -376,11 +376,6 @@ mod tests { let x = Matrix::filled(5, 5, 1.0); let expected = lower_incomplete_gamma_func(2.0, 1.0); let result = lower_incomplete_gamma(s, x); - assert!( - (result.data()[0] - expected).abs() < 1e-7, - "Expected: {}, Got: {}", - expected, - result.data()[0] - ); + assert!((result.data()[0] - expected).abs() < 1e-7); } } From 2cd2e24f57ce483ecf5c711467fb659ee12964d4 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 8 Jul 2025 23:21:59 +0100 Subject: [PATCH 42/43] Add test for gamma_cdf_func to validate behavior for negative input --- src/compute/stats/distributions.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compute/stats/distributions.rs b/src/compute/stats/distributions.rs index cfd554e..63534d5 100644 --- a/src/compute/stats/distributions.rs +++ b/src/compute/stats/distributions.rs @@ -360,6 +360,7 @@ mod tests { // <0 case assert_eq!(gamma_pdf_func(-1.0, 1.0, 1.0), 0.0); + assert_eq!(gamma_cdf_func(-1.0, 1.0, 1.0), 0.0); } #[test] fn test_gamma_matrix() { From e7c181f011e020243ef4bc2cec7e2f7acd0e3e42 Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Thu, 10 Jul 2025 23:27:23 +0100 Subject: [PATCH 43/43] Refactor error handling in GaussianNB fit method to use assert instead of panic for empty class labels --- src/compute/models/gaussian_nb.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/compute/models/gaussian_nb.rs b/src/compute/models/gaussian_nb.rs index 6d4b489..2d5cc97 100644 --- a/src/compute/models/gaussian_nb.rs +++ b/src/compute/models/gaussian_nb.rs @@ -59,9 +59,7 @@ impl GaussianNB { let bits = label.to_bits(); groups.entry(bits).or_default().push(i); } - if groups.is_empty() { - panic!("No class labels found in y"); - } + assert!(!groups.is_empty(), "No class labels found in y"); //-- panicked earlier // Extract and sort class labels self.classes = groups.keys().cloned().map(f64::from_bits).collect(); @@ -209,6 +207,5 @@ mod tests { let y = Matrix::from_vec(vec![0.0], 1, 1); let mut clf = GaussianNB::new(1e-9, false); clf.fit(&x, &y); - clf.predict(&x); } }