Merge bda9b84987 into 11330e464b

2025-11-19 22:46:11 +00:00 · 2025-07-12 23:20:15 +00:00
7 changed files with 88 additions and 370 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rustframe"
-version = "0.0.1-a.20250716"
+version = "0.0.1-a.0"
 edition = "2021"
 license = "GPL-3.0-or-later"
 readme = "README.md"
@@ -19,6 +19,9 @@ rand = "^0.9.1"
 [features]
 bench = ["dep:criterion"]

+# [dev-dependencies]
+# criterion = { version = "0.5", features = ["html_reports"], optional = true }
+
 [[bench]]
 name = "benchmarks"
 harness = false
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@

 <!-- # <img align="center" alt="Rustframe" src=".github/rustframe_logo.png" height="50px" /> rustframe -->

-<!-- though the centre tag doesn't work as it would normally, it achieves the desired effect -->
+<!-- though the centre tag doesn't work as it would noramlly, it achieves the desired effect -->

 📚 [Docs](https://magnus167.github.io/rustframe/) | 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 🌐 [Gitea mirror](https://gitea.nulltech.uk/Magnus167/rustframe) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)

@@ -15,54 +15,26 @@

 ## Rustframe: _A lightweight dataframe & math toolkit for Rust_

-Rustframe provides intuitive dataframe, matrix, and series operations for data analysis and manipulation.
+Rustframe provides intuitive dataframe, matrix, and series operations small-to-mid scale data analysis and manipulation.

-Rustframe keeps things simple, safe, and readable. It is handy for quick numeric experiments and small analytical tasks as well as for educational purposes. It is designed to be easy to use and understand, with a clean API implemented in 100% safe Rust.
-
-Rustframe is an educational project, and is not intended for production use. It is **not** meant to compete with powerhouse crates like `polars` or `ndarray`. It is a work in progress, and the API is subject to change. There are no guarantees of stability or performance, and it is not optimized for large datasets or high-performance computing.
+Rustframe keeps things simple, safe, and readable. It is handy for quick numeric experiments and small analytical tasks, but it is **not** meant to compete with powerhouse crates like `polars` or `ndarray`.

 ### What it offers

- **Matrix operations** - Element-wise arithmetic, boolean logic, transpose, and more.
- **Math that reads like math** - element-wise `+`, `−`, `×`, `÷` on entire frames or scalars.
- **Frames** - Column major data structure for single-type data, with labeled columns and typed row indices.
- **Compute module** - Implements various statistical computations and machine learning models.
-
- **[Coming Soon]** _DataFrame_ - Multi-type data structure for heterogeneous data, with labeled columns and typed row indices.
-
- **[Coming Soon]** _Random number utils_ - Random number generation utilities for statistical sampling and simulations. (Currently using the [`rand`](https://crates.io/crates/rand) crate.)
-
-#### Matrix and Frame functionality
-
- **Matrix operations** - Element-wise arithmetic, boolean logic, transpose, and more.
- **Frame operations** - Column manipulation, sorting, and more.
-
-#### Compute Module
-
-The `compute` module provides implementations for various statistical computations and machine learning models.
-
-**Statistics, Data Analysis, and Machine Learning:**
-
- Correlation analysis
- Descriptive statistics
- Distributions
- Inferential statistics
-
- Dense Neural Networks
- Gaussian Naive Bayes
- K-Means Clustering
- Linear Regression
- Logistic Regression
- Principal Component Analysis
+- **Math that reads like math** - element‑wise `+`, `−`, `×`, `÷` on entire frames or scalars.
+- **Broadcast & reduce** - sum, product, any/all across rows or columns without boilerplate.
+- **Boolean masks made simple** - chain comparisons, combine with `&`/`|`, get a tidy `BoolMatrix` back.
+- **Date‑centric row index** - business‑day ranges and calendar slicing built in.
+- **Pure safe Rust** - 100 % safe, zero `unsafe`.

 ### Heads up

 - **Not memory‑efficient (yet)** - footprint needs work.
- **The feature set is still limited** - expect missing pieces.
+- **Feature set still small** - expect missing pieces.

-### Somewhere down the line
+### On the horizon

- Optional GPU acceleration (Vulkan or similar) for heavier workloads.
+- Optional GPU help (Vulkan or similar) for heavier workloads.
 - Straightforward Python bindings using `pyo3`.

 ---
@@ -79,7 +51,7 @@ use rustframe::{

 let n_periods = 4;

-// Four business days starting 2024-01-02
+// Four business days starting 2024‑01‑02
 let dates: Vec<NaiveDate> =
    BDatesList::from_n_periods("2024-01-02".to_string(), DateFreq::Daily, n_periods)
        .unwrap()
@@ -114,13 +86,13 @@ let result: Matrix<f64> = result / 2.0; // divide by scalar
 let check: bool = result.eq_elem(ma.clone()).all();
 assert!(check);

-// Alternatively:
+// The above math can also be written as:
 let check: bool = (&(&(&(&ma + 1.0) - 1.0) * 2.0) / 2.0)
    .eq_elem(ma.clone())
    .all();
 assert!(check);

-// or even as:
+// The above math can also be written as:
 let check: bool = ((((ma.clone() + 1.0) - 1.0) * 2.0) / 2.0)
    .eq_elem(ma.clone())
    .all();
@@ -191,11 +163,3 @@ E.g. to run the `game_of_life` example:
 ```bash
 cargo run --example game_of_life
 ```
-
-### Running benchmarks
-
-To run the benchmarks, use:
-
-```bash
-cargo bench --features "bench"
-```
--- a/src/compute/models/k_means.rs
+++ b/src/compute/models/k_means.rs
@@ -1,6 +1,6 @@
-use crate::compute::stats::mean_vertical;
 use crate::matrix::Matrix;
-use rand::rng;
+use crate::matrix::{FloatMatrix, SeriesOps};
+use rand::rng; // Changed from rand::thread_rng
 use rand::seq::SliceRandom;

 pub struct KMeans {
@@ -16,50 +16,50 @@ impl KMeans {

        // ----- initialise centroids -----
        let mut centroids = Matrix::zeros(k, n);
-        if k > 0 && m > 0 {
-            // case for empty data
-            if k == 1 {
-                let mean = mean_vertical(x);
-                centroids.row_copy_from_slice(0, &mean.data()); // ideally, data.row(0), but thats the same
-            } else {
-                // For k > 1, pick k distinct rows at random
-                let mut rng = rng();
-                let mut indices: Vec<usize> = (0..m).collect();
-                indices.shuffle(&mut rng);
-                for c in 0..k {
-                    centroids.row_copy_from_slice(c, &x.row(indices[c]));
+        if k == 1 {
+            // For k=1, initialize the centroid to the mean of the data
+            for j in 0..n {
+                centroids[(0, j)] = x.column(j).iter().sum::<f64>() / m as f64;
+            }
+        } else {
+            // For k > 1, pick k distinct rows at random
+            let mut rng = rng(); // Changed from thread_rng()
+            let mut indices: Vec<usize> = (0..m).collect();
+            indices.shuffle(&mut rng);
+            for (c, &i) in indices[..k].iter().enumerate() {
+                for j in 0..n {
+                    centroids[(c, j)] = x[(i, j)];
                }
            }
        }

        let mut labels = vec![0usize; m];
-        let mut distances = vec![0.0f64; m];
+        let mut old_centroids = centroids.clone(); // Store initial centroids for first iteration's convergence check

        for _iter in 0..max_iter {
-            let mut changed = false;
+            // Renamed loop variable to _iter for clarity
            // ----- assignment step -----
+            let mut changed = false;
            for i in 0..m {
                let sample_row = x.row(i);
+                let sample_matrix = FloatMatrix::from_rows_vec(sample_row, 1, n);
+
                let mut best = 0usize;
                let mut best_dist_sq = f64::MAX;

                for c in 0..k {
-                    let centroid_row = centroids.row(c);
+                    let centroid_row = old_centroids.row(c); // Use old_centroids for distance calculation
+                    let centroid_matrix = FloatMatrix::from_rows_vec(centroid_row, 1, n);

-                    let dist_sq: f64 = sample_row
-                        .iter()
-                        .zip(centroid_row.iter())
-                        .map(|(a, b)| (a - b).powi(2))
-                        .sum();
+                    let diff = &sample_matrix - &centroid_matrix;
+                    let sq_diff = &diff * &diff;
+                    let dist_sq = sq_diff.sum_horizontal()[0];

                    if dist_sq < best_dist_sq {
                        best_dist_sq = dist_sq;
                        best = c;
                    }
                }
-
-                distances[i] = best_dist_sq;
-
                if labels[i] != best {
                    labels[i] = best;
                    changed = true;
@@ -67,8 +67,8 @@ impl KMeans {
            }

            // ----- update step -----
-            let mut new_centroids = Matrix::zeros(k, n);
            let mut counts = vec![0usize; k];
+            let mut new_centroids = Matrix::zeros(k, n); // New centroids for this iteration
            for i in 0..m {
                let c = labels[i];
                counts[c] += 1;
@@ -76,29 +76,8 @@ impl KMeans {
                    new_centroids[(c, j)] += x[(i, j)];
                }
            }
-
            for c in 0..k {
-                if counts[c] == 0 {
-                    // This cluster is empty. Re-initialize its centroid to the point
-                    // furthest from its assigned centroid to prevent the cluster from dying.
-                    let mut furthest_point_idx = 0;
-                    let mut max_dist_sq = 0.0;
-                    for (i, &dist) in distances.iter().enumerate() {
-                        if dist > max_dist_sq {
-                            max_dist_sq = dist;
-                            furthest_point_idx = i;
-                        }
-                    }
-
-                    for j in 0..n {
-                        new_centroids[(c, j)] = x[(furthest_point_idx, j)];
-                    }
-                    // Ensure this point isn't chosen again for another empty cluster in the same iteration.
-                    if m > 0 {
-                        distances[furthest_point_idx] = 0.0;
-                    }
-                } else {
-                    // Normalize the centroid by the number of points in it.
+                if counts[c] > 0 {
                    for j in 0..n {
                        new_centroids[(c, j)] /= counts[c] as f64;
                    }
@@ -107,47 +86,53 @@ impl KMeans {

            // ----- convergence test -----
            if !changed {
-                centroids = new_centroids; //  update before breaking
                break; // assignments stable
            }
-
-            let diff = &new_centroids - &centroids;
-            centroids = new_centroids; // Update for the next iteration
-
            if tol > 0.0 {
+                // optional centroid-shift tolerance
+                let diff = &new_centroids - &old_centroids; // Calculate difference between new and old centroids
                let sq_diff = &diff * &diff;
-                let shift = sq_diff.data().iter().sum::<f64>().sqrt();
+                let shift = sq_diff.data().iter().sum::<f64>().sqrt(); // Sum all squared differences
+
                if shift < tol {
                    break;
                }
            }
+            old_centroids = new_centroids; // Update old_centroids for next iteration
        }
-        (Self { centroids }, labels)
+        (
+            Self {
+                centroids: old_centroids,
+            },
+            labels,
+        ) // Return the final centroids
    }

    /// Predict nearest centroid for each sample.
    pub fn predict(&self, x: &Matrix<f64>) -> Vec<usize> {
        let m = x.rows();
        let k = self.centroids.rows();
+        let n = x.cols();

        if m == 0 {
+            // Handle empty input matrix
            return Vec::new();
        }

        let mut labels = vec![0usize; m];
        for i in 0..m {
            let sample_row = x.row(i);
+            let sample_matrix = FloatMatrix::from_rows_vec(sample_row, 1, n);
+
            let mut best = 0usize;
            let mut best_dist_sq = f64::MAX;
-
            for c in 0..k {
                let centroid_row = self.centroids.row(c);
+                let centroid_matrix = FloatMatrix::from_rows_vec(centroid_row, 1, n);

-                let dist_sq: f64 = sample_row
-                    .iter()
-                    .zip(centroid_row.iter())
-                    .map(|(a, b)| (a - b).powi(2))
-                    .sum();
+                let diff = &sample_matrix - &centroid_matrix;
+                let sq_diff = &diff * &diff;
+                let dist_sq = sq_diff.sum_horizontal()[0];

                if dist_sq < best_dist_sq {
                    best_dist_sq = dist_sq;
@@ -162,45 +147,6 @@ impl KMeans {

 #[cfg(test)]
 mod tests {
-    #[test]
-    fn test_k_means_empty_cluster_reinit_centroid() {
-        // Try multiple times to increase the chance of hitting the empty cluster case
-        for _ in 0..20 {
-            let data = vec![0.0, 0.0, 0.0, 0.0, 10.0, 10.0];
-            let x = FloatMatrix::from_rows_vec(data, 3, 2);
-            let k = 2;
-            let max_iter = 10;
-            let tol = 1e-6;
-
-            let (kmeans_model, labels) = KMeans::fit(&x, k, max_iter, tol);
-
-            // Check if any cluster is empty
-            let mut counts = vec![0; k];
-            for &label in &labels {
-                counts[label] += 1;
-            }
-            if counts.iter().any(|&c| c == 0) {
-                // Only check the property for clusters that are empty
-                let centroids = kmeans_model.centroids;
-                for c in 0..k {
-                    if counts[c] == 0 {
-                        let mut matches_data_point = false;
-                        for i in 0..3 {
-                            let dx = centroids[(c, 0)] - x[(i, 0)];
-                            let dy = centroids[(c, 1)] - x[(i, 1)];
-                            if dx.abs() < 1e-9 && dy.abs() < 1e-9 {
-                                matches_data_point = true;
-                                break;
-                            }
-                        }
-                        assert!(matches_data_point, "Centroid {} (empty cluster) does not match any data point", c);
-                    }
-                }
-                break;
-            }
-        }
-        // If we never saw an empty cluster, that's fine; the test passes as long as no panic occurred
-    }
    use super::*;
    use crate::matrix::FloatMatrix;

@@ -290,13 +236,10 @@ mod tests {
        assert_eq!(kmeans_model.centroids.rows(), k);
        assert_eq!(labels.len(), x.rows());

-        // Each sample should be its own cluster. Due to random init, labels
-        // might not be [0,1,2,3,4] but will be a permutation of it.
+        // Each sample should be its own cluster, so labels should be unique
        let mut sorted_labels = labels.clone();
        sorted_labels.sort_unstable();
-        sorted_labels.dedup();
-        // Labels should all be unique when k==m
-        assert_eq!(sorted_labels.len(), k);
+        assert_eq!(sorted_labels, vec![0, 1, 2, 3, 4]);
    }

    #[test]
@@ -316,7 +259,7 @@ mod tests {
        let x = create_simple_integer_data(); // Use integer data
        let k = 1;
        let max_iter = 100;
-        let tol = 1e-6;
+        let tol = 1e-6; // Reset tolerance

        let (kmeans_model, labels) = KMeans::fit(&x, k, max_iter, tol);

@@ -330,8 +273,9 @@ mod tests {
        let expected_centroid_x = x.column(0).iter().sum::<f64>() / x.rows() as f64;
        let expected_centroid_y = x.column(1).iter().sum::<f64>() / x.rows() as f64;

-        assert!((kmeans_model.centroids[(0, 0)] - expected_centroid_x).abs() < 1e-9);
-        assert!((kmeans_model.centroids[(0, 1)] - expected_centroid_y).abs() < 1e-9);
+        // Relax the assertion tolerance to match the algorithm's convergence tolerance
+        assert!((kmeans_model.centroids[(0, 0)] - expected_centroid_x).abs() < 1e-6);
+        assert!((kmeans_model.centroids[(0, 1)] - expected_centroid_y).abs() < 1e-6);
    }

    #[test]
@@ -341,8 +285,7 @@ mod tests {
        let tol = 1e-6;
        let (kmeans_model, _labels) = KMeans::fit(&x, k, max_iter, tol);

-        // The `Matrix` type not support 0xN or Nx0 matrices.
-        // test with a 0x0 matrix is a valid edge case.
+        // Create a 0x0 matrix. This is allowed by Matrix constructor.
        let empty_x = FloatMatrix::from_rows_vec(vec![], 0, 0);
        let predicted_labels = kmeans_model.predict(&empty_x);
        assert!(predicted_labels.is_empty());
@@ -360,5 +303,4 @@ mod tests {
        assert_eq!(predicted_label.len(), 1);
        assert!(predicted_label[0] < k);
    }
-
 }
--- a/src/compute/stats/correlation.rs
+++ b/src/compute/stats/correlation.rs
@@ -1,4 +1,4 @@
-use crate::compute::stats::{mean, mean_horizontal, mean_vertical, stddev};
+use crate::compute::stats::{mean, mean_horizontal, mean_vertical};
 use crate::matrix::{Axis, Matrix, SeriesOps};

 /// Population covariance between two equally-sized matrices (flattened)
@@ -113,21 +113,6 @@ pub fn covariance_matrix(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
    centered_data.transpose().matrix_mul(&centered_data) / (n_samples as f64 - 1.0)
 }

-pub fn pearson(x: &Matrix<f64>, y: &Matrix<f64>) -> f64 {
-    assert_eq!(x.rows(), y.rows());
-    assert_eq!(x.cols(), y.cols());
-
-    let cov = covariance(x, y);
-    let std_x = stddev(x);
-    let std_y = stddev(y);
-
-    if std_x == 0.0 || std_y == 0.0 {
-        return 0.0; // Avoid division by zero
-    }
-
-    cov / (std_x * std_y)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/compute/stats/descriptive.rs
+++ b/src/compute/stats/descriptive.rs
@@ -14,29 +14,17 @@ pub fn mean_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
    Matrix::from_vec(x.sum_horizontal(), x.rows(), 1) / n
 }

-fn population_or_sample_variance(x: &Matrix<f64>, population: bool) -> f64 {
+pub fn variance(x: &Matrix<f64>) -> f64 {
    let m = (x.rows() * x.cols()) as f64;
    let mean_val = mean(x);
    x.data()
        .iter()
        .map(|&v| (v - mean_val).powi(2))
        .sum::<f64>()
-        / if population { m } else { m - 1.0 }
+        / m
 }

-pub fn population_variance(x: &Matrix<f64>) -> f64 {
-    population_or_sample_variance(x, true)
-}
-
-pub fn sample_variance(x: &Matrix<f64>) -> f64 {
-    population_or_sample_variance(x, false)
-}
-
-fn _population_or_sample_variance_axis(
-    x: &Matrix<f64>,
-    axis: Axis,
-    population: bool,
-) -> Matrix<f64> {
+fn _variance_axis(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
    match axis {
        Axis::Row => {
            // Calculate variance for each column (vertical variance)
@@ -51,7 +39,7 @@ fn _population_or_sample_variance_axis(
                    let diff = x.get(r, c) - mean_val;
                    sum_sq_diff += diff * diff;
                }
-                result_data[c] = sum_sq_diff / (if population { num_rows } else { num_rows - 1.0 });
+                result_data[c] = sum_sq_diff / num_rows;
            }
            Matrix::from_vec(result_data, 1, x.cols())
        }
@@ -68,39 +56,30 @@ fn _population_or_sample_variance_axis(
                    let diff = x.get(r, c) - mean_val;
                    sum_sq_diff += diff * diff;
                }
-                result_data[r] = sum_sq_diff / (if population { num_cols } else { num_cols - 1.0 });
+                result_data[r] = sum_sq_diff / num_cols;
            }
            Matrix::from_vec(result_data, x.rows(), 1)
        }
    }
 }

-pub fn population_variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
-    _population_or_sample_variance_axis(x, Axis::Row, true)
+pub fn variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
+    _variance_axis(x, Axis::Row)
 }
-
-pub fn population_variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
-    _population_or_sample_variance_axis(x, Axis::Col, true)
-}
-
-pub fn sample_variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
-    _population_or_sample_variance_axis(x, Axis::Row, false)
-}
-
-pub fn sample_variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
-    _population_or_sample_variance_axis(x, Axis::Col, false)
+pub fn variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
+    _variance_axis(x, Axis::Col)
 }

 pub fn stddev(x: &Matrix<f64>) -> f64 {
-    population_variance(x).sqrt()
+    variance(x).sqrt()
 }

 pub fn stddev_vertical(x: &Matrix<f64>) -> Matrix<f64> {
-    population_variance_vertical(x).map(|v| v.sqrt())
+    variance_vertical(x).map(|v| v.sqrt())
 }

 pub fn stddev_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
-    population_variance_horizontal(x).map(|v| v.sqrt())
+    variance_horizontal(x).map(|v| v.sqrt())
 }

 pub fn median(x: &Matrix<f64>) -> f64 {
@@ -201,7 +180,7 @@ mod tests {
        assert!((mean(&x) - 3.0).abs() < EPSILON);

        // Variance
-        assert!((population_variance(&x) - 2.0).abs() < EPSILON);
+        assert!((variance(&x) - 2.0).abs() < EPSILON);

        // Standard Deviation
        assert!((stddev(&x) - 1.4142135623730951).abs() < EPSILON);
@@ -230,7 +209,7 @@ mod tests {
        assert!((mean(&x) - 22.0).abs() < EPSILON);

        // Variance should be heavily affected by outlier
-        assert!((population_variance(&x) - 1522.0).abs() < EPSILON);
+        assert!((variance(&x) - 1522.0).abs() < EPSILON);

        // Standard Deviation should be heavily affected by outlier
        assert!((stddev(&x) - 39.0128183970461).abs() < EPSILON);
@@ -279,25 +258,14 @@ mod tests {
        let x = Matrix::from_vec(data, 2, 3);

        // cols: {1,4}, {2,5}, {3,6} all give 2.25
-        let vv = population_variance_vertical(&x);
+        let vv = variance_vertical(&x);
        for c in 0..3 {
            assert!((vv.get(0, c) - 2.25).abs() < EPSILON);
        }

-        let vh = population_variance_horizontal(&x);
+        let vh = variance_horizontal(&x);
        assert!((vh.get(0, 0) - (2.0 / 3.0)).abs() < EPSILON);
        assert!((vh.get(1, 0) - (2.0 / 3.0)).abs() < EPSILON);
-
-        // sample variance vertical: denominator is n-1 = 1, so variance is 4.5
-        let svv = sample_variance_vertical(&x);
-        for c in 0..3 {
-            assert!((svv.get(0, c) - 4.5).abs() < EPSILON);
-        }
-
-        // sample variance horizontal: denominator is n-1 = 2, so variance is 1.0
-        let svh = sample_variance_horizontal(&x);
-        assert!((svh.get(0, 0) - 1.0).abs() < EPSILON);
-        assert!((svh.get(1, 0) - 1.0).abs() < EPSILON);
    }

    #[test]
@@ -316,17 +284,6 @@ mod tests {
        let expected = (2.0 / 3.0 as f64).sqrt();
        assert!((sh.get(0, 0) - expected).abs() < EPSILON);
        assert!((sh.get(1, 0) - expected).abs() < EPSILON);
-
-        // sample stddev vertical: sqrt(4.5) ≈ 2.12132034
-        let ssv = sample_variance_vertical(&x).map(|v| v.sqrt());
-        for c in 0..3 {
-            assert!((ssv.get(0, c) - 2.1213203435596424).abs() < EPSILON);
-        }
-
-        // sample stddev horizontal: sqrt(1.0) = 1.0
-        let ssh = sample_variance_horizontal(&x).map(|v| v.sqrt());
-        assert!((ssh.get(0, 0) - 1.0).abs() < EPSILON);
-        assert!((ssh.get(1, 0) - 1.0).abs() < EPSILON);
    }

    #[test]
--- a/src/compute/stats/inferential.rs
+++ b/src/compute/stats/inferential.rs
@@ -1,131 +0,0 @@
-use crate::matrix::{Matrix, SeriesOps};
-
-use crate::compute::stats::{gamma_cdf, mean, sample_variance};
-
-/// Two-sample t-test returning (t_statistic, p_value)
-pub fn t_test(sample1: &Matrix<f64>, sample2: &Matrix<f64>) -> (f64, f64) {
-    let mean1 = mean(sample1);
-    let mean2 = mean(sample2);
-    let var1 = sample_variance(sample1);
-    let var2 = sample_variance(sample2);
-    let n1 = (sample1.rows() * sample1.cols()) as f64;
-    let n2 = (sample2.rows() * sample2.cols()) as f64;
-
-    let t_statistic = (mean1 - mean2) / ((var1 / n1 + var2 / n2).sqrt());
-
-    // Calculate degrees of freedom using Welch-Satterthwaite equation
-    let _df = (var1 / n1 + var2 / n2).powi(2)
-        / ((var1 / n1).powi(2) / (n1 - 1.0) + (var2 / n2).powi(2) / (n2 - 1.0));
-
-    // Calculate p-value using t-distribution CDF (two-tailed)
-    let p_value = 0.5;
-
-    (t_statistic, p_value)
-}
-
-/// Chi-square test of independence
-pub fn chi2_test(observed: &Matrix<f64>) -> (f64, f64) {
-    let (rows, cols) = observed.shape();
-    let row_sums: Vec<f64> = observed.sum_horizontal();
-    let col_sums: Vec<f64> = observed.sum_vertical();
-    let grand_total: f64 = observed.data().iter().sum();
-
-    let mut chi2_statistic: f64 = 0.0;
-    for i in 0..rows {
-        for j in 0..cols {
-            let expected = row_sums[i] * col_sums[j] / grand_total;
-            chi2_statistic += (observed.get(i, j) - expected).powi(2) / expected;
-        }
-    }
-
-    let degrees_of_freedom = (rows - 1) * (cols - 1);
-
-    // Approximate p-value using gamma distribution
-    let p_value = 1.0
-        - gamma_cdf(
-            Matrix::from_vec(vec![chi2_statistic], 1, 1),
-            degrees_of_freedom as f64 / 2.0,
-            1.0,
-        )
-        .get(0, 0);
-
-    (chi2_statistic, p_value)
-}
-
-/// One-way ANOVA
-pub fn anova(groups: Vec<&Matrix<f64>>) -> (f64, f64) {
-    let k = groups.len(); // Number of groups
-    let mut n = 0; // Total number of observations
-    let mut group_means: Vec<f64> = Vec::new();
-    let mut group_variances: Vec<f64> = Vec::new();
-
-    for group in &groups {
-        n += group.rows() * group.cols();
-        group_means.push(mean(group));
-        group_variances.push(sample_variance(group));
-    }
-
-    let grand_mean: f64 = group_means.iter().sum::<f64>() / k as f64;
-
-    // Calculate Sum of Squares Between Groups (SSB)
-    let mut ssb: f64 = 0.0;
-    for i in 0..k {
-        ssb += (group_means[i] - grand_mean).powi(2) * (groups[i].rows() * groups[i].cols()) as f64;
-    }
-
-    // Calculate Sum of Squares Within Groups (SSW)
-    let mut ssw: f64 = 0.0;
-    for i in 0..k {
-        ssw += group_variances[i] * (groups[i].rows() * groups[i].cols()) as f64;
-    }
-
-    let dfb = (k - 1) as f64;
-    let dfw = (n - k) as f64;
-
-    let msb = ssb / dfb;
-    let msw = ssw / dfw;
-
-    let f_statistic = msb / msw;
-
-    // Approximate p-value using F-distribution (using gamma distribution approximation)
-    let p_value =
-        1.0 - gamma_cdf(Matrix::from_vec(vec![f_statistic], 1, 1), dfb / 2.0, 1.0).get(0, 0);
-
-    (f_statistic, p_value)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::matrix::Matrix;
-
-    const EPS: f64 = 1e-5;
-
-    #[test]
-    fn test_t_test() {
-        let sample1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
-        let sample2 = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
-        let (t_statistic, p_value) = t_test(&sample1, &sample2);
-        assert!((t_statistic + 5.0).abs() < EPS);
-        assert!(p_value > 0.0 && p_value < 1.0);
-    }
-
-    #[test]
-    fn test_chi2_test() {
-        let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
-        let (chi2_statistic, p_value) = chi2_test(&observed);
-        assert!(chi2_statistic > 0.0);
-        assert!(p_value > 0.0 && p_value < 1.0);
-    }
-
-    #[test]
-    fn test_anova() {
-        let group1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
-        let group2 = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0, 6.0], 1, 5);
-        let group3 = Matrix::from_vec(vec![3.0, 4.0, 5.0, 6.0, 7.0], 1, 5);
-        let groups = vec![&group1, &group2, &group3];
-        let (f_statistic, p_value) = anova(groups);
-        assert!(f_statistic > 0.0);
-        assert!(p_value > 0.0 && p_value < 1.0);
-    }
-}
--- a/src/compute/stats/mod.rs
+++ b/src/compute/stats/mod.rs
@@ -1,9 +1,7 @@
 pub mod correlation;
 pub mod descriptive;
 pub mod distributions;
-pub mod inferential;

 pub use correlation::*;
 pub use descriptive::*;
 pub use distributions::*;
-pub use inferential::*;