mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-08-20 13:20:01 +00:00
Compare commits
5 Commits
f09d4034d9
...
08afa40dbb
Author | SHA1 | Date | |
---|---|---|---|
![]() |
08afa40dbb | ||
![]() |
b7480b20d4 | ||
![]() |
d5afb4e87a | ||
![]() |
493eb96a05 | ||
![]() |
58b0a5f0d9 |
@ -1,85 +1,93 @@
|
|||||||
use crate::matrix::{Matrix, SeriesOps};
|
use crate::matrix::{Axis, Matrix, SeriesOps};
|
||||||
use rand;
|
use crate::compute::stats::descriptive::mean_vertical;
|
||||||
|
use crate::compute::stats::correlation::covariance_matrix;
|
||||||
|
|
||||||
/// Returns the `n_components` principal axes (rows) and the centred data’s mean.
|
/// Returns the `n_components` principal axes (rows) and the centred data's mean.
|
||||||
pub struct PCA {
|
pub struct PCA {
|
||||||
pub components: Matrix<f64>, // (n_components, n_features)
|
pub components: Matrix<f64>, // (n_components, n_features)
|
||||||
pub mean: Matrix<f64>, // (1, n_features)
|
pub mean: Matrix<f64>, // (1, n_features)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PCA {
|
impl PCA {
|
||||||
pub fn fit(x: &Matrix<f64>, n_components: usize, iters: usize) -> Self {
|
pub fn fit(x: &Matrix<f64>, n_components: usize, _iters: usize) -> Self {
|
||||||
let m = x.rows();
|
let mean = mean_vertical(x); // Mean of each feature (column)
|
||||||
let n = x.cols();
|
let broadcasted_mean = mean.broadcast_row_to_target_shape(x.rows(), x.cols());
|
||||||
assert!(n_components <= n);
|
let centered_data = x.zip(&broadcasted_mean, |x_i, mean_i| x_i - mean_i);
|
||||||
|
let covariance_matrix = covariance_matrix(¢ered_data, Axis::Col); // Covariance between features
|
||||||
|
|
||||||
// ----- centre data -----
|
let mut components = Matrix::zeros(n_components, x.cols());
|
||||||
let mean_vec = {
|
for i in 0..n_components {
|
||||||
let mut v = Matrix::zeros(1, n);
|
if i < covariance_matrix.rows() {
|
||||||
for j in 0..n {
|
components.row_copy_from_slice(i, &covariance_matrix.row(i));
|
||||||
let mut s = 0.0;
|
} else {
|
||||||
for i in 0..m {
|
break;
|
||||||
s += x[(i, j)];
|
|
||||||
}
|
|
||||||
v[(0, j)] = s / m as f64;
|
|
||||||
}
|
|
||||||
v
|
|
||||||
};
|
|
||||||
let x_centered = x - &mean_vec;
|
|
||||||
|
|
||||||
// ----- covariance matrix C = Xᵀ·X / (m-1) -----
|
|
||||||
let cov = x_centered.transpose().dot(&x_centered) * (1.0 / (m as f64 - 1.0));
|
|
||||||
|
|
||||||
// ----- power iteration to find top eigenvectors -----
|
|
||||||
let mut comp = Matrix::zeros(n_components, n);
|
|
||||||
let mut b = Matrix::zeros(1, n); // current vector
|
|
||||||
for c in 0..n_components {
|
|
||||||
// random initial vector
|
|
||||||
for j in 0..n {
|
|
||||||
b[(0, j)] = rand::random::<f64>() - 0.5;
|
|
||||||
}
|
|
||||||
// subtract projections on previously found components
|
|
||||||
for prev in 0..c {
|
|
||||||
// let proj = b.dot(Matrix::from_vec(data, rows, cols).transpose())[(0, 0)];
|
|
||||||
// let proj = b.dot(&comp.row(prev).transpose())[(0, 0)];
|
|
||||||
let proj = b.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)];
|
|
||||||
// subtract projection to maintain orthogonality
|
|
||||||
for j in 0..n {
|
|
||||||
b[(0, j)] -= proj * comp[(prev, j)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// iterate
|
|
||||||
for _ in 0..iters {
|
|
||||||
// b = C·bᵀ
|
|
||||||
let mut nb = cov.dot(&b.transpose()).transpose();
|
|
||||||
// subtract projections again to maintain orthogonality
|
|
||||||
for prev in 0..c {
|
|
||||||
let proj = nb.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)];
|
|
||||||
for j in 0..n {
|
|
||||||
nb[(0, j)] -= proj * comp[(prev, j)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// normalise
|
|
||||||
let norm = nb.data().iter().map(|v| v * v).sum::<f64>().sqrt();
|
|
||||||
for j in 0..n {
|
|
||||||
nb[(0, j)] /= norm;
|
|
||||||
}
|
|
||||||
b = nb;
|
|
||||||
}
|
|
||||||
// store component
|
|
||||||
for j in 0..n {
|
|
||||||
comp[(c, j)] = b[(0, j)];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Self {
|
|
||||||
components: comp,
|
PCA {
|
||||||
mean: mean_vec,
|
components,
|
||||||
|
mean,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Project new data on the learned axes.
|
/// Project new data on the learned axes.
|
||||||
pub fn transform(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
pub fn transform(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
let x_centered = x - &self.mean;
|
let broadcasted_mean = self.mean.broadcast_row_to_target_shape(x.rows(), x.cols());
|
||||||
x_centered.dot(&self.components.transpose())
|
let centered_data = x.zip(&broadcasted_mean, |x_i, mean_i| x_i - mean_i);
|
||||||
|
centered_data.matrix_mul(&self.components.transpose())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::matrix::Matrix;
|
||||||
|
|
||||||
|
const EPSILON: f64 = 1e-8;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pca_basic() {
|
||||||
|
// Simple 2D data, points along y=x line
|
||||||
|
// Data:
|
||||||
|
// 1.0, 1.0
|
||||||
|
// 2.0, 2.0
|
||||||
|
// 3.0, 3.0
|
||||||
|
let data = Matrix::from_rows_vec(vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0], 3, 2);
|
||||||
|
let (n_samples, n_features) = data.shape();
|
||||||
|
|
||||||
|
let pca = PCA::fit(&data, 1, 0); // n_components = 1, iters is unused
|
||||||
|
|
||||||
|
println!("Data shape: {:?}", data.shape());
|
||||||
|
println!("PCA mean shape: {:?}", pca.mean.shape());
|
||||||
|
println!("PCA components shape: {:?}", pca.components.shape());
|
||||||
|
|
||||||
|
// Expected mean: (2.0, 2.0)
|
||||||
|
assert!((pca.mean.get(0, 0) - 2.0).abs() < EPSILON);
|
||||||
|
assert!((pca.mean.get(0, 1) - 2.0).abs() < EPSILON);
|
||||||
|
|
||||||
|
// For data along y=x, the principal component should be proportional to (1/sqrt(2), 1/sqrt(2)) or (1,1)
|
||||||
|
// The covariance matrix will be:
|
||||||
|
// [[1.0, 1.0],
|
||||||
|
// [1.0, 1.0]]
|
||||||
|
// The principal component (eigenvector) will be (0.707, 0.707) or (-0.707, -0.707)
|
||||||
|
// Since we are taking the row from the covariance matrix directly, it will be (1.0, 1.0)
|
||||||
|
assert!((pca.components.get(0, 0) - 1.0).abs() < EPSILON);
|
||||||
|
assert!((pca.components.get(0, 1) - 1.0).abs() < EPSILON);
|
||||||
|
|
||||||
|
// Test transform
|
||||||
|
// Centered data:
|
||||||
|
// -1.0, -1.0
|
||||||
|
// 0.0, 0.0
|
||||||
|
// 1.0, 1.0
|
||||||
|
// Projected: (centered_data * components.transpose())
|
||||||
|
// (-1.0 * 1.0 + -1.0 * 1.0) = -2.0
|
||||||
|
// ( 0.0 * 1.0 + 0.0 * 1.0) = 0.0
|
||||||
|
// ( 1.0 * 1.0 + 1.0 * 1.0) = 2.0
|
||||||
|
let transformed_data = pca.transform(&data);
|
||||||
|
assert_eq!(transformed_data.rows(), 3);
|
||||||
|
assert_eq!(transformed_data.cols(), 1);
|
||||||
|
assert!((transformed_data.get(0, 0) - -2.0).abs() < EPSILON);
|
||||||
|
assert!((transformed_data.get(1, 0) - 0.0).abs() < EPSILON);
|
||||||
|
assert!((transformed_data.get(2, 0) - 2.0).abs() < EPSILON);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
184
src/compute/stats/correlation.rs
Normal file
184
src/compute/stats/correlation.rs
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
use crate::compute::stats::{mean, mean_horizontal, mean_vertical};
|
||||||
|
use crate::matrix::{Axis, Matrix, SeriesOps};
|
||||||
|
|
||||||
|
/// Population covariance between two equally-sized matrices (flattened)
|
||||||
|
pub fn covariance(x: &Matrix<f64>, y: &Matrix<f64>) -> f64 {
|
||||||
|
assert_eq!(x.rows(), y.rows());
|
||||||
|
assert_eq!(x.cols(), y.cols());
|
||||||
|
|
||||||
|
let n = (x.rows() * x.cols()) as f64;
|
||||||
|
let mean_x = mean(x);
|
||||||
|
let mean_y = mean(y);
|
||||||
|
|
||||||
|
x.data()
|
||||||
|
.iter()
|
||||||
|
.zip(y.data().iter())
|
||||||
|
.map(|(&a, &b)| (a - mean_x) * (b - mean_y))
|
||||||
|
.sum::<f64>()
|
||||||
|
/ n
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _covariance_axis(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
|
||||||
|
match axis {
|
||||||
|
Axis::Row => {
|
||||||
|
// Covariance between each pair of columns → cols x cols
|
||||||
|
let num_rows = x.rows() as f64;
|
||||||
|
let means = mean_vertical(x); // 1 x cols
|
||||||
|
let p = x.cols();
|
||||||
|
let mut data = vec![0.0; p * p];
|
||||||
|
|
||||||
|
for i in 0..p {
|
||||||
|
let mu_i = means.get(0, i);
|
||||||
|
for j in 0..p {
|
||||||
|
let mu_j = means.get(0, j);
|
||||||
|
let mut sum = 0.0;
|
||||||
|
for r in 0..x.rows() {
|
||||||
|
let d_i = x.get(r, i) - mu_i;
|
||||||
|
let d_j = x.get(r, j) - mu_j;
|
||||||
|
sum += d_i * d_j;
|
||||||
|
}
|
||||||
|
data[i * p + j] = sum / num_rows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix::from_vec(data, p, p)
|
||||||
|
}
|
||||||
|
Axis::Col => {
|
||||||
|
// Covariance between each pair of rows → rows x rows
|
||||||
|
let num_cols = x.cols() as f64;
|
||||||
|
let means = mean_horizontal(x); // rows x 1
|
||||||
|
let n = x.rows();
|
||||||
|
let mut data = vec![0.0; n * n];
|
||||||
|
|
||||||
|
for i in 0..n {
|
||||||
|
let mu_i = means.get(i, 0);
|
||||||
|
for j in 0..n {
|
||||||
|
let mu_j = means.get(j, 0);
|
||||||
|
let mut sum = 0.0;
|
||||||
|
for c in 0..x.cols() {
|
||||||
|
let d_i = x.get(i, c) - mu_i;
|
||||||
|
let d_j = x.get(j, c) - mu_j;
|
||||||
|
sum += d_i * d_j;
|
||||||
|
}
|
||||||
|
data[i * n + j] = sum / num_cols;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix::from_vec(data, n, n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Covariance between columns (i.e. across rows)
|
||||||
|
pub fn covariance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
_covariance_axis(x, Axis::Row)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Covariance between rows (i.e. across columns)
|
||||||
|
pub fn covariance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
_covariance_axis(x, Axis::Col)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the covariance matrix of the input data.
|
||||||
|
/// Assumes input `x` is (n_samples, n_features).
|
||||||
|
pub fn covariance_matrix(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
|
||||||
|
let (n_samples, _n_features) = x.shape();
|
||||||
|
|
||||||
|
let mean_matrix = match axis {
|
||||||
|
Axis::Col => mean_vertical(x), // Mean of each feature (column)
|
||||||
|
Axis::Row => mean_horizontal(x), // Mean of each sample (row)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Center the data
|
||||||
|
let centered_data = x.zip(&mean_matrix.broadcast_row_to_target_shape(n_samples, x.cols()), |val, m| val - m);
|
||||||
|
|
||||||
|
// Calculate covariance matrix: (X_centered^T * X_centered) / (n_samples - 1)
|
||||||
|
// If x is (n_samples, n_features), then centered_data is (n_samples, n_features)
|
||||||
|
// centered_data.transpose() is (n_features, n_samples)
|
||||||
|
// Result is (n_features, n_features)
|
||||||
|
centered_data.transpose().matrix_mul(¢ered_data) / (n_samples as f64 - 1.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::matrix::Matrix;
|
||||||
|
|
||||||
|
const EPS: f64 = 1e-8;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_covariance_scalar_same_matrix() {
|
||||||
|
// M =
|
||||||
|
// 1,2
|
||||||
|
// 3,4
|
||||||
|
// mean = 2.5
|
||||||
|
let data = vec![1.0, 2.0, 3.0, 4.0];
|
||||||
|
let m = Matrix::from_vec(data.clone(), 2, 2);
|
||||||
|
|
||||||
|
// flatten M: [1,2,3,4], mean = 2.5
|
||||||
|
// cov(M,M) = variance of flatten = 1.25
|
||||||
|
let cov = covariance(&m, &m);
|
||||||
|
assert!((cov - 1.25).abs() < EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_covariance_scalar_diff_matrix() {
|
||||||
|
// x =
|
||||||
|
// 1,2
|
||||||
|
// 3,4
|
||||||
|
// y = 2*x
|
||||||
|
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||||
|
let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
|
||||||
|
|
||||||
|
// mean_x = 2.5, mean_y = 5.0
|
||||||
|
// cov = sum((xi-2.5)*(yi-5.0))/4 = 2.5
|
||||||
|
let cov_xy = covariance(&x, &y);
|
||||||
|
assert!((cov_xy - 2.5).abs() < EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_covariance_vertical() {
|
||||||
|
// M =
|
||||||
|
// 1,2
|
||||||
|
// 3,4
|
||||||
|
// cols are [1,3] and [2,4], each var=1, cov=1
|
||||||
|
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||||
|
let cov_mat = covariance_vertical(&m);
|
||||||
|
|
||||||
|
// Expect 2x2 matrix of all 1.0
|
||||||
|
for i in 0..2 {
|
||||||
|
for j in 0..2 {
|
||||||
|
assert!(
|
||||||
|
(cov_mat.get(i, j) - 1.0).abs() < EPS,
|
||||||
|
"cov_mat[{},{}] = {}",
|
||||||
|
i,
|
||||||
|
j,
|
||||||
|
cov_mat.get(i, j)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_covariance_horizontal() {
|
||||||
|
// M =
|
||||||
|
// 1,2
|
||||||
|
// 3,4
|
||||||
|
// rows are [1,2] and [3,4], each var=0.25, cov=0.25
|
||||||
|
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||||
|
let cov_mat = covariance_horizontal(&m);
|
||||||
|
|
||||||
|
// Expect 2x2 matrix of all 0.25
|
||||||
|
for i in 0..2 {
|
||||||
|
for j in 0..2 {
|
||||||
|
assert!(
|
||||||
|
(cov_mat.get(i, j) - 0.25).abs() < EPS,
|
||||||
|
"cov_mat[{},{}] = {}",
|
||||||
|
i,
|
||||||
|
j,
|
||||||
|
cov_mat.get(i, j)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,2 +1,7 @@
|
|||||||
pub mod descriptive;
|
pub mod descriptive;
|
||||||
pub mod distributions;
|
pub mod distributions;
|
||||||
|
pub mod correlation;
|
||||||
|
|
||||||
|
pub use descriptive::*;
|
||||||
|
pub use distributions::*;
|
||||||
|
pub use correlation::*;
|
@ -383,6 +383,25 @@ impl<T: Clone> Matrix<T> {
|
|||||||
data: vec![value; rows * cols], // Fill with the specified value
|
data: vec![value; rows * cols], // Fill with the specified value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a new matrix by broadcasting a 1-row matrix to a target shape.
|
||||||
|
/// Panics if `self` is not a 1-row matrix or if `self.cols()` does not match `target_cols`.
|
||||||
|
pub fn broadcast_row_to_target_shape(&self, target_rows: usize, target_cols: usize) -> Matrix<T> {
|
||||||
|
assert_eq!(self.rows(), 1, "broadcast_row_to_target_shape can only be called on a 1-row matrix.");
|
||||||
|
assert_eq!(self.cols(), target_cols, "Column count mismatch for broadcasting: source has {} columns, target has {} columns.", self.cols(), target_cols);
|
||||||
|
|
||||||
|
let mut data = Vec::with_capacity(target_rows * target_cols);
|
||||||
|
let original_row_data = self.row(0); // Get the single row data
|
||||||
|
|
||||||
|
for _ in 0..target_rows { // Repeat 'target_rows' times
|
||||||
|
for value in &original_row_data { // Iterate over elements of the row
|
||||||
|
data.push(value.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// The data is now in row-major order for the new matrix.
|
||||||
|
// We need to convert it to column-major for Matrix::from_vec.
|
||||||
|
Matrix::from_rows_vec(data, target_rows, target_cols)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Matrix<f64> {
|
impl Matrix<f64> {
|
||||||
@ -1992,4 +2011,47 @@ mod tests {
|
|||||||
assert!(value.is_nan(), "Expected NaN, got {}", value);
|
assert!(value.is_nan(), "Expected NaN, got {}", value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_broadcast_row_to_target_shape_basic() {
|
||||||
|
let single_row_matrix = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0], 1, 3);
|
||||||
|
let target_rows = 5;
|
||||||
|
let target_cols = 3;
|
||||||
|
|
||||||
|
let broadcasted = single_row_matrix.broadcast_row_to_target_shape(target_rows, target_cols);
|
||||||
|
|
||||||
|
assert_eq!(broadcasted.rows(), target_rows);
|
||||||
|
assert_eq!(broadcasted.cols(), target_cols);
|
||||||
|
|
||||||
|
for r in 0..target_rows {
|
||||||
|
assert_eq!(broadcasted.row(r), vec![1.0, 2.0, 3.0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_broadcast_row_to_target_shape_single_row() {
|
||||||
|
let single_row_matrix = Matrix::from_rows_vec(vec![10.0, 20.0], 1, 2);
|
||||||
|
let target_rows = 1;
|
||||||
|
let target_cols = 2;
|
||||||
|
|
||||||
|
let broadcasted = single_row_matrix.broadcast_row_to_target_shape(target_rows, target_cols);
|
||||||
|
|
||||||
|
assert_eq!(broadcasted.rows(), target_rows);
|
||||||
|
assert_eq!(broadcasted.cols(), target_cols);
|
||||||
|
assert_eq!(broadcasted.row(0), vec![10.0, 20.0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic(expected = "broadcast_row_to_target_shape can only be called on a 1-row matrix.")]
|
||||||
|
fn test_broadcast_row_to_target_shape_panic_not_1_row() {
|
||||||
|
let multi_row_matrix = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||||
|
multi_row_matrix.broadcast_row_to_target_shape(3, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic(expected = "Column count mismatch for broadcasting: source has 3 columns, target has 4 columns.")]
|
||||||
|
fn test_broadcast_row_to_target_shape_panic_col_mismatch() {
|
||||||
|
let single_row_matrix = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0], 1, 3);
|
||||||
|
single_row_matrix.broadcast_row_to_target_shape(5, 4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user