mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-08-20 19:20:01 +00:00
Compare commits
No commits in common. "a335d29347d22ebf97fef7dd82d26a68a76bf373" and "64722914bdc8ba5e803e30a5e51f6be9aa0cee9d" have entirely different histories.
a335d29347
...
64722914bd
@ -14,29 +14,17 @@ pub fn mean_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
|
|||||||
Matrix::from_vec(x.sum_horizontal(), x.rows(), 1) / n
|
Matrix::from_vec(x.sum_horizontal(), x.rows(), 1) / n
|
||||||
}
|
}
|
||||||
|
|
||||||
fn population_or_sample_variance(x: &Matrix<f64>, population: bool) -> f64 {
|
pub fn variance(x: &Matrix<f64>) -> f64 {
|
||||||
let m = (x.rows() * x.cols()) as f64;
|
let m = (x.rows() * x.cols()) as f64;
|
||||||
let mean_val = mean(x);
|
let mean_val = mean(x);
|
||||||
x.data()
|
x.data()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&v| (v - mean_val).powi(2))
|
.map(|&v| (v - mean_val).powi(2))
|
||||||
.sum::<f64>()
|
.sum::<f64>()
|
||||||
/ if population { m } else { m - 1.0 }
|
/ m
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn population_variance(x: &Matrix<f64>) -> f64 {
|
fn _variance_axis(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
|
||||||
population_or_sample_variance(x, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sample_variance(x: &Matrix<f64>) -> f64 {
|
|
||||||
population_or_sample_variance(x, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn _population_or_sample_variance_axis(
|
|
||||||
x: &Matrix<f64>,
|
|
||||||
axis: Axis,
|
|
||||||
population: bool,
|
|
||||||
) -> Matrix<f64> {
|
|
||||||
match axis {
|
match axis {
|
||||||
Axis::Row => {
|
Axis::Row => {
|
||||||
// Calculate variance for each column (vertical variance)
|
// Calculate variance for each column (vertical variance)
|
||||||
@ -51,7 +39,7 @@ fn _population_or_sample_variance_axis(
|
|||||||
let diff = x.get(r, c) - mean_val;
|
let diff = x.get(r, c) - mean_val;
|
||||||
sum_sq_diff += diff * diff;
|
sum_sq_diff += diff * diff;
|
||||||
}
|
}
|
||||||
result_data[c] = sum_sq_diff / (if population { num_rows } else { num_rows - 1.0 });
|
result_data[c] = sum_sq_diff / num_rows;
|
||||||
}
|
}
|
||||||
Matrix::from_vec(result_data, 1, x.cols())
|
Matrix::from_vec(result_data, 1, x.cols())
|
||||||
}
|
}
|
||||||
@ -68,39 +56,30 @@ fn _population_or_sample_variance_axis(
|
|||||||
let diff = x.get(r, c) - mean_val;
|
let diff = x.get(r, c) - mean_val;
|
||||||
sum_sq_diff += diff * diff;
|
sum_sq_diff += diff * diff;
|
||||||
}
|
}
|
||||||
result_data[r] = sum_sq_diff / (if population { num_cols } else { num_cols - 1.0 });
|
result_data[r] = sum_sq_diff / num_cols;
|
||||||
}
|
}
|
||||||
Matrix::from_vec(result_data, x.rows(), 1)
|
Matrix::from_vec(result_data, x.rows(), 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn population_variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
|
pub fn variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
_population_or_sample_variance_axis(x, Axis::Row, true)
|
_variance_axis(x, Axis::Row)
|
||||||
}
|
}
|
||||||
|
pub fn variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
pub fn population_variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
|
_variance_axis(x, Axis::Col)
|
||||||
_population_or_sample_variance_axis(x, Axis::Col, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sample_variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
|
|
||||||
_population_or_sample_variance_axis(x, Axis::Row, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sample_variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
|
|
||||||
_population_or_sample_variance_axis(x, Axis::Col, false)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stddev(x: &Matrix<f64>) -> f64 {
|
pub fn stddev(x: &Matrix<f64>) -> f64 {
|
||||||
population_variance(x).sqrt()
|
variance(x).sqrt()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stddev_vertical(x: &Matrix<f64>) -> Matrix<f64> {
|
pub fn stddev_vertical(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
population_variance_vertical(x).map(|v| v.sqrt())
|
variance_vertical(x).map(|v| v.sqrt())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stddev_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
|
pub fn stddev_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
population_variance_horizontal(x).map(|v| v.sqrt())
|
variance_horizontal(x).map(|v| v.sqrt())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn median(x: &Matrix<f64>) -> f64 {
|
pub fn median(x: &Matrix<f64>) -> f64 {
|
||||||
@ -201,7 +180,7 @@ mod tests {
|
|||||||
assert!((mean(&x) - 3.0).abs() < EPSILON);
|
assert!((mean(&x) - 3.0).abs() < EPSILON);
|
||||||
|
|
||||||
// Variance
|
// Variance
|
||||||
assert!((population_variance(&x) - 2.0).abs() < EPSILON);
|
assert!((variance(&x) - 2.0).abs() < EPSILON);
|
||||||
|
|
||||||
// Standard Deviation
|
// Standard Deviation
|
||||||
assert!((stddev(&x) - 1.4142135623730951).abs() < EPSILON);
|
assert!((stddev(&x) - 1.4142135623730951).abs() < EPSILON);
|
||||||
@ -230,7 +209,7 @@ mod tests {
|
|||||||
assert!((mean(&x) - 22.0).abs() < EPSILON);
|
assert!((mean(&x) - 22.0).abs() < EPSILON);
|
||||||
|
|
||||||
// Variance should be heavily affected by outlier
|
// Variance should be heavily affected by outlier
|
||||||
assert!((population_variance(&x) - 1522.0).abs() < EPSILON);
|
assert!((variance(&x) - 1522.0).abs() < EPSILON);
|
||||||
|
|
||||||
// Standard Deviation should be heavily affected by outlier
|
// Standard Deviation should be heavily affected by outlier
|
||||||
assert!((stddev(&x) - 39.0128183970461).abs() < EPSILON);
|
assert!((stddev(&x) - 39.0128183970461).abs() < EPSILON);
|
||||||
@ -279,25 +258,14 @@ mod tests {
|
|||||||
let x = Matrix::from_vec(data, 2, 3);
|
let x = Matrix::from_vec(data, 2, 3);
|
||||||
|
|
||||||
// cols: {1,4}, {2,5}, {3,6} all give 2.25
|
// cols: {1,4}, {2,5}, {3,6} all give 2.25
|
||||||
let vv = population_variance_vertical(&x);
|
let vv = variance_vertical(&x);
|
||||||
for c in 0..3 {
|
for c in 0..3 {
|
||||||
assert!((vv.get(0, c) - 2.25).abs() < EPSILON);
|
assert!((vv.get(0, c) - 2.25).abs() < EPSILON);
|
||||||
}
|
}
|
||||||
|
|
||||||
let vh = population_variance_horizontal(&x);
|
let vh = variance_horizontal(&x);
|
||||||
assert!((vh.get(0, 0) - (2.0 / 3.0)).abs() < EPSILON);
|
assert!((vh.get(0, 0) - (2.0 / 3.0)).abs() < EPSILON);
|
||||||
assert!((vh.get(1, 0) - (2.0 / 3.0)).abs() < EPSILON);
|
assert!((vh.get(1, 0) - (2.0 / 3.0)).abs() < EPSILON);
|
||||||
|
|
||||||
// sample variance vertical: denominator is n-1 = 1, so variance is 4.5
|
|
||||||
let svv = sample_variance_vertical(&x);
|
|
||||||
for c in 0..3 {
|
|
||||||
assert!((svv.get(0, c) - 4.5).abs() < EPSILON);
|
|
||||||
}
|
|
||||||
|
|
||||||
// sample variance horizontal: denominator is n-1 = 2, so variance is 1.0
|
|
||||||
let svh = sample_variance_horizontal(&x);
|
|
||||||
assert!((svh.get(0, 0) - 1.0).abs() < EPSILON);
|
|
||||||
assert!((svh.get(1, 0) - 1.0).abs() < EPSILON);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -316,17 +284,6 @@ mod tests {
|
|||||||
let expected = (2.0 / 3.0 as f64).sqrt();
|
let expected = (2.0 / 3.0 as f64).sqrt();
|
||||||
assert!((sh.get(0, 0) - expected).abs() < EPSILON);
|
assert!((sh.get(0, 0) - expected).abs() < EPSILON);
|
||||||
assert!((sh.get(1, 0) - expected).abs() < EPSILON);
|
assert!((sh.get(1, 0) - expected).abs() < EPSILON);
|
||||||
|
|
||||||
// sample stddev vertical: sqrt(4.5) ≈ 2.12132034
|
|
||||||
let ssv = sample_variance_vertical(&x).map(|v| v.sqrt());
|
|
||||||
for c in 0..3 {
|
|
||||||
assert!((ssv.get(0, c) - 2.1213203435596424).abs() < EPSILON);
|
|
||||||
}
|
|
||||||
|
|
||||||
// sample stddev horizontal: sqrt(1.0) = 1.0
|
|
||||||
let ssh = sample_variance_horizontal(&x).map(|v| v.sqrt());
|
|
||||||
assert!((ssh.get(0, 0) - 1.0).abs() < EPSILON);
|
|
||||||
assert!((ssh.get(1, 0) - 1.0).abs() < EPSILON);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1,131 +0,0 @@
|
|||||||
use crate::matrix::{Matrix, SeriesOps};
|
|
||||||
|
|
||||||
use crate::compute::stats::{gamma_cdf, mean, sample_variance};
|
|
||||||
|
|
||||||
/// Two-sample t-test returning (t_statistic, p_value)
|
|
||||||
pub fn t_test(sample1: &Matrix<f64>, sample2: &Matrix<f64>) -> (f64, f64) {
|
|
||||||
let mean1 = mean(sample1);
|
|
||||||
let mean2 = mean(sample2);
|
|
||||||
let var1 = sample_variance(sample1);
|
|
||||||
let var2 = sample_variance(sample2);
|
|
||||||
let n1 = (sample1.rows() * sample1.cols()) as f64;
|
|
||||||
let n2 = (sample2.rows() * sample2.cols()) as f64;
|
|
||||||
|
|
||||||
let t_statistic = (mean1 - mean2) / ((var1 / n1 + var2 / n2).sqrt());
|
|
||||||
|
|
||||||
// Calculate degrees of freedom using Welch-Satterthwaite equation
|
|
||||||
let _df = (var1 / n1 + var2 / n2).powi(2)
|
|
||||||
/ ((var1 / n1).powi(2) / (n1 - 1.0) + (var2 / n2).powi(2) / (n2 - 1.0));
|
|
||||||
|
|
||||||
// Calculate p-value using t-distribution CDF (two-tailed)
|
|
||||||
let p_value = 0.5;
|
|
||||||
|
|
||||||
(t_statistic, p_value)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Chi-square test of independence
|
|
||||||
pub fn chi2_test(observed: &Matrix<f64>) -> (f64, f64) {
|
|
||||||
let (rows, cols) = observed.shape();
|
|
||||||
let row_sums: Vec<f64> = observed.sum_horizontal();
|
|
||||||
let col_sums: Vec<f64> = observed.sum_vertical();
|
|
||||||
let grand_total: f64 = observed.data().iter().sum();
|
|
||||||
|
|
||||||
let mut chi2_statistic: f64 = 0.0;
|
|
||||||
for i in 0..rows {
|
|
||||||
for j in 0..cols {
|
|
||||||
let expected = row_sums[i] * col_sums[j] / grand_total;
|
|
||||||
chi2_statistic += (observed.get(i, j) - expected).powi(2) / expected;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let degrees_of_freedom = (rows - 1) * (cols - 1);
|
|
||||||
|
|
||||||
// Approximate p-value using gamma distribution
|
|
||||||
let p_value = 1.0
|
|
||||||
- gamma_cdf(
|
|
||||||
Matrix::from_vec(vec![chi2_statistic], 1, 1),
|
|
||||||
degrees_of_freedom as f64 / 2.0,
|
|
||||||
1.0,
|
|
||||||
)
|
|
||||||
.get(0, 0);
|
|
||||||
|
|
||||||
(chi2_statistic, p_value)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// One-way ANOVA
|
|
||||||
pub fn anova(groups: Vec<&Matrix<f64>>) -> (f64, f64) {
|
|
||||||
let k = groups.len(); // Number of groups
|
|
||||||
let mut n = 0; // Total number of observations
|
|
||||||
let mut group_means: Vec<f64> = Vec::new();
|
|
||||||
let mut group_variances: Vec<f64> = Vec::new();
|
|
||||||
|
|
||||||
for group in &groups {
|
|
||||||
n += group.rows() * group.cols();
|
|
||||||
group_means.push(mean(group));
|
|
||||||
group_variances.push(sample_variance(group));
|
|
||||||
}
|
|
||||||
|
|
||||||
let grand_mean: f64 = group_means.iter().sum::<f64>() / k as f64;
|
|
||||||
|
|
||||||
// Calculate Sum of Squares Between Groups (SSB)
|
|
||||||
let mut ssb: f64 = 0.0;
|
|
||||||
for i in 0..k {
|
|
||||||
ssb += (group_means[i] - grand_mean).powi(2) * (groups[i].rows() * groups[i].cols()) as f64;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate Sum of Squares Within Groups (SSW)
|
|
||||||
let mut ssw: f64 = 0.0;
|
|
||||||
for i in 0..k {
|
|
||||||
ssw += group_variances[i] * (groups[i].rows() * groups[i].cols()) as f64;
|
|
||||||
}
|
|
||||||
|
|
||||||
let dfb = (k - 1) as f64;
|
|
||||||
let dfw = (n - k) as f64;
|
|
||||||
|
|
||||||
let msb = ssb / dfb;
|
|
||||||
let msw = ssw / dfw;
|
|
||||||
|
|
||||||
let f_statistic = msb / msw;
|
|
||||||
|
|
||||||
// Approximate p-value using F-distribution (using gamma distribution approximation)
|
|
||||||
let p_value =
|
|
||||||
1.0 - gamma_cdf(Matrix::from_vec(vec![f_statistic], 1, 1), dfb / 2.0, 1.0).get(0, 0);
|
|
||||||
|
|
||||||
(f_statistic, p_value)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use crate::matrix::Matrix;
|
|
||||||
|
|
||||||
const EPS: f64 = 1e-5;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_t_test() {
|
|
||||||
let sample1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
|
|
||||||
let sample2 = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
|
|
||||||
let (t_statistic, p_value) = t_test(&sample1, &sample2);
|
|
||||||
assert!((t_statistic + 5.0).abs() < EPS);
|
|
||||||
assert!(p_value > 0.0 && p_value < 1.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chi2_test() {
|
|
||||||
let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
|
|
||||||
let (chi2_statistic, p_value) = chi2_test(&observed);
|
|
||||||
assert!(chi2_statistic > 0.0);
|
|
||||||
assert!(p_value > 0.0 && p_value < 1.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_anova() {
|
|
||||||
let group1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
|
|
||||||
let group2 = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0, 6.0], 1, 5);
|
|
||||||
let group3 = Matrix::from_vec(vec![3.0, 4.0, 5.0, 6.0, 7.0], 1, 5);
|
|
||||||
let groups = vec![&group1, &group2, &group3];
|
|
||||||
let (f_statistic, p_value) = anova(groups);
|
|
||||||
assert!(f_statistic > 0.0);
|
|
||||||
assert!(p_value > 0.0 && p_value < 1.0);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,9 +1,7 @@
|
|||||||
pub mod correlation;
|
pub mod correlation;
|
||||||
pub mod descriptive;
|
pub mod descriptive;
|
||||||
pub mod distributions;
|
pub mod distributions;
|
||||||
pub mod inferential;
|
|
||||||
|
|
||||||
pub use correlation::*;
|
pub use correlation::*;
|
||||||
pub use descriptive::*;
|
pub use descriptive::*;
|
||||||
pub use distributions::*;
|
pub use distributions::*;
|
||||||
pub use inferential::*;
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user