mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-08-20 16:40:01 +00:00
Merge 96f434bf942880365dad3d2496be3644aa71020a into 11330e464ba3a7f08aaf73bc918281472c503b1d
This commit is contained in:
commit
438e51d358
@ -14,8 +14,6 @@ crate-type = ["cdylib", "lib"]
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
chrono = "^0.4.10"
|
chrono = "^0.4.10"
|
||||||
criterion = { version = "0.5", features = ["html_reports"], optional = true }
|
criterion = { version = "0.5", features = ["html_reports"], optional = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
rand = "^0.9.1"
|
rand = "^0.9.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
135
src/compute/activations.rs
Normal file
135
src/compute/activations.rs
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
use crate::matrix::{Matrix, SeriesOps};
|
||||||
|
|
||||||
|
pub fn sigmoid(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
x.map(|v| 1.0 / (1.0 + (-v).exp()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dsigmoid(y: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
// derivative w.r.t. pre-activation; takes y = sigmoid(x)
|
||||||
|
y.map(|v| v * (1.0 - v))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn relu(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
x.map(|v| if v > 0.0 { v } else { 0.0 })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn drelu(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
x.map(|v| if v > 0.0 { 1.0 } else { 0.0 })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn leaky_relu(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
x.map(|v| if v > 0.0 { v } else { 0.01 * v })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dleaky_relu(x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
x.map(|v| if v > 0.0 { 1.0 } else { 0.01 })
|
||||||
|
}
|
||||||
|
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
// Helper function to round all elements in a matrix to n decimal places
|
||||||
|
fn _round_matrix(mat: &Matrix<f64>, decimals: u32) -> Matrix<f64> {
|
||||||
|
let factor = 10f64.powi(decimals as i32);
|
||||||
|
let rounded: Vec<f64> = mat
|
||||||
|
.to_vec()
|
||||||
|
.iter()
|
||||||
|
.map(|v| (v * factor).round() / factor)
|
||||||
|
.collect();
|
||||||
|
Matrix::from_vec(rounded, mat.rows(), mat.cols())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sigmoid() {
|
||||||
|
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.26894142, 0.5, 0.73105858], 3, 1);
|
||||||
|
let result = sigmoid(&x);
|
||||||
|
assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sigmoid_edge_case() {
|
||||||
|
let x = Matrix::from_vec(vec![-1000.0, 0.0, 1000.0], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.0, 0.5, 1.0], 3, 1);
|
||||||
|
let result = sigmoid(&x);
|
||||||
|
|
||||||
|
for (r, e) in result.data().iter().zip(expected.data().iter()) {
|
||||||
|
assert!((r - e).abs() < 1e-6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_relu() {
|
||||||
|
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
|
||||||
|
assert_eq!(relu(&x), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_relu_edge_case() {
|
||||||
|
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.0, 0.0, 1e10], 3, 1);
|
||||||
|
assert_eq!(relu(&x), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dsigmoid() {
|
||||||
|
let y = Matrix::from_vec(vec![0.26894142, 0.5, 0.73105858], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.19661193, 0.25, 0.19661193], 3, 1);
|
||||||
|
let result = dsigmoid(&y);
|
||||||
|
assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dsigmoid_edge_case() {
|
||||||
|
let y = Matrix::from_vec(vec![0.0, 0.5, 1.0], 3, 1); // Assume these are outputs from sigmoid(x)
|
||||||
|
let expected = Matrix::from_vec(vec![0.0, 0.25, 0.0], 3, 1);
|
||||||
|
let result = dsigmoid(&y);
|
||||||
|
|
||||||
|
for (r, e) in result.data().iter().zip(expected.data().iter()) {
|
||||||
|
assert!((r - e).abs() < 1e-6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_drelu() {
|
||||||
|
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
|
||||||
|
assert_eq!(drelu(&x), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_drelu_edge_case() {
|
||||||
|
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
|
||||||
|
assert_eq!(drelu(&x), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_leaky_relu() {
|
||||||
|
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![-0.01, 0.0, 1.0], 3, 1);
|
||||||
|
assert_eq!(leaky_relu(&x), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_leaky_relu_edge_case() {
|
||||||
|
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![-1e-12, 0.0, 1e10], 3, 1);
|
||||||
|
assert_eq!(leaky_relu(&x), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dleaky_relu() {
|
||||||
|
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1);
|
||||||
|
assert_eq!(dleaky_relu(&x), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dleaky_relu_edge_case() {
|
||||||
|
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
|
||||||
|
let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1);
|
||||||
|
assert_eq!(dleaky_relu(&x), expected);
|
||||||
|
}
|
||||||
|
}
|
3
src/compute/mod.rs
Normal file
3
src/compute/mod.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
pub mod activations;
|
||||||
|
|
||||||
|
pub mod models;
|
340
src/compute/models/dense_nn.rs
Normal file
340
src/compute/models/dense_nn.rs
Normal file
@ -0,0 +1,340 @@
|
|||||||
|
use crate::compute::activations::{drelu, relu, sigmoid};
|
||||||
|
use crate::matrix::{Matrix, SeriesOps};
|
||||||
|
use rand::prelude::*;
|
||||||
|
|
||||||
|
/// Supported activation functions
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum ActivationKind {
|
||||||
|
Relu,
|
||||||
|
Sigmoid,
|
||||||
|
Tanh,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ActivationKind {
|
||||||
|
/// Apply activation elementwise
|
||||||
|
pub fn forward(&self, z: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
match self {
|
||||||
|
ActivationKind::Relu => relu(z),
|
||||||
|
ActivationKind::Sigmoid => sigmoid(z),
|
||||||
|
ActivationKind::Tanh => z.map(|v| v.tanh()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute elementwise derivative w.r.t. pre-activation z
|
||||||
|
pub fn derivative(&self, z: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
match self {
|
||||||
|
ActivationKind::Relu => drelu(z),
|
||||||
|
ActivationKind::Sigmoid => {
|
||||||
|
let s = sigmoid(z);
|
||||||
|
s.zip(&s, |si, sj| si * (1.0 - sj))
|
||||||
|
}
|
||||||
|
ActivationKind::Tanh => z.map(|v| 1.0 - v.tanh().powi(2)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Weight initialization schemes
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum InitializerKind {
|
||||||
|
/// Uniform(-limit .. limit)
|
||||||
|
Uniform(f64),
|
||||||
|
/// Xavier/Glorot uniform
|
||||||
|
Xavier,
|
||||||
|
/// He (Kaiming) uniform
|
||||||
|
He,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InitializerKind {
|
||||||
|
pub fn initialize(&self, rows: usize, cols: usize) -> Matrix<f64> {
|
||||||
|
let mut rng = rand::rng();
|
||||||
|
let fan_in = rows;
|
||||||
|
let fan_out = cols;
|
||||||
|
let limit = match self {
|
||||||
|
InitializerKind::Uniform(l) => *l,
|
||||||
|
InitializerKind::Xavier => (6.0 / (fan_in + fan_out) as f64).sqrt(),
|
||||||
|
InitializerKind::He => (2.0 / fan_in as f64).sqrt(),
|
||||||
|
};
|
||||||
|
let data = (0..rows * cols)
|
||||||
|
.map(|_| rng.random_range(-limit..limit))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
Matrix::from_vec(data, rows, cols)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Supported losses
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum LossKind {
|
||||||
|
/// Mean Squared Error: L = 1/m * sum((y_hat - y)^2)
|
||||||
|
MSE,
|
||||||
|
/// Binary Cross-Entropy: L = -1/m * sum(y*log(y_hat) + (1-y)*log(1-y_hat))
|
||||||
|
BCE,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LossKind {
|
||||||
|
/// Compute gradient dL/dy_hat (before applying activation derivative)
|
||||||
|
pub fn gradient(&self, y_hat: &Matrix<f64>, y: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
let m = y.rows() as f64;
|
||||||
|
match self {
|
||||||
|
LossKind::MSE => (y_hat - y) * (2.0 / m),
|
||||||
|
LossKind::BCE => (y_hat - y) * (1.0 / m),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Configuration for a dense neural network
|
||||||
|
pub struct DenseNNConfig {
|
||||||
|
pub input_size: usize,
|
||||||
|
pub hidden_layers: Vec<usize>,
|
||||||
|
/// Must have length = hidden_layers.len() + 1
|
||||||
|
pub activations: Vec<ActivationKind>,
|
||||||
|
pub output_size: usize,
|
||||||
|
pub initializer: InitializerKind,
|
||||||
|
pub loss: LossKind,
|
||||||
|
pub learning_rate: f64,
|
||||||
|
pub epochs: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A multi-layer perceptron with full configurability
|
||||||
|
pub struct DenseNN {
|
||||||
|
weights: Vec<Matrix<f64>>,
|
||||||
|
biases: Vec<Matrix<f64>>,
|
||||||
|
activations: Vec<ActivationKind>,
|
||||||
|
loss: LossKind,
|
||||||
|
lr: f64,
|
||||||
|
epochs: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DenseNN {
|
||||||
|
/// Build a new DenseNN from the given configuration
|
||||||
|
pub fn new(config: DenseNNConfig) -> Self {
|
||||||
|
let mut sizes = vec![config.input_size];
|
||||||
|
sizes.extend(&config.hidden_layers);
|
||||||
|
sizes.push(config.output_size);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
config.activations.len(),
|
||||||
|
sizes.len() - 1,
|
||||||
|
"Number of activation functions must match number of layers"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut weights = Vec::with_capacity(sizes.len() - 1);
|
||||||
|
let mut biases = Vec::with_capacity(sizes.len() - 1);
|
||||||
|
|
||||||
|
for i in 0..sizes.len() - 1 {
|
||||||
|
let w = config.initializer.initialize(sizes[i], sizes[i + 1]);
|
||||||
|
let b = Matrix::zeros(1, sizes[i + 1]);
|
||||||
|
weights.push(w);
|
||||||
|
biases.push(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
DenseNN {
|
||||||
|
weights,
|
||||||
|
biases,
|
||||||
|
activations: config.activations,
|
||||||
|
loss: config.loss,
|
||||||
|
lr: config.learning_rate,
|
||||||
|
epochs: config.epochs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform a full forward pass, returning pre-activations (z) and activations (a)
|
||||||
|
fn forward_full(&self, x: &Matrix<f64>) -> (Vec<Matrix<f64>>, Vec<Matrix<f64>>) {
|
||||||
|
let mut zs = Vec::with_capacity(self.weights.len());
|
||||||
|
let mut activs = Vec::with_capacity(self.weights.len() + 1);
|
||||||
|
activs.push(x.clone());
|
||||||
|
|
||||||
|
let mut a = x.clone();
|
||||||
|
for (i, (w, b)) in self.weights.iter().zip(self.biases.iter()).enumerate() {
|
||||||
|
let z = &a.dot(w) + &Matrix::repeat_rows(b, a.rows());
|
||||||
|
let a_next = self.activations[i].forward(&z);
|
||||||
|
zs.push(z);
|
||||||
|
activs.push(a_next.clone());
|
||||||
|
a = a_next;
|
||||||
|
}
|
||||||
|
|
||||||
|
(zs, activs)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Train the network on inputs X and targets Y
|
||||||
|
pub fn train(&mut self, x: &Matrix<f64>, y: &Matrix<f64>) {
|
||||||
|
let m = x.rows() as f64;
|
||||||
|
for _ in 0..self.epochs {
|
||||||
|
let (zs, activs) = self.forward_full(x);
|
||||||
|
let y_hat = activs.last().unwrap().clone();
|
||||||
|
|
||||||
|
// Initial delta (dL/dz) on output
|
||||||
|
let mut delta = match self.loss {
|
||||||
|
LossKind::BCE => self.loss.gradient(&y_hat, y),
|
||||||
|
LossKind::MSE => {
|
||||||
|
let grad = self.loss.gradient(&y_hat, y);
|
||||||
|
let dz = self
|
||||||
|
.activations
|
||||||
|
.last()
|
||||||
|
.unwrap()
|
||||||
|
.derivative(zs.last().unwrap());
|
||||||
|
grad.zip(&dz, |g, da| g * da)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Backpropagate through layers
|
||||||
|
for l in (0..self.weights.len()).rev() {
|
||||||
|
let a_prev = &activs[l];
|
||||||
|
let dw = a_prev.transpose().dot(&delta) / m;
|
||||||
|
let db = Matrix::from_vec(delta.sum_vertical(), 1, delta.cols()) / m;
|
||||||
|
|
||||||
|
// Update weights & biases
|
||||||
|
self.weights[l] = &self.weights[l] - &(dw * self.lr);
|
||||||
|
self.biases[l] = &self.biases[l] - &(db * self.lr);
|
||||||
|
|
||||||
|
// Propagate delta to previous layer
|
||||||
|
if l > 0 {
|
||||||
|
let w_t = self.weights[l].transpose();
|
||||||
|
let da = self.activations[l - 1].derivative(&zs[l - 1]);
|
||||||
|
delta = delta.dot(&w_t).zip(&da, |d, a| d * a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run a forward pass and return the network's output
|
||||||
|
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
let mut a = x.clone();
|
||||||
|
for (i, (w, b)) in self.weights.iter().zip(self.biases.iter()).enumerate() {
|
||||||
|
let z = &a.dot(w) + &Matrix::repeat_rows(b, a.rows());
|
||||||
|
a = self.activations[i].forward(&z);
|
||||||
|
}
|
||||||
|
a
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::matrix::Matrix;
|
||||||
|
|
||||||
|
/// Compute MSE = 1/m * Σ (ŷ - y)²
|
||||||
|
fn mse_loss(y_hat: &Matrix<f64>, y: &Matrix<f64>) -> f64 {
|
||||||
|
let m = y.rows() as f64;
|
||||||
|
y_hat
|
||||||
|
.zip(y, |yh, yv| (yh - yv).powi(2))
|
||||||
|
.data()
|
||||||
|
.iter()
|
||||||
|
.sum::<f64>()
|
||||||
|
/ m
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_predict_shape() {
|
||||||
|
let config = DenseNNConfig {
|
||||||
|
input_size: 1,
|
||||||
|
hidden_layers: vec![2],
|
||||||
|
activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid],
|
||||||
|
output_size: 1,
|
||||||
|
initializer: InitializerKind::Uniform(0.1),
|
||||||
|
loss: LossKind::MSE,
|
||||||
|
learning_rate: 0.01,
|
||||||
|
epochs: 0,
|
||||||
|
};
|
||||||
|
let model = DenseNN::new(config);
|
||||||
|
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0], 3, 1);
|
||||||
|
let preds = model.predict(&x);
|
||||||
|
assert_eq!(preds.rows(), 3);
|
||||||
|
assert_eq!(preds.cols(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_train_no_epochs_does_nothing() {
|
||||||
|
let config = DenseNNConfig {
|
||||||
|
input_size: 1,
|
||||||
|
hidden_layers: vec![2],
|
||||||
|
activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid],
|
||||||
|
output_size: 1,
|
||||||
|
initializer: InitializerKind::Uniform(0.1),
|
||||||
|
loss: LossKind::MSE,
|
||||||
|
learning_rate: 0.01,
|
||||||
|
epochs: 0,
|
||||||
|
};
|
||||||
|
let mut model = DenseNN::new(config);
|
||||||
|
let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
|
||||||
|
let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
|
||||||
|
|
||||||
|
let before = model.predict(&x);
|
||||||
|
model.train(&x, &y);
|
||||||
|
let after = model.predict(&x);
|
||||||
|
|
||||||
|
for i in 0..before.rows() {
|
||||||
|
for j in 0..before.cols() {
|
||||||
|
assert!(
|
||||||
|
(before[(i, j)] - after[(i, j)]).abs() < 1e-12,
|
||||||
|
"prediction changed despite 0 epochs"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_train_one_epoch_changes_predictions() {
|
||||||
|
// Single-layer sigmoid regression so gradients flow.
|
||||||
|
let config = DenseNNConfig {
|
||||||
|
input_size: 1,
|
||||||
|
hidden_layers: vec![],
|
||||||
|
activations: vec![ActivationKind::Sigmoid],
|
||||||
|
output_size: 1,
|
||||||
|
initializer: InitializerKind::Uniform(0.1),
|
||||||
|
loss: LossKind::MSE,
|
||||||
|
learning_rate: 1.0,
|
||||||
|
epochs: 1,
|
||||||
|
};
|
||||||
|
let mut model = DenseNN::new(config);
|
||||||
|
|
||||||
|
let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
|
||||||
|
let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
|
||||||
|
|
||||||
|
let before = model.predict(&x);
|
||||||
|
model.train(&x, &y);
|
||||||
|
let after = model.predict(&x);
|
||||||
|
|
||||||
|
// At least one of the two outputs must move by >ϵ
|
||||||
|
let mut moved = false;
|
||||||
|
for i in 0..before.rows() {
|
||||||
|
if (before[(i, 0)] - after[(i, 0)]).abs() > 1e-8 {
|
||||||
|
moved = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(moved, "predictions did not change after 1 epoch");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_training_reduces_mse_loss() {
|
||||||
|
// Same single‐layer sigmoid setup; check loss goes down.
|
||||||
|
let config = DenseNNConfig {
|
||||||
|
input_size: 1,
|
||||||
|
hidden_layers: vec![],
|
||||||
|
activations: vec![ActivationKind::Sigmoid],
|
||||||
|
output_size: 1,
|
||||||
|
initializer: InitializerKind::Uniform(0.1),
|
||||||
|
loss: LossKind::MSE,
|
||||||
|
learning_rate: 1.0,
|
||||||
|
epochs: 10,
|
||||||
|
};
|
||||||
|
let mut model = DenseNN::new(config);
|
||||||
|
|
||||||
|
let x = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
|
||||||
|
let y = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
|
||||||
|
|
||||||
|
let before_preds = model.predict(&x);
|
||||||
|
let before_loss = mse_loss(&before_preds, &y);
|
||||||
|
|
||||||
|
model.train(&x, &y);
|
||||||
|
|
||||||
|
let after_preds = model.predict(&x);
|
||||||
|
let after_loss = mse_loss(&after_preds, &y);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
after_loss < before_loss,
|
||||||
|
"MSE did not decrease (before: {}, after: {})",
|
||||||
|
before_loss,
|
||||||
|
after_loss
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
214
src/compute/models/gaussian_nb.rs
Normal file
214
src/compute/models/gaussian_nb.rs
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
use crate::matrix::Matrix;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
/// A Gaussian Naive Bayes classifier.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `var_smoothing`: Portion of the largest variance of all features to add to variances for stability.
|
||||||
|
/// - `use_unbiased_variance`: If `true`, uses Bessel's correction (dividing by (n-1)); otherwise divides by n.
|
||||||
|
///
|
||||||
|
pub struct GaussianNB {
|
||||||
|
// Distinct class labels
|
||||||
|
classes: Vec<f64>,
|
||||||
|
// Prior probabilities P(class)
|
||||||
|
priors: Vec<f64>,
|
||||||
|
// Feature means per class
|
||||||
|
means: Vec<Matrix<f64>>,
|
||||||
|
// Feature variances per class
|
||||||
|
variances: Vec<Matrix<f64>>,
|
||||||
|
// var_smoothing
|
||||||
|
eps: f64,
|
||||||
|
// flag for unbiased variance
|
||||||
|
use_unbiased: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GaussianNB {
|
||||||
|
/// Create a new GaussianNB.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `var_smoothing` - small float added to variances for numerical stability.
|
||||||
|
/// * `use_unbiased_variance` - whether to apply Bessel's correction (divide by n-1).
|
||||||
|
pub fn new(var_smoothing: f64, use_unbiased_variance: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
classes: Vec::new(),
|
||||||
|
priors: Vec::new(),
|
||||||
|
means: Vec::new(),
|
||||||
|
variances: Vec::new(),
|
||||||
|
eps: var_smoothing,
|
||||||
|
use_unbiased: use_unbiased_variance,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fit the model according to the training data `x` and labels `y`.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if `x` or `y` is empty, or if their dimensions disagree.
|
||||||
|
pub fn fit(&mut self, x: &Matrix<f64>, y: &Matrix<f64>) {
|
||||||
|
let m = x.rows();
|
||||||
|
let n = x.cols();
|
||||||
|
assert_eq!(y.rows(), m, "Row count of X and Y must match");
|
||||||
|
assert_eq!(y.cols(), 1, "Y must be a column vector");
|
||||||
|
if m == 0 || n == 0 {
|
||||||
|
panic!("Input matrix x or y is empty");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group sample indices by label
|
||||||
|
let mut groups: HashMap<u64, Vec<usize>> = HashMap::new();
|
||||||
|
for i in 0..m {
|
||||||
|
let label = y[(i, 0)];
|
||||||
|
let bits = label.to_bits();
|
||||||
|
groups.entry(bits).or_default().push(i);
|
||||||
|
}
|
||||||
|
if groups.is_empty() {
|
||||||
|
panic!("No class labels found in y");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract and sort class labels
|
||||||
|
self.classes = groups.keys().cloned().map(f64::from_bits).collect();
|
||||||
|
self.classes.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||||
|
|
||||||
|
self.priors.clear();
|
||||||
|
self.means.clear();
|
||||||
|
self.variances.clear();
|
||||||
|
|
||||||
|
// Precompute max variance for smoothing scale
|
||||||
|
let mut max_var_feature = 0.0;
|
||||||
|
for j in 0..n {
|
||||||
|
let mut col_vals = Vec::with_capacity(m);
|
||||||
|
for i in 0..m {
|
||||||
|
col_vals.push(x[(i, j)]);
|
||||||
|
}
|
||||||
|
let mean_all = col_vals.iter().sum::<f64>() / m as f64;
|
||||||
|
let var_all = col_vals.iter().map(|v| (v - mean_all).powi(2)).sum::<f64>() / m as f64;
|
||||||
|
if var_all > max_var_feature {
|
||||||
|
max_var_feature = var_all;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let smoothing = self.eps * max_var_feature;
|
||||||
|
|
||||||
|
// Compute per-class statistics
|
||||||
|
for &c in &self.classes {
|
||||||
|
let idx = &groups[&c.to_bits()];
|
||||||
|
let count = idx.len();
|
||||||
|
if count == 0 {
|
||||||
|
panic!("Class group for label {} is empty", c);
|
||||||
|
}
|
||||||
|
// Prior
|
||||||
|
self.priors.push(count as f64 / m as f64);
|
||||||
|
|
||||||
|
let mut mean = Matrix::zeros(1, n);
|
||||||
|
let mut var = Matrix::zeros(1, n);
|
||||||
|
|
||||||
|
// Mean
|
||||||
|
for &i in idx {
|
||||||
|
for j in 0..n {
|
||||||
|
mean[(0, j)] += x[(i, j)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for j in 0..n {
|
||||||
|
mean[(0, j)] /= count as f64;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variance
|
||||||
|
for &i in idx {
|
||||||
|
for j in 0..n {
|
||||||
|
let d = x[(i, j)] - mean[(0, j)];
|
||||||
|
var[(0, j)] += d * d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let denom = if self.use_unbiased {
|
||||||
|
(count as f64 - 1.0).max(1.0)
|
||||||
|
} else {
|
||||||
|
count as f64
|
||||||
|
};
|
||||||
|
for j in 0..n {
|
||||||
|
var[(0, j)] = var[(0, j)] / denom + smoothing;
|
||||||
|
if var[(0, j)] <= 0.0 {
|
||||||
|
var[(0, j)] = smoothing;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.means.push(mean);
|
||||||
|
self.variances.push(var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform classification on an array of test vectors `x`.
|
||||||
|
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
let m = x.rows();
|
||||||
|
let n = x.cols();
|
||||||
|
let k = self.classes.len();
|
||||||
|
let mut preds = Matrix::zeros(m, 1);
|
||||||
|
let ln_2pi = (2.0 * std::f64::consts::PI).ln();
|
||||||
|
|
||||||
|
for i in 0..m {
|
||||||
|
let mut best = (0, f64::NEG_INFINITY);
|
||||||
|
for c_idx in 0..k {
|
||||||
|
let mut log_prob = self.priors[c_idx].ln();
|
||||||
|
for j in 0..n {
|
||||||
|
let diff = x[(i, j)] - self.means[c_idx][(0, j)];
|
||||||
|
let var = self.variances[c_idx][(0, j)];
|
||||||
|
log_prob += -0.5 * (diff * diff / var + var.ln() + ln_2pi);
|
||||||
|
}
|
||||||
|
if log_prob > best.1 {
|
||||||
|
best = (c_idx, log_prob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
preds[(i, 0)] = self.classes[best.0];
|
||||||
|
}
|
||||||
|
preds
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::matrix::Matrix;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_simple_two_class() {
|
||||||
|
// Simple dataset: one feature, two classes 0 and 1
|
||||||
|
// Class 0: values [1.0, 1.2, 0.8]
|
||||||
|
// Class 1: values [3.0, 3.2, 2.8]
|
||||||
|
let x = Matrix::from_vec(vec![1.0, 1.2, 0.8, 3.0, 3.2, 2.8], 6, 1);
|
||||||
|
let y = Matrix::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0], 6, 1);
|
||||||
|
let mut clf = GaussianNB::new(1e-9, false);
|
||||||
|
clf.fit(&x, &y);
|
||||||
|
let test = Matrix::from_vec(vec![1.1, 3.1], 2, 1);
|
||||||
|
let preds = clf.predict(&test);
|
||||||
|
assert_eq!(preds[(0, 0)], 0.0);
|
||||||
|
assert_eq!(preds[(1, 0)], 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unbiased_variance() {
|
||||||
|
// Same as above but with unbiased variance
|
||||||
|
let x = Matrix::from_vec(vec![2.0, 2.2, 1.8, 4.0, 4.2, 3.8], 6, 1);
|
||||||
|
let y = Matrix::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0], 6, 1);
|
||||||
|
let mut clf = GaussianNB::new(1e-9, true);
|
||||||
|
clf.fit(&x, &y);
|
||||||
|
let test = Matrix::from_vec(vec![2.1, 4.1], 2, 1);
|
||||||
|
let preds = clf.predict(&test);
|
||||||
|
assert_eq!(preds[(0, 0)], 0.0);
|
||||||
|
assert_eq!(preds[(1, 0)], 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn test_empty_input() {
|
||||||
|
let x = Matrix::zeros(0, 0);
|
||||||
|
let y = Matrix::zeros(0, 1);
|
||||||
|
let mut clf = GaussianNB::new(1e-9, false);
|
||||||
|
clf.fit(&x, &y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic = "Row count of X and Y must match"]
|
||||||
|
fn test_mismatched_rows() {
|
||||||
|
let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1);
|
||||||
|
let y = Matrix::from_vec(vec![0.0], 1, 1);
|
||||||
|
let mut clf = GaussianNB::new(1e-9, false);
|
||||||
|
clf.fit(&x, &y);
|
||||||
|
clf.predict(&x);
|
||||||
|
}
|
||||||
|
}
|
105
src/compute/models/k_means.rs
Normal file
105
src/compute/models/k_means.rs
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
use crate::matrix::Matrix;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
pub struct GaussianNB {
|
||||||
|
classes: Vec<f64>, // distinct labels
|
||||||
|
priors: Vec<f64>, // P(class)
|
||||||
|
means: Vec<Matrix<f64>>,
|
||||||
|
variances: Vec<Matrix<f64>>,
|
||||||
|
eps: f64, // var-smoothing
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GaussianNB {
|
||||||
|
pub fn new(var_smoothing: f64) -> Self {
|
||||||
|
Self {
|
||||||
|
classes: vec![],
|
||||||
|
priors: vec![],
|
||||||
|
means: vec![],
|
||||||
|
variances: vec![],
|
||||||
|
eps: var_smoothing,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fit(&mut self, x: &Matrix<f64>, y: &Matrix<f64>) {
|
||||||
|
let m = x.rows();
|
||||||
|
let n = x.cols();
|
||||||
|
assert_eq!(y.rows(), m);
|
||||||
|
assert_eq!(y.cols(), 1);
|
||||||
|
|
||||||
|
// ----- group samples by label -----
|
||||||
|
let mut groups: HashMap<i64, Vec<usize>> = HashMap::new();
|
||||||
|
for i in 0..m {
|
||||||
|
groups.entry(y[(i, 0)] as i64).or_default().push(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.classes = groups.keys().cloned().map(|v| v as f64).collect::<Vec<_>>();
|
||||||
|
self.classes.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||||
|
|
||||||
|
self.priors.clear();
|
||||||
|
self.means.clear();
|
||||||
|
self.variances.clear();
|
||||||
|
|
||||||
|
for &c in &self.classes {
|
||||||
|
let idx = &groups[&(c as i64)];
|
||||||
|
let count = idx.len();
|
||||||
|
self.priors.push(count as f64 / m as f64);
|
||||||
|
|
||||||
|
let mut mean = Matrix::zeros(1, n);
|
||||||
|
let mut var = Matrix::zeros(1, n);
|
||||||
|
|
||||||
|
// mean
|
||||||
|
for &i in idx {
|
||||||
|
for j in 0..n {
|
||||||
|
mean[(0, j)] += x[(i, j)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for j in 0..n {
|
||||||
|
mean[(0, j)] /= count as f64;
|
||||||
|
}
|
||||||
|
|
||||||
|
// variance
|
||||||
|
for &i in idx {
|
||||||
|
for j in 0..n {
|
||||||
|
let d = x[(i, j)] - mean[(0, j)];
|
||||||
|
var[(0, j)] += d * d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for j in 0..n {
|
||||||
|
var[(0, j)] = var[(0, j)] / count as f64 + self.eps;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.means.push(mean);
|
||||||
|
self.variances.push(var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return class labels (shape m×1) for samples in X.
|
||||||
|
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
let m = x.rows();
|
||||||
|
let k = self.classes.len();
|
||||||
|
let n = x.cols();
|
||||||
|
let mut preds = Matrix::zeros(m, 1);
|
||||||
|
let ln_2pi = (2.0 * std::f64::consts::PI).ln();
|
||||||
|
|
||||||
|
for i in 0..m {
|
||||||
|
let mut best_class = 0usize;
|
||||||
|
let mut best_log_prob = f64::NEG_INFINITY;
|
||||||
|
for c in 0..k {
|
||||||
|
// log P(y=c) + Σ log N(x_j | μ, σ²)
|
||||||
|
let mut log_prob = self.priors[c].ln();
|
||||||
|
for j in 0..n {
|
||||||
|
let mean = self.means[c][(0, j)];
|
||||||
|
let var = self.variances[c][(0, j)];
|
||||||
|
let diff = x[(i, j)] - mean;
|
||||||
|
log_prob += -0.5 * (diff * diff / var + var.ln() + ln_2pi);
|
||||||
|
}
|
||||||
|
if log_prob > best_log_prob {
|
||||||
|
best_log_prob = log_prob;
|
||||||
|
best_class = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
preds[(i, 0)] = self.classes[best_class];
|
||||||
|
}
|
||||||
|
preds
|
||||||
|
}
|
||||||
|
}
|
54
src/compute/models/linreg.rs
Normal file
54
src/compute/models/linreg.rs
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
use crate::matrix::{Matrix, SeriesOps};
|
||||||
|
|
||||||
|
pub struct LinReg {
|
||||||
|
w: Matrix<f64>, // shape (n_features, 1)
|
||||||
|
b: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LinReg {
|
||||||
|
pub fn new(n_features: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
w: Matrix::from_vec(vec![0.0; n_features], n_features, 1),
|
||||||
|
b: 0.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
// X.dot(w) + b
|
||||||
|
x.dot(&self.w) + self.b
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fit(&mut self, x: &Matrix<f64>, y: &Matrix<f64>, lr: f64, epochs: usize) {
|
||||||
|
let m = x.rows() as f64;
|
||||||
|
for _ in 0..epochs {
|
||||||
|
let y_hat = self.predict(x);
|
||||||
|
let err = &y_hat - y; // shape (m,1)
|
||||||
|
|
||||||
|
// grads
|
||||||
|
let grad_w = x.transpose().dot(&err) * (2.0 / m); // (n,1)
|
||||||
|
let grad_b = (2.0 / m) * err.sum_vertical().iter().sum::<f64>();
|
||||||
|
// update
|
||||||
|
self.w = &self.w - &(grad_w * lr);
|
||||||
|
self.b -= lr * grad_b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_linreg_fit_predict() {
|
||||||
|
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
|
||||||
|
let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
|
||||||
|
let mut model = LinReg::new(1);
|
||||||
|
model.fit(&x, &y, 0.01, 10000);
|
||||||
|
let preds = model.predict(&x);
|
||||||
|
assert!((preds[(0, 0)] - 2.0).abs() < 1e-2);
|
||||||
|
assert!((preds[(1, 0)] - 3.0).abs() < 1e-2);
|
||||||
|
assert!((preds[(2, 0)] - 4.0).abs() < 1e-2);
|
||||||
|
assert!((preds[(3, 0)] - 5.0).abs() < 1e-2);
|
||||||
|
}
|
||||||
|
}
|
55
src/compute/models/logreg.rs
Normal file
55
src/compute/models/logreg.rs
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
use crate::compute::activations::sigmoid;
|
||||||
|
use crate::matrix::{Matrix, SeriesOps};
|
||||||
|
|
||||||
|
pub struct LogReg {
|
||||||
|
w: Matrix<f64>,
|
||||||
|
b: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LogReg {
|
||||||
|
pub fn new(n_features: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
w: Matrix::zeros(n_features, 1),
|
||||||
|
b: 0.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn predict_proba(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
sigmoid(&(x.dot(&self.w) + self.b)) // σ(Xw + b)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fit(&mut self, x: &Matrix<f64>, y: &Matrix<f64>, lr: f64, epochs: usize) {
|
||||||
|
let m = x.rows() as f64;
|
||||||
|
for _ in 0..epochs {
|
||||||
|
let p = self.predict_proba(x); // shape (m,1)
|
||||||
|
let err = &p - y; // derivative of BCE wrt pre-sigmoid
|
||||||
|
let grad_w = x.transpose().dot(&err) / m;
|
||||||
|
let grad_b = err.sum_vertical().iter().sum::<f64>() / m;
|
||||||
|
self.w = &self.w - &(grad_w * lr);
|
||||||
|
self.b -= lr * grad_b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
self.predict_proba(x)
|
||||||
|
.map(|p| if p >= 0.5 { 1.0 } else { 0.0 })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_logreg_fit_predict() {
|
||||||
|
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
|
||||||
|
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
|
||||||
|
let mut model = LogReg::new(1);
|
||||||
|
model.fit(&x, &y, 0.01, 10000);
|
||||||
|
let preds = model.predict(&x);
|
||||||
|
assert_eq!(preds[(0, 0)], 0.0);
|
||||||
|
assert_eq!(preds[(1, 0)], 0.0);
|
||||||
|
assert_eq!(preds[(2, 0)], 1.0);
|
||||||
|
assert_eq!(preds[(3, 0)], 1.0);
|
||||||
|
}
|
||||||
|
}
|
6
src/compute/models/mod.rs
Normal file
6
src/compute/models/mod.rs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
pub mod linreg;
|
||||||
|
pub mod logreg;
|
||||||
|
pub mod dense_nn;
|
||||||
|
pub mod k_means;
|
||||||
|
pub mod pca;
|
||||||
|
pub mod gaussian_nb;
|
85
src/compute/models/pca.rs
Normal file
85
src/compute/models/pca.rs
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
use crate::matrix::{Matrix, SeriesOps};
|
||||||
|
use rand;
|
||||||
|
|
||||||
|
/// Returns the `n_components` principal axes (rows) and the centred data’s mean.
|
||||||
|
pub struct PCA {
|
||||||
|
pub components: Matrix<f64>, // (n_components, n_features)
|
||||||
|
pub mean: Matrix<f64>, // (1, n_features)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PCA {
|
||||||
|
pub fn fit(x: &Matrix<f64>, n_components: usize, iters: usize) -> Self {
|
||||||
|
let m = x.rows();
|
||||||
|
let n = x.cols();
|
||||||
|
assert!(n_components <= n);
|
||||||
|
|
||||||
|
// ----- centre data -----
|
||||||
|
let mean_vec = {
|
||||||
|
let mut v = Matrix::zeros(1, n);
|
||||||
|
for j in 0..n {
|
||||||
|
let mut s = 0.0;
|
||||||
|
for i in 0..m {
|
||||||
|
s += x[(i, j)];
|
||||||
|
}
|
||||||
|
v[(0, j)] = s / m as f64;
|
||||||
|
}
|
||||||
|
v
|
||||||
|
};
|
||||||
|
let x_centered = x - &mean_vec;
|
||||||
|
|
||||||
|
// ----- covariance matrix C = Xᵀ·X / (m-1) -----
|
||||||
|
let cov = x_centered.transpose().dot(&x_centered) * (1.0 / (m as f64 - 1.0));
|
||||||
|
|
||||||
|
// ----- power iteration to find top eigenvectors -----
|
||||||
|
let mut comp = Matrix::zeros(n_components, n);
|
||||||
|
let mut b = Matrix::zeros(1, n); // current vector
|
||||||
|
for c in 0..n_components {
|
||||||
|
// random initial vector
|
||||||
|
for j in 0..n {
|
||||||
|
b[(0, j)] = rand::random::<f64>() - 0.5;
|
||||||
|
}
|
||||||
|
// subtract projections on previously found components
|
||||||
|
for prev in 0..c {
|
||||||
|
// let proj = b.dot(Matrix::from_vec(data, rows, cols).transpose())[(0, 0)];
|
||||||
|
// let proj = b.dot(&comp.row(prev).transpose())[(0, 0)];
|
||||||
|
let proj = b.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)];
|
||||||
|
// subtract projection to maintain orthogonality
|
||||||
|
for j in 0..n {
|
||||||
|
b[(0, j)] -= proj * comp[(prev, j)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// iterate
|
||||||
|
for _ in 0..iters {
|
||||||
|
// b = C·bᵀ
|
||||||
|
let mut nb = cov.dot(&b.transpose()).transpose();
|
||||||
|
// subtract projections again to maintain orthogonality
|
||||||
|
for prev in 0..c {
|
||||||
|
let proj = nb.dot(&Matrix::from_vec(comp.row(prev).to_vec(), 1, n).transpose())[(0, 0)];
|
||||||
|
for j in 0..n {
|
||||||
|
nb[(0, j)] -= proj * comp[(prev, j)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// normalise
|
||||||
|
let norm = nb.data().iter().map(|v| v * v).sum::<f64>().sqrt();
|
||||||
|
for j in 0..n {
|
||||||
|
nb[(0, j)] /= norm;
|
||||||
|
}
|
||||||
|
b = nb;
|
||||||
|
}
|
||||||
|
// store component
|
||||||
|
for j in 0..n {
|
||||||
|
comp[(c, j)] = b[(0, j)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Self {
|
||||||
|
components: comp,
|
||||||
|
mean: mean_vec,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Project new data on the learned axes.
|
||||||
|
pub fn transform(&self, x: &Matrix<f64>) -> Matrix<f64> {
|
||||||
|
let x_centered = x - &self.mean;
|
||||||
|
x_centered.dot(&self.components.transpose())
|
||||||
|
}
|
||||||
|
}
|
@ -8,3 +8,6 @@ pub mod frame;
|
|||||||
|
|
||||||
/// Documentation for the [`crate::utils`] module.
|
/// Documentation for the [`crate::utils`] module.
|
||||||
pub mod utils;
|
pub mod utils;
|
||||||
|
|
||||||
|
/// Documentation for the [`crate::compute`] module.
|
||||||
|
pub mod compute;
|
||||||
|
@ -89,6 +89,10 @@ impl<T: Clone> Matrix<T> {
|
|||||||
self.cols
|
self.cols
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn shape(&self) -> (usize, usize) {
|
||||||
|
(self.rows, self.cols)
|
||||||
|
}
|
||||||
|
|
||||||
/// Get element reference (immutable). Panics on out-of-bounds.
|
/// Get element reference (immutable). Panics on out-of-bounds.
|
||||||
pub fn get(&self, r: usize, c: usize) -> &T {
|
pub fn get(&self, r: usize, c: usize) -> &T {
|
||||||
&self[(r, c)]
|
&self[(r, c)]
|
||||||
@ -179,6 +183,21 @@ impl<T: Clone> Matrix<T> {
|
|||||||
self.cols -= 1;
|
self.cols -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn row(&self, r: usize) -> Vec<T> {
|
||||||
|
assert!(
|
||||||
|
r < self.rows,
|
||||||
|
"row index {} out of bounds for {} rows",
|
||||||
|
r,
|
||||||
|
self.rows
|
||||||
|
);
|
||||||
|
let mut row_data = Vec::with_capacity(self.cols);
|
||||||
|
for c in 0..self.cols {
|
||||||
|
row_data.push(self[(r, c)].clone()); // Clone each element
|
||||||
|
}
|
||||||
|
row_data
|
||||||
|
}
|
||||||
|
|
||||||
/// Deletes a row from the matrix. Panics on out-of-bounds.
|
/// Deletes a row from the matrix. Panics on out-of-bounds.
|
||||||
/// This is O(N) where N is the number of elements, as it rebuilds the data vec.
|
/// This is O(N) where N is the number of elements, as it rebuilds the data vec.
|
||||||
pub fn delete_row(&mut self, row: usize) {
|
pub fn delete_row(&mut self, row: usize) {
|
||||||
@ -308,6 +327,41 @@ impl<T: Clone> Matrix<T> {
|
|||||||
self.data = new_data;
|
self.data = new_data;
|
||||||
self.rows = new_rows;
|
self.rows = new_rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a new matrix where row 0 of `self` is repeated `n` times.
|
||||||
|
pub fn repeat_rows(&self, n: usize) -> Matrix<T>
|
||||||
|
where
|
||||||
|
T: Clone,
|
||||||
|
{
|
||||||
|
let mut data = Vec::with_capacity(n * self.cols());
|
||||||
|
let zeroth_row = self.row(0);
|
||||||
|
for value in &zeroth_row {
|
||||||
|
for _ in 0..n {
|
||||||
|
data.push(value.clone()); // Clone each element
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Matrix::from_vec(data, n, self.cols)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Matrix<f64> {
|
||||||
|
/// Creates a new matrix filled with a specific value of the specified size.
|
||||||
|
pub fn filled(rows: usize, cols: usize, value: f64) -> Self {
|
||||||
|
Matrix {
|
||||||
|
rows,
|
||||||
|
cols,
|
||||||
|
data: vec![value; rows * cols], // Fill with the specified value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Creates a new matrix filled with zeros of the specified size.
|
||||||
|
pub fn zeros(rows: usize, cols: usize) -> Self {
|
||||||
|
Matrix::filled(rows, cols, 0.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new matrix filled with ones of the specified size.
|
||||||
|
pub fn ones(rows: usize, cols: usize) -> Self {
|
||||||
|
Matrix::filled(rows, cols, 1.0)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Index<(usize, usize)> for Matrix<T> {
|
impl<T> Index<(usize, usize)> for Matrix<T> {
|
||||||
@ -1110,6 +1164,40 @@ mod tests {
|
|||||||
matrix[(0, 3)] = 99;
|
matrix[(0, 3)] = 99;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_row() {
|
||||||
|
let ma = static_test_matrix();
|
||||||
|
assert_eq!(ma.row(0), &[1, 4, 7]);
|
||||||
|
assert_eq!(ma.row(1), &[2, 5, 8]);
|
||||||
|
assert_eq!(ma.row(2), &[3, 6, 9]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_shape() {
|
||||||
|
let ma = static_test_matrix_2x4();
|
||||||
|
assert_eq!(ma.shape(), (2, 4));
|
||||||
|
assert_eq!(ma.rows(), 2);
|
||||||
|
assert_eq!(ma.cols(), 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_repeat_rows() {
|
||||||
|
let ma = static_test_matrix();
|
||||||
|
// Returns a new matrix where row 0 of `self` is repeated `n` times.
|
||||||
|
let repeated = ma.repeat_rows(3);
|
||||||
|
// assert all rows are equal to the first row
|
||||||
|
for r in 0..repeated.rows() {
|
||||||
|
assert_eq!(repeated.row(r), ma.row(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic(expected = "row index 3 out of bounds for 3 rows")]
|
||||||
|
fn test_row_out_of_bounds() {
|
||||||
|
let ma = static_test_matrix();
|
||||||
|
ma.row(3);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_column() {
|
fn test_column() {
|
||||||
let matrix = static_test_matrix_2x4();
|
let matrix = static_test_matrix_2x4();
|
||||||
@ -1794,4 +1882,25 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_matrix_zeros_ones_filled() {
|
||||||
|
// Test zeros
|
||||||
|
let m = Matrix::<f64>::zeros(2, 3);
|
||||||
|
assert_eq!(m.rows(), 2);
|
||||||
|
assert_eq!(m.cols(), 3);
|
||||||
|
assert_eq!(m.data(), &[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
|
||||||
|
|
||||||
|
// Test ones
|
||||||
|
let m = Matrix::<f64>::ones(3, 2);
|
||||||
|
assert_eq!(m.rows(), 3);
|
||||||
|
assert_eq!(m.cols(), 2);
|
||||||
|
assert_eq!(m.data(), &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]);
|
||||||
|
|
||||||
|
// Test filled
|
||||||
|
let m = Matrix::<f64>::filled(2, 2, 42.5);
|
||||||
|
assert_eq!(m.rows(), 2);
|
||||||
|
assert_eq!(m.cols(), 2);
|
||||||
|
assert_eq!(m.data(), &[42.5, 42.5, 42.5, 42.5]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user