Compare commits

..

No commits in common. "96f434bf942880365dad3d2496be3644aa71020a" and "75d07371b2edb3ce6b366cd8cf3df112ba82dd6a" have entirely different histories.

View File

@ -1,5 +1,5 @@
use crate::compute::activations::{drelu, relu, sigmoid};
use crate::matrix::{Matrix, SeriesOps}; use crate::matrix::{Matrix, SeriesOps};
use crate::compute::activations::{relu, drelu, sigmoid};
use rand::prelude::*; use rand::prelude::*;
/// Supported activation functions /// Supported activation functions
@ -118,7 +118,7 @@ impl DenseNN {
); );
let mut weights = Vec::with_capacity(sizes.len() - 1); let mut weights = Vec::with_capacity(sizes.len() - 1);
let mut biases = Vec::with_capacity(sizes.len() - 1); let mut biases = Vec::with_capacity(sizes.len() - 1);
for i in 0..sizes.len() - 1 { for i in 0..sizes.len() - 1 {
let w = config.initializer.initialize(sizes[i], sizes[i + 1]); let w = config.initializer.initialize(sizes[i], sizes[i + 1]);
@ -167,11 +167,7 @@ impl DenseNN {
LossKind::BCE => self.loss.gradient(&y_hat, y), LossKind::BCE => self.loss.gradient(&y_hat, y),
LossKind::MSE => { LossKind::MSE => {
let grad = self.loss.gradient(&y_hat, y); let grad = self.loss.gradient(&y_hat, y);
let dz = self let dz = self.activations.last().unwrap().derivative(zs.last().unwrap());
.activations
.last()
.unwrap()
.derivative(zs.last().unwrap());
grad.zip(&dz, |g, da| g * da) grad.zip(&dz, |g, da| g * da)
} }
}; };
@ -184,7 +180,7 @@ impl DenseNN {
// Update weights & biases // Update weights & biases
self.weights[l] = &self.weights[l] - &(dw * self.lr); self.weights[l] = &self.weights[l] - &(dw * self.lr);
self.biases[l] = &self.biases[l] - &(db * self.lr); self.biases[l] = &self.biases[l] - &(db * self.lr);
// Propagate delta to previous layer // Propagate delta to previous layer
if l > 0 { if l > 0 {
@ -207,22 +203,15 @@ impl DenseNN {
} }
} }
// ------------------------------
// Simple tests
// ------------------------------
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::matrix::Matrix; use crate::matrix::Matrix;
/// Compute MSE = 1/m * Σ (ŷ - y)²
fn mse_loss(y_hat: &Matrix<f64>, y: &Matrix<f64>) -> f64 {
let m = y.rows() as f64;
y_hat
.zip(y, |yh, yv| (yh - yv).powi(2))
.data()
.iter()
.sum::<f64>()
/ m
}
#[test] #[test]
fn test_predict_shape() { fn test_predict_shape() {
let config = DenseNNConfig { let config = DenseNNConfig {
@ -243,7 +232,7 @@ mod tests {
} }
#[test] #[test]
fn test_train_no_epochs_does_nothing() { fn test_train_no_epochs() {
let config = DenseNNConfig { let config = DenseNNConfig {
input_size: 1, input_size: 1,
hidden_layers: vec![2], hidden_layers: vec![2],
@ -255,86 +244,35 @@ mod tests {
epochs: 0, epochs: 0,
}; };
let mut model = DenseNN::new(config); let mut model = DenseNN::new(config);
let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1); let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1);
let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
let before = model.predict(&x); let before = model.predict(&x);
model.train(&x, &y); model.train(&x, &before);
let after = model.predict(&x); let after = model.predict(&x);
for i in 0..before.rows() { for i in 0..before.rows() {
for j in 0..before.cols() { assert!((before[(i, 0)] - after[(i, 0)]).abs() < 1e-12);
assert!(
(before[(i, j)] - after[(i, j)]).abs() < 1e-12,
"prediction changed despite 0 epochs"
);
}
} }
} }
#[test] #[test]
fn test_train_one_epoch_changes_predictions() { fn test_dense_nn_step() {
// Single-layer sigmoid regression so gradients flow.
let config = DenseNNConfig { let config = DenseNNConfig {
input_size: 1, input_size: 1,
hidden_layers: vec![], hidden_layers: vec![2],
activations: vec![ActivationKind::Sigmoid], activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid],
output_size: 1, output_size: 1,
initializer: InitializerKind::Uniform(0.1), initializer: InitializerKind::He,
loss: LossKind::MSE, loss: LossKind::BCE,
learning_rate: 1.0, learning_rate: 0.01,
epochs: 1, epochs: 10000,
}; };
let mut model = DenseNN::new(config); let mut model = DenseNN::new(config);
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1); let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
let before = model.predict(&x);
model.train(&x, &y); model.train(&x, &y);
let after = model.predict(&x); let preds = model.predict(&x);
assert!((preds[(0, 0)] - 0.0).abs() < 0.5);
// At least one of the two outputs must move by >ϵ assert!((preds[(1, 0)] - 0.0).abs() < 0.5);
let mut moved = false; assert!((preds[(2, 0)] - 1.0).abs() < 0.5);
for i in 0..before.rows() { assert!((preds[(3, 0)] - 1.0).abs() < 0.5);
if (before[(i, 0)] - after[(i, 0)]).abs() > 1e-8 {
moved = true;
}
}
assert!(moved, "predictions did not change after 1 epoch");
}
#[test]
fn test_training_reduces_mse_loss() {
// Same singlelayer sigmoid setup; check loss goes down.
let config = DenseNNConfig {
input_size: 1,
hidden_layers: vec![],
activations: vec![ActivationKind::Sigmoid],
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::MSE,
learning_rate: 1.0,
epochs: 10,
};
let mut model = DenseNN::new(config);
let x = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
let y = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
let before_preds = model.predict(&x);
let before_loss = mse_loss(&before_preds, &y);
model.train(&x, &y);
let after_preds = model.predict(&x);
let after_loss = mse_loss(&after_preds, &y);
assert!(
after_loss < before_loss,
"MSE did not decrease (before: {}, after: {})",
before_loss,
after_loss
);
} }
} }