diff --git a/.github/htmldocs/index.html b/.github/htmldocs/index.html index b39f850..3b0d9c3 100644 --- a/.github/htmldocs/index.html +++ b/.github/htmldocs/index.html @@ -58,6 +58,14 @@

A lightweight dataframe & math toolkit for Rust


+ + πŸ™ GitHub +

+ + πŸ“– User Guide +

+ + πŸ“š Docs | πŸ“Š Benchmarks @@ -65,8 +73,7 @@ πŸ¦€ Crates.io | πŸ”– docs.rs

- πŸ™ GitHub | - 🌐 Gitea mirror +

diff --git a/.github/workflows/docs-and-testcov.yml b/.github/workflows/docs-and-testcov.yml index 3b40067..38e2914 100644 --- a/.github/workflows/docs-and-testcov.yml +++ b/.github/workflows/docs-and-testcov.yml @@ -153,7 +153,6 @@ jobs: echo "" > target/doc/rustframe/index.html - mkdir output cp tarpaulin-report.html target/doc/docs/ cp tarpaulin-report.json target/doc/docs/ cp tarpaulin-badge.json target/doc/docs/ @@ -166,16 +165,30 @@ jobs: # copy the benchmark report to the output directory cp -r benchmark-report target/doc/ + mkdir output + cp -r target/doc/* output/ + + - name: Build user guide + run: | + cargo binstall mdbook + bash ./docs/build.sh + + - name: Copy user guide to output directory + run: | + mkdir output/user-guide + cp -r docs/book/* output/user-guide/ + - name: Add index.html to output directory run: | - cp .github/htmldocs/index.html target/doc/index.html - cp .github/rustframe_logo.png target/doc/rustframe_logo.png + cp .github/htmldocs/index.html output/index.html + cp .github/rustframe_logo.png output/rustframe_logo.png - name: Upload Pages artifact # if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' uses: actions/upload-pages-artifact@v3 with: - path: target/doc/ + # path: target/doc/ + path: output/ - name: Deploy to GitHub Pages # if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 0f4ae0b..8bdf3f3 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -78,3 +78,8 @@ jobs: uses: codecov/test-results-action@v1 with: token: ${{ secrets.CODECOV_TOKEN }} + + - name: Test build user guide + run: | + cargo binstall mdbook + bash ./docs/build.sh diff --git a/.gitignore b/.gitignore index ac6b27b..5198bcf 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,6 @@ data/ tarpaulin-report.* -.github/htmldocs/rustframe_logo.png \ No newline at end of file +.github/htmldocs/rustframe_logo.png + +docs/book/ \ No newline at end of file diff --git a/README.md b/README.md index fd6123c..a52864d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,12 @@ # rustframe -πŸ“š [Docs](https://magnus167.github.io/rustframe/) | πŸ™ [GitHub](https://github.com/Magnus167/rustframe) | 🌐 [Gitea mirror](https://gitea.nulltech.uk/Magnus167/rustframe) | πŸ¦€ [Crates.io](https://crates.io/crates/rustframe) | πŸ”– [docs.rs](https://docs.rs/rustframe/latest/rustframe/) +πŸ™ [GitHub](https://github.com/Magnus167/rustframe) | πŸ“š [Docs](https://magnus167.github.io/rustframe/) | πŸ“– [User Guide](https://magnus167.github.io/rustframe/user-guide/) | πŸ¦€ [Crates.io](https://crates.io/crates/rustframe) | πŸ”– [docs.rs](https://docs.rs/rustframe/latest/rustframe/) [![codecov](https://codecov.io/gh/Magnus167/rustframe/graph/badge.svg?token=J7ULJEFTVI)](https://codecov.io/gh/Magnus167/rustframe) [![Coverage](https://img.shields.io/endpoint?url=https://magnus167.github.io/rustframe/docs/tarpaulin-badge.json)](https://magnus167.github.io/rustframe/docs/tarpaulin-report.html) +[![gitea-mirror](https://img.shields.io/badge/git_mirror-blue)](https://gitea.nulltech.uk/Magnus167/rustframe) --- @@ -198,3 +199,14 @@ To run the benchmarks, use: ```bash cargo bench --features "bench" ``` + +## Building the user-guide + +To build the user guide, use: + +```bash +cargo binstall mdbook +bash docs/build.sh +``` + +This will generate the user guide in the `docs/book` directory. diff --git a/docs/book.toml b/docs/book.toml new file mode 100644 index 0000000..a742c0e --- /dev/null +++ b/docs/book.toml @@ -0,0 +1,7 @@ +[book] +title = "Rustframe User Guide" +authors = ["Palash Tyagi (https://github.com/Magnus167)"] +description = "Guided journey through Rustframe capabilities." + +[build] +build-dir = "book" diff --git a/docs/build.sh b/docs/build.sh new file mode 100755 index 0000000..38414e9 --- /dev/null +++ b/docs/build.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh +# Build and test the Rustframe user guide using mdBook. +set -e + +cd docs +bash gen.sh "$@" +cd .. \ No newline at end of file diff --git a/docs/gen.sh b/docs/gen.sh new file mode 100644 index 0000000..02e9949 --- /dev/null +++ b/docs/gen.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env sh + +set -e + +cargo clean + +cargo build --manifest-path ../Cargo.toml + +mdbook test -L ../target/debug/deps "$@" + +mdbook build "$@" + +cargo build +# cargo build --release diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md new file mode 100644 index 0000000..4479e5d --- /dev/null +++ b/docs/src/SUMMARY.md @@ -0,0 +1,7 @@ +# Summary + +- [Introduction](./introduction.md) +- [Data Manipulation](./data-manipulation.md) +- [Compute Features](./compute.md) +- [Machine Learning](./machine-learning.md) +- [Utilities](./utilities.md) diff --git a/docs/src/compute.md b/docs/src/compute.md new file mode 100644 index 0000000..f478534 --- /dev/null +++ b/docs/src/compute.md @@ -0,0 +1,222 @@ +# Compute Features + +The `compute` module hosts numerical routines for exploratory data analysis. +It covers descriptive statistics, correlations, probability distributions and +some basic inferential tests. + +## Basic Statistics + +```rust +# extern crate rustframe; +use rustframe::compute::stats::{mean, mean_horizontal, mean_vertical, stddev, median, population_variance, percentile}; +use rustframe::matrix::Matrix; + +let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +assert_eq!(mean(&m), 2.5); +assert_eq!(stddev(&m), 1.118033988749895); +assert_eq!(median(&m), 2.5); +assert_eq!(population_variance(&m), 1.25); +assert_eq!(percentile(&m, 50.0), 3.0); +// column averages returned as 1 x n matrix +let row_means = mean_horizontal(&m); +assert_eq!(row_means.data(), &[2.0, 3.0]); +let col_means = mean_vertical(&m); +assert_eq!(col_means.data(), & [1.5, 3.5]); +``` + +### Axis-specific Operations + +Operations can be applied along specific axes (rows or columns): + +```rust +# extern crate rustframe; +use rustframe::compute::stats::{mean_vertical, mean_horizontal, stddev_vertical, stddev_horizontal}; +use rustframe::matrix::Matrix; + +// 3x2 matrix +let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2); + +// Mean along columns (vertical) - returns 1 x cols matrix +let col_means = mean_vertical(&m); +assert_eq!(col_means.shape(), (1, 2)); +assert_eq!(col_means.data(), &[3.0, 4.0]); // [(1+3+5)/3, (2+4+6)/3] + +// Mean along rows (horizontal) - returns rows x 1 matrix +let row_means = mean_horizontal(&m); +assert_eq!(row_means.shape(), (3, 1)); +assert_eq!(row_means.data(), &[1.5, 3.5, 5.5]); // [(1+2)/2, (3+4)/2, (5+6)/2] + +// Standard deviation along columns +let col_stddev = stddev_vertical(&m); +assert_eq!(col_stddev.shape(), (1, 2)); + +// Standard deviation along rows +let row_stddev = stddev_horizontal(&m); +assert_eq!(row_stddev.shape(), (3, 1)); +``` + +## Correlation + +```rust +# extern crate rustframe; +use rustframe::compute::stats::{pearson, covariance}; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2); +let corr = pearson(&x, &y); +let cov = covariance(&x, &y); +assert!((corr - 1.0).abs() < 1e-8); +assert!((cov - 2.5).abs() < 1e-8); +``` + +## Covariance + +### `covariance` + +Computes the population covariance between two equally sized matrices by flattening +their values. + +```rust +# extern crate rustframe; +use rustframe::compute::stats::covariance; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2); +let cov = covariance(&x, &y); +assert!((cov - 2.5).abs() < 1e-8); +``` + +### `covariance_vertical` + +Evaluates covariance between columns (i.e. across rows) and returns a matrix of +column pair covariances. + +```rust +# extern crate rustframe; +use rustframe::compute::stats::covariance_vertical; +use rustframe::matrix::Matrix; + +let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let cov = covariance_vertical(&m); +assert_eq!(cov.shape(), (2, 2)); +assert!(cov.data().iter().all(|&v| (v - 1.0).abs() < 1e-8)); +``` + +### `covariance_horizontal` + +Computes covariance between rows (i.e. across columns) returning a matrix that +describes how each pair of rows varies together. + +```rust +# extern crate rustframe; +use rustframe::compute::stats::covariance_horizontal; +use rustframe::matrix::Matrix; + +let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let cov = covariance_horizontal(&m); +assert_eq!(cov.shape(), (2, 2)); +assert!(cov.data().iter().all(|&v| (v - 0.25).abs() < 1e-8)); +``` + +### `covariance_matrix` + +Builds a covariance matrix either between columns (`Axis::Col`) or rows +(`Axis::Row`). Each entry represents how two series co-vary. + +```rust +# extern crate rustframe; +use rustframe::compute::stats::covariance_matrix; +use rustframe::matrix::{Axis, Matrix}; + +let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); + +// Covariance between columns +let cov_cols = covariance_matrix(&data, Axis::Col); +assert!((cov_cols.get(0, 0) - 2.0).abs() < 1e-8); + +// Covariance between rows +let cov_rows = covariance_matrix(&data, Axis::Row); +assert!((cov_rows.get(0, 1) + 0.5).abs() < 1e-8); +``` + +## Distributions + +Probability distribution helpers are available for common PDFs and CDFs. + +```rust +# extern crate rustframe; +use rustframe::compute::stats::distributions::normal_pdf; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2); +let pdf = normal_pdf(x, 0.0, 1.0); +assert_eq!(pdf.data().len(), 2); +``` + +### Additional Distributions + +Rustframe provides several other probability distributions: + +```rust +# extern crate rustframe; +use rustframe::compute::stats::distributions::{normal_cdf, binomial_pmf, binomial_cdf, poisson_pmf}; +use rustframe::matrix::Matrix; + +// Normal distribution CDF +let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2); +let cdf = normal_cdf(x, 0.0, 1.0); +assert_eq!(cdf.data().len(), 2); + +// Binomial distribution PMF +// Probability of k successes in n trials with probability p +let k = Matrix::from_vec(vec![0_u64, 1, 2, 3], 1, 4); +let pmf = binomial_pmf(3, k.clone(), 0.5); +assert_eq!(pmf.data().len(), 4); + +// Binomial distribution CDF +let cdf = binomial_cdf(3, k, 0.5); +assert_eq!(cdf.data().len(), 4); + +// Poisson distribution PMF +// Probability of k events with rate parameter lambda +let k = Matrix::from_vec(vec![0_u64, 1, 2], 1, 3); +let pmf = poisson_pmf(2.0, k); +assert_eq!(pmf.data().len(), 3); +``` + +### Inferential Statistics + +Rustframe provides several inferential statistical tests: + +```rust +# extern crate rustframe; +use rustframe::matrix::Matrix; +use rustframe::compute::stats::inferential::{t_test, chi2_test, anova}; + +// Two-sample t-test +let sample1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5); +let sample2 = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5); +let (t_statistic, p_value) = t_test(&sample1, &sample2); +assert!((t_statistic + 5.0).abs() < 1e-5); +assert!(p_value > 0.0 && p_value < 1.0); + +// Chi-square test of independence +let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2); +let (chi2_statistic, p_value) = chi2_test(&observed); +assert!(chi2_statistic > 0.0); +assert!(p_value > 0.0 && p_value < 1.0); + +// One-way ANOVA +let group1 = Matrix::from_vec(vec![1.0, 2.0, 3.0], 1, 3); +let group2 = Matrix::from_vec(vec![2.0, 3.0, 4.0], 1, 3); +let group3 = Matrix::from_vec(vec![3.0, 4.0, 5.0], 1, 3); +let groups = vec![&group1, &group2, &group3]; +let (f_statistic, p_value) = anova(groups); +assert!(f_statistic > 0.0); +assert!(p_value > 0.0 && p_value < 1.0); +``` + +With the basics covered, explore predictive models in the +[machine learning](./machine-learning.md) chapter. diff --git a/docs/src/data-manipulation.md b/docs/src/data-manipulation.md new file mode 100644 index 0000000..31f91c9 --- /dev/null +++ b/docs/src/data-manipulation.md @@ -0,0 +1,157 @@ +# Data Manipulation + +Rustframe's `Frame` type couples tabular data with +column labels and a typed row index. Frames expose a familiar API for loading +data, selecting rows or columns and performing aggregations. + +## Creating a Frame + +```rust +# extern crate rustframe; +use rustframe::frame::{Frame, RowIndex}; +use rustframe::matrix::Matrix; + +let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); +let frame = Frame::new(data, vec!["A", "B"], None); +assert_eq!(frame["A"], vec![1.0, 2.0]); +``` + +## Indexing Rows + +Row labels can be integers, dates or a default range. Retrieving a row returns a +view that lets you inspect values by column name or position. + +```rust +# extern crate rustframe; +# extern crate chrono; +use chrono::NaiveDate; +use rustframe::frame::{Frame, RowIndex}; +use rustframe::matrix::Matrix; + +let d = |y, m, d| NaiveDate::from_ymd_opt(y, m, d).unwrap(); +let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); +let index = RowIndex::Date(vec![d(2024, 1, 1), d(2024, 1, 2)]); +let mut frame = Frame::new(data, vec!["A", "B"], Some(index)); +assert_eq!(frame.get_row_date(d(2024, 1, 2))["B"], 4.0); + +// mutate by row key +frame.get_row_date_mut(d(2024, 1, 1)).set_by_index(0, 9.0); +assert_eq!(frame.get_row_date(d(2024, 1, 1))["A"], 9.0); +``` + +## Column operations + +Columns can be inserted, renamed, removed or reordered in place. + +```rust +# extern crate rustframe; +use rustframe::frame::{Frame, RowIndex}; +use rustframe::matrix::Matrix; + +let data = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]); +let mut frame = Frame::new(data, vec!["X", "Y"], Some(RowIndex::Range(0..2))); + +frame.add_column("Z", vec![5, 6]); +frame.rename("Y", "W"); +let removed = frame.delete_column("X"); +assert_eq!(removed, vec![1, 2]); +frame.sort_columns(); +assert_eq!(frame.columns(), &["W", "Z"]); +``` + +## Aggregations + +Any numeric aggregation available on `Matrix` is forwarded to `Frame`. + +```rust +# extern crate rustframe; +use rustframe::frame::Frame; +use rustframe::matrix::{Matrix, SeriesOps}; + +let frame = Frame::new(Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]), vec!["A", "B"], None); +assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]); +assert_eq!(frame.sum_horizontal(), vec![4.0, 6.0]); +``` + +## Matrix Operations + +```rust +# extern crate rustframe; +use rustframe::matrix::Matrix; + +let data1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let data2 = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2); + +let sum = data1.clone() + data2.clone(); +assert_eq!(sum.data(), vec![6.0, 8.0, 10.0, 12.0]); + +let product = data1.clone() * data2.clone(); +assert_eq!(product.data(), vec![5.0, 12.0, 21.0, 32.0]); + +let scalar_product = data1.clone() * 2.0; +assert_eq!(scalar_product.data(), vec![2.0, 4.0, 6.0, 8.0]); + +let equals = data1 == data1.clone(); +assert_eq!(equals, true); +``` + +### Advanced Matrix Operations + +Matrices support a variety of advanced operations: + +```rust +# extern crate rustframe; +use rustframe::matrix::{Matrix, SeriesOps}; + +// Matrix multiplication (dot product) +let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let b = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2); +let product = a.matrix_mul(&b); +assert_eq!(product.data(), vec![23.0, 34.0, 31.0, 46.0]); + +// Transpose +let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let transposed = m.transpose(); +assert_eq!(transposed.data(), vec![1.0, 3.0, 2.0, 4.0]); + +// Map function over all elements +let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let squared = m.map(|x| x * x); +assert_eq!(squared.data(), vec![1.0, 4.0, 9.0, 16.0]); + +// Zip two matrices with a function +let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let b = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2); +let zipped = a.zip(&b, |x, y| x + y); +assert_eq!(zipped.data(), vec![6.0, 8.0, 10.0, 12.0]); +``` + +### Matrix Reductions + +Matrices support various reduction operations: + +```rust +# extern crate rustframe; +use rustframe::matrix::{Matrix, SeriesOps}; + +let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2); + +// Sum along columns (vertical) +let col_sums = m.sum_vertical(); +assert_eq!(col_sums, vec![9.0, 12.0]); // [1+3+5, 2+4+6] + +// Sum along rows (horizontal) +let row_sums = m.sum_horizontal(); +assert_eq!(row_sums, vec![3.0, 7.0, 11.0]); // [1+2, 3+4, 5+6] + +// Cumulative sum along columns +let col_cumsum = m.cumsum_vertical(); +assert_eq!(col_cumsum.data(), vec![1.0, 4.0, 9.0, 2.0, 6.0, 12.0]); + +// Cumulative sum along rows +let row_cumsum = m.cumsum_horizontal(); +assert_eq!(row_cumsum.data(), vec![1.0, 3.0, 5.0, 3.0, 7.0, 11.0]); +``` + +With the basics covered, continue to the [compute features](./compute.md) +chapter for statistics and analytics. diff --git a/docs/src/introduction.md b/docs/src/introduction.md new file mode 100644 index 0000000..1fa82e9 --- /dev/null +++ b/docs/src/introduction.md @@ -0,0 +1,40 @@ +# Introduction + +πŸ™ [GitHub](https://github.com/Magnus167/rustframe) | πŸ“š [Docs](https://magnus167.github.io/rustframe/) | πŸ“– [User Guide](https://magnus167.github.io/rustframe/user-guide/) | πŸ¦€ [Crates.io](https://crates.io/crates/rustframe) | πŸ”– [docs.rs](https://docs.rs/rustframe/latest/rustframe/) + +Welcome to the **Rustframe User Guide**. Rustframe is a lightweight dataframe +and math toolkit for Rust written in 100% safe Rust. It focuses on keeping the +API approachable while offering handy features for small analytical or +educational projects. + +Rustframe bundles: + +- column‑labelled frames built on a fast column‑major matrix +- familiar element‑wise math and aggregation routines +- a growing `compute` module for statistics and machine learning +- utilities for dates and random numbers + +```rust +# extern crate rustframe; +use rustframe::{frame::Frame, matrix::{Matrix, SeriesOps}}; + +let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); +let frame = Frame::new(data, vec!["A", "B"], None); + +// Perform column wise aggregation +assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]); +``` + +## Resources + +- [GitHub repository](https://github.com/Magnus167/rustframe) +- [Crates.io](https://crates.io/crates/rustframe) & [API docs](https://docs.rs/rustframe) +- [Code coverage](https://codecov.io/gh/Magnus167/rustframe) + +This guide walks through the main building blocks of the library. Each chapter +contains runnable snippets so you can follow along: + +1. [Data manipulation](./data-manipulation.md) for loading and transforming data +2. [Compute features](./compute.md) for statistics and analytics +3. [Machine learning](./machine-learning.md) for predictive models +4. [Utilities](./utilities.md) for supporting helpers and upcoming modules diff --git a/docs/src/machine-learning.md b/docs/src/machine-learning.md new file mode 100644 index 0000000..445e867 --- /dev/null +++ b/docs/src/machine-learning.md @@ -0,0 +1,282 @@ +# Machine Learning + +The `compute::models` module bundles several learning algorithms that operate on +`Matrix` structures. These examples highlight the basic training and prediction +APIs. For more end‑to‑end walkthroughs see the examples directory in the +repository. + +Currently implemented models include: + +- Linear and logistic regression +- K‑means clustering +- Principal component analysis (PCA) +- Gaussian Naive Bayes +- Dense neural networks + +## Linear Regression + +```rust +# extern crate rustframe; +use rustframe::compute::models::linreg::LinReg; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); +let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1); +let mut model = LinReg::new(1); +model.fit(&x, &y, 0.01, 100); +let preds = model.predict(&x); +assert_eq!(preds.rows(), 4); +``` + +## K-means Walkthrough + +```rust +# extern crate rustframe; +use rustframe::compute::models::k_means::KMeans; +use rustframe::matrix::Matrix; + +let data = Matrix::from_vec(vec![1.0, 1.0, 5.0, 5.0], 2, 2); +let (model, _labels) = KMeans::fit(&data, 2, 10, 1e-4); +let new_point = Matrix::from_vec(vec![0.0, 0.0], 1, 2); +let cluster = model.predict(&new_point)[0]; +``` + +## Logistic Regression + +```rust +# extern crate rustframe; +use rustframe::compute::models::logreg::LogReg; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); +let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1); +let mut model = LogReg::new(1); +model.fit(&x, &y, 0.1, 200); +let preds = model.predict_proba(&x); +assert_eq!(preds.rows(), 4); +``` + +## Principal Component Analysis + +```rust +# extern crate rustframe; +use rustframe::compute::models::pca::PCA; +use rustframe::matrix::Matrix; + +let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let pca = PCA::fit(&data, 1, 0); +let transformed = pca.transform(&data); +assert_eq!(transformed.cols(), 1); +``` + +## Gaussian Naive Bayes + +Gaussian Naive Bayes classifier for continuous features: + +```rust +# extern crate rustframe; +use rustframe::compute::models::gaussian_nb::GaussianNB; +use rustframe::matrix::Matrix; + +// Training data with 2 features +let x = Matrix::from_rows_vec(vec![ + 1.0, 2.0, + 2.0, 3.0, + 3.0, 4.0, + 4.0, 5.0 +], 4, 2); + +// Class labels (0 or 1) +let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1); + +// Train the model +let mut model = GaussianNB::new(1e-9, true); +model.fit(&x, &y); + +// Make predictions +let predictions = model.predict(&x); +assert_eq!(predictions.rows(), 4); +``` + +## Dense Neural Networks + +Simple fully connected neural network: + +```rust +# extern crate rustframe; +use rustframe::compute::models::dense_nn::{DenseNN, DenseNNConfig, ActivationKind, InitializerKind, LossKind}; +use rustframe::matrix::Matrix; + +// Training data with 2 features +let x = Matrix::from_rows_vec(vec![ + 0.0, 0.0, + 0.0, 1.0, + 1.0, 0.0, + 1.0, 1.0 +], 4, 2); + +// XOR target outputs +let y = Matrix::from_vec(vec![0.0, 1.0, 1.0, 0.0], 4, 1); + +// Create a neural network with 2 hidden layers +let config = DenseNNConfig { + input_size: 2, + hidden_layers: vec![4, 4], + output_size: 1, + activations: vec![ActivationKind::Sigmoid, ActivationKind::Sigmoid, ActivationKind::Sigmoid], + initializer: InitializerKind::Uniform(0.5), + loss: LossKind::MSE, + learning_rate: 0.1, + epochs: 1000, +}; +let mut model = DenseNN::new(config); + +// Train the model +model.train(&x, &y); + +// Make predictions +let predictions = model.predict(&x); +assert_eq!(predictions.rows(), 4); +``` + +## Real-world Examples + +### Housing Price Prediction + +```rust +# extern crate rustframe; +use rustframe::compute::models::linreg::LinReg; +use rustframe::matrix::Matrix; + +// Features: square feet and bedrooms +let features = Matrix::from_rows_vec(vec![ + 2100.0, 3.0, + 1600.0, 2.0, + 2400.0, 4.0, + 1400.0, 2.0, +], 4, 2); + +// Sale prices +let target = Matrix::from_vec(vec![400_000.0, 330_000.0, 369_000.0, 232_000.0], 4, 1); + +let mut model = LinReg::new(2); +model.fit(&features, &target, 1e-8, 10_000); + +// Predict price of a new home +let new_home = Matrix::from_vec(vec![2000.0, 3.0], 1, 2); +let predicted_price = model.predict(&new_home); +println!("Predicted price: ${}", predicted_price.data()[0]); +``` + +### Spam Detection + +```rust +# extern crate rustframe; +use rustframe::compute::models::logreg::LogReg; +use rustframe::matrix::Matrix; + +// 20 e-mails Γ— 5 features = 100 numbers (row-major, spam first) +let x = Matrix::from_rows_vec( + vec![ + // ─────────── spam examples ─────────── + 2.0, 1.0, 1.0, 1.0, 1.0, // "You win a FREE offer - click for money-back bonus!" + 1.0, 0.0, 1.0, 1.0, 0.0, // "FREE offer! Click now!" + 0.0, 2.0, 0.0, 1.0, 1.0, // "Win win win - money inside, click…" + 1.0, 1.0, 0.0, 0.0, 1.0, // "Limited offer to win easy money…" + 1.0, 0.0, 1.0, 0.0, 1.0, // ... + 0.0, 1.0, 1.0, 1.0, 0.0, // ... + 2.0, 0.0, 0.0, 1.0, 1.0, // ... + 0.0, 1.0, 1.0, 0.0, 1.0, // ... + 1.0, 1.0, 1.0, 1.0, 0.0, // ... + 1.0, 0.0, 0.0, 1.0, 1.0, // ... + // ─────────── ham examples ─────────── + 0.0, 0.0, 0.0, 0.0, 0.0, // "See you at the meeting tomorrow." + 0.0, 0.0, 0.0, 1.0, 0.0, // "Here's the Zoom click-link." + 0.0, 0.0, 0.0, 0.0, 1.0, // "Expense report: money attached." + 0.0, 0.0, 0.0, 1.0, 1.0, // ... + 0.0, 1.0, 0.0, 0.0, 0.0, // "Did we win the bid?" + 0.0, 0.0, 0.0, 0.0, 0.0, // ... + 0.0, 0.0, 0.0, 1.0, 0.0, // ... + 1.0, 0.0, 0.0, 0.0, 0.0, // "Special offer for staff lunch." + 0.0, 0.0, 0.0, 0.0, 0.0, // ... + 0.0, 0.0, 0.0, 1.0, 0.0, + ], + 20, + 5, +); + +// Labels: 1 = spam, 0 = ham +let y = Matrix::from_vec( + vec![ + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, // 10 spam + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, // 10 ham + ], + 20, + 1, +); + +// Train +let mut model = LogReg::new(5); +model.fit(&x, &y, 0.01, 5000); + +// Predict +// e.g. "free money offer" +let email_data = vec![1.0, 0.0, 1.0, 0.0, 1.0]; +let email = Matrix::from_vec(email_data, 1, 5); +let prob_spam = model.predict_proba(&email); +println!("Probability of spam: {:.4}", prob_spam.data()[0]); +``` + +### Iris Flower Classification + +```rust +# extern crate rustframe; +use rustframe::compute::models::gaussian_nb::GaussianNB; +use rustframe::matrix::Matrix; + +// Features: sepal length and petal length +let x = Matrix::from_rows_vec(vec![ + 5.1, 1.4, // setosa + 4.9, 1.4, // setosa + 6.2, 4.5, // versicolor + 5.9, 5.1, // virginica +], 4, 2); + +let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 2.0], 4, 1); +let names = vec!["setosa", "versicolor", "virginica"]; + +let mut model = GaussianNB::new(1e-9, true); +model.fit(&x, &y); + +let sample = Matrix::from_vec(vec![5.0, 1.5], 1, 2); +let predicted_class = model.predict(&sample); +let class_name = names[predicted_class.data()[0] as usize]; +println!("Predicted class: {} ({:?})", class_name, predicted_class.data()[0]); +``` + +### Customer Segmentation + +```rust +# extern crate rustframe; +use rustframe::compute::models::k_means::KMeans; +use rustframe::matrix::Matrix; + +// Each row: [age, annual_income] +let customers = Matrix::from_rows_vec( + vec![ + 25.0, 40_000.0, 34.0, 52_000.0, 58.0, 95_000.0, 45.0, 70_000.0, + ], + 4, + 2, +); + +let (model, labels) = KMeans::fit(&customers, 2, 20, 1e-4); + +let new_customer = Matrix::from_vec(vec![30.0, 50_000.0], 1, 2); +let cluster = model.predict(&new_customer)[0]; +println!("New customer belongs to cluster: {}", cluster); +println!("Cluster labels: {:?}", labels); +``` + +For helper functions and upcoming modules, visit the +[utilities](./utilities.md) section. diff --git a/docs/src/utilities.md b/docs/src/utilities.md new file mode 100644 index 0000000..6f0e3ed --- /dev/null +++ b/docs/src/utilities.md @@ -0,0 +1,63 @@ +# Utilities + +Utilities provide handy helpers around the core library. Existing tools +include: + +- Date utilities for generating calendar sequences and business‑day sets +- Random number generators for simulations and testing + +## Date Helpers + +```rust +# extern crate rustframe; +use rustframe::utils::dateutils::{BDatesList, BDateFreq, DatesList, DateFreq}; + +// Calendar sequence +let list = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily); +assert_eq!(list.count().unwrap(), 3); + +// Business days starting from 2024‑01‑02 +let bdates = BDatesList::from_n_periods("2024-01-02".into(), BDateFreq::Daily, 3).unwrap(); +assert_eq!(bdates.list().unwrap().len(), 3); +``` + +## Random Numbers + +The `random` module offers deterministic and cryptographically secure RNGs. + +```rust +# extern crate rustframe; +use rustframe::random::{Prng, Rng}; + +let mut rng = Prng::new(42); +let v1 = rng.next_u64(); +let v2 = rng.next_u64(); +assert_ne!(v1, v2); +``` + +## Stats Functions + +```rust +# extern crate rustframe; +use rustframe::matrix::Matrix; +use rustframe::compute::stats::descriptive::{mean, median, stddev}; + +let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5); + +let mean_value = mean(&data); +assert_eq!(mean_value, 3.0); + +let median_value = median(&data); +assert_eq!(median_value, 3.0); + +let std_value = stddev(&data); +assert_eq!(std_value, 2.0_f64.sqrt()); +``` + +Upcoming utilities will cover: + +- Data import/export helpers +- Visualization adapters +- Streaming data interfaces + +Contributions to these sections are welcome!