diff --git a/.github/htmldocs/index.html b/.github/htmldocs/index.html index b39f850..85484c6 100644 --- a/.github/htmldocs/index.html +++ b/.github/htmldocs/index.html @@ -58,6 +58,10 @@

A lightweight dataframe & math toolkit for Rust


+ + đź“– User Guide +

+ 📚 Docs | 📊 Benchmarks diff --git a/.github/workflows/docs-and-testcov.yml b/.github/workflows/docs-and-testcov.yml index 3b40067..38e2914 100644 --- a/.github/workflows/docs-and-testcov.yml +++ b/.github/workflows/docs-and-testcov.yml @@ -153,7 +153,6 @@ jobs: echo "" > target/doc/rustframe/index.html - mkdir output cp tarpaulin-report.html target/doc/docs/ cp tarpaulin-report.json target/doc/docs/ cp tarpaulin-badge.json target/doc/docs/ @@ -166,16 +165,30 @@ jobs: # copy the benchmark report to the output directory cp -r benchmark-report target/doc/ + mkdir output + cp -r target/doc/* output/ + + - name: Build user guide + run: | + cargo binstall mdbook + bash ./docs/build.sh + + - name: Copy user guide to output directory + run: | + mkdir output/user-guide + cp -r docs/book/* output/user-guide/ + - name: Add index.html to output directory run: | - cp .github/htmldocs/index.html target/doc/index.html - cp .github/rustframe_logo.png target/doc/rustframe_logo.png + cp .github/htmldocs/index.html output/index.html + cp .github/rustframe_logo.png output/rustframe_logo.png - name: Upload Pages artifact # if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' uses: actions/upload-pages-artifact@v3 with: - path: target/doc/ + # path: target/doc/ + path: output/ - name: Deploy to GitHub Pages # if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 0f4ae0b..8bdf3f3 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -78,3 +78,8 @@ jobs: uses: codecov/test-results-action@v1 with: token: ${{ secrets.CODECOV_TOKEN }} + + - name: Test build user guide + run: | + cargo binstall mdbook + bash ./docs/build.sh diff --git a/.gitignore b/.gitignore index ac6b27b..5198bcf 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,6 @@ data/ tarpaulin-report.* -.github/htmldocs/rustframe_logo.png \ No newline at end of file +.github/htmldocs/rustframe_logo.png + +docs/book/ \ No newline at end of file diff --git a/README.md b/README.md index fd6123c..c3b87c1 100644 --- a/README.md +++ b/README.md @@ -198,3 +198,14 @@ To run the benchmarks, use: ```bash cargo bench --features "bench" ``` + +## Building the user-guide + +To build the user guide, use: + +```bash +cargo binstall mdbook +bash docs/build.sh +``` + +This will generate the user guide in the `docs/book` directory. diff --git a/docs/book.toml b/docs/book.toml new file mode 100644 index 0000000..a742c0e --- /dev/null +++ b/docs/book.toml @@ -0,0 +1,7 @@ +[book] +title = "Rustframe User Guide" +authors = ["Palash Tyagi (https://github.com/Magnus167)"] +description = "Guided journey through Rustframe capabilities." + +[build] +build-dir = "book" diff --git a/docs/build.sh b/docs/build.sh new file mode 100755 index 0000000..38414e9 --- /dev/null +++ b/docs/build.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh +# Build and test the Rustframe user guide using mdBook. +set -e + +cd docs +bash gen.sh "$@" +cd .. \ No newline at end of file diff --git a/docs/gen.sh b/docs/gen.sh new file mode 100644 index 0000000..02e9949 --- /dev/null +++ b/docs/gen.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env sh + +set -e + +cargo clean + +cargo build --manifest-path ../Cargo.toml + +mdbook test -L ../target/debug/deps "$@" + +mdbook build "$@" + +cargo build +# cargo build --release diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md new file mode 100644 index 0000000..4479e5d --- /dev/null +++ b/docs/src/SUMMARY.md @@ -0,0 +1,7 @@ +# Summary + +- [Introduction](./introduction.md) +- [Data Manipulation](./data-manipulation.md) +- [Compute Features](./compute.md) +- [Machine Learning](./machine-learning.md) +- [Utilities](./utilities.md) diff --git a/docs/src/compute.md b/docs/src/compute.md new file mode 100644 index 0000000..8727935 --- /dev/null +++ b/docs/src/compute.md @@ -0,0 +1,55 @@ +# Compute Features + +The `compute` module hosts numerical routines for exploratory data analysis. +It covers descriptive statistics, correlations, probability distributions and +some basic inferential tests. + +## Basic Statistics + +```rust +# extern crate rustframe; +use rustframe::compute::stats::{mean, mean_vertical, stddev, median}; +use rustframe::matrix::Matrix; + +let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +assert_eq!(mean(&m), 2.5); +assert_eq!(stddev(&m), 1.118033988749895); +assert_eq!(median(&m), 2.5); +// column averages returned as 1 x n matrix +let col_means = mean_vertical(&m); +assert_eq!(col_means.data(), &[1.5, 3.5]); +``` + +## Correlation + +Correlation functions help measure linear relationships between datasets. + +```rust +# extern crate rustframe; +use rustframe::compute::stats::{pearson, covariance}; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2); +let corr = pearson(&x, &y); +let cov = covariance(&x, &y); +assert!((corr - 1.0).abs() < 1e-8); +assert!((cov - 2.5).abs() < 1e-8); +``` + +## Distributions + +Probability distribution helpers are available for common PDFs and CDFs. + +```rust +# extern crate rustframe; +use rustframe::compute::stats::distributions::normal_pdf; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2); +let pdf = normal_pdf(x, 0.0, 1.0); +assert_eq!(pdf.data().len(), 2); +``` + +With the basics covered, explore predictive models in the +[machine learning](./machine-learning.md) chapter. diff --git a/docs/src/data-manipulation.md b/docs/src/data-manipulation.md new file mode 100644 index 0000000..bcb1b77 --- /dev/null +++ b/docs/src/data-manipulation.md @@ -0,0 +1,77 @@ +# Data Manipulation + +Rustframe's `Frame` type couples tabular data with +column labels and a typed row index. Frames expose a familiar API for loading +data, selecting rows or columns and performing aggregations. + +## Creating a Frame + +```rust +# extern crate rustframe; +use rustframe::frame::{Frame, RowIndex}; +use rustframe::matrix::Matrix; + +let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); +let frame = Frame::new(data, vec!["A", "B"], None); +assert_eq!(frame["A"], vec![1.0, 2.0]); +``` + +## Indexing Rows + +Row labels can be integers, dates or a default range. Retrieving a row returns a +view that lets you inspect values by column name or position. + +```rust +# extern crate rustframe; +# extern crate chrono; +use chrono::NaiveDate; +use rustframe::frame::{Frame, RowIndex}; +use rustframe::matrix::Matrix; + +let d = |y, m, d| NaiveDate::from_ymd_opt(y, m, d).unwrap(); +let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); +let index = RowIndex::Date(vec![d(2024, 1, 1), d(2024, 1, 2)]); +let mut frame = Frame::new(data, vec!["A", "B"], Some(index)); +assert_eq!(frame.get_row_date(d(2024, 1, 2))["B"], 4.0); + +// mutate by row key +frame.get_row_date_mut(d(2024, 1, 1)).set_by_index(0, 9.0); +assert_eq!(frame.get_row_date(d(2024, 1, 1))["A"], 9.0); +``` + +## Column operations + +Columns can be inserted, renamed, removed or reordered in place. + +```rust +# extern crate rustframe; +use rustframe::frame::{Frame, RowIndex}; +use rustframe::matrix::Matrix; + +let data = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]); +let mut frame = Frame::new(data, vec!["X", "Y"], Some(RowIndex::Range(0..2))); + +frame.add_column("Z", vec![5, 6]); +frame.rename("Y", "W"); +let removed = frame.delete_column("X"); +assert_eq!(removed, vec![1, 2]); +frame.sort_columns(); +assert_eq!(frame.columns(), &["W", "Z"]); +``` + +## Aggregations + +Any numeric aggregation available on `Matrix` is forwarded to `Frame`. + +```rust +# extern crate rustframe; +use rustframe::frame::Frame; +use rustframe::matrix::{Matrix, SeriesOps}; + +let frame = Frame::new(Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]), vec!["A", "B"], None); +assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]); +assert_eq!(frame.sum_horizontal(), vec![4.0, 6.0]); +``` + +With the basics covered, continue to the [compute features](./compute.md) +chapter for statistics and analytics. diff --git a/docs/src/introduction.md b/docs/src/introduction.md new file mode 100644 index 0000000..eb23e40 --- /dev/null +++ b/docs/src/introduction.md @@ -0,0 +1,38 @@ +# Introduction + +Welcome to the **Rustframe User Guide**. Rustframe is a lightweight dataframe +and math toolkit for Rust written in 100% safe Rust. It focuses on keeping the +API approachable while offering handy features for small analytical or +educational projects. + +Rustframe bundles: + +- column‑labelled frames built on a fast column‑major matrix +- familiar element‑wise math and aggregation routines +- a growing `compute` module for statistics and machine learning +- utilities for dates and random numbers + +```rust +# extern crate rustframe; +use rustframe::{frame::Frame, matrix::{Matrix, SeriesOps}}; + +let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); +let frame = Frame::new(data, vec!["A", "B"], None); + +// Perform column wise aggregation +assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]); +``` + +## Resources + +- [GitHub repository](https://github.com/Magnus167/rustframe) +- [Crates.io](https://crates.io/crates/rustframe) & [API docs](https://docs.rs/rustframe) +- [Code coverage](https://codecov.io/gh/Magnus167/rustframe) + +This guide walks through the main building blocks of the library. Each chapter +contains runnable snippets so you can follow along: + +1. [Data manipulation](./data-manipulation.md) for loading and transforming data +2. [Compute features](./compute.md) for statistics and analytics +3. [Machine learning](./machine-learning.md) for predictive models +4. [Utilities](./utilities.md) for supporting helpers and upcoming modules diff --git a/docs/src/machine-learning.md b/docs/src/machine-learning.md new file mode 100644 index 0000000..ccbec53 --- /dev/null +++ b/docs/src/machine-learning.md @@ -0,0 +1,76 @@ +# Machine Learning + +The `compute::models` module bundles several learning algorithms that operate on +`Matrix` structures. These examples highlight the basic training and prediction +APIs. For more end‑to‑end walkthroughs see the examples directory in the +repository. + +Currently implemented models include: + +- Linear and logistic regression +- K‑means clustering +- Principal component analysis (PCA) +- Gaussian Naive Bayes +- Dense neural networks + +## Linear Regression + +```rust +# extern crate rustframe; +use rustframe::compute::models::linreg::LinReg; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); +let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1); +let mut model = LinReg::new(1); +model.fit(&x, &y, 0.01, 100); +let preds = model.predict(&x); +assert_eq!(preds.rows(), 4); +``` + +## K-means Walkthrough + +```rust +# extern crate rustframe; +use rustframe::compute::models::k_means::KMeans; +use rustframe::matrix::Matrix; + +let data = Matrix::from_vec(vec![1.0, 1.0, 5.0, 5.0], 2, 2); +let (model, _labels) = KMeans::fit(&data, 2, 10, 1e-4); +let new_point = Matrix::from_vec(vec![0.0, 0.0], 1, 2); +let cluster = model.predict(&new_point)[0]; +``` + +For helper functions and upcoming modules, visit the +[utilities](./utilities.md) section. + +## Logistic Regression + +```rust +# extern crate rustframe; +use rustframe::compute::models::logreg::LogReg; +use rustframe::matrix::Matrix; + +let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1); +let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1); +let mut model = LogReg::new(1); +model.fit(&x, &y, 0.1, 200); +let preds = model.predict_proba(&x); +assert_eq!(preds.rows(), 4); +``` + +## Principal Component Analysis + +```rust +# extern crate rustframe; +use rustframe::compute::models::pca::PCA; +use rustframe::matrix::Matrix; + +let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2); +let pca = PCA::fit(&data, 1, 0); +let transformed = pca.transform(&data); +assert_eq!(transformed.cols(), 1); +``` + +For helper functions and upcoming modules, visit the +[utilities](./utilities.md) section. diff --git a/docs/src/utilities.md b/docs/src/utilities.md new file mode 100644 index 0000000..28ddaab --- /dev/null +++ b/docs/src/utilities.md @@ -0,0 +1,44 @@ +# Utilities + +Utilities provide handy helpers around the core library. Existing tools +include: + +- Date utilities for generating calendar sequences and business‑day sets +- Random number generators for simulations and testing + +## Date Helpers + +```rust +# extern crate rustframe; +use rustframe::utils::dateutils::{BDatesList, BDateFreq, DatesList, DateFreq}; + +// Calendar sequence +let list = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily); +assert_eq!(list.count().unwrap(), 3); + +// Business days starting from 2024‑01‑02 +let bdates = BDatesList::from_n_periods("2024-01-02".into(), BDateFreq::Daily, 3).unwrap(); +assert_eq!(bdates.list().unwrap().len(), 3); +``` + +## Random Numbers + +The `random` module offers deterministic and cryptographically secure RNGs. + +```rust +# extern crate rustframe; +use rustframe::random::{Prng, Rng}; + +let mut rng = Prng::new(42); +let v1 = rng.next_u64(); +let v2 = rng.next_u64(); +assert_ne!(v1, v2); +``` + +Upcoming utilities will cover: + +- Data import/export helpers +- Visualization adapters +- Streaming data interfaces + +Contributions to these sections are welcome!