mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-11-19 15:46:10 +00:00
Compare commits
74 Commits
d851c500af
...
dataframe
| Author | SHA1 | Date | |
|---|---|---|---|
| 811c153eaf | |||
| c53693fa7b | |||
| 109d39b248 | |||
|
|
18ad6c689a | ||
| 1fead78b69 | |||
|
|
6fb32e743c | ||
| 2cb4e46217 | |||
|
|
a53ba63f30 | ||
|
|
dae60ea1bd | ||
|
|
755dee58e7 | ||
|
|
9e6e22fc37 | ||
|
|
b687fd4e6b | ||
|
|
68a01ab528 | ||
|
|
23a01dab07 | ||
|
|
f4ebd78234 | ||
|
|
1475156855 | ||
|
|
080680d095 | ||
|
|
2845f357b7 | ||
|
|
3d11226d57 | ||
|
|
039fb1a98e | ||
|
|
31a5ba2460 | ||
|
|
1a9f397702 | ||
|
|
ecd06eb352 | ||
|
|
ae327b6060 | ||
|
|
83ac9d4821 | ||
|
|
ae27ed9373 | ||
|
|
c7552f2264 | ||
|
|
3654c7053c | ||
|
|
1dcd9727b4 | ||
|
|
b62152b4f0 | ||
|
|
a6a901d6ab | ||
|
|
676af850ef | ||
|
|
ca2ca2a738 | ||
|
|
4876a74e01 | ||
|
|
b78dd75e77 | ||
|
|
9db8853d75 | ||
|
|
9738154dac | ||
| 7d0978e5fb | |||
|
|
ed01c4b8f2 | ||
|
|
e6964795e3 | ||
|
|
d1dd7ea6d2 | ||
|
|
676f78bb1e | ||
|
|
f7325a9558 | ||
|
|
18b9eef063 | ||
|
|
f99f78d508 | ||
| 2926a8a6e8 | |||
|
|
bce1bdd21a | ||
| 39a95e63d9 | |||
| 1de8ba4f2d | |||
| 74bec4b69e | |||
| 58b38311b5 | |||
| 4ed23069fc | |||
|
|
7d7794627b | ||
| d9bdf8ee96 | |||
| a61ff8a4e1 | |||
|
|
26ee580710 | ||
|
|
96934cd89f | ||
|
|
27ab1ac129 | ||
|
|
eb4fefe363 | ||
|
|
60cc97e702 | ||
|
|
7e2a5ec18d | ||
|
|
4038d25b07 | ||
|
|
aa15248b58 | ||
|
|
fa392ec631 | ||
|
|
8b6f16236a | ||
|
|
58acea8467 | ||
|
|
2607d9c3b0 | ||
|
|
57ed06f79b | ||
|
|
01a132264f | ||
|
|
ff4535c56b | ||
| 9b480e8130 | |||
|
|
fe666a4ddb | ||
|
|
b80d5ab381 | ||
|
|
49f7558225 |
11
.github/htmldocs/index.html
vendored
11
.github/htmldocs/index.html
vendored
@@ -58,6 +58,14 @@
|
||||
<h2>A lightweight dataframe & math toolkit for Rust</h2>
|
||||
<hr style="border: 1px solid #d4d4d4; margin: 20px 0;">
|
||||
<p>
|
||||
|
||||
🐙 <a href="https://github.com/Magnus167/rustframe">GitHub</a>
|
||||
<br><br>
|
||||
|
||||
📖 <a href="https://magnus167.github.io/rustframe/user-guide">User Guide</a>
|
||||
<br><br>
|
||||
|
||||
|
||||
📚 <a href="https://magnus167.github.io/rustframe/docs">Docs</a> |
|
||||
📊 <a href="https://magnus167.github.io/rustframe/benchmark-report/">Benchmarks</a>
|
||||
|
||||
@@ -65,8 +73,7 @@
|
||||
🦀 <a href="https://crates.io/crates/rustframe">Crates.io</a> |
|
||||
🔖 <a href="https://docs.rs/rustframe/latest/rustframe/">docs.rs</a>
|
||||
<br><br>
|
||||
🐙 <a href="https://github.com/Magnus167/rustframe">GitHub</a> |
|
||||
🌐 <a href="https://gitea.nulltech.uk/Magnus167/rustframe">Gitea mirror</a>
|
||||
<!-- 🌐 <a href="https://gitea.nulltech.uk/Magnus167/rustframe">Gitea mirror</a> -->
|
||||
</p>
|
||||
</main>
|
||||
</body>
|
||||
|
||||
21
.github/workflows/docs-and-testcov.yml
vendored
21
.github/workflows/docs-and-testcov.yml
vendored
@@ -153,7 +153,6 @@ jobs:
|
||||
|
||||
echo "<meta http-equiv=\"refresh\" content=\"0; url=../docs/index.html\">" > target/doc/rustframe/index.html
|
||||
|
||||
mkdir output
|
||||
cp tarpaulin-report.html target/doc/docs/
|
||||
cp tarpaulin-report.json target/doc/docs/
|
||||
cp tarpaulin-badge.json target/doc/docs/
|
||||
@@ -166,16 +165,30 @@ jobs:
|
||||
# copy the benchmark report to the output directory
|
||||
cp -r benchmark-report target/doc/
|
||||
|
||||
mkdir output
|
||||
cp -r target/doc/* output/
|
||||
|
||||
- name: Build user guide
|
||||
run: |
|
||||
cargo binstall mdbook
|
||||
bash ./docs/build.sh
|
||||
|
||||
- name: Copy user guide to output directory
|
||||
run: |
|
||||
mkdir output/user-guide
|
||||
cp -r docs/book/* output/user-guide/
|
||||
|
||||
- name: Add index.html to output directory
|
||||
run: |
|
||||
cp .github/htmldocs/index.html target/doc/index.html
|
||||
cp .github/rustframe_logo.png target/doc/rustframe_logo.png
|
||||
cp .github/htmldocs/index.html output/index.html
|
||||
cp .github/rustframe_logo.png output/rustframe_logo.png
|
||||
|
||||
- name: Upload Pages artifact
|
||||
# if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: target/doc/
|
||||
# path: target/doc/
|
||||
path: output/
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
# if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
||||
|
||||
5
.github/workflows/run-unit-tests.yml
vendored
5
.github/workflows/run-unit-tests.yml
vendored
@@ -78,3 +78,8 @@ jobs:
|
||||
uses: codecov/test-results-action@v1
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
- name: Test build user guide
|
||||
run: |
|
||||
cargo binstall mdbook
|
||||
bash ./docs/build.sh
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -16,4 +16,6 @@ data/
|
||||
|
||||
tarpaulin-report.*
|
||||
|
||||
.github/htmldocs/rustframe_logo.png
|
||||
.github/htmldocs/rustframe_logo.png
|
||||
|
||||
docs/book/
|
||||
@@ -1,11 +1,12 @@
|
||||
[package]
|
||||
name = "rustframe"
|
||||
authors = ["Palash Tyagi (https://github.com/Magnus167)"]
|
||||
version = "0.0.1-a.20250716"
|
||||
version = "0.0.1-a.20250805"
|
||||
edition = "2021"
|
||||
license = "GPL-3.0-or-later"
|
||||
readme = "README.md"
|
||||
description = "A simple dataframe library"
|
||||
description = "A simple dataframe and math toolkit"
|
||||
documentation = "https://magnus167.github.io/rustframe/"
|
||||
|
||||
[lib]
|
||||
name = "rustframe"
|
||||
|
||||
149
README.md
149
README.md
@@ -1,11 +1,12 @@
|
||||
# rustframe
|
||||
|
||||
📚 [Docs](https://magnus167.github.io/rustframe/) | 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 🌐 [Gitea mirror](https://gitea.nulltech.uk/Magnus167/rustframe) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
|
||||
🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
|
||||
|
||||
<!-- [](https://github.com/Magnus167/rustframe) -->
|
||||
|
||||
[](https://codecov.io/gh/Magnus167/rustframe)
|
||||
[](https://magnus167.github.io/rustframe/docs/tarpaulin-report.html)
|
||||
[](https://gitea.nulltech.uk/Magnus167/rustframe)
|
||||
|
||||
---
|
||||
|
||||
@@ -23,10 +24,8 @@ Rustframe is an educational project, and is not intended for production use. It
|
||||
- **Math that reads like math** - element-wise `+`, `−`, `×`, `÷` on entire frames or scalars.
|
||||
- **Frames** - Column major data structure for single-type data, with labeled columns and typed row indices.
|
||||
- **Compute module** - Implements various statistical computations and machine learning models.
|
||||
|
||||
- **[Coming Soon]** _DataFrame_ - Multi-type data structure for heterogeneous data, with labeled columns and typed row indices.
|
||||
|
||||
- **Random number utils** - Built-in pseudo and cryptographically secure generators for simulations.
|
||||
- **[Coming Soon]** _DataFrame_ - Multi-type data structure for heterogeneous data, with labeled columns and typed row indices.
|
||||
|
||||
#### Matrix and Frame functionality
|
||||
|
||||
@@ -154,7 +153,134 @@ let zipped_matrix = a.zip(&b, |x, y| x + y);
|
||||
assert_eq!(zipped_matrix.data(), &[6.0, 8.0, 10.0, 12.0]);
|
||||
```
|
||||
|
||||
### More examples
|
||||
---
|
||||
|
||||
## DataFrame Usage Example
|
||||
|
||||
```rust
|
||||
use chrono::NaiveDate;
|
||||
use rustframe::dataframe::DataFrame;
|
||||
use rustframe::utils::{BDateFreq, BDatesList};
|
||||
use std::any::TypeId;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// Helper for NaiveDate
|
||||
fn d(y: i32, m: u32, d: u32) -> NaiveDate {
|
||||
NaiveDate::from_ymd_opt(y, m, d).unwrap()
|
||||
}
|
||||
|
||||
// Create a new DataFrame
|
||||
let mut df = DataFrame::new();
|
||||
|
||||
// Add columns of different types
|
||||
df.add_column("col_int1", vec![1, 2, 3, 4, 5]);
|
||||
df.add_column("col_float1", vec![1.1, 2.2, 3.3, 4.4, 5.5]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec![
|
||||
"apple".to_string(),
|
||||
"banana".to_string(),
|
||||
"cherry".to_string(),
|
||||
"date".to_string(),
|
||||
"elderberry".to_string(),
|
||||
],
|
||||
);
|
||||
df.add_column("col_bool", vec![true, false, true, false, true]);
|
||||
// df.add_column("col_date", vec![d(2023,1,1), d(2023,1,2), d(2023,1,3), d(2023,1,4), d(2023,1,5)]);
|
||||
df.add_column(
|
||||
"col_date",
|
||||
BDatesList::from_n_periods("2023-01-01".to_string(), BDateFreq::Daily, 5)
|
||||
.unwrap()
|
||||
.list()
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
println!("DataFrame after initial column additions:\n{}", df);
|
||||
|
||||
// Demonstrate frame re-use when adding columns of existing types
|
||||
let initial_frames_count = df.num_internal_frames();
|
||||
println!(
|
||||
"\nInitial number of internal frames: {}",
|
||||
initial_frames_count
|
||||
);
|
||||
|
||||
df.add_column("col_int2", vec![6, 7, 8, 9, 10]);
|
||||
df.add_column("col_float2", vec![6.6, 7.7, 8.8, 9.9, 10.0]);
|
||||
|
||||
let frames_after_reuse = df.num_internal_frames();
|
||||
println!(
|
||||
"Number of internal frames after adding more columns of existing types: {}",
|
||||
frames_after_reuse
|
||||
);
|
||||
assert_eq!(initial_frames_count, frames_after_reuse); // Should be equal, demonstrating re-use
|
||||
|
||||
println!(
|
||||
"\nDataFrame after adding more columns of existing types:\n{}",
|
||||
df
|
||||
);
|
||||
|
||||
// Get number of rows and columns
|
||||
println!("Rows: {}", df.rows()); // Output: Rows: 5
|
||||
println!("Columns: {}", df.cols()); // Output: Columns: 5
|
||||
|
||||
// Get column names
|
||||
println!("Column names: {:?}", df.get_column_names());
|
||||
// Output: Column names: ["col_int", "col_float", "col_string", "col_bool", "col_date"]
|
||||
|
||||
// Get a specific column by name and type
|
||||
let int_col = df.get_column::<i32>("col_int1").unwrap();
|
||||
// Output: Integer column: [1, 2, 3, 4, 5]
|
||||
println!("Integer column (col_int1): {:?}", int_col);
|
||||
|
||||
let int_col2 = df.get_column::<i32>("col_int2").unwrap();
|
||||
// Output: Integer column: [6, 7, 8, 9, 10]
|
||||
println!("Integer column (col_int2): {:?}", int_col2);
|
||||
|
||||
let float_col = df.get_column::<f64>("col_float1").unwrap();
|
||||
// Output: Float column: [1.1, 2.2, 3.3, 4.4, 5.5]
|
||||
println!("Float column (col_float1): {:?}", float_col);
|
||||
|
||||
// Attempt to get a column with incorrect type (returns None)
|
||||
let wrong_type_col = df.get_column::<bool>("col_int1");
|
||||
// Output: Wrong type column: None
|
||||
println!("Wrong type column: {:?}", wrong_type_col);
|
||||
|
||||
// Get a row by index
|
||||
let row_0 = df.get_row(0).unwrap();
|
||||
println!("Row 0: {:?}", row_0);
|
||||
// Output: Row 0: {"col_int1": "1", "col_float1": "1.1", "col_string": "apple", "col_bool": "true", "col_date": "2023-01-01", "col_int2": "6", "col_float2": "6.6"}
|
||||
|
||||
let row_2 = df.get_row(2).unwrap();
|
||||
println!("Row 2: {:?}", row_2);
|
||||
// Output: Row 2: {"col_int1": "3", "col_float1": "3.3", "col_string": "cherry", "col_bool": "true", "col_date": "2023-01-03", "col_int2": "8", "col_float2": "8.8"}
|
||||
|
||||
// Attempt to get an out-of-bounds row (returns None)
|
||||
let row_out_of_bounds = df.get_row(10);
|
||||
// Output: Row out of bounds: None
|
||||
println!("Row out of bounds: {:?}", row_out_of_bounds);
|
||||
|
||||
// Drop a column
|
||||
df.drop_column("col_bool");
|
||||
println!("\nDataFrame after dropping 'col_bool':\n{}", df);
|
||||
|
||||
println!("Columns after drop: {}", df.cols());
|
||||
println!("Column names after drop: {:?}", df.get_column_names());
|
||||
|
||||
// Drop another column, ensuring the underlying Frame is removed if empty
|
||||
df.drop_column("col_float1");
|
||||
println!("\nDataFrame after dropping 'col_float1':\n{}", df);
|
||||
|
||||
println!("Columns after second drop: {}", df.cols());
|
||||
println!(
|
||||
"Column names after second drop: {:?}",
|
||||
df.get_column_names()
|
||||
);
|
||||
|
||||
// Attempt to drop a non-existent column (will panic)
|
||||
// df.drop_column("non_existent_col"); // Uncomment to see panic
|
||||
```
|
||||
|
||||
## More examples
|
||||
|
||||
See the [examples](./examples/) directory for some demonstrations of Rustframe's syntax and functionality.
|
||||
|
||||
@@ -193,10 +319,21 @@ cargo run --example
|
||||
|
||||
Each demo runs a couple of mini-scenarios showcasing the APIs.
|
||||
|
||||
### Running benchmarks
|
||||
## Running benchmarks
|
||||
|
||||
To run the benchmarks, use:
|
||||
|
||||
```bash
|
||||
cargo bench --features "bench"
|
||||
```
|
||||
|
||||
## Building the user-guide
|
||||
|
||||
To build the user guide, use:
|
||||
|
||||
```bash
|
||||
cargo binstall mdbook
|
||||
bash docs/build.sh
|
||||
```
|
||||
|
||||
This will generate the user guide in the `docs/book` directory.
|
||||
|
||||
7
docs/book.toml
Normal file
7
docs/book.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[book]
|
||||
title = "Rustframe User Guide"
|
||||
authors = ["Palash Tyagi (https://github.com/Magnus167)"]
|
||||
description = "Guided journey through Rustframe capabilities."
|
||||
|
||||
[build]
|
||||
build-dir = "book"
|
||||
7
docs/build.sh
Executable file
7
docs/build.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env sh
|
||||
# Build and test the Rustframe user guide using mdBook.
|
||||
set -e
|
||||
|
||||
cd docs
|
||||
bash gen.sh "$@"
|
||||
cd ..
|
||||
14
docs/gen.sh
Normal file
14
docs/gen.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
set -e
|
||||
|
||||
cargo clean
|
||||
|
||||
cargo build --manifest-path ../Cargo.toml
|
||||
|
||||
mdbook test -L ../target/debug/deps "$@"
|
||||
|
||||
mdbook build "$@"
|
||||
|
||||
cargo build
|
||||
# cargo build --release
|
||||
7
docs/src/SUMMARY.md
Normal file
7
docs/src/SUMMARY.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Summary
|
||||
|
||||
- [Introduction](./introduction.md)
|
||||
- [Data Manipulation](./data-manipulation.md)
|
||||
- [Compute Features](./compute.md)
|
||||
- [Machine Learning](./machine-learning.md)
|
||||
- [Utilities](./utilities.md)
|
||||
222
docs/src/compute.md
Normal file
222
docs/src/compute.md
Normal file
@@ -0,0 +1,222 @@
|
||||
# Compute Features
|
||||
|
||||
The `compute` module hosts numerical routines for exploratory data analysis.
|
||||
It covers descriptive statistics, correlations, probability distributions and
|
||||
some basic inferential tests.
|
||||
|
||||
## Basic Statistics
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::{mean, mean_horizontal, mean_vertical, stddev, median, population_variance, percentile};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
assert_eq!(mean(&m), 2.5);
|
||||
assert_eq!(stddev(&m), 1.118033988749895);
|
||||
assert_eq!(median(&m), 2.5);
|
||||
assert_eq!(population_variance(&m), 1.25);
|
||||
assert_eq!(percentile(&m, 50.0), 3.0);
|
||||
// column averages returned as 1 x n matrix
|
||||
let row_means = mean_horizontal(&m);
|
||||
assert_eq!(row_means.data(), &[2.0, 3.0]);
|
||||
let col_means = mean_vertical(&m);
|
||||
assert_eq!(col_means.data(), & [1.5, 3.5]);
|
||||
```
|
||||
|
||||
### Axis-specific Operations
|
||||
|
||||
Operations can be applied along specific axes (rows or columns):
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::{mean_vertical, mean_horizontal, stddev_vertical, stddev_horizontal};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// 3x2 matrix
|
||||
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2);
|
||||
|
||||
// Mean along columns (vertical) - returns 1 x cols matrix
|
||||
let col_means = mean_vertical(&m);
|
||||
assert_eq!(col_means.shape(), (1, 2));
|
||||
assert_eq!(col_means.data(), &[3.0, 4.0]); // [(1+3+5)/3, (2+4+6)/3]
|
||||
|
||||
// Mean along rows (horizontal) - returns rows x 1 matrix
|
||||
let row_means = mean_horizontal(&m);
|
||||
assert_eq!(row_means.shape(), (3, 1));
|
||||
assert_eq!(row_means.data(), &[1.5, 3.5, 5.5]); // [(1+2)/2, (3+4)/2, (5+6)/2]
|
||||
|
||||
// Standard deviation along columns
|
||||
let col_stddev = stddev_vertical(&m);
|
||||
assert_eq!(col_stddev.shape(), (1, 2));
|
||||
|
||||
// Standard deviation along rows
|
||||
let row_stddev = stddev_horizontal(&m);
|
||||
assert_eq!(row_stddev.shape(), (3, 1));
|
||||
```
|
||||
|
||||
## Correlation
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::{pearson, covariance};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
|
||||
let corr = pearson(&x, &y);
|
||||
let cov = covariance(&x, &y);
|
||||
assert!((corr - 1.0).abs() < 1e-8);
|
||||
assert!((cov - 2.5).abs() < 1e-8);
|
||||
```
|
||||
|
||||
## Covariance
|
||||
|
||||
### `covariance`
|
||||
|
||||
Computes the population covariance between two equally sized matrices by flattening
|
||||
their values.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
|
||||
let cov = covariance(&x, &y);
|
||||
assert!((cov - 2.5).abs() < 1e-8);
|
||||
```
|
||||
|
||||
### `covariance_vertical`
|
||||
|
||||
Evaluates covariance between columns (i.e. across rows) and returns a matrix of
|
||||
column pair covariances.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance_vertical;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let cov = covariance_vertical(&m);
|
||||
assert_eq!(cov.shape(), (2, 2));
|
||||
assert!(cov.data().iter().all(|&v| (v - 1.0).abs() < 1e-8));
|
||||
```
|
||||
|
||||
### `covariance_horizontal`
|
||||
|
||||
Computes covariance between rows (i.e. across columns) returning a matrix that
|
||||
describes how each pair of rows varies together.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance_horizontal;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let cov = covariance_horizontal(&m);
|
||||
assert_eq!(cov.shape(), (2, 2));
|
||||
assert!(cov.data().iter().all(|&v| (v - 0.25).abs() < 1e-8));
|
||||
```
|
||||
|
||||
### `covariance_matrix`
|
||||
|
||||
Builds a covariance matrix either between columns (`Axis::Col`) or rows
|
||||
(`Axis::Row`). Each entry represents how two series co-vary.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance_matrix;
|
||||
use rustframe::matrix::{Axis, Matrix};
|
||||
|
||||
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
|
||||
// Covariance between columns
|
||||
let cov_cols = covariance_matrix(&data, Axis::Col);
|
||||
assert!((cov_cols.get(0, 0) - 2.0).abs() < 1e-8);
|
||||
|
||||
// Covariance between rows
|
||||
let cov_rows = covariance_matrix(&data, Axis::Row);
|
||||
assert!((cov_rows.get(0, 1) + 0.5).abs() < 1e-8);
|
||||
```
|
||||
|
||||
## Distributions
|
||||
|
||||
Probability distribution helpers are available for common PDFs and CDFs.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::distributions::normal_pdf;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2);
|
||||
let pdf = normal_pdf(x, 0.0, 1.0);
|
||||
assert_eq!(pdf.data().len(), 2);
|
||||
```
|
||||
|
||||
### Additional Distributions
|
||||
|
||||
Rustframe provides several other probability distributions:
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::distributions::{normal_cdf, binomial_pmf, binomial_cdf, poisson_pmf};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Normal distribution CDF
|
||||
let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2);
|
||||
let cdf = normal_cdf(x, 0.0, 1.0);
|
||||
assert_eq!(cdf.data().len(), 2);
|
||||
|
||||
// Binomial distribution PMF
|
||||
// Probability of k successes in n trials with probability p
|
||||
let k = Matrix::from_vec(vec![0_u64, 1, 2, 3], 1, 4);
|
||||
let pmf = binomial_pmf(3, k.clone(), 0.5);
|
||||
assert_eq!(pmf.data().len(), 4);
|
||||
|
||||
// Binomial distribution CDF
|
||||
let cdf = binomial_cdf(3, k, 0.5);
|
||||
assert_eq!(cdf.data().len(), 4);
|
||||
|
||||
// Poisson distribution PMF
|
||||
// Probability of k events with rate parameter lambda
|
||||
let k = Matrix::from_vec(vec![0_u64, 1, 2], 1, 3);
|
||||
let pmf = poisson_pmf(2.0, k);
|
||||
assert_eq!(pmf.data().len(), 3);
|
||||
```
|
||||
|
||||
### Inferential Statistics
|
||||
|
||||
Rustframe provides several inferential statistical tests:
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::matrix::Matrix;
|
||||
use rustframe::compute::stats::inferential::{t_test, chi2_test, anova};
|
||||
|
||||
// Two-sample t-test
|
||||
let sample1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
|
||||
let sample2 = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
|
||||
let (t_statistic, p_value) = t_test(&sample1, &sample2);
|
||||
assert!((t_statistic + 5.0).abs() < 1e-5);
|
||||
assert!(p_value > 0.0 && p_value < 1.0);
|
||||
|
||||
// Chi-square test of independence
|
||||
let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
|
||||
let (chi2_statistic, p_value) = chi2_test(&observed);
|
||||
assert!(chi2_statistic > 0.0);
|
||||
assert!(p_value > 0.0 && p_value < 1.0);
|
||||
|
||||
// One-way ANOVA
|
||||
let group1 = Matrix::from_vec(vec![1.0, 2.0, 3.0], 1, 3);
|
||||
let group2 = Matrix::from_vec(vec![2.0, 3.0, 4.0], 1, 3);
|
||||
let group3 = Matrix::from_vec(vec![3.0, 4.0, 5.0], 1, 3);
|
||||
let groups = vec![&group1, &group2, &group3];
|
||||
let (f_statistic, p_value) = anova(groups);
|
||||
assert!(f_statistic > 0.0);
|
||||
assert!(p_value > 0.0 && p_value < 1.0);
|
||||
```
|
||||
|
||||
With the basics covered, explore predictive models in the
|
||||
[machine learning](./machine-learning.md) chapter.
|
||||
157
docs/src/data-manipulation.md
Normal file
157
docs/src/data-manipulation.md
Normal file
@@ -0,0 +1,157 @@
|
||||
# Data Manipulation
|
||||
|
||||
Rustframe's `Frame` type couples tabular data with
|
||||
column labels and a typed row index. Frames expose a familiar API for loading
|
||||
data, selecting rows or columns and performing aggregations.
|
||||
|
||||
## Creating a Frame
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::frame::{Frame, RowIndex};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
|
||||
let frame = Frame::new(data, vec!["A", "B"], None);
|
||||
assert_eq!(frame["A"], vec![1.0, 2.0]);
|
||||
```
|
||||
|
||||
## Indexing Rows
|
||||
|
||||
Row labels can be integers, dates or a default range. Retrieving a row returns a
|
||||
view that lets you inspect values by column name or position.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
# extern crate chrono;
|
||||
use chrono::NaiveDate;
|
||||
use rustframe::frame::{Frame, RowIndex};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let d = |y, m, d| NaiveDate::from_ymd_opt(y, m, d).unwrap();
|
||||
let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
|
||||
let index = RowIndex::Date(vec![d(2024, 1, 1), d(2024, 1, 2)]);
|
||||
let mut frame = Frame::new(data, vec!["A", "B"], Some(index));
|
||||
assert_eq!(frame.get_row_date(d(2024, 1, 2))["B"], 4.0);
|
||||
|
||||
// mutate by row key
|
||||
frame.get_row_date_mut(d(2024, 1, 1)).set_by_index(0, 9.0);
|
||||
assert_eq!(frame.get_row_date(d(2024, 1, 1))["A"], 9.0);
|
||||
```
|
||||
|
||||
## Column operations
|
||||
|
||||
Columns can be inserted, renamed, removed or reordered in place.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::frame::{Frame, RowIndex};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let data = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]);
|
||||
let mut frame = Frame::new(data, vec!["X", "Y"], Some(RowIndex::Range(0..2)));
|
||||
|
||||
frame.add_column("Z", vec![5, 6]);
|
||||
frame.rename("Y", "W");
|
||||
let removed = frame.delete_column("X");
|
||||
assert_eq!(removed, vec![1, 2]);
|
||||
frame.sort_columns();
|
||||
assert_eq!(frame.columns(), &["W", "Z"]);
|
||||
```
|
||||
|
||||
## Aggregations
|
||||
|
||||
Any numeric aggregation available on `Matrix` is forwarded to `Frame`.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::frame::Frame;
|
||||
use rustframe::matrix::{Matrix, SeriesOps};
|
||||
|
||||
let frame = Frame::new(Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]), vec!["A", "B"], None);
|
||||
assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]);
|
||||
assert_eq!(frame.sum_horizontal(), vec![4.0, 6.0]);
|
||||
```
|
||||
|
||||
## Matrix Operations
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let data1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let data2 = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2);
|
||||
|
||||
let sum = data1.clone() + data2.clone();
|
||||
assert_eq!(sum.data(), vec![6.0, 8.0, 10.0, 12.0]);
|
||||
|
||||
let product = data1.clone() * data2.clone();
|
||||
assert_eq!(product.data(), vec![5.0, 12.0, 21.0, 32.0]);
|
||||
|
||||
let scalar_product = data1.clone() * 2.0;
|
||||
assert_eq!(scalar_product.data(), vec![2.0, 4.0, 6.0, 8.0]);
|
||||
|
||||
let equals = data1 == data1.clone();
|
||||
assert_eq!(equals, true);
|
||||
```
|
||||
|
||||
### Advanced Matrix Operations
|
||||
|
||||
Matrices support a variety of advanced operations:
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::matrix::{Matrix, SeriesOps};
|
||||
|
||||
// Matrix multiplication (dot product)
|
||||
let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let b = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2);
|
||||
let product = a.matrix_mul(&b);
|
||||
assert_eq!(product.data(), vec![23.0, 34.0, 31.0, 46.0]);
|
||||
|
||||
// Transpose
|
||||
let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let transposed = m.transpose();
|
||||
assert_eq!(transposed.data(), vec![1.0, 3.0, 2.0, 4.0]);
|
||||
|
||||
// Map function over all elements
|
||||
let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let squared = m.map(|x| x * x);
|
||||
assert_eq!(squared.data(), vec![1.0, 4.0, 9.0, 16.0]);
|
||||
|
||||
// Zip two matrices with a function
|
||||
let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let b = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2);
|
||||
let zipped = a.zip(&b, |x, y| x + y);
|
||||
assert_eq!(zipped.data(), vec![6.0, 8.0, 10.0, 12.0]);
|
||||
```
|
||||
|
||||
### Matrix Reductions
|
||||
|
||||
Matrices support various reduction operations:
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::matrix::{Matrix, SeriesOps};
|
||||
|
||||
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2);
|
||||
|
||||
// Sum along columns (vertical)
|
||||
let col_sums = m.sum_vertical();
|
||||
assert_eq!(col_sums, vec![9.0, 12.0]); // [1+3+5, 2+4+6]
|
||||
|
||||
// Sum along rows (horizontal)
|
||||
let row_sums = m.sum_horizontal();
|
||||
assert_eq!(row_sums, vec![3.0, 7.0, 11.0]); // [1+2, 3+4, 5+6]
|
||||
|
||||
// Cumulative sum along columns
|
||||
let col_cumsum = m.cumsum_vertical();
|
||||
assert_eq!(col_cumsum.data(), vec![1.0, 4.0, 9.0, 2.0, 6.0, 12.0]);
|
||||
|
||||
// Cumulative sum along rows
|
||||
let row_cumsum = m.cumsum_horizontal();
|
||||
assert_eq!(row_cumsum.data(), vec![1.0, 3.0, 5.0, 3.0, 7.0, 11.0]);
|
||||
```
|
||||
|
||||
With the basics covered, continue to the [compute features](./compute.md)
|
||||
chapter for statistics and analytics.
|
||||
40
docs/src/introduction.md
Normal file
40
docs/src/introduction.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Introduction
|
||||
|
||||
🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
|
||||
|
||||
Welcome to the **Rustframe User Guide**. Rustframe is a lightweight dataframe
|
||||
and math toolkit for Rust written in 100% safe Rust. It focuses on keeping the
|
||||
API approachable while offering handy features for small analytical or
|
||||
educational projects.
|
||||
|
||||
Rustframe bundles:
|
||||
|
||||
- column‑labelled frames built on a fast column‑major matrix
|
||||
- familiar element‑wise math and aggregation routines
|
||||
- a growing `compute` module for statistics and machine learning
|
||||
- utilities for dates and random numbers
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::{frame::Frame, matrix::{Matrix, SeriesOps}};
|
||||
|
||||
let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
|
||||
let frame = Frame::new(data, vec!["A", "B"], None);
|
||||
|
||||
// Perform column wise aggregation
|
||||
assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]);
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
- [GitHub repository](https://github.com/Magnus167/rustframe)
|
||||
- [Crates.io](https://crates.io/crates/rustframe) & [API docs](https://docs.rs/rustframe)
|
||||
- [Code coverage](https://codecov.io/gh/Magnus167/rustframe)
|
||||
|
||||
This guide walks through the main building blocks of the library. Each chapter
|
||||
contains runnable snippets so you can follow along:
|
||||
|
||||
1. [Data manipulation](./data-manipulation.md) for loading and transforming data
|
||||
2. [Compute features](./compute.md) for statistics and analytics
|
||||
3. [Machine learning](./machine-learning.md) for predictive models
|
||||
4. [Utilities](./utilities.md) for supporting helpers and upcoming modules
|
||||
282
docs/src/machine-learning.md
Normal file
282
docs/src/machine-learning.md
Normal file
@@ -0,0 +1,282 @@
|
||||
# Machine Learning
|
||||
|
||||
The `compute::models` module bundles several learning algorithms that operate on
|
||||
`Matrix` structures. These examples highlight the basic training and prediction
|
||||
APIs. For more end‑to‑end walkthroughs see the examples directory in the
|
||||
repository.
|
||||
|
||||
Currently implemented models include:
|
||||
|
||||
- Linear and logistic regression
|
||||
- K‑means clustering
|
||||
- Principal component analysis (PCA)
|
||||
- Gaussian Naive Bayes
|
||||
- Dense neural networks
|
||||
|
||||
## Linear Regression
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::linreg::LinReg;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
|
||||
let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
|
||||
let mut model = LinReg::new(1);
|
||||
model.fit(&x, &y, 0.01, 100);
|
||||
let preds = model.predict(&x);
|
||||
assert_eq!(preds.rows(), 4);
|
||||
```
|
||||
|
||||
## K-means Walkthrough
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::k_means::KMeans;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let data = Matrix::from_vec(vec![1.0, 1.0, 5.0, 5.0], 2, 2);
|
||||
let (model, _labels) = KMeans::fit(&data, 2, 10, 1e-4);
|
||||
let new_point = Matrix::from_vec(vec![0.0, 0.0], 1, 2);
|
||||
let cluster = model.predict(&new_point)[0];
|
||||
```
|
||||
|
||||
## Logistic Regression
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::logreg::LogReg;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
|
||||
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
|
||||
let mut model = LogReg::new(1);
|
||||
model.fit(&x, &y, 0.1, 200);
|
||||
let preds = model.predict_proba(&x);
|
||||
assert_eq!(preds.rows(), 4);
|
||||
```
|
||||
|
||||
## Principal Component Analysis
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::pca::PCA;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let pca = PCA::fit(&data, 1, 0);
|
||||
let transformed = pca.transform(&data);
|
||||
assert_eq!(transformed.cols(), 1);
|
||||
```
|
||||
|
||||
## Gaussian Naive Bayes
|
||||
|
||||
Gaussian Naive Bayes classifier for continuous features:
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::gaussian_nb::GaussianNB;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Training data with 2 features
|
||||
let x = Matrix::from_rows_vec(vec![
|
||||
1.0, 2.0,
|
||||
2.0, 3.0,
|
||||
3.0, 4.0,
|
||||
4.0, 5.0
|
||||
], 4, 2);
|
||||
|
||||
// Class labels (0 or 1)
|
||||
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
|
||||
|
||||
// Train the model
|
||||
let mut model = GaussianNB::new(1e-9, true);
|
||||
model.fit(&x, &y);
|
||||
|
||||
// Make predictions
|
||||
let predictions = model.predict(&x);
|
||||
assert_eq!(predictions.rows(), 4);
|
||||
```
|
||||
|
||||
## Dense Neural Networks
|
||||
|
||||
Simple fully connected neural network:
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::dense_nn::{DenseNN, DenseNNConfig, ActivationKind, InitializerKind, LossKind};
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Training data with 2 features
|
||||
let x = Matrix::from_rows_vec(vec![
|
||||
0.0, 0.0,
|
||||
0.0, 1.0,
|
||||
1.0, 0.0,
|
||||
1.0, 1.0
|
||||
], 4, 2);
|
||||
|
||||
// XOR target outputs
|
||||
let y = Matrix::from_vec(vec![0.0, 1.0, 1.0, 0.0], 4, 1);
|
||||
|
||||
// Create a neural network with 2 hidden layers
|
||||
let config = DenseNNConfig {
|
||||
input_size: 2,
|
||||
hidden_layers: vec![4, 4],
|
||||
output_size: 1,
|
||||
activations: vec![ActivationKind::Sigmoid, ActivationKind::Sigmoid, ActivationKind::Sigmoid],
|
||||
initializer: InitializerKind::Uniform(0.5),
|
||||
loss: LossKind::MSE,
|
||||
learning_rate: 0.1,
|
||||
epochs: 1000,
|
||||
};
|
||||
let mut model = DenseNN::new(config);
|
||||
|
||||
// Train the model
|
||||
model.train(&x, &y);
|
||||
|
||||
// Make predictions
|
||||
let predictions = model.predict(&x);
|
||||
assert_eq!(predictions.rows(), 4);
|
||||
```
|
||||
|
||||
## Real-world Examples
|
||||
|
||||
### Housing Price Prediction
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::linreg::LinReg;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Features: square feet and bedrooms
|
||||
let features = Matrix::from_rows_vec(vec![
|
||||
2100.0, 3.0,
|
||||
1600.0, 2.0,
|
||||
2400.0, 4.0,
|
||||
1400.0, 2.0,
|
||||
], 4, 2);
|
||||
|
||||
// Sale prices
|
||||
let target = Matrix::from_vec(vec![400_000.0, 330_000.0, 369_000.0, 232_000.0], 4, 1);
|
||||
|
||||
let mut model = LinReg::new(2);
|
||||
model.fit(&features, &target, 1e-8, 10_000);
|
||||
|
||||
// Predict price of a new home
|
||||
let new_home = Matrix::from_vec(vec![2000.0, 3.0], 1, 2);
|
||||
let predicted_price = model.predict(&new_home);
|
||||
println!("Predicted price: ${}", predicted_price.data()[0]);
|
||||
```
|
||||
|
||||
### Spam Detection
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::logreg::LogReg;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// 20 e-mails × 5 features = 100 numbers (row-major, spam first)
|
||||
let x = Matrix::from_rows_vec(
|
||||
vec![
|
||||
// ─────────── spam examples ───────────
|
||||
2.0, 1.0, 1.0, 1.0, 1.0, // "You win a FREE offer - click for money-back bonus!"
|
||||
1.0, 0.0, 1.0, 1.0, 0.0, // "FREE offer! Click now!"
|
||||
0.0, 2.0, 0.0, 1.0, 1.0, // "Win win win - money inside, click…"
|
||||
1.0, 1.0, 0.0, 0.0, 1.0, // "Limited offer to win easy money…"
|
||||
1.0, 0.0, 1.0, 0.0, 1.0, // ...
|
||||
0.0, 1.0, 1.0, 1.0, 0.0, // ...
|
||||
2.0, 0.0, 0.0, 1.0, 1.0, // ...
|
||||
0.0, 1.0, 1.0, 0.0, 1.0, // ...
|
||||
1.0, 1.0, 1.0, 1.0, 0.0, // ...
|
||||
1.0, 0.0, 0.0, 1.0, 1.0, // ...
|
||||
// ─────────── ham examples ───────────
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, // "See you at the meeting tomorrow."
|
||||
0.0, 0.0, 0.0, 1.0, 0.0, // "Here's the Zoom click-link."
|
||||
0.0, 0.0, 0.0, 0.0, 1.0, // "Expense report: money attached."
|
||||
0.0, 0.0, 0.0, 1.0, 1.0, // ...
|
||||
0.0, 1.0, 0.0, 0.0, 0.0, // "Did we win the bid?"
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, // ...
|
||||
0.0, 0.0, 0.0, 1.0, 0.0, // ...
|
||||
1.0, 0.0, 0.0, 0.0, 0.0, // "Special offer for staff lunch."
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, // ...
|
||||
0.0, 0.0, 0.0, 1.0, 0.0,
|
||||
],
|
||||
20,
|
||||
5,
|
||||
);
|
||||
|
||||
// Labels: 1 = spam, 0 = ham
|
||||
let y = Matrix::from_vec(
|
||||
vec![
|
||||
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, // 10 spam
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, // 10 ham
|
||||
],
|
||||
20,
|
||||
1,
|
||||
);
|
||||
|
||||
// Train
|
||||
let mut model = LogReg::new(5);
|
||||
model.fit(&x, &y, 0.01, 5000);
|
||||
|
||||
// Predict
|
||||
// e.g. "free money offer"
|
||||
let email_data = vec![1.0, 0.0, 1.0, 0.0, 1.0];
|
||||
let email = Matrix::from_vec(email_data, 1, 5);
|
||||
let prob_spam = model.predict_proba(&email);
|
||||
println!("Probability of spam: {:.4}", prob_spam.data()[0]);
|
||||
```
|
||||
|
||||
### Iris Flower Classification
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::gaussian_nb::GaussianNB;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Features: sepal length and petal length
|
||||
let x = Matrix::from_rows_vec(vec![
|
||||
5.1, 1.4, // setosa
|
||||
4.9, 1.4, // setosa
|
||||
6.2, 4.5, // versicolor
|
||||
5.9, 5.1, // virginica
|
||||
], 4, 2);
|
||||
|
||||
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 2.0], 4, 1);
|
||||
let names = vec!["setosa", "versicolor", "virginica"];
|
||||
|
||||
let mut model = GaussianNB::new(1e-9, true);
|
||||
model.fit(&x, &y);
|
||||
|
||||
let sample = Matrix::from_vec(vec![5.0, 1.5], 1, 2);
|
||||
let predicted_class = model.predict(&sample);
|
||||
let class_name = names[predicted_class.data()[0] as usize];
|
||||
println!("Predicted class: {} ({:?})", class_name, predicted_class.data()[0]);
|
||||
```
|
||||
|
||||
### Customer Segmentation
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::k_means::KMeans;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Each row: [age, annual_income]
|
||||
let customers = Matrix::from_rows_vec(
|
||||
vec![
|
||||
25.0, 40_000.0, 34.0, 52_000.0, 58.0, 95_000.0, 45.0, 70_000.0,
|
||||
],
|
||||
4,
|
||||
2,
|
||||
);
|
||||
|
||||
let (model, labels) = KMeans::fit(&customers, 2, 20, 1e-4);
|
||||
|
||||
let new_customer = Matrix::from_vec(vec![30.0, 50_000.0], 1, 2);
|
||||
let cluster = model.predict(&new_customer)[0];
|
||||
println!("New customer belongs to cluster: {}", cluster);
|
||||
println!("Cluster labels: {:?}", labels);
|
||||
```
|
||||
|
||||
For helper functions and upcoming modules, visit the
|
||||
[utilities](./utilities.md) section.
|
||||
63
docs/src/utilities.md
Normal file
63
docs/src/utilities.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Utilities
|
||||
|
||||
Utilities provide handy helpers around the core library. Existing tools
|
||||
include:
|
||||
|
||||
- Date utilities for generating calendar sequences and business‑day sets
|
||||
- Random number generators for simulations and testing
|
||||
|
||||
## Date Helpers
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::utils::dateutils::{BDatesList, BDateFreq, DatesList, DateFreq};
|
||||
|
||||
// Calendar sequence
|
||||
let list = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily);
|
||||
assert_eq!(list.count().unwrap(), 3);
|
||||
|
||||
// Business days starting from 2024‑01‑02
|
||||
let bdates = BDatesList::from_n_periods("2024-01-02".into(), BDateFreq::Daily, 3).unwrap();
|
||||
assert_eq!(bdates.list().unwrap().len(), 3);
|
||||
```
|
||||
|
||||
## Random Numbers
|
||||
|
||||
The `random` module offers deterministic and cryptographically secure RNGs.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::random::{Prng, Rng};
|
||||
|
||||
let mut rng = Prng::new(42);
|
||||
let v1 = rng.next_u64();
|
||||
let v2 = rng.next_u64();
|
||||
assert_ne!(v1, v2);
|
||||
```
|
||||
|
||||
## Stats Functions
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::matrix::Matrix;
|
||||
use rustframe::compute::stats::descriptive::{mean, median, stddev};
|
||||
|
||||
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
|
||||
|
||||
let mean_value = mean(&data);
|
||||
assert_eq!(mean_value, 3.0);
|
||||
|
||||
let median_value = median(&data);
|
||||
assert_eq!(median_value, 3.0);
|
||||
|
||||
let std_value = stddev(&data);
|
||||
assert_eq!(std_value, 2.0_f64.sqrt());
|
||||
```
|
||||
|
||||
Upcoming utilities will cover:
|
||||
|
||||
- Data import/export helpers
|
||||
- Visualization adapters
|
||||
- Streaming data interfaces
|
||||
|
||||
Contributions to these sections are welcome!
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Algorithms and statistical utilities built on top of the core matrices.
|
||||
//!
|
||||
//! This module groups together machine‑learning models and statistical helper
|
||||
//! functions. For quick access to basic statistics see [`stats`](crate::compute::stats), while
|
||||
//! [`models`](crate::compute::models) contains small learning algorithms.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::stats;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let m = Matrix::from_vec(vec![1.0, 2.0, 3.0], 3, 1);
|
||||
//! assert_eq!(stats::mean(&m), 2.0);
|
||||
//! ```
|
||||
pub mod models;
|
||||
|
||||
pub mod stats;
|
||||
|
||||
@@ -1,3 +1,15 @@
|
||||
//! Common activation functions used in neural networks.
|
||||
//!
|
||||
//! Functions operate element-wise on [`Matrix`] values.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::activations::sigmoid;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let x = Matrix::from_vec(vec![0.0], 1, 1);
|
||||
//! let y = sigmoid(&x);
|
||||
//! assert!((y.get(0,0) - 0.5).abs() < 1e-6);
|
||||
//! ```
|
||||
use crate::matrix::{Matrix, SeriesOps};
|
||||
|
||||
pub fn sigmoid(x: &Matrix<f64>) -> Matrix<f64> {
|
||||
|
||||
@@ -1,3 +1,30 @@
|
||||
//! A minimal dense neural network implementation for educational purposes.
|
||||
//!
|
||||
//! Layers operate on [`Matrix`] values and support ReLU and Sigmoid
|
||||
//! activations. This is not meant to be a performant deep‑learning framework
|
||||
//! but rather a small example of how the surrounding matrix utilities can be
|
||||
//! composed.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::dense_nn::{ActivationKind, DenseNN, DenseNNConfig, InitializerKind, LossKind};
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! // Tiny network with one input and one output neuron.
|
||||
//! let config = DenseNNConfig {
|
||||
//! input_size: 1,
|
||||
//! hidden_layers: vec![],
|
||||
//! output_size: 1,
|
||||
//! activations: vec![ActivationKind::Relu],
|
||||
//! initializer: InitializerKind::Uniform(0.5),
|
||||
//! loss: LossKind::MSE,
|
||||
//! learning_rate: 0.1,
|
||||
//! epochs: 1,
|
||||
//! };
|
||||
//! let mut nn = DenseNN::new(config);
|
||||
//! let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1);
|
||||
//! let y = Matrix::from_vec(vec![2.0, 3.0], 2, 1);
|
||||
//! nn.train(&x, &y);
|
||||
//! ```
|
||||
use crate::compute::models::activations::{drelu, relu, sigmoid};
|
||||
use crate::matrix::{Matrix, SeriesOps};
|
||||
use crate::random::prelude::*;
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Gaussian Naive Bayes classifier for dense matrices.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::gaussian_nb::GaussianNB;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let x = Matrix::from_vec(vec![1.0, 2.0, 1.0, 2.0], 2, 2); // two samples
|
||||
//! let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
|
||||
//! let mut model = GaussianNB::new(1e-9, false);
|
||||
//! model.fit(&x, &y);
|
||||
//! let preds = model.predict(&x);
|
||||
//! assert_eq!(preds.rows(), 2);
|
||||
//! ```
|
||||
use crate::matrix::Matrix;
|
||||
use std::collections::HashMap;
|
||||
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
//! Simple k-means clustering working on [`Matrix`] data.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::k_means::KMeans;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let data = Matrix::from_vec(vec![1.0, 1.0, 5.0, 5.0], 2, 2);
|
||||
//! let (model, labels) = KMeans::fit(&data, 2, 10, 1e-4);
|
||||
//! assert_eq!(model.centroids.rows(), 2);
|
||||
//! assert_eq!(labels.len(), 2);
|
||||
//! ```
|
||||
use crate::compute::stats::mean_vertical;
|
||||
use crate::matrix::Matrix;
|
||||
use crate::random::prelude::*;
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Ordinary least squares linear regression.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::linreg::LinReg;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
|
||||
//! let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
|
||||
//! let mut model = LinReg::new(1);
|
||||
//! model.fit(&x, &y, 0.01, 100);
|
||||
//! let preds = model.predict(&x);
|
||||
//! assert_eq!(preds.rows(), 4);
|
||||
//! ```
|
||||
use crate::matrix::{Matrix, SeriesOps};
|
||||
|
||||
pub struct LinReg {
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Binary logistic regression classifier.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::logreg::LogReg;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
|
||||
//! let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
|
||||
//! let mut model = LogReg::new(1);
|
||||
//! model.fit(&x, &y, 0.1, 100);
|
||||
//! let preds = model.predict(&x);
|
||||
//! assert_eq!(preds[(0,0)], 0.0);
|
||||
//! ```
|
||||
use crate::compute::models::activations::sigmoid;
|
||||
use crate::matrix::{Matrix, SeriesOps};
|
||||
|
||||
|
||||
@@ -1,3 +1,19 @@
|
||||
//! Lightweight machine‑learning models built on matrices.
|
||||
//!
|
||||
//! Models are intentionally minimal and operate on the [`Matrix`](crate::matrix::Matrix) type for
|
||||
//! inputs and parameters.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::linreg::LinReg;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
|
||||
//! let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
|
||||
//! let mut model = LinReg::new(1);
|
||||
//! model.fit(&x, &y, 0.01, 1000);
|
||||
//! let preds = model.predict(&x);
|
||||
//! assert_eq!(preds.rows(), 4);
|
||||
//! ```
|
||||
pub mod activations;
|
||||
pub mod dense_nn;
|
||||
pub mod gaussian_nb;
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
//! Principal Component Analysis using covariance matrices.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::models::pca::PCA;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let data = Matrix::from_rows_vec(vec![1.0, 1.0, 2.0, 2.0], 2, 2);
|
||||
//! let pca = PCA::fit(&data, 1, 0);
|
||||
//! let projected = pca.transform(&data);
|
||||
//! assert_eq!(projected.cols(), 1);
|
||||
//! ```
|
||||
use crate::compute::stats::correlation::covariance_matrix;
|
||||
use crate::compute::stats::descriptive::mean_vertical;
|
||||
use crate::matrix::{Axis, Matrix, SeriesOps};
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Covariance and correlation helpers.
|
||||
//!
|
||||
//! This module provides routines for measuring the relationship between
|
||||
//! columns or rows of matrices.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::stats::correlation;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
//! let cov = correlation::covariance(&x, &x);
|
||||
//! assert!((cov - 1.25).abs() < 1e-8);
|
||||
//! ```
|
||||
use crate::compute::stats::{mean, mean_horizontal, mean_vertical, stddev};
|
||||
use crate::matrix::{Axis, Matrix, SeriesOps};
|
||||
|
||||
|
||||
@@ -1,3 +1,15 @@
|
||||
//! Descriptive statistics for matrices.
|
||||
//!
|
||||
//! Provides means, variances, medians and other aggregations computed either
|
||||
//! across the whole matrix or along a specific axis.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::stats::descriptive;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
//! assert_eq!(descriptive::mean(&m), 2.5);
|
||||
//! ```
|
||||
use crate::matrix::{Axis, Matrix, SeriesOps};
|
||||
|
||||
pub fn mean(x: &Matrix<f64>) -> f64 {
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Probability distribution functions applied element-wise to matrices.
|
||||
//!
|
||||
//! Includes approximations for the normal, uniform and gamma distributions as
|
||||
//! well as the error function.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::stats::distributions;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let x = Matrix::from_vec(vec![0.0], 1, 1);
|
||||
//! let pdf = distributions::normal_pdf(x.clone(), 0.0, 1.0);
|
||||
//! assert!((pdf.get(0,0) - 0.3989).abs() < 1e-3);
|
||||
//! ```
|
||||
use crate::matrix::{Matrix, SeriesOps};
|
||||
|
||||
use std::f64::consts::PI;
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
//! Basic inferential statistics such as t‑tests and chi‑square tests.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::stats::inferential;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let a = Matrix::from_vec(vec![1.0, 2.0], 2, 1);
|
||||
//! let b = Matrix::from_vec(vec![1.1, 1.9], 2, 1);
|
||||
//! let (t, _p) = inferential::t_test(&a, &b);
|
||||
//! assert!(t.abs() < 1.0);
|
||||
//! ```
|
||||
use crate::matrix::{Matrix, SeriesOps};
|
||||
|
||||
use crate::compute::stats::{gamma_cdf, mean, sample_variance};
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Statistical routines for matrices.
|
||||
//!
|
||||
//! Functions are grouped into submodules for descriptive statistics,
|
||||
//! correlations, probability distributions and basic inferential tests.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::compute::stats;
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
//! let cov = stats::covariance(&m, &m);
|
||||
//! assert!((cov - 1.25).abs() < 1e-8);
|
||||
//! ```
|
||||
pub mod correlation;
|
||||
pub mod descriptive;
|
||||
pub mod distributions;
|
||||
|
||||
659
src/dataframe/df.rs
Normal file
659
src/dataframe/df.rs
Normal file
@@ -0,0 +1,659 @@
|
||||
use crate::frame::{Frame, RowIndex};
|
||||
use std::any::{Any, TypeId};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt; // Import TypeId
|
||||
|
||||
const DEFAULT_DISPLAY_ROWS: usize = 5;
|
||||
const DEFAULT_DISPLAY_COLS: usize = 10;
|
||||
|
||||
// Trait to enable type-agnostic operations on Frame objects within DataFrame
|
||||
pub trait SubFrame: Send + Sync + fmt::Debug + Any {
|
||||
fn rows(&self) -> usize;
|
||||
fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String;
|
||||
fn clone_box(&self) -> Box<dyn SubFrame>;
|
||||
fn delete_column_from_frame(&mut self, col_name: &str);
|
||||
fn get_frame_cols(&self) -> usize; // Add a method to get the number of columns in the underlying frame
|
||||
|
||||
// Methods for downcasting to concrete types
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
fn as_any_mut(&mut self) -> &mut dyn Any;
|
||||
}
|
||||
|
||||
// Implement SubFrame for any Frame<T> that meets the requirements
|
||||
impl<T> SubFrame for Frame<T>
|
||||
where
|
||||
T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
|
||||
{
|
||||
fn rows(&self) -> usize {
|
||||
self.rows()
|
||||
}
|
||||
|
||||
fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String {
|
||||
self.get_row(physical_row_idx).get(col_name).to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn SubFrame> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn delete_column_from_frame(&mut self, col_name: &str) {
|
||||
self.delete_column(col_name);
|
||||
}
|
||||
|
||||
fn get_frame_cols(&self) -> usize {
|
||||
self.cols()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_any_mut(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DataFrame {
|
||||
frames_by_type: HashMap<TypeId, Box<dyn SubFrame>>, // Maps TypeId to the Frame holding columns of that type
|
||||
column_to_type: HashMap<String, TypeId>, // Maps column name to its TypeId
|
||||
column_names: Vec<String>,
|
||||
index: RowIndex,
|
||||
}
|
||||
|
||||
impl DataFrame {
|
||||
pub fn new() -> Self {
|
||||
DataFrame {
|
||||
frames_by_type: HashMap::new(),
|
||||
column_to_type: HashMap::new(),
|
||||
column_names: Vec::new(),
|
||||
index: RowIndex::Range(0..0), // Initialize with an empty range index
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of rows in the DataFrame.
|
||||
pub fn rows(&self) -> usize {
|
||||
self.index.len()
|
||||
}
|
||||
|
||||
/// Returns the number of columns in the DataFrame.
|
||||
pub fn cols(&self) -> usize {
|
||||
self.column_names.len()
|
||||
}
|
||||
|
||||
/// Returns a reference to the vector of column names.
|
||||
pub fn get_column_names(&self) -> &Vec<String> {
|
||||
&self.column_names
|
||||
}
|
||||
|
||||
/// Returns the number of internal Frame objects (one per unique data type).
|
||||
pub fn num_internal_frames(&self) -> usize {
|
||||
self.frames_by_type.len()
|
||||
}
|
||||
|
||||
/// Returns a reference to a column of a specific type, if it exists.
|
||||
pub fn get_column<T>(&self, col_name: &str) -> Option<&[T]>
|
||||
where
|
||||
T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
|
||||
{
|
||||
let expected_type_id = TypeId::of::<T>();
|
||||
if let Some(actual_type_id) = self.column_to_type.get(col_name) {
|
||||
if *actual_type_id == expected_type_id {
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get(actual_type_id) {
|
||||
if let Some(frame) = sub_frame_box.as_any().downcast_ref::<Frame<T>>() {
|
||||
return Some(frame.column(col_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns a HashMap representing a row, mapping column names to their string values.
|
||||
pub fn get_row(&self, row_idx: usize) -> Option<HashMap<String, String>> {
|
||||
if row_idx >= self.rows() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut row_data = HashMap::new();
|
||||
for col_name in &self.column_names {
|
||||
if let Some(type_id) = self.column_to_type.get(col_name) {
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
|
||||
let value = sub_frame_box.get_value_as_string(row_idx, col_name);
|
||||
row_data.insert(col_name.clone(), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(row_data)
|
||||
}
|
||||
|
||||
pub fn add_column<T>(&mut self, col_name: &str, data: Vec<T>)
|
||||
where
|
||||
T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
|
||||
{
|
||||
let type_id = TypeId::of::<T>();
|
||||
let col_name_string = col_name.to_string();
|
||||
|
||||
// Check for duplicate column name across the entire DataFrame
|
||||
if self.column_to_type.contains_key(&col_name_string) {
|
||||
panic!(
|
||||
"DataFrame::add_column: duplicate column name: '{}'",
|
||||
col_name_string
|
||||
);
|
||||
}
|
||||
|
||||
// If this is the first column being added, set the DataFrame's index
|
||||
if self.column_names.is_empty() {
|
||||
self.index = RowIndex::Range(0..data.len());
|
||||
} else {
|
||||
// Ensure new column has the same number of rows as existing columns
|
||||
if data.len() != self.index.len() {
|
||||
panic!(
|
||||
"DataFrame::add_column: new column '{}' has {} rows, but existing columns have {} rows",
|
||||
col_name_string,
|
||||
data.len(),
|
||||
self.index.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if a Frame of this type already exists
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
|
||||
// Downcast to the concrete Frame<T> and add the column
|
||||
if let Some(frame) = sub_frame_box.as_any_mut().downcast_mut::<Frame<T>>() {
|
||||
frame.add_column(col_name_string.clone(), data);
|
||||
} else {
|
||||
// This should ideally not happen if TypeId matches, but good for safety
|
||||
panic!(
|
||||
"Type mismatch when downcasting existing SubFrame for TypeId {:?}",
|
||||
type_id
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// No Frame of this type exists, create a new one
|
||||
// The Frame::new constructor expects a Matrix and column names.
|
||||
// We create a Matrix from a single column vector.
|
||||
let new_frame = Frame::new(
|
||||
crate::matrix::Matrix::from_cols(vec![data]),
|
||||
vec![col_name_string.clone()],
|
||||
Some(self.index.clone()), // Pass the DataFrame's index to the new Frame
|
||||
);
|
||||
self.frames_by_type.insert(type_id, Box::new(new_frame));
|
||||
}
|
||||
|
||||
// Update column mappings and names
|
||||
self.column_to_type.insert(col_name_string.clone(), type_id);
|
||||
self.column_names.push(col_name_string);
|
||||
}
|
||||
|
||||
/// Drops a column from the DataFrame.
|
||||
/// Panics if the column does not exist.
|
||||
pub fn drop_column(&mut self, col_name: &str) {
|
||||
let col_name_string = col_name.to_string();
|
||||
|
||||
// 1. Get the TypeId associated with the column
|
||||
let type_id = self
|
||||
.column_to_type
|
||||
.remove(&col_name_string)
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"DataFrame::drop_column: column '{}' not found",
|
||||
col_name_string
|
||||
);
|
||||
});
|
||||
|
||||
// 2. Remove the column name from the ordered list
|
||||
self.column_names.retain(|name| name != &col_name_string);
|
||||
|
||||
// 3. Find the Frame object and delete the column from it
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
|
||||
sub_frame_box.delete_column_from_frame(&col_name_string);
|
||||
|
||||
// 4. If the Frame object for this type becomes empty, remove it from frames_by_type
|
||||
if sub_frame_box.get_frame_cols() == 0 {
|
||||
self.frames_by_type.remove(&type_id);
|
||||
}
|
||||
} else {
|
||||
// This should not happen if column_to_type was consistent
|
||||
panic!(
|
||||
"DataFrame::drop_column: internal error, no frame found for type_id {:?}",
|
||||
type_id
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DataFrame {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
// Display column headers
|
||||
for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
|
||||
write!(f, "{:<15}", col_name)?;
|
||||
}
|
||||
if self.column_names.len() > DEFAULT_DISPLAY_COLS {
|
||||
write!(f, "...")?;
|
||||
}
|
||||
writeln!(f)?;
|
||||
|
||||
// Display data rows
|
||||
let mut displayed_rows = 0;
|
||||
for i in 0..self.index.len() {
|
||||
if displayed_rows >= DEFAULT_DISPLAY_ROWS {
|
||||
writeln!(f, "...")?;
|
||||
break;
|
||||
}
|
||||
for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
|
||||
if let Some(type_id) = self.column_to_type.get(col_name) {
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
|
||||
write!(f, "{:<15}", sub_frame_box.get_value_as_string(i, col_name))?;
|
||||
} else {
|
||||
// This case indicates an inconsistency: column_to_type has an entry,
|
||||
// but frames_by_type doesn't have the corresponding Frame.
|
||||
write!(f, "{:<15}", "[ERROR]")?;
|
||||
}
|
||||
} else {
|
||||
// This case indicates an inconsistency: column_names has an entry,
|
||||
// but column_to_type doesn't have the corresponding column.
|
||||
write!(f, "{:<15}", "[ERROR]")?;
|
||||
}
|
||||
}
|
||||
if self.column_names.len() > DEFAULT_DISPLAY_COLS {
|
||||
write!(f, "...")?;
|
||||
}
|
||||
writeln!(f)?;
|
||||
displayed_rows += 1;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for DataFrame {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("DataFrame")
|
||||
.field("column_names", &self.column_names)
|
||||
.field("index", &self.index)
|
||||
.field("column_to_type", &self.column_to_type)
|
||||
.field("frames_by_type", &self.frames_by_type)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::frame::Frame;
|
||||
use crate::matrix::Matrix;
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_new() {
|
||||
let df = DataFrame::new();
|
||||
assert_eq!(df.rows(), 0);
|
||||
assert_eq!(df.cols(), 0);
|
||||
assert!(df.get_column_names().is_empty());
|
||||
assert!(df.frames_by_type.is_empty());
|
||||
assert!(df.column_to_type.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_initial() {
|
||||
let mut df = DataFrame::new();
|
||||
let data = vec![1, 2, 3];
|
||||
df.add_column("col_int", data.clone());
|
||||
|
||||
assert_eq!(df.rows(), 3);
|
||||
assert_eq!(df.cols(), 1);
|
||||
assert_eq!(df.get_column_names(), &vec!["col_int".to_string()]);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
|
||||
|
||||
// Verify the underlying frame
|
||||
let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
|
||||
let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
|
||||
assert_eq!(frame.rows(), 3);
|
||||
assert_eq!(frame.cols(), 1);
|
||||
assert_eq!(frame.columns(), &vec!["col_int".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_same_type() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_int2", vec![4, 5, 6]);
|
||||
|
||||
assert_eq!(df.rows(), 3);
|
||||
assert_eq!(df.cols(), 2);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec!["col_int1".to_string(), "col_int2".to_string()]
|
||||
);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_int1"),
|
||||
Some(&TypeId::of::<i32>())
|
||||
);
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_int2"),
|
||||
Some(&TypeId::of::<i32>())
|
||||
);
|
||||
|
||||
// Verify the underlying frame
|
||||
let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
|
||||
let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
|
||||
assert_eq!(frame.rows(), 3);
|
||||
assert_eq!(frame.cols(), 2);
|
||||
assert_eq!(
|
||||
frame.columns(),
|
||||
&vec!["col_int1".to_string(), "col_int2".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_different_type() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
assert_eq!(df.rows(), 3);
|
||||
assert_eq!(df.cols(), 3);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec![
|
||||
"col_int".to_string(),
|
||||
"col_float".to_string(),
|
||||
"col_string".to_string()
|
||||
]
|
||||
);
|
||||
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
|
||||
|
||||
assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_float"),
|
||||
Some(&TypeId::of::<f64>())
|
||||
);
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_string"),
|
||||
Some(&TypeId::of::<String>())
|
||||
);
|
||||
|
||||
// Verify underlying frames
|
||||
let int_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<i32>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<i32>>()
|
||||
.unwrap();
|
||||
assert_eq!(int_frame.columns(), &vec!["col_int".to_string()]);
|
||||
|
||||
let float_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<f64>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<f64>>()
|
||||
.unwrap();
|
||||
assert_eq!(float_frame.columns(), &vec!["col_float".to_string()]);
|
||||
|
||||
let string_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<String>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<String>>()
|
||||
.unwrap();
|
||||
assert_eq!(string_frame.columns(), &vec!["col_string".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_get_column() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
// Test getting existing columns with correct type
|
||||
assert_eq!(
|
||||
df.get_column::<i32>("col_int").unwrap(),
|
||||
vec![1, 2, 3].as_slice()
|
||||
);
|
||||
assert_eq!(
|
||||
df.get_column::<f64>("col_float").unwrap(),
|
||||
vec![1.1, 2.2, 3.3].as_slice()
|
||||
);
|
||||
assert_eq!(
|
||||
df.get_column::<String>("col_string").unwrap(),
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()].as_slice()
|
||||
);
|
||||
|
||||
// Test getting non-existent column
|
||||
assert_eq!(df.get_column::<i32>("non_existent"), None);
|
||||
|
||||
// Test getting existing column with incorrect type
|
||||
assert_eq!(df.get_column::<f64>("col_int"), None);
|
||||
assert_eq!(df.get_column::<i32>("col_float"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_get_row() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
// Test getting an existing row
|
||||
let row0 = df.get_row(0).unwrap();
|
||||
assert_eq!(row0.get("col_int"), Some(&"1".to_string()));
|
||||
assert_eq!(row0.get("col_float"), Some(&"1.1".to_string()));
|
||||
assert_eq!(row0.get("col_string"), Some(&"a".to_string()));
|
||||
|
||||
let row1 = df.get_row(1).unwrap();
|
||||
assert_eq!(row1.get("col_int"), Some(&"2".to_string()));
|
||||
assert_eq!(row1.get("col_float"), Some(&"2.2".to_string()));
|
||||
assert_eq!(row1.get("col_string"), Some(&"b".to_string()));
|
||||
|
||||
// Test getting an out-of-bounds row
|
||||
assert_eq!(df.get_row(3), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "DataFrame::add_column: duplicate column name: 'col_int'")]
|
||||
fn test_dataframe_add_column_duplicate_name() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_int", vec![4, 5, 6]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "DataFrame::add_column: new column 'col_int2' has 2 rows, but existing columns have 3 rows"
|
||||
)]
|
||||
fn test_dataframe_add_column_mismatched_rows() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_int2", vec![4, 5]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_display() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3, 4, 5, 6]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec![
|
||||
"a".to_string(),
|
||||
"b".to_string(),
|
||||
"c".to_string(),
|
||||
"d".to_string(),
|
||||
"e".to_string(),
|
||||
"f".to_string(),
|
||||
],
|
||||
);
|
||||
|
||||
let expected_output = "\
|
||||
col_int col_float col_string
|
||||
1 1.1 a
|
||||
2 2.2 b
|
||||
3 3.3 c
|
||||
4 4.4 d
|
||||
5 5.5 e
|
||||
...
|
||||
";
|
||||
assert_eq!(format!("{}", df), expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_debug() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
|
||||
let debug_output = format!("{:?}", df);
|
||||
assert!(debug_output.contains("DataFrame {"));
|
||||
assert!(debug_output.contains("column_names: [\"col_int\", \"col_float\"]"));
|
||||
assert!(debug_output.contains("index: Range(0..3)"));
|
||||
assert!(debug_output.contains("column_to_type: {"));
|
||||
assert!(debug_output.contains("frames_by_type: {"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_drop_column_single_type() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_int2", vec![4, 5, 6]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
|
||||
assert_eq!(df.cols(), 3);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec![
|
||||
"col_int1".to_string(),
|
||||
"col_int2".to_string(),
|
||||
"col_float".to_string()
|
||||
]
|
||||
);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
df.drop_column("col_int1");
|
||||
|
||||
assert_eq!(df.cols(), 2);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec!["col_int2".to_string(), "col_float".to_string()]
|
||||
);
|
||||
assert!(df.column_to_type.get("col_int1").is_none());
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should still exist
|
||||
let int_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<i32>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<i32>>()
|
||||
.unwrap();
|
||||
assert_eq!(int_frame.columns(), &vec!["col_int2".to_string()]);
|
||||
|
||||
df.drop_column("col_int2");
|
||||
|
||||
assert_eq!(df.cols(), 1);
|
||||
assert_eq!(df.get_column_names(), &vec!["col_float".to_string()]);
|
||||
assert!(df.column_to_type.get("col_int2").is_none());
|
||||
assert!(!df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should be removed
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_drop_column_mixed_types() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
assert_eq!(df.cols(), 3);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
|
||||
|
||||
df.drop_column("col_float");
|
||||
|
||||
assert_eq!(df.cols(), 2);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec!["col_int".to_string(), "col_string".to_string()]
|
||||
);
|
||||
assert!(df.column_to_type.get("col_float").is_none());
|
||||
assert!(!df.frames_by_type.contains_key(&TypeId::of::<f64>())); // Frame<f64> should be removed
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
|
||||
|
||||
df.drop_column("col_int");
|
||||
df.drop_column("col_string");
|
||||
|
||||
assert_eq!(df.cols(), 0);
|
||||
assert!(df.get_column_names().is_empty());
|
||||
assert!(df.frames_by_type.is_empty());
|
||||
assert!(df.column_to_type.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "DataFrame::drop_column: column 'non_existent' not found")]
|
||||
fn test_dataframe_drop_column_non_existent() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.drop_column("non_existent");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_reuses_existing_frame() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_float1", vec![1.1, 2.2, 3.3]);
|
||||
|
||||
// Initially, there should be two frames (one for i32, one for f64)
|
||||
assert_eq!(df.frames_by_type.len(), 2);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
// Add another integer column
|
||||
df.add_column("col_int2", vec![4, 5, 6]);
|
||||
|
||||
// The number of frames should still be 2, as the existing i32 frame should be reused
|
||||
assert_eq!(df.frames_by_type.len(), 2);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
// Verify the i32 frame now contains both integer columns
|
||||
let int_frame = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap().as_any().downcast_ref::<Frame<i32>>().unwrap();
|
||||
assert_eq!(int_frame.columns(), &vec!["col_int1".to_string(), "col_int2".to_string()]);
|
||||
assert_eq!(int_frame.cols(), 2);
|
||||
|
||||
// Add another float column
|
||||
df.add_column("col_float2", vec![4.4, 5.5, 6.6]);
|
||||
|
||||
// The number of frames should still be 2, as the existing f64 frame should be reused
|
||||
assert_eq!(df.frames_by_type.len(), 2);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
// Verify the f64 frame now contains both float columns
|
||||
let float_frame = df.frames_by_type.get(&TypeId::of::<f64>()).unwrap().as_any().downcast_ref::<Frame<f64>>().unwrap();
|
||||
assert_eq!(float_frame.columns(), &vec!["col_float1".to_string(), "col_float2".to_string()]);
|
||||
assert_eq!(float_frame.cols(), 2);
|
||||
}
|
||||
}
|
||||
4
src/dataframe/mod.rs
Normal file
4
src/dataframe/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! This module provides the DataFrame structure for handling tabular data with mixed types.
|
||||
pub mod df;
|
||||
|
||||
pub use df::{DataFrame, SubFrame};
|
||||
@@ -1,3 +1,19 @@
|
||||
//! Core data-frame structures such as [`Frame`] and [`RowIndex`].
|
||||
//!
|
||||
//! The [`Frame`] type stores column-labelled data with an optional row index
|
||||
//! and builds upon the [`crate::matrix::Matrix`] type.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::frame::{Frame, RowIndex};
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let data = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]);
|
||||
//! let frame = Frame::new(data, vec!["L", "R"], Some(RowIndex::Int(vec![10, 20])));
|
||||
//! assert_eq!(frame.columns(), &["L", "R"]);
|
||||
//! assert_eq!(frame.index(), &RowIndex::Int(vec![10, 20]));
|
||||
//! ```
|
||||
use crate::matrix::Matrix;
|
||||
use chrono::NaiveDate;
|
||||
use std::collections::HashMap;
|
||||
@@ -316,7 +332,7 @@ impl<T: Clone + PartialEq> Frame<T> {
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns an immutable slice of the specified column's data.
|
||||
/// Returns an immutable slice of the specified column's data by name.
|
||||
/// Panics if the column name is not found.
|
||||
pub fn column(&self, name: &str) -> &[T] {
|
||||
let idx = self
|
||||
@@ -325,7 +341,13 @@ impl<T: Clone + PartialEq> Frame<T> {
|
||||
self.matrix.column(idx)
|
||||
}
|
||||
|
||||
/// Returns a mutable slice of the specified column's data.
|
||||
/// Returns an immutable slice of the specified column's data by its physical index.
|
||||
/// Panics if the index is out of bounds.
|
||||
pub fn column_by_physical_idx(&self, idx: usize) -> &[T] {
|
||||
self.matrix.column(idx)
|
||||
}
|
||||
|
||||
/// Returns a mutable slice of the specified column's data by name.
|
||||
/// Panics if the column name is not found.
|
||||
pub fn column_mut(&mut self, name: &str) -> &mut [T] {
|
||||
let idx = self
|
||||
@@ -334,6 +356,12 @@ impl<T: Clone + PartialEq> Frame<T> {
|
||||
self.matrix.column_mut(idx)
|
||||
}
|
||||
|
||||
/// Returns a mutable slice of the specified column's data by its physical index.
|
||||
/// Panics if the index is out of bounds.
|
||||
pub fn column_mut_by_physical_idx(&mut self, idx: usize) -> &mut [T] {
|
||||
self.matrix.column_mut(idx)
|
||||
}
|
||||
|
||||
// Row access methods
|
||||
|
||||
/// Returns an immutable view of the row for the given integer key.
|
||||
|
||||
@@ -1,3 +1,21 @@
|
||||
//! High-level interface for working with columnar data and row indices.
|
||||
//!
|
||||
//! The [`Frame`](crate::frame::Frame) type combines a matrix with column labels and a typed row
|
||||
//! index, similar to data frames in other data-analysis libraries.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::frame::{Frame, RowIndex};
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! // Build a frame from two columns labelled "A" and "B".
|
||||
//! let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
|
||||
//! let frame = Frame::new(data, vec!["A", "B"], None);
|
||||
//!
|
||||
//! assert_eq!(frame["A"], vec![1.0, 2.0]);
|
||||
//! assert_eq!(frame.index(), &RowIndex::Range(0..2));
|
||||
//! ```
|
||||
pub mod base;
|
||||
pub mod ops;
|
||||
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
//! Trait implementations that allow [`Frame`] to reuse matrix operations.
|
||||
//!
|
||||
//! These modules forward numeric and boolean aggregation methods from the
|
||||
//! underlying [`Matrix`](crate::matrix::Matrix) type so that they can be called
|
||||
//! directly on a [`Frame`].
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::frame::Frame;
|
||||
//! use rustframe::matrix::{Matrix, SeriesOps};
|
||||
//!
|
||||
//! let frame = Frame::new(Matrix::from_cols(vec![vec![1.0, 2.0]]), vec!["A"], None);
|
||||
//! assert_eq!(frame.sum_vertical(), vec![3.0]);
|
||||
//! ```
|
||||
use crate::frame::Frame;
|
||||
use crate::matrix::{Axis, BoolMatrix, BoolOps, FloatMatrix, SeriesOps};
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#![doc = include_str!("../README.md")]
|
||||
|
||||
/// Documentation for the [`crate::dataframe`] module.
|
||||
pub mod dataframe;
|
||||
|
||||
/// Documentation for the [`crate::matrix`] module.
|
||||
pub mod matrix;
|
||||
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
//! Logical reductions for boolean matrices.
|
||||
//!
|
||||
//! The [`BoolOps`] trait mirrors common boolean aggregations such as `any` and
|
||||
//! `all` over rows or columns of a [`BoolMatrix`].
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::matrix::{BoolMatrix, BoolOps};
|
||||
//!
|
||||
//! let m = BoolMatrix::from_vec(vec![true, false], 2, 1);
|
||||
//! assert!(m.any());
|
||||
//! ```
|
||||
use crate::matrix::{Axis, BoolMatrix};
|
||||
|
||||
/// Boolean operations on `Matrix<bool>`
|
||||
|
||||
@@ -1,3 +1,18 @@
|
||||
//! Core matrix types and operations.
|
||||
//!
|
||||
//! The [`Matrix`](crate::matrix::Matrix) struct provides a simple column‑major 2D array with a
|
||||
//! suite of numeric helpers. Additional traits like [`SeriesOps`](crate::matrix::SeriesOps) and
|
||||
//! [`BoolOps`](crate::matrix::BoolOps) extend functionality for common statistics and logical reductions.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::matrix::Matrix;
|
||||
//!
|
||||
//! let m = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]);
|
||||
//! assert_eq!(m.shape(), (2, 2));
|
||||
//! assert_eq!(m[(0,1)], 3);
|
||||
//! ```
|
||||
pub mod boolops;
|
||||
pub mod mat;
|
||||
pub mod seriesops;
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
//! Numeric reductions and transformations over matrix axes.
|
||||
//!
|
||||
//! [`SeriesOps`] provides methods like [`SeriesOps::sum_vertical`] or
|
||||
//! [`SeriesOps::map`] that operate on [`FloatMatrix`] values.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::matrix::{Matrix, SeriesOps};
|
||||
//!
|
||||
//! let m = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
|
||||
//! assert_eq!(m.sum_horizontal(), vec![4.0, 6.0]);
|
||||
//! ```
|
||||
use crate::matrix::{Axis, BoolMatrix, FloatMatrix};
|
||||
|
||||
/// "Series-like" helpers that work along a single axis.
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
//! Cryptographically secure random number generator.
|
||||
//!
|
||||
//! On Unix systems this reads from `/dev/urandom`; on Windows it uses the
|
||||
//! system's preferred CNG provider.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::random::{crypto_rng, Rng};
|
||||
//! let mut rng = crypto_rng();
|
||||
//! let _v = rng.next_u64();
|
||||
//! ```
|
||||
#[cfg(unix)]
|
||||
use std::{fs::File, io::Read};
|
||||
|
||||
|
||||
@@ -1,3 +1,18 @@
|
||||
//! Random number generation utilities.
|
||||
//!
|
||||
//! Provides both a simple pseudo-random generator [`Prng`](crate::random::Prng) and a
|
||||
//! cryptographically secure alternative [`CryptoRng`](crate::random::CryptoRng). The
|
||||
//! [`SliceRandom`](crate::random::SliceRandom) trait offers shuffling of slices using any RNG
|
||||
//! implementing [`Rng`](crate::random::Rng).
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::random::{rng, SliceRandom};
|
||||
//!
|
||||
//! let mut rng = rng();
|
||||
//! let mut data = [1, 2, 3, 4];
|
||||
//! data.shuffle(&mut rng);
|
||||
//! assert_eq!(data.len(), 4);
|
||||
//! ```
|
||||
pub mod crypto;
|
||||
pub mod prng;
|
||||
pub mod random_core;
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
//! A tiny XorShift64-based pseudo random number generator.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::random::{rng, Rng};
|
||||
//! let mut rng = rng();
|
||||
//! let x = rng.next_u64();
|
||||
//! assert!(x >= 0);
|
||||
//! ```
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use crate::random::Rng;
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
//! Core traits for random number generators and sampling ranges.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::random::{rng, Rng};
|
||||
//! let mut r = rng();
|
||||
//! let value: f64 = r.random_range(0.0..1.0);
|
||||
//! assert!(value >= 0.0 && value < 1.0);
|
||||
//! ```
|
||||
use std::f64::consts::PI;
|
||||
use std::ops::Range;
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
//! Extensions for shuffling slices with a random number generator.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::random::{rng, SliceRandom};
|
||||
//! let mut data = [1, 2, 3];
|
||||
//! data.shuffle(&mut rng());
|
||||
//! assert_eq!(data.len(), 3);
|
||||
//! ```
|
||||
use crate::random::Rng;
|
||||
|
||||
/// Trait for randomizing slices.
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
//! Generation and manipulation of calendar date sequences.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::utils::dateutils::dates::{DateFreq, DatesList};
|
||||
//! let list = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily);
|
||||
//! assert_eq!(list.count().unwrap(), 3);
|
||||
//! ```
|
||||
use chrono::{Datelike, Duration, NaiveDate, Weekday};
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error;
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
//! Generators for sequences of calendar and business dates.
|
||||
//!
|
||||
//! See [`dates`] for all-day calendars and [`bdates`] for business-day aware
|
||||
//! variants.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::utils::dateutils::{DatesList, DateFreq};
|
||||
//! let list = DatesList::new("2024-01-01".into(), "2024-01-02".into(), DateFreq::Daily);
|
||||
//! assert_eq!(list.count().unwrap(), 2);
|
||||
//! ```
|
||||
pub mod bdates;
|
||||
pub mod dates;
|
||||
|
||||
|
||||
@@ -1,3 +1,14 @@
|
||||
//! Assorted helper utilities.
|
||||
//!
|
||||
//! Currently this module exposes date generation utilities in [`dateutils`](crate::utils::dateutils),
|
||||
//! including calendar and business date sequences.
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::utils::DatesList;
|
||||
//! use rustframe::utils::DateFreq;
|
||||
//! let dates = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily);
|
||||
//! assert_eq!(dates.count().unwrap(), 3);
|
||||
//! ```
|
||||
pub mod dateutils;
|
||||
|
||||
pub use dateutils::{BDateFreq, BDatesGenerator, BDatesList};
|
||||
|
||||
Reference in New Issue
Block a user