rustframe

📚 Docs | 🐙 GitHub | 🌐 Gitea mirror | 🦀 Crates.io | 🔖 docs.rs

codecov Coverage


Rustframe: A lightweight dataframe & math toolkit for Rust

Rustframe provides intuitive dataframe, matrix, and series operations small-to-mid scale data analysis and manipulation.

Rustframe keeps things simple, safe, and readable. It is handy for quick numeric experiments and small analytical tasks, but it is not meant to compete with powerhouse crates like polars or ndarray.

What it offers

  • Math that reads like math - elementwise +, , ×, ÷ on entire frames or scalars.
  • Broadcast & reduce - sum, product, any/all across rows or columns without boilerplate.
  • Boolean masks made simple - chain comparisons, combine with &/|, get a tidy BoolMatrix back.
  • Datecentric row index - businessday ranges and calendar slicing built in.
  • Pure safe Rust - 100% safe, zero unsafe.

Heads up

  • Not memoryefficient (yet) - footprint needs work.
  • Feature set still small - expect missing pieces.

On the horizon

  • Optional GPU help (Vulkan or similar) for heavier workloads.
  • Straightforward Python bindings using pyo3.

Quick start

use chrono::NaiveDate;
use rustframe::{
    frame::{Frame, RowIndex},
    matrix::{BoolOps, Matrix, SeriesOps},
    utils::{DateFreq, BDatesList},
};

let n_periods = 4;

// Four business days starting 20240102
let dates: Vec<NaiveDate> =
    BDatesList::from_n_periods("2024-01-02".to_string(), DateFreq::Daily, n_periods)
        .unwrap()
        .list().unwrap();

let col_names: Vec<String> = vec!["a".to_string(), "b".to_string()];

let ma: Matrix<f64> =
    Matrix::from_cols(vec![vec![1.0, 2.0, 3.0, 4.0], vec![5.0, 6.0, 7.0, 8.0]]);
let mb: Matrix<f64> =
    Matrix::from_cols(vec![vec![4.0, 3.0, 2.0, 1.0], vec![8.0, 7.0, 6.0, 5.0]]);

let fa: Frame<f64> = Frame::new(
    ma.clone(),
    col_names.clone(),
    Some(RowIndex::Date(dates.clone())),
);
let fb: Frame<f64> = Frame::new(mb, col_names, Some(RowIndex::Date(dates)));

// Math that reads like math
let result: Frame<f64> = &fa * &fb; // elementwise multiply
let total: f64 = result.sum_vertical().iter().sum::<f64>();
assert_eq!(total, 184.0);

// broadcast & reduce
let result: Matrix<f64> = ma.clone() + 1.0; // add scalar
let result: Matrix<f64> = result + &ma - &ma; // add matrix
let result: Matrix<f64> = result - 1.0; // subtract scalar
let result: Matrix<f64> = result * 2.0; // multiply by scalar
let result: Matrix<f64> = result / 2.0; // divide by scalar

let check: bool = result.eq_elem(ma.clone()).all();
assert!(check);

// The above math can also be written as:
let check: bool = (&(&(&(&ma + 1.0) - 1.0) * 2.0) / 2.0)
    .eq_elem(ma.clone())
    .all();
assert!(check);

// The above math can also be written as:
let check: bool = ((((ma.clone() + 1.0) - 1.0) * 2.0) / 2.0)
    .eq_elem(ma.clone())
    .all();
assert!(check);

// Matrix multiplication
let mc: Matrix<f64> = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
let md: Matrix<f64> = Matrix::from_cols(vec![vec![5.0, 6.0], vec![7.0, 8.0]]);
let mul_result: Matrix<f64> = mc.matrix_mul(&md);
// Expected:
// 1*5 + 3*6 = 5 + 18 = 23
// 2*5 + 4*6 = 10 + 24 = 34
// 1*7 + 3*8 = 7 + 24 = 31
// 2*7 + 4*8 = 14 + 32 = 46
assert_eq!(mul_result.data(), &[23.0, 34.0, 31.0, 46.0]);

// Dot product (alias for matrix_mul for FloatMatrix)
let dot_result: Matrix<f64> = mc.dot(&md);
assert_eq!(dot_result, mul_result);

// Transpose
let original_matrix: Matrix<f64> = Matrix::from_cols(vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]]);
// Original:
// 1 4
// 2 5
// 3 6
let transposed_matrix: Matrix<f64> = original_matrix.transpose();
// Transposed:
// 1 2 3
// 4 5 6
assert_eq!(transposed_matrix.rows(), 2);
assert_eq!(transposed_matrix.cols(), 3);
assert_eq!(transposed_matrix.data(), &[1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);

// Map
let matrix = Matrix::from_cols(vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]]);
// Map function to double each value
let mapped_matrix = matrix.map(|x| x * 2.0);
// Expected data after mapping
// 2 8
// 4 10
// 6 12
assert_eq!(mapped_matrix.data(), &[2.0, 4.0, 6.0, 8.0, 10.0, 12.0]);

// Zip
let a = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); // 2x2 matrix
let b = Matrix::from_cols(vec![vec![5.0, 6.0], vec![7.0, 8.0]]); // 2x2 matrix
                                                                   // Zip function to add corresponding elements
let zipped_matrix = a.zip(&b, |x, y| x + y);
// Expected data after zipping
// 6 10
// 8 12
assert_eq!(zipped_matrix.data(), &[6.0, 8.0, 10.0, 12.0]);

DataFrame Usage Example

use chrono::NaiveDate;
use rustframe::dataframe::DataFrame;
use rustframe::utils::{BDateFreq, BDatesList};
use std::any::TypeId;
use std::collections::HashMap;

// Helper for NaiveDate
fn d(y: i32, m: u32, d: u32) -> NaiveDate {
    NaiveDate::from_ymd_opt(y, m, d).unwrap()
}

// Create a new DataFrame
let mut df = DataFrame::new();

// Add columns of different types
df.add_column("col_int1", vec![1, 2, 3, 4, 5]);
df.add_column("col_float1", vec![1.1, 2.2, 3.3, 4.4, 5.5]);
df.add_column(
    "col_string",
    vec![
        "apple".to_string(),
        "banana".to_string(),
        "cherry".to_string(),
        "date".to_string(),
        "elderberry".to_string(),
    ],
);
df.add_column("col_bool", vec![true, false, true, false, true]);
// df.add_column("col_date", vec![d(2023,1,1), d(2023,1,2), d(2023,1,3), d(2023,1,4), d(2023,1,5)]);
df.add_column(
    "col_date",
    BDatesList::from_n_periods("2023-01-01".to_string(), BDateFreq::Daily, 5)
        .unwrap()
        .list()
        .unwrap(),
);

println!("DataFrame after initial column additions:\n{}", df);

// Demonstrate frame re-use when adding columns of existing types
let initial_frames_count = df.num_internal_frames();
println!(
    "\nInitial number of internal frames: {}",
    initial_frames_count
);

df.add_column("col_int2", vec![6, 7, 8, 9, 10]);
df.add_column("col_float2", vec![6.6, 7.7, 8.8, 9.9, 10.0]);

let frames_after_reuse = df.num_internal_frames();
println!(
    "Number of internal frames after adding more columns of existing types: {}",
    frames_after_reuse
);
assert_eq!(initial_frames_count, frames_after_reuse); // Should be equal, demonstrating re-use

println!(
    "\nDataFrame after adding more columns of existing types:\n{}",
    df
);

// Get number of rows and columns
println!("Rows: {}", df.rows()); // Output: Rows: 5
println!("Columns: {}", df.cols()); // Output: Columns: 5

// Get column names
println!("Column names: {:?}", df.get_column_names());
// Output: Column names: ["col_int", "col_float", "col_string", "col_bool", "col_date"]

// Get a specific column by name and type
let int_col = df.get_column::<i32>("col_int1").unwrap();
// Output: Integer column: [1, 2, 3, 4, 5]
println!("Integer column (col_int1): {:?}", int_col);

let int_col2 = df.get_column::<i32>("col_int2").unwrap();
// Output: Integer column: [6, 7, 8, 9, 10]
println!("Integer column (col_int2): {:?}", int_col2);

let float_col = df.get_column::<f64>("col_float1").unwrap();
// Output: Float column: [1.1, 2.2, 3.3, 4.4, 5.5]
println!("Float column (col_float1): {:?}", float_col);

// Attempt to get a column with incorrect type (returns None)
let wrong_type_col = df.get_column::<bool>("col_int1");
// Output: Wrong type column: None
println!("Wrong type column: {:?}", wrong_type_col);

// Get a row by index
let row_0 = df.get_row(0).unwrap();
println!("Row 0: {:?}", row_0);
// Output: Row 0: {"col_int1": "1", "col_float1": "1.1", "col_string": "apple", "col_bool": "true", "col_date": "2023-01-01", "col_int2": "6", "col_float2": "6.6"}

let row_2 = df.get_row(2).unwrap();
println!("Row 2: {:?}", row_2);
// Output: Row 2: {"col_int1": "3", "col_float1": "3.3", "col_string": "cherry", "col_bool": "true", "col_date": "2023-01-03", "col_int2": "8", "col_float2": "8.8"}

// Attempt to get an out-of-bounds row (returns None)
let row_out_of_bounds = df.get_row(10);
// Output: Row out of bounds: None
println!("Row out of bounds: {:?}", row_out_of_bounds);

// Drop a column
df.drop_column("col_bool");
println!("\nDataFrame after dropping 'col_bool':\n{}", df);

println!("Columns after drop: {}", df.cols());
println!("Column names after drop: {:?}", df.get_column_names());

// Drop another column, ensuring the underlying Frame is removed if empty
df.drop_column("col_float1");
println!("\nDataFrame after dropping 'col_float1':\n{}", df);

println!("Columns after second drop: {}", df.cols());
println!(
    "Column names after second drop: {:?}",
    df.get_column_names()
);

// Attempt to drop a non-existent column (will panic)
// df.drop_column("non_existent_col"); // Uncomment to see panic

More examples

See the examples directory for some demonstrations of Rustframe's syntax and functionality.

To run the examples, use:

cargo run --example <example_name>

E.g. to run the game_of_life example:

cargo run --example game_of_life
Languages
Rust 100%