Refactor main.rs by removing unused functions and optimizing array generation; add cumulative sum operations for performance measurement

This commit is contained in:
Palash Tyagi 2025-05-08 22:49:27 +01:00
parent 3a288d27a5
commit 54ca7f797f

View File

@ -1,206 +1,73 @@
use chrono::NaiveDate;
use rand::Rng; // Import thread_rng for random number generation
use rustframe::frame::{Frame, RowIndex};
use rustframe::matrix::{BoolMatrix, BoolOps, Broadcastable, Matrix, SeriesOps}; // Explicitly list used items
use rustframe::utils::{BDateFreq, BDatesList};
use std::time::Instant; // Use Instant for timing
use rustframe::{
frame::{Frame, RowIndex},
matrix::{BoolOps, Matrix, SeriesOps},
utils::{BDateFreq, BDatesList},
};
// Helper function to generate a random f64 between 0.0 and 1.0
fn generate_random_float() -> f64 {
let mut rng = rand::rng(); // Get the thread-local random number generator
let uniform = rand::distr::Uniform::new(0.0, 1.0).unwrap(); // Define a uniform distribution range and unwrap the result
rng.sample(&uniform) // Sample a value from the distribution
}
// Helper function to generate column labels
fn generate_column_labels(num_cols: usize) -> Vec<String> {
(0..num_cols).map(|i| format!("col_{}", i)).collect() // Use "col" prefix
}
// Helper function to generate a Frame with random data
fn generate_random_frame(column_labels: Vec<String>, dates_vec: Vec<NaiveDate>) -> Frame<f64> {
let num_cols = column_labels.len();
let num_rows = dates_vec.len();
let mut data: Vec<Vec<f64>> = Vec::with_capacity(num_cols);
for _ in 0..num_cols {
let col: Vec<f64> = (0..num_rows).map(|_| generate_random_float()).collect();
data.push(col);
}
let matrix = Matrix::from_cols(data); // Create Matrix from columns
Frame::new(
matrix,
column_labels, // Consume the vector of labels
Some(RowIndex::Date(dates_vec)), // Consume the vector of dates for the index
)
}
fn series_operations() {
// create a frame of 1s of len 1000 with 1000 columns
let num_cols = 1000;
let num_rows = 1000;
let frame = Frame::new(
Matrix::from_vec(vec![1.0; num_cols * num_rows], num_rows, num_cols),
generate_column_labels(num_cols),
Some(RowIndex::Date(
BDatesList::from_n_periods("2000-01-01".to_string(), BDateFreq::Daily, num_rows)
.unwrap()
.list()
.unwrap(),
)),
fn generate_array(n_cols: usize, n_rows: usize) -> Matrix<f64> {
let matrix = Matrix::from_vec(
(0..n_cols * n_rows).map(|x| x as f64).collect::<Vec<f64>>(),
n_cols,
n_rows,
);
let cum_sum_start_time = Instant::now();
let cum_sum = Frame::new(
frame.matrix().cumsum_horizontal(),
frame.columns().to_vec(),
Some(frame.index().clone()),
matrix
}
fn test_a() {
let n_periods = 4;
// Four business days starting 20240102
let dates: Vec<NaiveDate> =
BDatesList::from_n_periods("2024-01-02".to_string(), BDateFreq::Daily, n_periods)
.unwrap()
.list()
.unwrap();
let col_names: Vec<String> = vec!["a".to_string(), "b".to_string()];
let ma: Matrix<f64> =
Matrix::from_cols(vec![vec![1.0, 2.0, 3.0, 4.0], vec![5.0, 6.0, 7.0, 8.0]]);
let mb: Matrix<f64> =
Matrix::from_cols(vec![vec![4.0, 3.0, 2.0, 1.0], vec![8.0, 7.0, 6.0, 5.0]]);
let fa: Frame<f64> = Frame::new(
ma.clone(),
col_names.clone(),
Some(RowIndex::Date(dates.clone())),
);
let cum_sum_duration = cum_sum_start_time.elapsed();
println!("Cumulative sum duration: {:?}", cum_sum_duration);
let fb: Frame<f64> = Frame::new(mb, col_names, Some(RowIndex::Date(dates)));
// Check if the cumulative sum is correct
let check_start_time = Instant::now();
cum_sum.columns().iter().enumerate().for_each(|(i, col)| {
assert_eq!(cum_sum.column(col), vec![(i + 1) as f64; num_rows]);
});
let check_duration = check_start_time.elapsed();
println!("Cumulative sum check duration: {:?}", check_duration);
// Math that reads like math
let result: Frame<f64> = &fa * &fb; // elementwise multiply
let total: f64 = result.sum_vertical().iter().sum::<f64>();
assert_eq!(total, 184.0);
let cum_sum_start_time = Instant::now();
Frame::new(
frame.matrix().cumsum_horizontal(),
frame.columns().to_vec(),
Some(frame.index().clone()),
)
.matrix()
.iter_columns()
.enumerate()
.for_each(|(i, col)| {
assert_eq!(col, vec![(i + 1) as f64; num_rows]);
});
let cum_sum_duration = cum_sum_start_time.elapsed();
println!("Cumulative sum and check duration: {:?}", cum_sum_duration);
// broadcast & reduce
let result: Matrix<f64> = &ma + 1.0; // add scalar
let result: Matrix<f64> = &result - 1.0; // subtract scalar
let result: Matrix<f64> = &result * 2.0; // multiply by scalar
let result: Matrix<f64> = &result / 2.0; // divide by scalar
let check: bool = result.eq_elem(ma.clone()).all();
assert!(check);
// The above math can also be written as:
let check: bool = (&(&(&(&ma + 1.0) - 1.0) * 2.0) / 2.0).eq_elem(ma).all();
assert!(check);
}
fn main() {
// Start overall timing
let total_start_time = Instant::now();
// test with 1000x1000
let n_cols = 1000;
let n_rows = 1000;
let ma = generate_array(n_cols, n_rows);
// time the operation
let start = std::time::Instant::now();
// let result = Matrix::from_vec((&ma * &ma).sum_vertical(), n_rows, 1).sum_vertical();
let result = (&ma * &ma).sum_vertical();
let duration = start.elapsed();
// --- Configuration ---
let start_date_str = "2000-01-01".to_string();
let num_periods = 10_000;
let num_columns = 1_000;
let frequency = BDateFreq::Daily;
println!("--- Demo Parameters ---");
println!("Start Date: {}", start_date_str);
println!("Number of Periods (Dates): {}", num_periods);
println!("Number of Columns: {}", num_columns);
println!("Frequency: {:?}", frequency);
println!("-----------------------");
println!("--- Timing Results ---");
// --- Setup Data (Dates and Labels) ---
let setup_data_start_time = Instant::now();
let dates_vec = BDatesList::from_n_periods(start_date_str, frequency, num_periods)
.unwrap()
.list()
.unwrap();
let column_labels = generate_column_labels(num_columns);
let setup_data_duration = setup_data_start_time.elapsed();
println!(
"Setup data (dates/labels) duration: {:?}",
setup_data_duration
);
// --- Frame Creation ---
let frame_creation_start_time = Instant::now();
// Create two frames with the same structure but different random data
// Clone labels and dates because generate_random_frame consumes them
let frame_a = generate_random_frame(column_labels.clone(), dates_vec.clone());
let frame_b = generate_random_frame(column_labels.clone(), dates_vec.clone());
let frame_creation_duration = frame_creation_start_time.elapsed();
println!(
"Frame size: dates: {}, cols: {}",
frame_a.rows(),
frame_a.cols()
);
println!("Frame creation duration: {:?}", frame_creation_duration);
// --- Arithmetic Operations and Timing ---
// Multiplication
let mul_start_time = Instant::now();
let _result_mul = &frame_a * &frame_b; // Store result, even if unused, as the operation happens
let mul_duration = mul_start_time.elapsed();
println!("Multiplication duration: {:?}", mul_duration);
// Addition
let add_start_time = Instant::now();
let frame_r = &frame_a + &frame_b;
let add_duration = add_start_time.elapsed();
println!("Addition duration: {:?}", add_duration);
// Division (using the result of addition)
let div_start_time = Instant::now();
let frame_r = &frame_r / &frame_b;
let div_duration = div_start_time.elapsed();
println!("Division duration: {:?}", div_duration);
// Subtraction (using the result of division)
let sub_start_time = Instant::now();
let frame_r = &frame_r - &frame_a;
let sub_duration = sub_start_time.elapsed();
println!("Subtraction duration: {:?}", sub_duration);
// --- Boolean Operations and Timing ---
// Element-wise comparison (e.g., less than)
let bool_mat_start_time = Instant::now();
// Check elements in the subtraction result that are less than a small value
let frame_r = frame_r.matrix().lt_elementwise(0.001);
let bool_mat_duration = bool_mat_start_time.elapsed();
println!("LT operation duration: {:?}", bool_mat_duration);
// Reduction operation (e.g., check if 'any' element is true)
let any_start_time = Instant::now();
let any_result = frame_r.any();
let any_duration = any_start_time.elapsed();
println!("Any operation duration: {:?}", any_duration);
println!("Any operation result: {:?}", any_result);
// Complex operation
let complex_start_time = Instant::now();
let frame_r = (&(&(&(&(&frame_a * &frame_b) / &frame_b) - &frame_a) + &frame_a) - &frame_a)
.matrix()
.lt_elementwise(0.0000001)
.all();
let complex_result = frame_r;
let complex_duration = complex_start_time.elapsed();
println!("Complex operation duration: {:?}", complex_duration);
println!(
"Complex operation result (expected true): {:?}",
complex_result
);
// --- Series Operations ---
let series_ops_start_time = Instant::now();
series_operations();
let series_ops_duration = series_ops_start_time.elapsed();
println!("Series operations duration: {:?}", series_ops_duration);
println!("-----------------------");
// End overall timing
let total_duration = total_start_time.elapsed();
println!(
"Total execution duration (including setup and ops): {:?}",
total_duration
);
// println!("Result: {:?}", result);
println!("Duration: {:?}", duration);
}