Merge branch 'main' into dataframe

Merge pull request #72 from Magnus167/release/a20250805
Bump version to 0.0.1-a.20250805 in Cargo.toml
2025-11-19 15:26:10 +00:00 · 2025-08-24 21:29:31 +01:00 · 2025-08-05 00:11:57 +01:00 · 2025-08-05 00:08:27 +01:00 · 2025-08-05 00:06:49 +01:00 · 2025-08-04 23:27:12 +01:00
9 changed files with 1024 additions and 11 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "rustframe"
 authors = ["Palash Tyagi (https://github.com/Magnus167)"]
-version = "0.0.1-a.20250716"
+version = "0.0.1-a.20250805"
 edition = "2021"
 license = "GPL-3.0-or-later"
 readme = "README.md"
-description = "A simple dataframe library"
+description = "A simple dataframe and math toolkit"
 documentation = "https://magnus167.github.io/rustframe/"
 [lib]
 name = "rustframe"
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # rustframe
-📚 [Docs](https://magnus167.github.io/rustframe/) | 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
+🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
 <!-- [![Last commit](https://img.shields.io/endpoint?url=https://magnus167.github.io/rustframe/rustframe/last-commit-date.json)](https://github.com/Magnus167/rustframe) -->
@@ -153,6 +153,133 @@ let zipped_matrix = a.zip(&b, |x, y| x + y);
 assert_eq!(zipped_matrix.data(), &[6.0, 8.0, 10.0, 12.0]);
 ```
 ---
 ## DataFrame Usage Example
 ```rust
 use chrono::NaiveDate;
 use rustframe::dataframe::DataFrame;
 use rustframe::utils::{BDateFreq, BDatesList};
 use std::any::TypeId;
 use std::collections::HashMap;
 // Helper for NaiveDate
 fn d(y: i32, m: u32, d: u32) -> NaiveDate {
    NaiveDate::from_ymd_opt(y, m, d).unwrap()
 }
 // Create a new DataFrame
 let mut df = DataFrame::new();
 // Add columns of different types
 df.add_column("col_int1", vec![1, 2, 3, 4, 5]);
 df.add_column("col_float1", vec![1.1, 2.2, 3.3, 4.4, 5.5]);
 df.add_column(
    "col_string",
    vec![
        "apple".to_string(),
        "banana".to_string(),
        "cherry".to_string(),
        "date".to_string(),
        "elderberry".to_string(),
    ],
 );
 df.add_column("col_bool", vec![true, false, true, false, true]);
 // df.add_column("col_date", vec![d(2023,1,1), d(2023,1,2), d(2023,1,3), d(2023,1,4), d(2023,1,5)]);
 df.add_column(
    "col_date",
    BDatesList::from_n_periods("2023-01-01".to_string(), BDateFreq::Daily, 5)
        .unwrap()
        .list()
        .unwrap(),
 );
 println!("DataFrame after initial column additions:\n{}", df);
 // Demonstrate frame re-use when adding columns of existing types
 let initial_frames_count = df.num_internal_frames();
 println!(
    "\nInitial number of internal frames: {}",
    initial_frames_count
 );
 df.add_column("col_int2", vec![6, 7, 8, 9, 10]);
 df.add_column("col_float2", vec![6.6, 7.7, 8.8, 9.9, 10.0]);
 let frames_after_reuse = df.num_internal_frames();
 println!(
    "Number of internal frames after adding more columns of existing types: {}",
    frames_after_reuse
 );
 assert_eq!(initial_frames_count, frames_after_reuse); // Should be equal, demonstrating re-use
 println!(
    "\nDataFrame after adding more columns of existing types:\n{}",
    df
 );
 // Get number of rows and columns
 println!("Rows: {}", df.rows()); // Output: Rows: 5
 println!("Columns: {}", df.cols()); // Output: Columns: 5
 // Get column names
 println!("Column names: {:?}", df.get_column_names());
 // Output: Column names: ["col_int", "col_float", "col_string", "col_bool", "col_date"]
 // Get a specific column by name and type
 let int_col = df.get_column::<i32>("col_int1").unwrap();
 // Output: Integer column: [1, 2, 3, 4, 5]
 println!("Integer column (col_int1): {:?}", int_col);
 let int_col2 = df.get_column::<i32>("col_int2").unwrap();
 // Output: Integer column: [6, 7, 8, 9, 10]
 println!("Integer column (col_int2): {:?}", int_col2);
 let float_col = df.get_column::<f64>("col_float1").unwrap();
 // Output: Float column: [1.1, 2.2, 3.3, 4.4, 5.5]
 println!("Float column (col_float1): {:?}", float_col);
 // Attempt to get a column with incorrect type (returns None)
 let wrong_type_col = df.get_column::<bool>("col_int1");
 // Output: Wrong type column: None
 println!("Wrong type column: {:?}", wrong_type_col);
 // Get a row by index
 let row_0 = df.get_row(0).unwrap();
 println!("Row 0: {:?}", row_0);
 // Output: Row 0: {"col_int1": "1", "col_float1": "1.1", "col_string": "apple", "col_bool": "true", "col_date": "2023-01-01", "col_int2": "6", "col_float2": "6.6"}
 let row_2 = df.get_row(2).unwrap();
 println!("Row 2: {:?}", row_2);
 // Output: Row 2: {"col_int1": "3", "col_float1": "3.3", "col_string": "cherry", "col_bool": "true", "col_date": "2023-01-03", "col_int2": "8", "col_float2": "8.8"}
 // Attempt to get an out-of-bounds row (returns None)
 let row_out_of_bounds = df.get_row(10);
 // Output: Row out of bounds: None
 println!("Row out of bounds: {:?}", row_out_of_bounds);
 // Drop a column
 df.drop_column("col_bool");
 println!("\nDataFrame after dropping 'col_bool':\n{}", df);
 println!("Columns after drop: {}", df.cols());
 println!("Column names after drop: {:?}", df.get_column_names());
 // Drop another column, ensuring the underlying Frame is removed if empty
 df.drop_column("col_float1");
 println!("\nDataFrame after dropping 'col_float1':\n{}", df);
 println!("Columns after second drop: {}", df.cols());
 println!(
    "Column names after second drop: {:?}",
    df.get_column_names()
 );
 // Attempt to drop a non-existent column (will panic)
 // df.drop_column("non_existent_col"); // Uncomment to see panic
 ```
 ## More examples
 See the [examples](./examples/) directory for some demonstrations of Rustframe's syntax and functionality.
--- a/docs/src/compute.md
+++ b/docs/src/compute.md
@@ -70,6 +70,77 @@ assert!((corr - 1.0).abs() < 1e-8);
 assert!((cov - 2.5).abs() < 1e-8);
 ```
 ## Covariance
 ### `covariance`
 Computes the population covariance between two equally sized matrices by flattening
 their values.
 ```rust
 # extern crate rustframe;
 use rustframe::compute::stats::covariance;
 use rustframe::matrix::Matrix;
 let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
 let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
 let cov = covariance(&x, &y);
 assert!((cov - 2.5).abs() < 1e-8);
 ```
 ### `covariance_vertical`
 Evaluates covariance between columns (i.e. across rows) and returns a matrix of
 column pair covariances.
 ```rust
 # extern crate rustframe;
 use rustframe::compute::stats::covariance_vertical;
 use rustframe::matrix::Matrix;
 let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
 let cov = covariance_vertical(&m);
 assert_eq!(cov.shape(), (2, 2));
 assert!(cov.data().iter().all(|&v| (v - 1.0).abs() < 1e-8));
 ```
 ### `covariance_horizontal`
 Computes covariance between rows (i.e. across columns) returning a matrix that
 describes how each pair of rows varies together.
 ```rust
 # extern crate rustframe;
 use rustframe::compute::stats::covariance_horizontal;
 use rustframe::matrix::Matrix;
 let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
 let cov = covariance_horizontal(&m);
 assert_eq!(cov.shape(), (2, 2));
 assert!(cov.data().iter().all(|&v| (v - 0.25).abs() < 1e-8));
 ```
 ### `covariance_matrix`
 Builds a covariance matrix either between columns (`Axis::Col`) or rows
 (`Axis::Row`). Each entry represents how two series co-vary.
 ```rust
 # extern crate rustframe;
 use rustframe::compute::stats::covariance_matrix;
 use rustframe::matrix::{Axis, Matrix};
 let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
 // Covariance between columns
 let cov_cols = covariance_matrix(&data, Axis::Col);
 assert!((cov_cols.get(0, 0) - 2.0).abs() < 1e-8);
 // Covariance between rows
 let cov_rows = covariance_matrix(&data, Axis::Row);
 assert!((cov_rows.get(0, 1) + 0.5).abs() < 1e-8);
 ```
 ## Distributions
 Probability distribution helpers are available for common PDFs and CDFs.
--- a/docs/src/introduction.md
+++ b/docs/src/introduction.md
@@ -1,6 +1,6 @@
 # Introduction
-📚 [Docs](https://magnus167.github.io/rustframe/) | 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
+🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
 Welcome to the **Rustframe User Guide**. Rustframe is a lightweight dataframe
 and math toolkit for Rust written in 100% safe Rust. It focuses on keeping the
--- a/docs/src/machine-learning.md
+++ b/docs/src/machine-learning.md
@@ -41,9 +41,6 @@ let new_point = Matrix::from_vec(vec![0.0, 0.0], 1, 2);
 let cluster = model.predict(&new_point)[0];
 ```
 For helper functions and upcoming modules, visit the
 [utilities](./utilities.md) section.
 ## Logistic Regression
 ```rust
@@ -72,7 +69,7 @@ let transformed = pca.transform(&data);
 assert_eq!(transformed.cols(), 1);
 ```
-### Gaussian Naive Bayes
+## Gaussian Naive Bayes
 Gaussian Naive Bayes classifier for continuous features:
@@ -101,7 +98,7 @@ let predictions = model.predict(&x);
 assert_eq!(predictions.rows(), 4);
 ```
-### Dense Neural Networks
+## Dense Neural Networks
 Simple fully connected neural network:
@@ -142,5 +139,144 @@ let predictions = model.predict(&x);
 assert_eq!(predictions.rows(), 4);
 ```
 ## Real-world Examples
 ### Housing Price Prediction
 ```rust
 # extern crate rustframe;
 use rustframe::compute::models::linreg::LinReg;
 use rustframe::matrix::Matrix;
 // Features: square feet and bedrooms
 let features = Matrix::from_rows_vec(vec![
    2100.0, 3.0,
    1600.0, 2.0,
    2400.0, 4.0,
    1400.0, 2.0,
 ], 4, 2);
 // Sale prices
 let target = Matrix::from_vec(vec![400_000.0, 330_000.0, 369_000.0, 232_000.0], 4, 1);
 let mut model = LinReg::new(2);
 model.fit(&features, &target, 1e-8, 10_000);
 // Predict price of a new home
 let new_home = Matrix::from_vec(vec![2000.0, 3.0], 1, 2);
 let predicted_price = model.predict(&new_home);
 println!("Predicted price: ${}", predicted_price.data()[0]);
 ```
 ### Spam Detection
 ```rust
 # extern crate rustframe;
 use rustframe::compute::models::logreg::LogReg;
 use rustframe::matrix::Matrix;
 // 20 e-mails × 5 features = 100 numbers (row-major, spam first)
 let x = Matrix::from_rows_vec(
    vec![
        // ─────────── spam examples ───────────
        2.0, 1.0, 1.0, 1.0, 1.0, // "You win a FREE offer - click for money-back bonus!"
        1.0, 0.0, 1.0, 1.0, 0.0, // "FREE offer! Click now!"
        0.0, 2.0, 0.0, 1.0, 1.0, // "Win win win - money inside, click…"
        1.0, 1.0, 0.0, 0.0, 1.0, // "Limited offer to win easy money…"
        1.0, 0.0, 1.0, 0.0, 1.0, // ...
        0.0, 1.0, 1.0, 1.0, 0.0, // ...
        2.0, 0.0, 0.0, 1.0, 1.0, // ...
        0.0, 1.0, 1.0, 0.0, 1.0, // ...
        1.0, 1.0, 1.0, 1.0, 0.0, // ...
        1.0, 0.0, 0.0, 1.0, 1.0, // ...
        // ─────────── ham examples ───────────
        0.0, 0.0, 0.0, 0.0, 0.0, // "See you at the meeting tomorrow."
        0.0, 0.0, 0.0, 1.0, 0.0, // "Here's the Zoom click-link."
        0.0, 0.0, 0.0, 0.0, 1.0, // "Expense report: money attached."
        0.0, 0.0, 0.0, 1.0, 1.0, // ...
        0.0, 1.0, 0.0, 0.0, 0.0, // "Did we win the bid?"
        0.0, 0.0, 0.0, 0.0, 0.0, // ...
        0.0, 0.0, 0.0, 1.0, 0.0, // ...
        1.0, 0.0, 0.0, 0.0, 0.0, // "Special offer for staff lunch."
        0.0, 0.0, 0.0, 0.0, 0.0, // ...
        0.0, 0.0, 0.0, 1.0, 0.0,
    ],
    20,
    5,
 );
 // Labels: 1 = spam, 0 = ham
 let y = Matrix::from_vec(
    vec![
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, // 10 spam
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, // 10 ham
    ],
    20,
    1,
 );
 // Train
 let mut model = LogReg::new(5);
 model.fit(&x, &y, 0.01, 5000);
 // Predict
 // e.g. "free money offer"
 let email_data = vec![1.0, 0.0, 1.0, 0.0, 1.0];
 let email = Matrix::from_vec(email_data, 1, 5);
 let prob_spam = model.predict_proba(&email);
 println!("Probability of spam: {:.4}", prob_spam.data()[0]);
 ```
 ### Iris Flower Classification
 ```rust
 # extern crate rustframe;
 use rustframe::compute::models::gaussian_nb::GaussianNB;
 use rustframe::matrix::Matrix;
 // Features: sepal length and petal length
 let x = Matrix::from_rows_vec(vec![
    5.1, 1.4, // setosa
    4.9, 1.4, // setosa
    6.2, 4.5, // versicolor
    5.9, 5.1, // virginica
 ], 4, 2);
 let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 2.0], 4, 1);
 let names = vec!["setosa", "versicolor", "virginica"];
 let mut model = GaussianNB::new(1e-9, true);
 model.fit(&x, &y);
 let sample = Matrix::from_vec(vec![5.0, 1.5], 1, 2);
 let predicted_class = model.predict(&sample);
 let class_name = names[predicted_class.data()[0] as usize];
 println!("Predicted class: {} ({:?})", class_name, predicted_class.data()[0]);
 ```
 ### Customer Segmentation
 ```rust
 # extern crate rustframe;
 use rustframe::compute::models::k_means::KMeans;
 use rustframe::matrix::Matrix;
 // Each row: [age, annual_income]
 let customers = Matrix::from_rows_vec(
    vec![
        25.0, 40_000.0, 34.0, 52_000.0, 58.0, 95_000.0, 45.0, 70_000.0,
    ],
    4,
    2,
 );
 let (model, labels) = KMeans::fit(&customers, 2, 20, 1e-4);
 let new_customer = Matrix::from_vec(vec![30.0, 50_000.0], 1, 2);
 let cluster = model.predict(&new_customer)[0];
 println!("New customer belongs to cluster: {}", cluster);
 println!("Cluster labels: {:?}", labels);
 ```
 For helper functions and upcoming modules, visit the
 [utilities](./utilities.md) section.
--- a/src/dataframe/df.rs
+++ b/src/dataframe/df.rs
@@ -0,0 +1,659 @@
 use crate::frame::{Frame, RowIndex};
 use std::any::{Any, TypeId};
 use std::collections::HashMap;
 use std::fmt; // Import TypeId
 const DEFAULT_DISPLAY_ROWS: usize = 5;
 const DEFAULT_DISPLAY_COLS: usize = 10;
 // Trait to enable type-agnostic operations on Frame objects within DataFrame
 pub trait SubFrame: Send + Sync + fmt::Debug + Any {
    fn rows(&self) -> usize;
    fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String;
    fn clone_box(&self) -> Box<dyn SubFrame>;
    fn delete_column_from_frame(&mut self, col_name: &str);
    fn get_frame_cols(&self) -> usize; // Add a method to get the number of columns in the underlying frame
    // Methods for downcasting to concrete types
    fn as_any(&self) -> &dyn Any;
    fn as_any_mut(&mut self) -> &mut dyn Any;
 }
 // Implement SubFrame for any Frame<T> that meets the requirements
 impl<T> SubFrame for Frame<T>
 where
    T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
 {
    fn rows(&self) -> usize {
        self.rows()
    }
    fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String {
        self.get_row(physical_row_idx).get(col_name).to_string()
    }
    fn clone_box(&self) -> Box<dyn SubFrame> {
        Box::new(self.clone())
    }
    fn delete_column_from_frame(&mut self, col_name: &str) {
        self.delete_column(col_name);
    }
    fn get_frame_cols(&self) -> usize {
        self.cols()
    }
    fn as_any(&self) -> &dyn Any {
        self
    }
    fn as_any_mut(&mut self) -> &mut dyn Any {
        self
    }
 }
 pub struct DataFrame {
    frames_by_type: HashMap<TypeId, Box<dyn SubFrame>>, // Maps TypeId to the Frame holding columns of that type
    column_to_type: HashMap<String, TypeId>,            // Maps column name to its TypeId
    column_names: Vec<String>,
    index: RowIndex,
 }
 impl DataFrame {
    pub fn new() -> Self {
        DataFrame {
            frames_by_type: HashMap::new(),
            column_to_type: HashMap::new(),
            column_names: Vec::new(),
            index: RowIndex::Range(0..0), // Initialize with an empty range index
        }
    }
    /// Returns the number of rows in the DataFrame.
    pub fn rows(&self) -> usize {
        self.index.len()
    }
    /// Returns the number of columns in the DataFrame.
    pub fn cols(&self) -> usize {
        self.column_names.len()
    }
    /// Returns a reference to the vector of column names.
    pub fn get_column_names(&self) -> &Vec<String> {
        &self.column_names
    }
    /// Returns the number of internal Frame objects (one per unique data type).
    pub fn num_internal_frames(&self) -> usize {
        self.frames_by_type.len()
    }
    /// Returns a reference to a column of a specific type, if it exists.
    pub fn get_column<T>(&self, col_name: &str) -> Option<&[T]>
    where
        T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
    {
        let expected_type_id = TypeId::of::<T>();
        if let Some(actual_type_id) = self.column_to_type.get(col_name) {
            if *actual_type_id == expected_type_id {
                if let Some(sub_frame_box) = self.frames_by_type.get(actual_type_id) {
                    if let Some(frame) = sub_frame_box.as_any().downcast_ref::<Frame<T>>() {
                        return Some(frame.column(col_name));
                    }
                }
            }
        }
        None
    }
    /// Returns a HashMap representing a row, mapping column names to their string values.
    pub fn get_row(&self, row_idx: usize) -> Option<HashMap<String, String>> {
        if row_idx >= self.rows() {
            return None;
        }
        let mut row_data = HashMap::new();
        for col_name in &self.column_names {
            if let Some(type_id) = self.column_to_type.get(col_name) {
                if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
                    let value = sub_frame_box.get_value_as_string(row_idx, col_name);
                    row_data.insert(col_name.clone(), value);
                }
            }
        }
        Some(row_data)
    }
    pub fn add_column<T>(&mut self, col_name: &str, data: Vec<T>)
    where
        T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
    {
        let type_id = TypeId::of::<T>();
        let col_name_string = col_name.to_string();
        // Check for duplicate column name across the entire DataFrame
        if self.column_to_type.contains_key(&col_name_string) {
            panic!(
                "DataFrame::add_column: duplicate column name: '{}'",
                col_name_string
            );
        }
        // If this is the first column being added, set the DataFrame's index
        if self.column_names.is_empty() {
            self.index = RowIndex::Range(0..data.len());
        } else {
            // Ensure new column has the same number of rows as existing columns
            if data.len() != self.index.len() {
                panic!(
                    "DataFrame::add_column: new column '{}' has {} rows, but existing columns have {} rows",
                    col_name_string,
                    data.len(),
                    self.index.len()
                );
            }
        }
        // Check if a Frame of this type already exists
        if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
            // Downcast to the concrete Frame<T> and add the column
            if let Some(frame) = sub_frame_box.as_any_mut().downcast_mut::<Frame<T>>() {
                frame.add_column(col_name_string.clone(), data);
            } else {
                // This should ideally not happen if TypeId matches, but good for safety
                panic!(
                    "Type mismatch when downcasting existing SubFrame for TypeId {:?}",
                    type_id
                );
            }
        } else {
            // No Frame of this type exists, create a new one
            // The Frame::new constructor expects a Matrix and column names.
            // We create a Matrix from a single column vector.
            let new_frame = Frame::new(
                crate::matrix::Matrix::from_cols(vec![data]),
                vec![col_name_string.clone()],
                Some(self.index.clone()), // Pass the DataFrame's index to the new Frame
            );
            self.frames_by_type.insert(type_id, Box::new(new_frame));
        }
        // Update column mappings and names
        self.column_to_type.insert(col_name_string.clone(), type_id);
        self.column_names.push(col_name_string);
    }
    /// Drops a column from the DataFrame.
    /// Panics if the column does not exist.
    pub fn drop_column(&mut self, col_name: &str) {
        let col_name_string = col_name.to_string();
        // 1. Get the TypeId associated with the column
        let type_id = self
            .column_to_type
            .remove(&col_name_string)
            .unwrap_or_else(|| {
                panic!(
                    "DataFrame::drop_column: column '{}' not found",
                    col_name_string
                );
            });
        // 2. Remove the column name from the ordered list
        self.column_names.retain(|name| name != &col_name_string);
        // 3. Find the Frame object and delete the column from it
        if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
            sub_frame_box.delete_column_from_frame(&col_name_string);
            // 4. If the Frame object for this type becomes empty, remove it from frames_by_type
            if sub_frame_box.get_frame_cols() == 0 {
                self.frames_by_type.remove(&type_id);
            }
        } else {
            // This should not happen if column_to_type was consistent
            panic!(
                "DataFrame::drop_column: internal error, no frame found for type_id {:?}",
                type_id
            );
        }
    }
 }
 impl fmt::Display for DataFrame {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // Display column headers
        for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
            write!(f, "{:<15}", col_name)?;
        }
        if self.column_names.len() > DEFAULT_DISPLAY_COLS {
            write!(f, "...")?;
        }
        writeln!(f)?;
        // Display data rows
        let mut displayed_rows = 0;
        for i in 0..self.index.len() {
            if displayed_rows >= DEFAULT_DISPLAY_ROWS {
                writeln!(f, "...")?;
                break;
            }
            for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
                if let Some(type_id) = self.column_to_type.get(col_name) {
                    if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
                        write!(f, "{:<15}", sub_frame_box.get_value_as_string(i, col_name))?;
                    } else {
                        // This case indicates an inconsistency: column_to_type has an entry,
                        // but frames_by_type doesn't have the corresponding Frame.
                        write!(f, "{:<15}", "[ERROR]")?;
                    }
                } else {
                    // This case indicates an inconsistency: column_names has an entry,
                    // but column_to_type doesn't have the corresponding column.
                    write!(f, "{:<15}", "[ERROR]")?;
                }
            }
            if self.column_names.len() > DEFAULT_DISPLAY_COLS {
                write!(f, "...")?;
            }
            writeln!(f)?;
            displayed_rows += 1;
        }
        Ok(())
    }
 }
 impl fmt::Debug for DataFrame {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("DataFrame")
            .field("column_names", &self.column_names)
            .field("index", &self.index)
            .field("column_to_type", &self.column_to_type)
            .field("frames_by_type", &self.frames_by_type)
            .finish()
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::frame::Frame;
    use crate::matrix::Matrix;
    #[test]
    fn test_dataframe_new() {
        let df = DataFrame::new();
        assert_eq!(df.rows(), 0);
        assert_eq!(df.cols(), 0);
        assert!(df.get_column_names().is_empty());
        assert!(df.frames_by_type.is_empty());
        assert!(df.column_to_type.is_empty());
    }
    #[test]
    fn test_dataframe_add_column_initial() {
        let mut df = DataFrame::new();
        let data = vec![1, 2, 3];
        df.add_column("col_int", data.clone());
        assert_eq!(df.rows(), 3);
        assert_eq!(df.cols(), 1);
        assert_eq!(df.get_column_names(), &vec!["col_int".to_string()]);
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
        // Verify the underlying frame
        let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
        let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
        assert_eq!(frame.rows(), 3);
        assert_eq!(frame.cols(), 1);
        assert_eq!(frame.columns(), &vec!["col_int".to_string()]);
    }
    #[test]
    fn test_dataframe_add_column_same_type() {
        let mut df = DataFrame::new();
        df.add_column("col_int1", vec![1, 2, 3]);
        df.add_column("col_int2", vec![4, 5, 6]);
        assert_eq!(df.rows(), 3);
        assert_eq!(df.cols(), 2);
        assert_eq!(
            df.get_column_names(),
            &vec!["col_int1".to_string(), "col_int2".to_string()]
        );
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert_eq!(
            df.column_to_type.get("col_int1"),
            Some(&TypeId::of::<i32>())
        );
        assert_eq!(
            df.column_to_type.get("col_int2"),
            Some(&TypeId::of::<i32>())
        );
        // Verify the underlying frame
        let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
        let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
        assert_eq!(frame.rows(), 3);
        assert_eq!(frame.cols(), 2);
        assert_eq!(
            frame.columns(),
            &vec!["col_int1".to_string(), "col_int2".to_string()]
        );
    }
    #[test]
    fn test_dataframe_add_column_different_type() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3]);
        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
        df.add_column(
            "col_string",
            vec!["a".to_string(), "b".to_string(), "c".to_string()],
        );
        assert_eq!(df.rows(), 3);
        assert_eq!(df.cols(), 3);
        assert_eq!(
            df.get_column_names(),
            &vec![
                "col_int".to_string(),
                "col_float".to_string(),
                "col_string".to_string()
            ]
        );
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
        assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
        assert_eq!(
            df.column_to_type.get("col_float"),
            Some(&TypeId::of::<f64>())
        );
        assert_eq!(
            df.column_to_type.get("col_string"),
            Some(&TypeId::of::<String>())
        );
        // Verify underlying frames
        let int_frame = df
            .frames_by_type
            .get(&TypeId::of::<i32>())
            .unwrap()
            .as_any()
            .downcast_ref::<Frame<i32>>()
            .unwrap();
        assert_eq!(int_frame.columns(), &vec!["col_int".to_string()]);
        let float_frame = df
            .frames_by_type
            .get(&TypeId::of::<f64>())
            .unwrap()
            .as_any()
            .downcast_ref::<Frame<f64>>()
            .unwrap();
        assert_eq!(float_frame.columns(), &vec!["col_float".to_string()]);
        let string_frame = df
            .frames_by_type
            .get(&TypeId::of::<String>())
            .unwrap()
            .as_any()
            .downcast_ref::<Frame<String>>()
            .unwrap();
        assert_eq!(string_frame.columns(), &vec!["col_string".to_string()]);
    }
    #[test]
    fn test_dataframe_get_column() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3]);
        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
        df.add_column(
            "col_string",
            vec!["a".to_string(), "b".to_string(), "c".to_string()],
        );
        // Test getting existing columns with correct type
        assert_eq!(
            df.get_column::<i32>("col_int").unwrap(),
            vec![1, 2, 3].as_slice()
        );
        assert_eq!(
            df.get_column::<f64>("col_float").unwrap(),
            vec![1.1, 2.2, 3.3].as_slice()
        );
        assert_eq!(
            df.get_column::<String>("col_string").unwrap(),
            vec!["a".to_string(), "b".to_string(), "c".to_string()].as_slice()
        );
        // Test getting non-existent column
        assert_eq!(df.get_column::<i32>("non_existent"), None);
        // Test getting existing column with incorrect type
        assert_eq!(df.get_column::<f64>("col_int"), None);
        assert_eq!(df.get_column::<i32>("col_float"), None);
    }
    #[test]
    fn test_dataframe_get_row() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3]);
        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
        df.add_column(
            "col_string",
            vec!["a".to_string(), "b".to_string(), "c".to_string()],
        );
        // Test getting an existing row
        let row0 = df.get_row(0).unwrap();
        assert_eq!(row0.get("col_int"), Some(&"1".to_string()));
        assert_eq!(row0.get("col_float"), Some(&"1.1".to_string()));
        assert_eq!(row0.get("col_string"), Some(&"a".to_string()));
        let row1 = df.get_row(1).unwrap();
        assert_eq!(row1.get("col_int"), Some(&"2".to_string()));
        assert_eq!(row1.get("col_float"), Some(&"2.2".to_string()));
        assert_eq!(row1.get("col_string"), Some(&"b".to_string()));
        // Test getting an out-of-bounds row
        assert_eq!(df.get_row(3), None);
    }
    #[test]
    #[should_panic(expected = "DataFrame::add_column: duplicate column name: 'col_int'")]
    fn test_dataframe_add_column_duplicate_name() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3]);
        df.add_column("col_int", vec![4, 5, 6]);
    }
    #[test]
    #[should_panic(
        expected = "DataFrame::add_column: new column 'col_int2' has 2 rows, but existing columns have 3 rows"
    )]
    fn test_dataframe_add_column_mismatched_rows() {
        let mut df = DataFrame::new();
        df.add_column("col_int1", vec![1, 2, 3]);
        df.add_column("col_int2", vec![4, 5]);
    }
    #[test]
    fn test_dataframe_display() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3, 4, 5, 6]);
        df.add_column("col_float", vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6]);
        df.add_column(
            "col_string",
            vec![
                "a".to_string(),
                "b".to_string(),
                "c".to_string(),
                "d".to_string(),
                "e".to_string(),
                "f".to_string(),
            ],
        );
        let expected_output = "\
 col_int        col_float      col_string     
 1              1.1            a              
 2              2.2            b              
 3              3.3            c              
 4              4.4            d              
 5              5.5            e              
 ...
 ";
        assert_eq!(format!("{}", df), expected_output);
    }
    #[test]
    fn test_dataframe_debug() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3]);
        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
        let debug_output = format!("{:?}", df);
        assert!(debug_output.contains("DataFrame {"));
        assert!(debug_output.contains("column_names: [\"col_int\", \"col_float\"]"));
        assert!(debug_output.contains("index: Range(0..3)"));
        assert!(debug_output.contains("column_to_type: {"));
        assert!(debug_output.contains("frames_by_type: {"));
    }
    #[test]
    fn test_dataframe_drop_column_single_type() {
        let mut df = DataFrame::new();
        df.add_column("col_int1", vec![1, 2, 3]);
        df.add_column("col_int2", vec![4, 5, 6]);
        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
        assert_eq!(df.cols(), 3);
        assert_eq!(
            df.get_column_names(),
            &vec![
                "col_int1".to_string(),
                "col_int2".to_string(),
                "col_float".to_string()
            ]
        );
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
        df.drop_column("col_int1");
        assert_eq!(df.cols(), 2);
        assert_eq!(
            df.get_column_names(),
            &vec!["col_int2".to_string(), "col_float".to_string()]
        );
        assert!(df.column_to_type.get("col_int1").is_none());
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should still exist
        let int_frame = df
            .frames_by_type
            .get(&TypeId::of::<i32>())
            .unwrap()
            .as_any()
            .downcast_ref::<Frame<i32>>()
            .unwrap();
        assert_eq!(int_frame.columns(), &vec!["col_int2".to_string()]);
        df.drop_column("col_int2");
        assert_eq!(df.cols(), 1);
        assert_eq!(df.get_column_names(), &vec!["col_float".to_string()]);
        assert!(df.column_to_type.get("col_int2").is_none());
        assert!(!df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should be removed
        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
    }
    #[test]
    fn test_dataframe_drop_column_mixed_types() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3]);
        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
        df.add_column(
            "col_string",
            vec!["a".to_string(), "b".to_string(), "c".to_string()],
        );
        assert_eq!(df.cols(), 3);
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
        df.drop_column("col_float");
        assert_eq!(df.cols(), 2);
        assert_eq!(
            df.get_column_names(),
            &vec!["col_int".to_string(), "col_string".to_string()]
        );
        assert!(df.column_to_type.get("col_float").is_none());
        assert!(!df.frames_by_type.contains_key(&TypeId::of::<f64>())); // Frame<f64> should be removed
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
        df.drop_column("col_int");
        df.drop_column("col_string");
        assert_eq!(df.cols(), 0);
        assert!(df.get_column_names().is_empty());
        assert!(df.frames_by_type.is_empty());
        assert!(df.column_to_type.is_empty());
    }
    #[test]
    #[should_panic(expected = "DataFrame::drop_column: column 'non_existent' not found")]
    fn test_dataframe_drop_column_non_existent() {
        let mut df = DataFrame::new();
        df.add_column("col_int", vec![1, 2, 3]);
        df.drop_column("non_existent");
    }
    #[test]
    fn test_dataframe_add_column_reuses_existing_frame() {
        let mut df = DataFrame::new();
        df.add_column("col_int1", vec![1, 2, 3]);
        df.add_column("col_float1", vec![1.1, 2.2, 3.3]);
        // Initially, there should be two frames (one for i32, one for f64)
        assert_eq!(df.frames_by_type.len(), 2);
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
        // Add another integer column
        df.add_column("col_int2", vec![4, 5, 6]);
        // The number of frames should still be 2, as the existing i32 frame should be reused
        assert_eq!(df.frames_by_type.len(), 2);
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
        // Verify the i32 frame now contains both integer columns
        let int_frame = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap().as_any().downcast_ref::<Frame<i32>>().unwrap();
        assert_eq!(int_frame.columns(), &vec!["col_int1".to_string(), "col_int2".to_string()]);
        assert_eq!(int_frame.cols(), 2);
        // Add another float column
        df.add_column("col_float2", vec![4.4, 5.5, 6.6]);
        // The number of frames should still be 2, as the existing f64 frame should be reused
        assert_eq!(df.frames_by_type.len(), 2);
        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
        // Verify the f64 frame now contains both float columns
        let float_frame = df.frames_by_type.get(&TypeId::of::<f64>()).unwrap().as_any().downcast_ref::<Frame<f64>>().unwrap();
        assert_eq!(float_frame.columns(), &vec!["col_float1".to_string(), "col_float2".to_string()]);
        assert_eq!(float_frame.cols(), 2);
    }
 }
--- a/src/dataframe/mod.rs
+++ b/src/dataframe/mod.rs
@@ -0,0 +1,4 @@
 //! This module provides the DataFrame structure for handling tabular data with mixed types.
 pub mod df;
 pub use df::{DataFrame, SubFrame};
--- a/src/frame/base.rs
+++ b/src/frame/base.rs
@@ -332,7 +332,7 @@ impl<T: Clone + PartialEq> Frame<T> {
        )
    }
-    /// Returns an immutable slice of the specified column's data.
+    /// Returns an immutable slice of the specified column's data by name.
    /// Panics if the column name is not found.
    pub fn column(&self, name: &str) -> &[T] {
        let idx = self
@@ -341,7 +341,13 @@ impl<T: Clone + PartialEq> Frame<T> {
        self.matrix.column(idx)
    }
-    /// Returns a mutable slice of the specified column's data.
+    /// Returns an immutable slice of the specified column's data by its physical index.
    /// Panics if the index is out of bounds.
    pub fn column_by_physical_idx(&self, idx: usize) -> &[T] {
        self.matrix.column(idx)
    }
    /// Returns a mutable slice of the specified column's data by name.
    /// Panics if the column name is not found.
    pub fn column_mut(&mut self, name: &str) -> &mut [T] {
        let idx = self
@@ -350,6 +356,12 @@ impl<T: Clone + PartialEq> Frame<T> {
        self.matrix.column_mut(idx)
    }
    /// Returns a mutable slice of the specified column's data by its physical index.
    /// Panics if the index is out of bounds.
    pub fn column_mut_by_physical_idx(&mut self, idx: usize) -> &mut [T] {
        self.matrix.column_mut(idx)
    }
    // Row access methods
    /// Returns an immutable view of the row for the given integer key.
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,8 @@
 #![doc = include_str!("../README.md")]
 /// Documentation for the [`crate::dataframe`] module.
 pub mod dataframe;
 /// Documentation for the [`crate::matrix`] module.
 pub mod matrix;
Author	SHA1	Message	Date
Palash Tyagi	811c153eaf	Merge branch 'main' into dataframe	2025-08-24 21:29:31 +01:00
Palash Tyagi	c53693fa7b	Merge pull request #72 from Magnus167/release/a20250805 Bump version to 0.0.1-a.20250805 in Cargo.toml	2025-08-05 00:11:57 +01:00
Palash Tyagi	109d39b248	Merge branch 'main' into release/a20250805	2025-08-05 00:08:27 +01:00
Palash Tyagi	18ad6c689a	Bump version to 0.0.1-a.20250805 in Cargo.toml	2025-08-05 00:06:49 +01:00
Palash Tyagi	1fead78b69	Merge pull request #71 from Magnus167/prep-release-20250804 Update package version and enhance description in Cargo.toml	2025-08-04 23:27:12 +01:00
Palash Tyagi	6fb32e743c	Update package version and enhance description in Cargo.toml	2025-08-04 23:15:24 +01:00
Palash Tyagi	2cb4e46217	Merge pull request #69 from Magnus167/user-guide Add user guide mdbook	2025-08-04 22:22:55 +01:00
Palash Tyagi	a53ba63f30	Rearrange links in the introduction for improved visibility	2025-08-04 22:20:58 +01:00
Palash Tyagi	dae60ea1bd	Rearrange links in the README for improved visibility	2025-08-04 22:15:42 +01:00
Palash Tyagi	755dee58e7	Refactor machine learning user-guide	2025-08-04 22:14:17 +01:00
Palash Tyagi	9e6e22fc37	Add covariance functions and examples to documentation	2025-08-04 20:37:27 +01:00
Palash Tyagi	39a95e63d9	Merge branch 'main' into dataframe	2025-07-16 01:54:37 +01:00
Palash Tyagi	1de8ba4f2d	Merge branch 'main' into dataframe	2025-07-06 11:35:08 +01:00
Palash Tyagi	74bec4b69e	Merge branch 'main' into dataframe	2025-07-06 11:05:14 +01:00
Palash Tyagi	58b38311b5	Merge branch 'main' into dataframe	2025-07-06 01:04:19 +01:00
Palash Tyagi	4ed23069fc	Merge branch 'main' into dataframe	2025-07-06 00:47:15 +01:00
Palash Tyagi	7d7794627b	Refactor DataFrame usage example in README.md for clarity and consistency	2025-07-04 20:15:47 +01:00
Palash Tyagi	d9bdf8ee96	Merge branch 'main' into dataframe	2025-07-04 00:59:57 +01:00
Palash Tyagi	a61ff8a4e1	Merge branch 'main' into dataframe	2025-07-04 00:55:16 +01:00
Palash Tyagi	26ee580710	Refactor README: update DataFrame usage example	2025-07-04 00:46:12 +01:00
Palash Tyagi	96934cd89f	update DataFrame module exports	2025-07-04 00:45:45 +01:00
Palash Tyagi	27ab1ac129	reimplement dataframe functionality from scratch	2025-07-04 00:45:28 +01:00
Palash Tyagi	eb4fefe363	Enhance DataFrame display: implement column ellipsis for large datasets; improve row and column index calculations for better output formatting.	2025-07-02 23:45:43 +01:00
Palash Tyagi	60cc97e702	Enhance DataFrame display: implement row truncation with ellipsis for large datasets; improve column width calculations and formatting for better readability.	2025-07-02 23:33:34 +01:00
Palash Tyagi	7e2a5ec18d	Enhance DataFrame display: update head and tail methods for improved row retrieval and formatting; refine display output for empty DataFrames and adjust column width calculations.	2025-07-02 22:18:09 +01:00
Palash Tyagi	4038d25b07	applied formatting	2025-07-02 00:25:45 +01:00
Palash Tyagi	aa15248b58	Rename variable for clarity in DataFrame display formatting	2025-07-02 00:25:31 +01:00
Palash Tyagi	fa392ec631	Add head_n and tail_n methods to DataFrame for row retrieval; enhance display formatting	2025-07-02 00:22:52 +01:00
Palash Tyagi	8b6f16236a	Refactor TypedFrame methods using macros for common functionality and improve column accessors	2025-07-01 23:26:57 +01:00
Palash Tyagi	58acea8467	Add DataFrame usage examples to README.md	2025-06-22 21:16:06 +01:00
Palash Tyagi	2607d9c3b0	Add pub use statement for DataFrame, DataFrameColumn, and TypedFrame in mod.rs	2025-06-22 21:15:12 +01:00
Palash Tyagi	57ed06f79b	Reimplemented dataframe class with TypedFrame interface	2025-06-22 19:47:12 +01:00
Palash Tyagi	01a132264f	Remove unused imports and clean up test module in DataFrame implementation	2025-06-22 05:44:24 +01:00
Palash Tyagi	ff4535c56b	Implement column renaming in DataFrame, updating both logical names and underlying Frame references.	2025-06-22 05:35:48 +01:00
Palash Tyagi	9b480e8130	Merge branch 'main' into dataframe	2025-06-22 05:22:06 +01:00
Palash Tyagi	fe666a4ddb	First draft: Implement DataFrame and DataFrameColumn structures	2025-06-22 05:01:19 +01:00
Palash Tyagi	b80d5ab381	Add documentation for the DataFrame module and include it in the library	2025-06-22 05:00:59 +01:00
Palash Tyagi	49f7558225	Enhance column access methods to clarify usage by name and physical index	2025-06-22 05:00:42 +01:00