Update CI workflows to include 'test' and 'develop' branches for pull requests

Implement CI checks and remove deprecated PR checks script
2025-11-19 17:26:09 +00:00 · 2025-08-24 21:40:28 +01:00 · 2025-08-07 22:28:08 +01:00
11 changed files with 112 additions and 1078 deletions
--- a/.github/.archive/pr-checks.yml
+++ b/.github/.archive/pr-checks.yml
@@ -1,34 +0,0 @@
-# name: pr-checks
-
-# on:
-#   pull_request:
-#     branches: [pr_checks_disabled_for_now]
-#     types:
-#       - opened
-#       # - synchronize
-#       - reopened
-#       - edited
-#       - ready_for_review
-
-# concurrency:
-#   group: pr-checks-${{ github.event.number }}
-
-# permissions:
-#   contents: read
-#   pull-requests: read
-#   checks: write
-
-# jobs:
-#   pr-checks:
-#     name: pr-checks
-#     runs-on: ubuntu-latest
-#     steps:
-#       - uses: actions/checkout@v4
-
-#       - name: Run PR checks
-#         shell: bash
-#         env:
-#           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-#           PR_NUMBER: ${{ github.event.number }}
-#         run: |
-#           python .github/scripts/pr_checks.py $PR_NUMBER
--- a/.github/scripts/ci_checks.py
+++ b/.github/scripts/ci_checks.py
@@ -0,0 +1,64 @@
+import os
+import sys
+from typing import Any, Dict, Optional
+import tomllib
+import packaging.version
+import requests
+
+sys.path.append(os.getcwd())
+
+ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
+
+GITHUB_REQUEST_CONFIG = {
+    "Accept": "application/vnd.github.v3+json",
+    "Authorization": f"token {ACCESS_TOKEN}",
+    "X-GitHub-Api-Version": "2022-11-28",
+}
+REPO_OWNER_USERNAME: str = "Magnus167"
+REPO_NAME: str = "rustframe"
+REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
+
+CARGO_TOML_PATH: str = "Cargo.toml"
+
+
+def load_cargo_toml() -> Dict[str, Any]:
+    if not os.path.exists(CARGO_TOML_PATH):
+        raise FileNotFoundError(f"{CARGO_TOML_PATH} does not exist.")
+
+    with open(CARGO_TOML_PATH, "rb") as file:
+        return tomllib.load(file)
+
+def get_latest_crates_io_version() -> str:
+    url = "https://crates.io/api/v1/crates/rustframe"
+    try:
+        response = requests.get(url, headers=GITHUB_REQUEST_CONFIG)
+        response.raise_for_status()
+        data = response.json()
+        return data["crate"]["max_version"]
+    except requests.RequestException as e:
+        raise RuntimeError(f"Failed to fetch latest version from crates.io: {e}")
+
+
+def get_current_version() -> str:
+    cargo_toml = load_cargo_toml()
+    version = cargo_toml.get("package", {}).get("version", None)
+    if not version:
+        raise ValueError("Version not found in Cargo.toml")
+    return version
+
+
+def check_version() -> None:
+    latest_version = get_latest_crates_io_version()
+    latest_version_tuple = packaging.version.parse(latest_version)
+    current_version = get_current_version()
+    current_version_tuple = packaging.version.parse(current_version)
+
+    # if the current version is >= latest, exit 1
+    if latest_version_tuple >= current_version_tuple:
+        sys.exit(1)
+
+    print(f"Current version: {current_version_tuple}")
+
+
+if __name__ == "__main__":
+    check_version()
--- a/.github/scripts/pr_checks.py
+++ b/.github/scripts/pr_checks.py
@@ -1,236 +0,0 @@
-import os
-import sys
-import urllib.request
-import urllib.error
-import json
-from typing import Any, Dict, List, Optional, Tuple
-import warnings
-import urllib.parse
-
-from time import sleep
-
-sys.path.append(os.getcwd())
-
-ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
-
-REQUEST_CONFIG = {
-    "Accept": "application/vnd.github.v3+json",
-    "Authorization": f"token {ACCESS_TOKEN}",
-    "X-GitHub-Api-Version": "2022-11-28",
-}
-REPO_OWNER_USERNAME: str = "Magnus167"
-REPO_NAME: str = "rustframe"
-REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
-
-
-def perform_api_call(
-    target_url: str,
-    call_headers: Optional[dict] = REQUEST_CONFIG,
-    query_parameters: Dict[str, Any] = {},
-    http_method: str = "GET",
-    maximum_attempts: int = 5,
-) -> Any:
-    assert http_method in ["GET", "DELETE", "POST", "PATCH", "PUT"]
-
-    attempt_count = 0
-    while attempt_count < maximum_attempts:
-        try:
-            if query_parameters:
-                encoded_parameters = urllib.parse.urlencode(query_parameters)
-                target_url = f"{target_url}?{encoded_parameters}"
-
-            http_request_object = urllib.request.Request(target_url, method=http_method)
-
-            if call_headers:
-                for key, value in call_headers.items():
-                    http_request_object.add_header(key, value)
-
-            with urllib.request.urlopen(http_request_object) as server_response:
-                if server_response.status == 404:
-                    raise Exception(f"404: {target_url} not found.")
-
-                return json.loads(server_response.read().decode())
-
-        except urllib.error.HTTPError as error_details:
-            unrecoverable_codes = [403, 404, 422]
-            if error_details.code in unrecoverable_codes:
-                raise Exception(f"Request failed: {error_details}")
-
-            print(f"Request failed: {error_details}")
-            attempt_count += 1
-            sleep(1)
-
-        except Exception as error_details:
-            print(f"Request failed: {error_details}")
-            attempt_count += 1
-            sleep(1)
-
-    raise Exception("Request failed")
-
-
-valid_title_prefixes: List[str] = [
-    "Feature:",
-    "Bugfix:",
-    "Documentation:",
-    "CI/CD:",
-    "Misc:",
-    "Suggestion:",
-]
-
-
-def validate_title_format(
-    item_title: str,
-) -> bool:
-    estr = "Skipping PR title validation"
-    for _ in range(5):
-        warnings.warn(estr)
-        print(estr)
-    return True
-
-    is_format_correct: bool = False
-    for prefix_pattern in valid_title_prefixes:
-        cleaned_input: str = item_title.strip()
-        if cleaned_input.startswith(prefix_pattern):
-            is_format_correct = True
-            break
-
-    if not is_format_correct:
-        issue_message: str = (
-            f"PR title '{item_title}' does not match any "
-            f"of the accepted patterns: {valid_title_prefixes}"
-        )
-        raise ValueError(issue_message)
-
-    return is_format_correct
-
-
-def _locate_segment_indices(
-    content_string: str,
-    search_pattern: str,
-    expect_numeric_segment: bool = False,
-) -> Tuple[int, int]:
-    numeric_characters: List[str] = list(map(str, range(10))) + ["."]
-    assert bool(content_string)
-    assert bool(search_pattern)
-    assert search_pattern in content_string
-    start_index: int = content_string.find(search_pattern)
-    end_index: int = content_string.find("-", start_index)
-    if end_index == -1 and not expect_numeric_segment:
-        return (start_index, len(content_string))
-
-    if expect_numeric_segment:
-        start_index = start_index + len(search_pattern)
-        for char_index, current_character in enumerate(content_string[start_index:]):
-            if current_character not in numeric_characters:
-                break
-        end_index = start_index + char_index
-
-    return (start_index, end_index)
-
-
-def _verify_no_merge_flag(
-    content_string: str,
-) -> bool:
-    assert bool(content_string)
-    return "DO-NOT-MERGE" not in content_string
-
-
-def _verify_merge_dependency(
-    content_string: str,
-) -> bool:
-    assert bool(content_string)
-    dependency_marker: str = "MERGE-AFTER-#"
-
-    if dependency_marker not in content_string:
-        return True
-
-    start_index, end_index = _locate_segment_indices(
-        content_string=content_string, pattern=dependency_marker, numeric=True
-    )
-    dependent_item_id: str = content_string[start_index:end_index].strip()
-    try:
-        dependent_item_id = int(dependent_item_id)
-    except ValueError:
-        issue_message: str = f"PR number '{dependent_item_id}' is not an integer."
-        raise ValueError(issue_message)
-
-    dependent_item_data: Dict[str, Any] = fetch_item_details(
-        item_identifier=dependent_item_id
-    )
-    is_dependent_item_closed: bool = dependent_item_data["state"] == "closed"
-    return is_dependent_item_closed
-
-
-def evaluate_merge_conditions(
-    item_details: Dict[str, Any],
-) -> bool:
-    item_body_content: str = item_details["body"]
-
-    if item_body_content is None:
-        return True
-
-    item_body_content = item_body_content.strip().replace(" ", "-").upper()
-    item_body_content = f" {item_body_content} "
-
-    condition_outcomes: List[bool] = [
-        _verify_no_merge_flag(content_string=item_body_content),
-        _verify_merge_dependency(content_string=item_body_content),
-    ]
-
-    return all(condition_outcomes)
-
-
-def validate_item_for_merge(
-    item_data: Dict[str, Any],
-) -> bool:
-    assert set(["number", "title", "state", "body"]).issubset(item_data.keys())
-    accumulated_issues: str = ""
-    if not validate_title_format(item_title=item_data["title"]):
-        accumulated_issues += (
-            f"PR #{item_data['number']} is not mergable due to invalid title.\n"
-        )
-
-    if not evaluate_merge_conditions(item_details=item_data):
-        accumulated_issues += (
-            f"PR #{item_data['number']} is not mergable due to merge restrictions"
-            " specified in the PR body."
-        )
-
-    if accumulated_issues:
-        raise ValueError(accumulated_issues.strip())
-
-    return True
-
-
-def fetch_item_details(
-    item_identifier: int,
-):
-    api_request_url: str = f"https://api.github.com/repos/{REPO_OWNER_USERNAME}/{REPO_NAME}/pulls/{item_identifier}"
-
-    raw_api_response_data: Dict[str, Any] = perform_api_call(target_url=api_request_url)
-
-    extracted_item_info: Dict[str, Any] = {
-        "number": raw_api_response_data["number"],
-        "title": raw_api_response_data["title"],
-        "state": raw_api_response_data["state"],
-        "body": raw_api_response_data["body"],
-    }
-
-    return extracted_item_info
-
-
-def process_item_request(requested_item_id: int):
-    extracted_item_info: Dict[str, Any] = fetch_item_details(
-        item_identifier=requested_item_id
-    )
-    if not validate_item_for_merge(item_data=extracted_item_info):
-        raise ValueError("PR is not mergable.")
-
-    print("PR is mergable.")
-
-    return True
-
-
-if __name__ == "__main__":
-    requested_item_id: int = int(sys.argv[1])
-    process_item_request(requested_item_id=requested_item_id)
--- a/.github/workflows/ci-checks.yml
+++ b/.github/workflows/ci-checks.yml
@@ -0,0 +1,40 @@
+name: docs-and-testcov
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    types: [review_requested, ready_for_review, synchronize, opened, reopened]
+    branches:
+      - main
+      - test
+      - develop
+
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  id-token: write
+  pages: write
+
+jobs:
+  docs-and-testcov:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Python
+        uses: actions/setup-python@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Install dependencies
+        run: |
+          uv pip install requests
+      - name: Run CI checks
+        run: |
+          python .github/scripts/ci_checks.py
--- a/.github/workflows/run-benchmarks.yml
+++ b/.github/workflows/run-benchmarks.yml
@@ -2,9 +2,12 @@ name: run-benchmarks

 on:
  workflow_dispatch:
-  push:
+  pull_request:
    branches:
      - main
+  push:
+    branches:
+      - test

 jobs:
  pick-runner:
--- a/.github/workflows/run-unit-tests.yml
+++ b/.github/workflows/run-unit-tests.yml
@@ -5,6 +5,8 @@ on:
    types: [review_requested, ready_for_review, synchronize, opened, reopened]
    branches:
      - main
+      - test
+      - develop

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
--- a/README.md
+++ b/README.md
@@ -153,133 +153,6 @@ let zipped_matrix = a.zip(&b, |x, y| x + y);
 assert_eq!(zipped_matrix.data(), &[6.0, 8.0, 10.0, 12.0]);
 ```

---
-
-## DataFrame Usage Example
-
-```rust
-use chrono::NaiveDate;
-use rustframe::dataframe::DataFrame;
-use rustframe::utils::{BDateFreq, BDatesList};
-use std::any::TypeId;
-use std::collections::HashMap;
-
-// Helper for NaiveDate
-fn d(y: i32, m: u32, d: u32) -> NaiveDate {
-    NaiveDate::from_ymd_opt(y, m, d).unwrap()
-}
-
-// Create a new DataFrame
-let mut df = DataFrame::new();
-
-// Add columns of different types
-df.add_column("col_int1", vec![1, 2, 3, 4, 5]);
-df.add_column("col_float1", vec![1.1, 2.2, 3.3, 4.4, 5.5]);
-df.add_column(
-    "col_string",
-    vec![
-        "apple".to_string(),
-        "banana".to_string(),
-        "cherry".to_string(),
-        "date".to_string(),
-        "elderberry".to_string(),
-    ],
-);
-df.add_column("col_bool", vec![true, false, true, false, true]);
-// df.add_column("col_date", vec![d(2023,1,1), d(2023,1,2), d(2023,1,3), d(2023,1,4), d(2023,1,5)]);
-df.add_column(
-    "col_date",
-    BDatesList::from_n_periods("2023-01-01".to_string(), BDateFreq::Daily, 5)
-        .unwrap()
-        .list()
-        .unwrap(),
-);
-
-println!("DataFrame after initial column additions:\n{}", df);
-
-// Demonstrate frame re-use when adding columns of existing types
-let initial_frames_count = df.num_internal_frames();
-println!(
-    "\nInitial number of internal frames: {}",
-    initial_frames_count
-);
-
-df.add_column("col_int2", vec![6, 7, 8, 9, 10]);
-df.add_column("col_float2", vec![6.6, 7.7, 8.8, 9.9, 10.0]);
-
-let frames_after_reuse = df.num_internal_frames();
-println!(
-    "Number of internal frames after adding more columns of existing types: {}",
-    frames_after_reuse
-);
-assert_eq!(initial_frames_count, frames_after_reuse); // Should be equal, demonstrating re-use
-
-println!(
-    "\nDataFrame after adding more columns of existing types:\n{}",
-    df
-);
-
-// Get number of rows and columns
-println!("Rows: {}", df.rows()); // Output: Rows: 5
-println!("Columns: {}", df.cols()); // Output: Columns: 5
-
-// Get column names
-println!("Column names: {:?}", df.get_column_names());
-// Output: Column names: ["col_int", "col_float", "col_string", "col_bool", "col_date"]
-
-// Get a specific column by name and type
-let int_col = df.get_column::<i32>("col_int1").unwrap();
-// Output: Integer column: [1, 2, 3, 4, 5]
-println!("Integer column (col_int1): {:?}", int_col);
-
-let int_col2 = df.get_column::<i32>("col_int2").unwrap();
-// Output: Integer column: [6, 7, 8, 9, 10]
-println!("Integer column (col_int2): {:?}", int_col2);
-
-let float_col = df.get_column::<f64>("col_float1").unwrap();
-// Output: Float column: [1.1, 2.2, 3.3, 4.4, 5.5]
-println!("Float column (col_float1): {:?}", float_col);
-
-// Attempt to get a column with incorrect type (returns None)
-let wrong_type_col = df.get_column::<bool>("col_int1");
-// Output: Wrong type column: None
-println!("Wrong type column: {:?}", wrong_type_col);
-
-// Get a row by index
-let row_0 = df.get_row(0).unwrap();
-println!("Row 0: {:?}", row_0);
-// Output: Row 0: {"col_int1": "1", "col_float1": "1.1", "col_string": "apple", "col_bool": "true", "col_date": "2023-01-01", "col_int2": "6", "col_float2": "6.6"}
-
-let row_2 = df.get_row(2).unwrap();
-println!("Row 2: {:?}", row_2);
-// Output: Row 2: {"col_int1": "3", "col_float1": "3.3", "col_string": "cherry", "col_bool": "true", "col_date": "2023-01-03", "col_int2": "8", "col_float2": "8.8"}
-
-// Attempt to get an out-of-bounds row (returns None)
-let row_out_of_bounds = df.get_row(10);
-// Output: Row out of bounds: None
-println!("Row out of bounds: {:?}", row_out_of_bounds);
-
-// Drop a column
-df.drop_column("col_bool");
-println!("\nDataFrame after dropping 'col_bool':\n{}", df);
-
-println!("Columns after drop: {}", df.cols());
-println!("Column names after drop: {:?}", df.get_column_names());
-
-// Drop another column, ensuring the underlying Frame is removed if empty
-df.drop_column("col_float1");
-println!("\nDataFrame after dropping 'col_float1':\n{}", df);
-
-println!("Columns after second drop: {}", df.cols());
-println!(
-    "Column names after second drop: {:?}",
-    df.get_column_names()
-);
-
-// Attempt to drop a non-existent column (will panic)
-// df.drop_column("non_existent_col"); // Uncomment to see panic
-```
-
 ## More examples

 See the [examples](./examples/) directory for some demonstrations of Rustframe's syntax and functionality.
--- a/src/dataframe/df.rs
+++ b/src/dataframe/df.rs
@@ -1,659 +0,0 @@
-use crate::frame::{Frame, RowIndex};
-use std::any::{Any, TypeId};
-use std::collections::HashMap;
-use std::fmt; // Import TypeId
-
-const DEFAULT_DISPLAY_ROWS: usize = 5;
-const DEFAULT_DISPLAY_COLS: usize = 10;
-
-// Trait to enable type-agnostic operations on Frame objects within DataFrame
-pub trait SubFrame: Send + Sync + fmt::Debug + Any {
-    fn rows(&self) -> usize;
-    fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String;
-    fn clone_box(&self) -> Box<dyn SubFrame>;
-    fn delete_column_from_frame(&mut self, col_name: &str);
-    fn get_frame_cols(&self) -> usize; // Add a method to get the number of columns in the underlying frame
-
-    // Methods for downcasting to concrete types
-    fn as_any(&self) -> &dyn Any;
-    fn as_any_mut(&mut self) -> &mut dyn Any;
-}
-
-// Implement SubFrame for any Frame<T> that meets the requirements
-impl<T> SubFrame for Frame<T>
-where
-    T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
-{
-    fn rows(&self) -> usize {
-        self.rows()
-    }
-
-    fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String {
-        self.get_row(physical_row_idx).get(col_name).to_string()
-    }
-
-    fn clone_box(&self) -> Box<dyn SubFrame> {
-        Box::new(self.clone())
-    }
-
-    fn delete_column_from_frame(&mut self, col_name: &str) {
-        self.delete_column(col_name);
-    }
-
-    fn get_frame_cols(&self) -> usize {
-        self.cols()
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-}
-
-pub struct DataFrame {
-    frames_by_type: HashMap<TypeId, Box<dyn SubFrame>>, // Maps TypeId to the Frame holding columns of that type
-    column_to_type: HashMap<String, TypeId>,            // Maps column name to its TypeId
-    column_names: Vec<String>,
-    index: RowIndex,
-}
-
-impl DataFrame {
-    pub fn new() -> Self {
-        DataFrame {
-            frames_by_type: HashMap::new(),
-            column_to_type: HashMap::new(),
-            column_names: Vec::new(),
-            index: RowIndex::Range(0..0), // Initialize with an empty range index
-        }
-    }
-
-    /// Returns the number of rows in the DataFrame.
-    pub fn rows(&self) -> usize {
-        self.index.len()
-    }
-
-    /// Returns the number of columns in the DataFrame.
-    pub fn cols(&self) -> usize {
-        self.column_names.len()
-    }
-
-    /// Returns a reference to the vector of column names.
-    pub fn get_column_names(&self) -> &Vec<String> {
-        &self.column_names
-    }
-
-    /// Returns the number of internal Frame objects (one per unique data type).
-    pub fn num_internal_frames(&self) -> usize {
-        self.frames_by_type.len()
-    }
-
-    /// Returns a reference to a column of a specific type, if it exists.
-    pub fn get_column<T>(&self, col_name: &str) -> Option<&[T]>
-    where
-        T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
-    {
-        let expected_type_id = TypeId::of::<T>();
-        if let Some(actual_type_id) = self.column_to_type.get(col_name) {
-            if *actual_type_id == expected_type_id {
-                if let Some(sub_frame_box) = self.frames_by_type.get(actual_type_id) {
-                    if let Some(frame) = sub_frame_box.as_any().downcast_ref::<Frame<T>>() {
-                        return Some(frame.column(col_name));
-                    }
-                }
-            }
-        }
-        None
-    }
-
-    /// Returns a HashMap representing a row, mapping column names to their string values.
-    pub fn get_row(&self, row_idx: usize) -> Option<HashMap<String, String>> {
-        if row_idx >= self.rows() {
-            return None;
-        }
-
-        let mut row_data = HashMap::new();
-        for col_name in &self.column_names {
-            if let Some(type_id) = self.column_to_type.get(col_name) {
-                if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
-                    let value = sub_frame_box.get_value_as_string(row_idx, col_name);
-                    row_data.insert(col_name.clone(), value);
-                }
-            }
-        }
-        Some(row_data)
-    }
-
-    pub fn add_column<T>(&mut self, col_name: &str, data: Vec<T>)
-    where
-        T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
-    {
-        let type_id = TypeId::of::<T>();
-        let col_name_string = col_name.to_string();
-
-        // Check for duplicate column name across the entire DataFrame
-        if self.column_to_type.contains_key(&col_name_string) {
-            panic!(
-                "DataFrame::add_column: duplicate column name: '{}'",
-                col_name_string
-            );
-        }
-
-        // If this is the first column being added, set the DataFrame's index
-        if self.column_names.is_empty() {
-            self.index = RowIndex::Range(0..data.len());
-        } else {
-            // Ensure new column has the same number of rows as existing columns
-            if data.len() != self.index.len() {
-                panic!(
-                    "DataFrame::add_column: new column '{}' has {} rows, but existing columns have {} rows",
-                    col_name_string,
-                    data.len(),
-                    self.index.len()
-                );
-            }
-        }
-
-        // Check if a Frame of this type already exists
-        if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
-            // Downcast to the concrete Frame<T> and add the column
-            if let Some(frame) = sub_frame_box.as_any_mut().downcast_mut::<Frame<T>>() {
-                frame.add_column(col_name_string.clone(), data);
-            } else {
-                // This should ideally not happen if TypeId matches, but good for safety
-                panic!(
-                    "Type mismatch when downcasting existing SubFrame for TypeId {:?}",
-                    type_id
-                );
-            }
-        } else {
-            // No Frame of this type exists, create a new one
-            // The Frame::new constructor expects a Matrix and column names.
-            // We create a Matrix from a single column vector.
-            let new_frame = Frame::new(
-                crate::matrix::Matrix::from_cols(vec![data]),
-                vec![col_name_string.clone()],
-                Some(self.index.clone()), // Pass the DataFrame's index to the new Frame
-            );
-            self.frames_by_type.insert(type_id, Box::new(new_frame));
-        }
-
-        // Update column mappings and names
-        self.column_to_type.insert(col_name_string.clone(), type_id);
-        self.column_names.push(col_name_string);
-    }
-
-    /// Drops a column from the DataFrame.
-    /// Panics if the column does not exist.
-    pub fn drop_column(&mut self, col_name: &str) {
-        let col_name_string = col_name.to_string();
-
-        // 1. Get the TypeId associated with the column
-        let type_id = self
-            .column_to_type
-            .remove(&col_name_string)
-            .unwrap_or_else(|| {
-                panic!(
-                    "DataFrame::drop_column: column '{}' not found",
-                    col_name_string
-                );
-            });
-
-        // 2. Remove the column name from the ordered list
-        self.column_names.retain(|name| name != &col_name_string);
-
-        // 3. Find the Frame object and delete the column from it
-        if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
-            sub_frame_box.delete_column_from_frame(&col_name_string);
-
-            // 4. If the Frame object for this type becomes empty, remove it from frames_by_type
-            if sub_frame_box.get_frame_cols() == 0 {
-                self.frames_by_type.remove(&type_id);
-            }
-        } else {
-            // This should not happen if column_to_type was consistent
-            panic!(
-                "DataFrame::drop_column: internal error, no frame found for type_id {:?}",
-                type_id
-            );
-        }
-    }
-}
-
-impl fmt::Display for DataFrame {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        // Display column headers
-        for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
-            write!(f, "{:<15}", col_name)?;
-        }
-        if self.column_names.len() > DEFAULT_DISPLAY_COLS {
-            write!(f, "...")?;
-        }
-        writeln!(f)?;
-
-        // Display data rows
-        let mut displayed_rows = 0;
-        for i in 0..self.index.len() {
-            if displayed_rows >= DEFAULT_DISPLAY_ROWS {
-                writeln!(f, "...")?;
-                break;
-            }
-            for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
-                if let Some(type_id) = self.column_to_type.get(col_name) {
-                    if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
-                        write!(f, "{:<15}", sub_frame_box.get_value_as_string(i, col_name))?;
-                    } else {
-                        // This case indicates an inconsistency: column_to_type has an entry,
-                        // but frames_by_type doesn't have the corresponding Frame.
-                        write!(f, "{:<15}", "[ERROR]")?;
-                    }
-                } else {
-                    // This case indicates an inconsistency: column_names has an entry,
-                    // but column_to_type doesn't have the corresponding column.
-                    write!(f, "{:<15}", "[ERROR]")?;
-                }
-            }
-            if self.column_names.len() > DEFAULT_DISPLAY_COLS {
-                write!(f, "...")?;
-            }
-            writeln!(f)?;
-            displayed_rows += 1;
-        }
-        Ok(())
-    }
-}
-
-impl fmt::Debug for DataFrame {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("DataFrame")
-            .field("column_names", &self.column_names)
-            .field("index", &self.index)
-            .field("column_to_type", &self.column_to_type)
-            .field("frames_by_type", &self.frames_by_type)
-            .finish()
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::frame::Frame;
-    use crate::matrix::Matrix;
-
-    #[test]
-    fn test_dataframe_new() {
-        let df = DataFrame::new();
-        assert_eq!(df.rows(), 0);
-        assert_eq!(df.cols(), 0);
-        assert!(df.get_column_names().is_empty());
-        assert!(df.frames_by_type.is_empty());
-        assert!(df.column_to_type.is_empty());
-    }
-
-    #[test]
-    fn test_dataframe_add_column_initial() {
-        let mut df = DataFrame::new();
-        let data = vec![1, 2, 3];
-        df.add_column("col_int", data.clone());
-
-        assert_eq!(df.rows(), 3);
-        assert_eq!(df.cols(), 1);
-        assert_eq!(df.get_column_names(), &vec!["col_int".to_string()]);
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
-
-        // Verify the underlying frame
-        let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
-        let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
-        assert_eq!(frame.rows(), 3);
-        assert_eq!(frame.cols(), 1);
-        assert_eq!(frame.columns(), &vec!["col_int".to_string()]);
-    }
-
-    #[test]
-    fn test_dataframe_add_column_same_type() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int1", vec![1, 2, 3]);
-        df.add_column("col_int2", vec![4, 5, 6]);
-
-        assert_eq!(df.rows(), 3);
-        assert_eq!(df.cols(), 2);
-        assert_eq!(
-            df.get_column_names(),
-            &vec!["col_int1".to_string(), "col_int2".to_string()]
-        );
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert_eq!(
-            df.column_to_type.get("col_int1"),
-            Some(&TypeId::of::<i32>())
-        );
-        assert_eq!(
-            df.column_to_type.get("col_int2"),
-            Some(&TypeId::of::<i32>())
-        );
-
-        // Verify the underlying frame
-        let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
-        let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
-        assert_eq!(frame.rows(), 3);
-        assert_eq!(frame.cols(), 2);
-        assert_eq!(
-            frame.columns(),
-            &vec!["col_int1".to_string(), "col_int2".to_string()]
-        );
-    }
-
-    #[test]
-    fn test_dataframe_add_column_different_type() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3]);
-        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
-        df.add_column(
-            "col_string",
-            vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        );
-
-        assert_eq!(df.rows(), 3);
-        assert_eq!(df.cols(), 3);
-        assert_eq!(
-            df.get_column_names(),
-            &vec![
-                "col_int".to_string(),
-                "col_float".to_string(),
-                "col_string".to_string()
-            ]
-        );
-
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
-
-        assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
-        assert_eq!(
-            df.column_to_type.get("col_float"),
-            Some(&TypeId::of::<f64>())
-        );
-        assert_eq!(
-            df.column_to_type.get("col_string"),
-            Some(&TypeId::of::<String>())
-        );
-
-        // Verify underlying frames
-        let int_frame = df
-            .frames_by_type
-            .get(&TypeId::of::<i32>())
-            .unwrap()
-            .as_any()
-            .downcast_ref::<Frame<i32>>()
-            .unwrap();
-        assert_eq!(int_frame.columns(), &vec!["col_int".to_string()]);
-
-        let float_frame = df
-            .frames_by_type
-            .get(&TypeId::of::<f64>())
-            .unwrap()
-            .as_any()
-            .downcast_ref::<Frame<f64>>()
-            .unwrap();
-        assert_eq!(float_frame.columns(), &vec!["col_float".to_string()]);
-
-        let string_frame = df
-            .frames_by_type
-            .get(&TypeId::of::<String>())
-            .unwrap()
-            .as_any()
-            .downcast_ref::<Frame<String>>()
-            .unwrap();
-        assert_eq!(string_frame.columns(), &vec!["col_string".to_string()]);
-    }
-
-    #[test]
-    fn test_dataframe_get_column() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3]);
-        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
-        df.add_column(
-            "col_string",
-            vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        );
-
-        // Test getting existing columns with correct type
-        assert_eq!(
-            df.get_column::<i32>("col_int").unwrap(),
-            vec![1, 2, 3].as_slice()
-        );
-        assert_eq!(
-            df.get_column::<f64>("col_float").unwrap(),
-            vec![1.1, 2.2, 3.3].as_slice()
-        );
-        assert_eq!(
-            df.get_column::<String>("col_string").unwrap(),
-            vec!["a".to_string(), "b".to_string(), "c".to_string()].as_slice()
-        );
-
-        // Test getting non-existent column
-        assert_eq!(df.get_column::<i32>("non_existent"), None);
-
-        // Test getting existing column with incorrect type
-        assert_eq!(df.get_column::<f64>("col_int"), None);
-        assert_eq!(df.get_column::<i32>("col_float"), None);
-    }
-
-    #[test]
-    fn test_dataframe_get_row() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3]);
-        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
-        df.add_column(
-            "col_string",
-            vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        );
-
-        // Test getting an existing row
-        let row0 = df.get_row(0).unwrap();
-        assert_eq!(row0.get("col_int"), Some(&"1".to_string()));
-        assert_eq!(row0.get("col_float"), Some(&"1.1".to_string()));
-        assert_eq!(row0.get("col_string"), Some(&"a".to_string()));
-
-        let row1 = df.get_row(1).unwrap();
-        assert_eq!(row1.get("col_int"), Some(&"2".to_string()));
-        assert_eq!(row1.get("col_float"), Some(&"2.2".to_string()));
-        assert_eq!(row1.get("col_string"), Some(&"b".to_string()));
-
-        // Test getting an out-of-bounds row
-        assert_eq!(df.get_row(3), None);
-    }
-
-    #[test]
-    #[should_panic(expected = "DataFrame::add_column: duplicate column name: 'col_int'")]
-    fn test_dataframe_add_column_duplicate_name() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3]);
-        df.add_column("col_int", vec![4, 5, 6]);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "DataFrame::add_column: new column 'col_int2' has 2 rows, but existing columns have 3 rows"
-    )]
-    fn test_dataframe_add_column_mismatched_rows() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int1", vec![1, 2, 3]);
-        df.add_column("col_int2", vec![4, 5]);
-    }
-
-    #[test]
-    fn test_dataframe_display() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3, 4, 5, 6]);
-        df.add_column("col_float", vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6]);
-        df.add_column(
-            "col_string",
-            vec![
-                "a".to_string(),
-                "b".to_string(),
-                "c".to_string(),
-                "d".to_string(),
-                "e".to_string(),
-                "f".to_string(),
-            ],
-        );
-
-        let expected_output = "\
-col_int        col_float      col_string     
-1              1.1            a              
-2              2.2            b              
-3              3.3            c              
-4              4.4            d              
-5              5.5            e              
-...
-";
-        assert_eq!(format!("{}", df), expected_output);
-    }
-
-    #[test]
-    fn test_dataframe_debug() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3]);
-        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
-
-        let debug_output = format!("{:?}", df);
-        assert!(debug_output.contains("DataFrame {"));
-        assert!(debug_output.contains("column_names: [\"col_int\", \"col_float\"]"));
-        assert!(debug_output.contains("index: Range(0..3)"));
-        assert!(debug_output.contains("column_to_type: {"));
-        assert!(debug_output.contains("frames_by_type: {"));
-    }
-
-    #[test]
-    fn test_dataframe_drop_column_single_type() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int1", vec![1, 2, 3]);
-        df.add_column("col_int2", vec![4, 5, 6]);
-        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
-
-        assert_eq!(df.cols(), 3);
-        assert_eq!(
-            df.get_column_names(),
-            &vec![
-                "col_int1".to_string(),
-                "col_int2".to_string(),
-                "col_float".to_string()
-            ]
-        );
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
-
-        df.drop_column("col_int1");
-
-        assert_eq!(df.cols(), 2);
-        assert_eq!(
-            df.get_column_names(),
-            &vec!["col_int2".to_string(), "col_float".to_string()]
-        );
-        assert!(df.column_to_type.get("col_int1").is_none());
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should still exist
-        let int_frame = df
-            .frames_by_type
-            .get(&TypeId::of::<i32>())
-            .unwrap()
-            .as_any()
-            .downcast_ref::<Frame<i32>>()
-            .unwrap();
-        assert_eq!(int_frame.columns(), &vec!["col_int2".to_string()]);
-
-        df.drop_column("col_int2");
-
-        assert_eq!(df.cols(), 1);
-        assert_eq!(df.get_column_names(), &vec!["col_float".to_string()]);
-        assert!(df.column_to_type.get("col_int2").is_none());
-        assert!(!df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should be removed
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
-    }
-
-    #[test]
-    fn test_dataframe_drop_column_mixed_types() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3]);
-        df.add_column("col_float", vec![1.1, 2.2, 3.3]);
-        df.add_column(
-            "col_string",
-            vec!["a".to_string(), "b".to_string(), "c".to_string()],
-        );
-
-        assert_eq!(df.cols(), 3);
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
-
-        df.drop_column("col_float");
-
-        assert_eq!(df.cols(), 2);
-        assert_eq!(
-            df.get_column_names(),
-            &vec!["col_int".to_string(), "col_string".to_string()]
-        );
-        assert!(df.column_to_type.get("col_float").is_none());
-        assert!(!df.frames_by_type.contains_key(&TypeId::of::<f64>())); // Frame<f64> should be removed
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
-
-        df.drop_column("col_int");
-        df.drop_column("col_string");
-
-        assert_eq!(df.cols(), 0);
-        assert!(df.get_column_names().is_empty());
-        assert!(df.frames_by_type.is_empty());
-        assert!(df.column_to_type.is_empty());
-    }
-
-    #[test]
-    #[should_panic(expected = "DataFrame::drop_column: column 'non_existent' not found")]
-    fn test_dataframe_drop_column_non_existent() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int", vec![1, 2, 3]);
-        df.drop_column("non_existent");
-    }
-
-    #[test]
-    fn test_dataframe_add_column_reuses_existing_frame() {
-        let mut df = DataFrame::new();
-        df.add_column("col_int1", vec![1, 2, 3]);
-        df.add_column("col_float1", vec![1.1, 2.2, 3.3]);
-
-        // Initially, there should be two frames (one for i32, one for f64)
-        assert_eq!(df.frames_by_type.len(), 2);
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
-
-        // Add another integer column
-        df.add_column("col_int2", vec![4, 5, 6]);
-
-        // The number of frames should still be 2, as the existing i32 frame should be reused
-        assert_eq!(df.frames_by_type.len(), 2);
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
-
-        // Verify the i32 frame now contains both integer columns
-        let int_frame = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap().as_any().downcast_ref::<Frame<i32>>().unwrap();
-        assert_eq!(int_frame.columns(), &vec!["col_int1".to_string(), "col_int2".to_string()]);
-        assert_eq!(int_frame.cols(), 2);
-
-        // Add another float column
-        df.add_column("col_float2", vec![4.4, 5.5, 6.6]);
-
-        // The number of frames should still be 2, as the existing f64 frame should be reused
-        assert_eq!(df.frames_by_type.len(), 2);
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
-        assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
-
-        // Verify the f64 frame now contains both float columns
-        let float_frame = df.frames_by_type.get(&TypeId::of::<f64>()).unwrap().as_any().downcast_ref::<Frame<f64>>().unwrap();
-        assert_eq!(float_frame.columns(), &vec!["col_float1".to_string(), "col_float2".to_string()]);
-        assert_eq!(float_frame.cols(), 2);
-    }
-}
--- a/src/dataframe/mod.rs
+++ b/src/dataframe/mod.rs
@@ -1,4 +0,0 @@
-//! This module provides the DataFrame structure for handling tabular data with mixed types.
-pub mod df;
-
-pub use df::{DataFrame, SubFrame};
--- a/src/frame/base.rs
+++ b/src/frame/base.rs
@@ -332,7 +332,7 @@ impl<T: Clone + PartialEq> Frame<T> {
        )
    }

-    /// Returns an immutable slice of the specified column's data by name.
+    /// Returns an immutable slice of the specified column's data.
    /// Panics if the column name is not found.
    pub fn column(&self, name: &str) -> &[T] {
        let idx = self
@@ -341,13 +341,7 @@ impl<T: Clone + PartialEq> Frame<T> {
        self.matrix.column(idx)
    }

-    /// Returns an immutable slice of the specified column's data by its physical index.
-    /// Panics if the index is out of bounds.
-    pub fn column_by_physical_idx(&self, idx: usize) -> &[T] {
-        self.matrix.column(idx)
-    }
-
-    /// Returns a mutable slice of the specified column's data by name.
+    /// Returns a mutable slice of the specified column's data.
    /// Panics if the column name is not found.
    pub fn column_mut(&mut self, name: &str) -> &mut [T] {
        let idx = self
@@ -356,12 +350,6 @@ impl<T: Clone + PartialEq> Frame<T> {
        self.matrix.column_mut(idx)
    }

-    /// Returns a mutable slice of the specified column's data by its physical index.
-    /// Panics if the index is out of bounds.
-    pub fn column_mut_by_physical_idx(&mut self, idx: usize) -> &mut [T] {
-        self.matrix.column_mut(idx)
-    }
-
    // Row access methods

    /// Returns an immutable view of the row for the given integer key.
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,8 +1,5 @@
 #![doc = include_str!("../README.md")]

-/// Documentation for the [`crate::dataframe`] module.
-pub mod dataframe;
-
 /// Documentation for the [`crate::matrix`] module.
 pub mod matrix;
Author	SHA1	Message	Date
Palash Tyagi	7f45b32806	Update CI workflows to include 'test' and 'develop' branches for pull requests	2025-08-24 21:40:28 +01:00
Palash Tyagi	0346c59d9a	Implement CI checks and remove deprecated PR checks script	2025-08-07 22:28:08 +01:00