mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-11-19 17:26:09 +00:00
Compare commits
2 Commits
dataframe
...
7f45b32806
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7f45b32806 | ||
|
|
0346c59d9a |
34
.github/.archive/pr-checks.yml
vendored
34
.github/.archive/pr-checks.yml
vendored
@@ -1,34 +0,0 @@
|
||||
# name: pr-checks
|
||||
|
||||
# on:
|
||||
# pull_request:
|
||||
# branches: [pr_checks_disabled_for_now]
|
||||
# types:
|
||||
# - opened
|
||||
# # - synchronize
|
||||
# - reopened
|
||||
# - edited
|
||||
# - ready_for_review
|
||||
|
||||
# concurrency:
|
||||
# group: pr-checks-${{ github.event.number }}
|
||||
|
||||
# permissions:
|
||||
# contents: read
|
||||
# pull-requests: read
|
||||
# checks: write
|
||||
|
||||
# jobs:
|
||||
# pr-checks:
|
||||
# name: pr-checks
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
|
||||
# - name: Run PR checks
|
||||
# shell: bash
|
||||
# env:
|
||||
# GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# PR_NUMBER: ${{ github.event.number }}
|
||||
# run: |
|
||||
# python .github/scripts/pr_checks.py $PR_NUMBER
|
||||
64
.github/scripts/ci_checks.py
vendored
Normal file
64
.github/scripts/ci_checks.py
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, Optional
|
||||
import tomllib
|
||||
import packaging.version
|
||||
import requests
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
|
||||
|
||||
GITHUB_REQUEST_CONFIG = {
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
"Authorization": f"token {ACCESS_TOKEN}",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
REPO_OWNER_USERNAME: str = "Magnus167"
|
||||
REPO_NAME: str = "rustframe"
|
||||
REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
|
||||
|
||||
CARGO_TOML_PATH: str = "Cargo.toml"
|
||||
|
||||
|
||||
def load_cargo_toml() -> Dict[str, Any]:
|
||||
if not os.path.exists(CARGO_TOML_PATH):
|
||||
raise FileNotFoundError(f"{CARGO_TOML_PATH} does not exist.")
|
||||
|
||||
with open(CARGO_TOML_PATH, "rb") as file:
|
||||
return tomllib.load(file)
|
||||
|
||||
def get_latest_crates_io_version() -> str:
|
||||
url = "https://crates.io/api/v1/crates/rustframe"
|
||||
try:
|
||||
response = requests.get(url, headers=GITHUB_REQUEST_CONFIG)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["crate"]["max_version"]
|
||||
except requests.RequestException as e:
|
||||
raise RuntimeError(f"Failed to fetch latest version from crates.io: {e}")
|
||||
|
||||
|
||||
def get_current_version() -> str:
|
||||
cargo_toml = load_cargo_toml()
|
||||
version = cargo_toml.get("package", {}).get("version", None)
|
||||
if not version:
|
||||
raise ValueError("Version not found in Cargo.toml")
|
||||
return version
|
||||
|
||||
|
||||
def check_version() -> None:
|
||||
latest_version = get_latest_crates_io_version()
|
||||
latest_version_tuple = packaging.version.parse(latest_version)
|
||||
current_version = get_current_version()
|
||||
current_version_tuple = packaging.version.parse(current_version)
|
||||
|
||||
# if the current version is >= latest, exit 1
|
||||
if latest_version_tuple >= current_version_tuple:
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Current version: {current_version_tuple}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_version()
|
||||
236
.github/scripts/pr_checks.py
vendored
236
.github/scripts/pr_checks.py
vendored
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
import warnings
|
||||
import urllib.parse
|
||||
|
||||
from time import sleep
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
|
||||
|
||||
REQUEST_CONFIG = {
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
"Authorization": f"token {ACCESS_TOKEN}",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
REPO_OWNER_USERNAME: str = "Magnus167"
|
||||
REPO_NAME: str = "rustframe"
|
||||
REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
|
||||
|
||||
|
||||
def perform_api_call(
|
||||
target_url: str,
|
||||
call_headers: Optional[dict] = REQUEST_CONFIG,
|
||||
query_parameters: Dict[str, Any] = {},
|
||||
http_method: str = "GET",
|
||||
maximum_attempts: int = 5,
|
||||
) -> Any:
|
||||
assert http_method in ["GET", "DELETE", "POST", "PATCH", "PUT"]
|
||||
|
||||
attempt_count = 0
|
||||
while attempt_count < maximum_attempts:
|
||||
try:
|
||||
if query_parameters:
|
||||
encoded_parameters = urllib.parse.urlencode(query_parameters)
|
||||
target_url = f"{target_url}?{encoded_parameters}"
|
||||
|
||||
http_request_object = urllib.request.Request(target_url, method=http_method)
|
||||
|
||||
if call_headers:
|
||||
for key, value in call_headers.items():
|
||||
http_request_object.add_header(key, value)
|
||||
|
||||
with urllib.request.urlopen(http_request_object) as server_response:
|
||||
if server_response.status == 404:
|
||||
raise Exception(f"404: {target_url} not found.")
|
||||
|
||||
return json.loads(server_response.read().decode())
|
||||
|
||||
except urllib.error.HTTPError as error_details:
|
||||
unrecoverable_codes = [403, 404, 422]
|
||||
if error_details.code in unrecoverable_codes:
|
||||
raise Exception(f"Request failed: {error_details}")
|
||||
|
||||
print(f"Request failed: {error_details}")
|
||||
attempt_count += 1
|
||||
sleep(1)
|
||||
|
||||
except Exception as error_details:
|
||||
print(f"Request failed: {error_details}")
|
||||
attempt_count += 1
|
||||
sleep(1)
|
||||
|
||||
raise Exception("Request failed")
|
||||
|
||||
|
||||
valid_title_prefixes: List[str] = [
|
||||
"Feature:",
|
||||
"Bugfix:",
|
||||
"Documentation:",
|
||||
"CI/CD:",
|
||||
"Misc:",
|
||||
"Suggestion:",
|
||||
]
|
||||
|
||||
|
||||
def validate_title_format(
|
||||
item_title: str,
|
||||
) -> bool:
|
||||
estr = "Skipping PR title validation"
|
||||
for _ in range(5):
|
||||
warnings.warn(estr)
|
||||
print(estr)
|
||||
return True
|
||||
|
||||
is_format_correct: bool = False
|
||||
for prefix_pattern in valid_title_prefixes:
|
||||
cleaned_input: str = item_title.strip()
|
||||
if cleaned_input.startswith(prefix_pattern):
|
||||
is_format_correct = True
|
||||
break
|
||||
|
||||
if not is_format_correct:
|
||||
issue_message: str = (
|
||||
f"PR title '{item_title}' does not match any "
|
||||
f"of the accepted patterns: {valid_title_prefixes}"
|
||||
)
|
||||
raise ValueError(issue_message)
|
||||
|
||||
return is_format_correct
|
||||
|
||||
|
||||
def _locate_segment_indices(
|
||||
content_string: str,
|
||||
search_pattern: str,
|
||||
expect_numeric_segment: bool = False,
|
||||
) -> Tuple[int, int]:
|
||||
numeric_characters: List[str] = list(map(str, range(10))) + ["."]
|
||||
assert bool(content_string)
|
||||
assert bool(search_pattern)
|
||||
assert search_pattern in content_string
|
||||
start_index: int = content_string.find(search_pattern)
|
||||
end_index: int = content_string.find("-", start_index)
|
||||
if end_index == -1 and not expect_numeric_segment:
|
||||
return (start_index, len(content_string))
|
||||
|
||||
if expect_numeric_segment:
|
||||
start_index = start_index + len(search_pattern)
|
||||
for char_index, current_character in enumerate(content_string[start_index:]):
|
||||
if current_character not in numeric_characters:
|
||||
break
|
||||
end_index = start_index + char_index
|
||||
|
||||
return (start_index, end_index)
|
||||
|
||||
|
||||
def _verify_no_merge_flag(
|
||||
content_string: str,
|
||||
) -> bool:
|
||||
assert bool(content_string)
|
||||
return "DO-NOT-MERGE" not in content_string
|
||||
|
||||
|
||||
def _verify_merge_dependency(
|
||||
content_string: str,
|
||||
) -> bool:
|
||||
assert bool(content_string)
|
||||
dependency_marker: str = "MERGE-AFTER-#"
|
||||
|
||||
if dependency_marker not in content_string:
|
||||
return True
|
||||
|
||||
start_index, end_index = _locate_segment_indices(
|
||||
content_string=content_string, pattern=dependency_marker, numeric=True
|
||||
)
|
||||
dependent_item_id: str = content_string[start_index:end_index].strip()
|
||||
try:
|
||||
dependent_item_id = int(dependent_item_id)
|
||||
except ValueError:
|
||||
issue_message: str = f"PR number '{dependent_item_id}' is not an integer."
|
||||
raise ValueError(issue_message)
|
||||
|
||||
dependent_item_data: Dict[str, Any] = fetch_item_details(
|
||||
item_identifier=dependent_item_id
|
||||
)
|
||||
is_dependent_item_closed: bool = dependent_item_data["state"] == "closed"
|
||||
return is_dependent_item_closed
|
||||
|
||||
|
||||
def evaluate_merge_conditions(
|
||||
item_details: Dict[str, Any],
|
||||
) -> bool:
|
||||
item_body_content: str = item_details["body"]
|
||||
|
||||
if item_body_content is None:
|
||||
return True
|
||||
|
||||
item_body_content = item_body_content.strip().replace(" ", "-").upper()
|
||||
item_body_content = f" {item_body_content} "
|
||||
|
||||
condition_outcomes: List[bool] = [
|
||||
_verify_no_merge_flag(content_string=item_body_content),
|
||||
_verify_merge_dependency(content_string=item_body_content),
|
||||
]
|
||||
|
||||
return all(condition_outcomes)
|
||||
|
||||
|
||||
def validate_item_for_merge(
|
||||
item_data: Dict[str, Any],
|
||||
) -> bool:
|
||||
assert set(["number", "title", "state", "body"]).issubset(item_data.keys())
|
||||
accumulated_issues: str = ""
|
||||
if not validate_title_format(item_title=item_data["title"]):
|
||||
accumulated_issues += (
|
||||
f"PR #{item_data['number']} is not mergable due to invalid title.\n"
|
||||
)
|
||||
|
||||
if not evaluate_merge_conditions(item_details=item_data):
|
||||
accumulated_issues += (
|
||||
f"PR #{item_data['number']} is not mergable due to merge restrictions"
|
||||
" specified in the PR body."
|
||||
)
|
||||
|
||||
if accumulated_issues:
|
||||
raise ValueError(accumulated_issues.strip())
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def fetch_item_details(
|
||||
item_identifier: int,
|
||||
):
|
||||
api_request_url: str = f"https://api.github.com/repos/{REPO_OWNER_USERNAME}/{REPO_NAME}/pulls/{item_identifier}"
|
||||
|
||||
raw_api_response_data: Dict[str, Any] = perform_api_call(target_url=api_request_url)
|
||||
|
||||
extracted_item_info: Dict[str, Any] = {
|
||||
"number": raw_api_response_data["number"],
|
||||
"title": raw_api_response_data["title"],
|
||||
"state": raw_api_response_data["state"],
|
||||
"body": raw_api_response_data["body"],
|
||||
}
|
||||
|
||||
return extracted_item_info
|
||||
|
||||
|
||||
def process_item_request(requested_item_id: int):
|
||||
extracted_item_info: Dict[str, Any] = fetch_item_details(
|
||||
item_identifier=requested_item_id
|
||||
)
|
||||
if not validate_item_for_merge(item_data=extracted_item_info):
|
||||
raise ValueError("PR is not mergable.")
|
||||
|
||||
print("PR is mergable.")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
requested_item_id: int = int(sys.argv[1])
|
||||
process_item_request(requested_item_id=requested_item_id)
|
||||
40
.github/workflows/ci-checks.yml
vendored
Normal file
40
.github/workflows/ci-checks.yml
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
name: docs-and-testcov
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
types: [review_requested, ready_for_review, synchronize, opened, reopened]
|
||||
branches:
|
||||
- main
|
||||
- test
|
||||
- develop
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
pages: write
|
||||
|
||||
jobs:
|
||||
docs-and-testcov:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v4
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv pip install requests
|
||||
- name: Run CI checks
|
||||
run: |
|
||||
python .github/scripts/ci_checks.py
|
||||
5
.github/workflows/run-benchmarks.yml
vendored
5
.github/workflows/run-benchmarks.yml
vendored
@@ -2,9 +2,12 @@ name: run-benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- test
|
||||
|
||||
jobs:
|
||||
pick-runner:
|
||||
|
||||
2
.github/workflows/run-unit-tests.yml
vendored
2
.github/workflows/run-unit-tests.yml
vendored
@@ -5,6 +5,8 @@ on:
|
||||
types: [review_requested, ready_for_review, synchronize, opened, reopened]
|
||||
branches:
|
||||
- main
|
||||
- test
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
127
README.md
127
README.md
@@ -153,133 +153,6 @@ let zipped_matrix = a.zip(&b, |x, y| x + y);
|
||||
assert_eq!(zipped_matrix.data(), &[6.0, 8.0, 10.0, 12.0]);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## DataFrame Usage Example
|
||||
|
||||
```rust
|
||||
use chrono::NaiveDate;
|
||||
use rustframe::dataframe::DataFrame;
|
||||
use rustframe::utils::{BDateFreq, BDatesList};
|
||||
use std::any::TypeId;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// Helper for NaiveDate
|
||||
fn d(y: i32, m: u32, d: u32) -> NaiveDate {
|
||||
NaiveDate::from_ymd_opt(y, m, d).unwrap()
|
||||
}
|
||||
|
||||
// Create a new DataFrame
|
||||
let mut df = DataFrame::new();
|
||||
|
||||
// Add columns of different types
|
||||
df.add_column("col_int1", vec![1, 2, 3, 4, 5]);
|
||||
df.add_column("col_float1", vec![1.1, 2.2, 3.3, 4.4, 5.5]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec![
|
||||
"apple".to_string(),
|
||||
"banana".to_string(),
|
||||
"cherry".to_string(),
|
||||
"date".to_string(),
|
||||
"elderberry".to_string(),
|
||||
],
|
||||
);
|
||||
df.add_column("col_bool", vec![true, false, true, false, true]);
|
||||
// df.add_column("col_date", vec![d(2023,1,1), d(2023,1,2), d(2023,1,3), d(2023,1,4), d(2023,1,5)]);
|
||||
df.add_column(
|
||||
"col_date",
|
||||
BDatesList::from_n_periods("2023-01-01".to_string(), BDateFreq::Daily, 5)
|
||||
.unwrap()
|
||||
.list()
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
println!("DataFrame after initial column additions:\n{}", df);
|
||||
|
||||
// Demonstrate frame re-use when adding columns of existing types
|
||||
let initial_frames_count = df.num_internal_frames();
|
||||
println!(
|
||||
"\nInitial number of internal frames: {}",
|
||||
initial_frames_count
|
||||
);
|
||||
|
||||
df.add_column("col_int2", vec![6, 7, 8, 9, 10]);
|
||||
df.add_column("col_float2", vec![6.6, 7.7, 8.8, 9.9, 10.0]);
|
||||
|
||||
let frames_after_reuse = df.num_internal_frames();
|
||||
println!(
|
||||
"Number of internal frames after adding more columns of existing types: {}",
|
||||
frames_after_reuse
|
||||
);
|
||||
assert_eq!(initial_frames_count, frames_after_reuse); // Should be equal, demonstrating re-use
|
||||
|
||||
println!(
|
||||
"\nDataFrame after adding more columns of existing types:\n{}",
|
||||
df
|
||||
);
|
||||
|
||||
// Get number of rows and columns
|
||||
println!("Rows: {}", df.rows()); // Output: Rows: 5
|
||||
println!("Columns: {}", df.cols()); // Output: Columns: 5
|
||||
|
||||
// Get column names
|
||||
println!("Column names: {:?}", df.get_column_names());
|
||||
// Output: Column names: ["col_int", "col_float", "col_string", "col_bool", "col_date"]
|
||||
|
||||
// Get a specific column by name and type
|
||||
let int_col = df.get_column::<i32>("col_int1").unwrap();
|
||||
// Output: Integer column: [1, 2, 3, 4, 5]
|
||||
println!("Integer column (col_int1): {:?}", int_col);
|
||||
|
||||
let int_col2 = df.get_column::<i32>("col_int2").unwrap();
|
||||
// Output: Integer column: [6, 7, 8, 9, 10]
|
||||
println!("Integer column (col_int2): {:?}", int_col2);
|
||||
|
||||
let float_col = df.get_column::<f64>("col_float1").unwrap();
|
||||
// Output: Float column: [1.1, 2.2, 3.3, 4.4, 5.5]
|
||||
println!("Float column (col_float1): {:?}", float_col);
|
||||
|
||||
// Attempt to get a column with incorrect type (returns None)
|
||||
let wrong_type_col = df.get_column::<bool>("col_int1");
|
||||
// Output: Wrong type column: None
|
||||
println!("Wrong type column: {:?}", wrong_type_col);
|
||||
|
||||
// Get a row by index
|
||||
let row_0 = df.get_row(0).unwrap();
|
||||
println!("Row 0: {:?}", row_0);
|
||||
// Output: Row 0: {"col_int1": "1", "col_float1": "1.1", "col_string": "apple", "col_bool": "true", "col_date": "2023-01-01", "col_int2": "6", "col_float2": "6.6"}
|
||||
|
||||
let row_2 = df.get_row(2).unwrap();
|
||||
println!("Row 2: {:?}", row_2);
|
||||
// Output: Row 2: {"col_int1": "3", "col_float1": "3.3", "col_string": "cherry", "col_bool": "true", "col_date": "2023-01-03", "col_int2": "8", "col_float2": "8.8"}
|
||||
|
||||
// Attempt to get an out-of-bounds row (returns None)
|
||||
let row_out_of_bounds = df.get_row(10);
|
||||
// Output: Row out of bounds: None
|
||||
println!("Row out of bounds: {:?}", row_out_of_bounds);
|
||||
|
||||
// Drop a column
|
||||
df.drop_column("col_bool");
|
||||
println!("\nDataFrame after dropping 'col_bool':\n{}", df);
|
||||
|
||||
println!("Columns after drop: {}", df.cols());
|
||||
println!("Column names after drop: {:?}", df.get_column_names());
|
||||
|
||||
// Drop another column, ensuring the underlying Frame is removed if empty
|
||||
df.drop_column("col_float1");
|
||||
println!("\nDataFrame after dropping 'col_float1':\n{}", df);
|
||||
|
||||
println!("Columns after second drop: {}", df.cols());
|
||||
println!(
|
||||
"Column names after second drop: {:?}",
|
||||
df.get_column_names()
|
||||
);
|
||||
|
||||
// Attempt to drop a non-existent column (will panic)
|
||||
// df.drop_column("non_existent_col"); // Uncomment to see panic
|
||||
```
|
||||
|
||||
## More examples
|
||||
|
||||
See the [examples](./examples/) directory for some demonstrations of Rustframe's syntax and functionality.
|
||||
|
||||
@@ -1,659 +0,0 @@
|
||||
use crate::frame::{Frame, RowIndex};
|
||||
use std::any::{Any, TypeId};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt; // Import TypeId
|
||||
|
||||
const DEFAULT_DISPLAY_ROWS: usize = 5;
|
||||
const DEFAULT_DISPLAY_COLS: usize = 10;
|
||||
|
||||
// Trait to enable type-agnostic operations on Frame objects within DataFrame
|
||||
pub trait SubFrame: Send + Sync + fmt::Debug + Any {
|
||||
fn rows(&self) -> usize;
|
||||
fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String;
|
||||
fn clone_box(&self) -> Box<dyn SubFrame>;
|
||||
fn delete_column_from_frame(&mut self, col_name: &str);
|
||||
fn get_frame_cols(&self) -> usize; // Add a method to get the number of columns in the underlying frame
|
||||
|
||||
// Methods for downcasting to concrete types
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
fn as_any_mut(&mut self) -> &mut dyn Any;
|
||||
}
|
||||
|
||||
// Implement SubFrame for any Frame<T> that meets the requirements
|
||||
impl<T> SubFrame for Frame<T>
|
||||
where
|
||||
T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
|
||||
{
|
||||
fn rows(&self) -> usize {
|
||||
self.rows()
|
||||
}
|
||||
|
||||
fn get_value_as_string(&self, physical_row_idx: usize, col_name: &str) -> String {
|
||||
self.get_row(physical_row_idx).get(col_name).to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn SubFrame> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn delete_column_from_frame(&mut self, col_name: &str) {
|
||||
self.delete_column(col_name);
|
||||
}
|
||||
|
||||
fn get_frame_cols(&self) -> usize {
|
||||
self.cols()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_any_mut(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DataFrame {
|
||||
frames_by_type: HashMap<TypeId, Box<dyn SubFrame>>, // Maps TypeId to the Frame holding columns of that type
|
||||
column_to_type: HashMap<String, TypeId>, // Maps column name to its TypeId
|
||||
column_names: Vec<String>,
|
||||
index: RowIndex,
|
||||
}
|
||||
|
||||
impl DataFrame {
|
||||
pub fn new() -> Self {
|
||||
DataFrame {
|
||||
frames_by_type: HashMap::new(),
|
||||
column_to_type: HashMap::new(),
|
||||
column_names: Vec::new(),
|
||||
index: RowIndex::Range(0..0), // Initialize with an empty range index
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of rows in the DataFrame.
|
||||
pub fn rows(&self) -> usize {
|
||||
self.index.len()
|
||||
}
|
||||
|
||||
/// Returns the number of columns in the DataFrame.
|
||||
pub fn cols(&self) -> usize {
|
||||
self.column_names.len()
|
||||
}
|
||||
|
||||
/// Returns a reference to the vector of column names.
|
||||
pub fn get_column_names(&self) -> &Vec<String> {
|
||||
&self.column_names
|
||||
}
|
||||
|
||||
/// Returns the number of internal Frame objects (one per unique data type).
|
||||
pub fn num_internal_frames(&self) -> usize {
|
||||
self.frames_by_type.len()
|
||||
}
|
||||
|
||||
/// Returns a reference to a column of a specific type, if it exists.
|
||||
pub fn get_column<T>(&self, col_name: &str) -> Option<&[T]>
|
||||
where
|
||||
T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
|
||||
{
|
||||
let expected_type_id = TypeId::of::<T>();
|
||||
if let Some(actual_type_id) = self.column_to_type.get(col_name) {
|
||||
if *actual_type_id == expected_type_id {
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get(actual_type_id) {
|
||||
if let Some(frame) = sub_frame_box.as_any().downcast_ref::<Frame<T>>() {
|
||||
return Some(frame.column(col_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns a HashMap representing a row, mapping column names to their string values.
|
||||
pub fn get_row(&self, row_idx: usize) -> Option<HashMap<String, String>> {
|
||||
if row_idx >= self.rows() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut row_data = HashMap::new();
|
||||
for col_name in &self.column_names {
|
||||
if let Some(type_id) = self.column_to_type.get(col_name) {
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
|
||||
let value = sub_frame_box.get_value_as_string(row_idx, col_name);
|
||||
row_data.insert(col_name.clone(), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(row_data)
|
||||
}
|
||||
|
||||
pub fn add_column<T>(&mut self, col_name: &str, data: Vec<T>)
|
||||
where
|
||||
T: Clone + PartialEq + fmt::Display + fmt::Debug + 'static + Send + Sync + Any,
|
||||
{
|
||||
let type_id = TypeId::of::<T>();
|
||||
let col_name_string = col_name.to_string();
|
||||
|
||||
// Check for duplicate column name across the entire DataFrame
|
||||
if self.column_to_type.contains_key(&col_name_string) {
|
||||
panic!(
|
||||
"DataFrame::add_column: duplicate column name: '{}'",
|
||||
col_name_string
|
||||
);
|
||||
}
|
||||
|
||||
// If this is the first column being added, set the DataFrame's index
|
||||
if self.column_names.is_empty() {
|
||||
self.index = RowIndex::Range(0..data.len());
|
||||
} else {
|
||||
// Ensure new column has the same number of rows as existing columns
|
||||
if data.len() != self.index.len() {
|
||||
panic!(
|
||||
"DataFrame::add_column: new column '{}' has {} rows, but existing columns have {} rows",
|
||||
col_name_string,
|
||||
data.len(),
|
||||
self.index.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if a Frame of this type already exists
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
|
||||
// Downcast to the concrete Frame<T> and add the column
|
||||
if let Some(frame) = sub_frame_box.as_any_mut().downcast_mut::<Frame<T>>() {
|
||||
frame.add_column(col_name_string.clone(), data);
|
||||
} else {
|
||||
// This should ideally not happen if TypeId matches, but good for safety
|
||||
panic!(
|
||||
"Type mismatch when downcasting existing SubFrame for TypeId {:?}",
|
||||
type_id
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// No Frame of this type exists, create a new one
|
||||
// The Frame::new constructor expects a Matrix and column names.
|
||||
// We create a Matrix from a single column vector.
|
||||
let new_frame = Frame::new(
|
||||
crate::matrix::Matrix::from_cols(vec![data]),
|
||||
vec![col_name_string.clone()],
|
||||
Some(self.index.clone()), // Pass the DataFrame's index to the new Frame
|
||||
);
|
||||
self.frames_by_type.insert(type_id, Box::new(new_frame));
|
||||
}
|
||||
|
||||
// Update column mappings and names
|
||||
self.column_to_type.insert(col_name_string.clone(), type_id);
|
||||
self.column_names.push(col_name_string);
|
||||
}
|
||||
|
||||
/// Drops a column from the DataFrame.
|
||||
/// Panics if the column does not exist.
|
||||
pub fn drop_column(&mut self, col_name: &str) {
|
||||
let col_name_string = col_name.to_string();
|
||||
|
||||
// 1. Get the TypeId associated with the column
|
||||
let type_id = self
|
||||
.column_to_type
|
||||
.remove(&col_name_string)
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"DataFrame::drop_column: column '{}' not found",
|
||||
col_name_string
|
||||
);
|
||||
});
|
||||
|
||||
// 2. Remove the column name from the ordered list
|
||||
self.column_names.retain(|name| name != &col_name_string);
|
||||
|
||||
// 3. Find the Frame object and delete the column from it
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get_mut(&type_id) {
|
||||
sub_frame_box.delete_column_from_frame(&col_name_string);
|
||||
|
||||
// 4. If the Frame object for this type becomes empty, remove it from frames_by_type
|
||||
if sub_frame_box.get_frame_cols() == 0 {
|
||||
self.frames_by_type.remove(&type_id);
|
||||
}
|
||||
} else {
|
||||
// This should not happen if column_to_type was consistent
|
||||
panic!(
|
||||
"DataFrame::drop_column: internal error, no frame found for type_id {:?}",
|
||||
type_id
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DataFrame {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
// Display column headers
|
||||
for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
|
||||
write!(f, "{:<15}", col_name)?;
|
||||
}
|
||||
if self.column_names.len() > DEFAULT_DISPLAY_COLS {
|
||||
write!(f, "...")?;
|
||||
}
|
||||
writeln!(f)?;
|
||||
|
||||
// Display data rows
|
||||
let mut displayed_rows = 0;
|
||||
for i in 0..self.index.len() {
|
||||
if displayed_rows >= DEFAULT_DISPLAY_ROWS {
|
||||
writeln!(f, "...")?;
|
||||
break;
|
||||
}
|
||||
for col_name in self.column_names.iter().take(DEFAULT_DISPLAY_COLS) {
|
||||
if let Some(type_id) = self.column_to_type.get(col_name) {
|
||||
if let Some(sub_frame_box) = self.frames_by_type.get(type_id) {
|
||||
write!(f, "{:<15}", sub_frame_box.get_value_as_string(i, col_name))?;
|
||||
} else {
|
||||
// This case indicates an inconsistency: column_to_type has an entry,
|
||||
// but frames_by_type doesn't have the corresponding Frame.
|
||||
write!(f, "{:<15}", "[ERROR]")?;
|
||||
}
|
||||
} else {
|
||||
// This case indicates an inconsistency: column_names has an entry,
|
||||
// but column_to_type doesn't have the corresponding column.
|
||||
write!(f, "{:<15}", "[ERROR]")?;
|
||||
}
|
||||
}
|
||||
if self.column_names.len() > DEFAULT_DISPLAY_COLS {
|
||||
write!(f, "...")?;
|
||||
}
|
||||
writeln!(f)?;
|
||||
displayed_rows += 1;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for DataFrame {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("DataFrame")
|
||||
.field("column_names", &self.column_names)
|
||||
.field("index", &self.index)
|
||||
.field("column_to_type", &self.column_to_type)
|
||||
.field("frames_by_type", &self.frames_by_type)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::frame::Frame;
|
||||
use crate::matrix::Matrix;
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_new() {
|
||||
let df = DataFrame::new();
|
||||
assert_eq!(df.rows(), 0);
|
||||
assert_eq!(df.cols(), 0);
|
||||
assert!(df.get_column_names().is_empty());
|
||||
assert!(df.frames_by_type.is_empty());
|
||||
assert!(df.column_to_type.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_initial() {
|
||||
let mut df = DataFrame::new();
|
||||
let data = vec![1, 2, 3];
|
||||
df.add_column("col_int", data.clone());
|
||||
|
||||
assert_eq!(df.rows(), 3);
|
||||
assert_eq!(df.cols(), 1);
|
||||
assert_eq!(df.get_column_names(), &vec!["col_int".to_string()]);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
|
||||
|
||||
// Verify the underlying frame
|
||||
let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
|
||||
let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
|
||||
assert_eq!(frame.rows(), 3);
|
||||
assert_eq!(frame.cols(), 1);
|
||||
assert_eq!(frame.columns(), &vec!["col_int".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_same_type() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_int2", vec![4, 5, 6]);
|
||||
|
||||
assert_eq!(df.rows(), 3);
|
||||
assert_eq!(df.cols(), 2);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec!["col_int1".to_string(), "col_int2".to_string()]
|
||||
);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_int1"),
|
||||
Some(&TypeId::of::<i32>())
|
||||
);
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_int2"),
|
||||
Some(&TypeId::of::<i32>())
|
||||
);
|
||||
|
||||
// Verify the underlying frame
|
||||
let sub_frame_box = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap();
|
||||
let frame = sub_frame_box.as_any().downcast_ref::<Frame<i32>>().unwrap();
|
||||
assert_eq!(frame.rows(), 3);
|
||||
assert_eq!(frame.cols(), 2);
|
||||
assert_eq!(
|
||||
frame.columns(),
|
||||
&vec!["col_int1".to_string(), "col_int2".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_different_type() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
assert_eq!(df.rows(), 3);
|
||||
assert_eq!(df.cols(), 3);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec![
|
||||
"col_int".to_string(),
|
||||
"col_float".to_string(),
|
||||
"col_string".to_string()
|
||||
]
|
||||
);
|
||||
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
|
||||
|
||||
assert_eq!(df.column_to_type.get("col_int"), Some(&TypeId::of::<i32>()));
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_float"),
|
||||
Some(&TypeId::of::<f64>())
|
||||
);
|
||||
assert_eq!(
|
||||
df.column_to_type.get("col_string"),
|
||||
Some(&TypeId::of::<String>())
|
||||
);
|
||||
|
||||
// Verify underlying frames
|
||||
let int_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<i32>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<i32>>()
|
||||
.unwrap();
|
||||
assert_eq!(int_frame.columns(), &vec!["col_int".to_string()]);
|
||||
|
||||
let float_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<f64>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<f64>>()
|
||||
.unwrap();
|
||||
assert_eq!(float_frame.columns(), &vec!["col_float".to_string()]);
|
||||
|
||||
let string_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<String>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<String>>()
|
||||
.unwrap();
|
||||
assert_eq!(string_frame.columns(), &vec!["col_string".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_get_column() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
// Test getting existing columns with correct type
|
||||
assert_eq!(
|
||||
df.get_column::<i32>("col_int").unwrap(),
|
||||
vec![1, 2, 3].as_slice()
|
||||
);
|
||||
assert_eq!(
|
||||
df.get_column::<f64>("col_float").unwrap(),
|
||||
vec![1.1, 2.2, 3.3].as_slice()
|
||||
);
|
||||
assert_eq!(
|
||||
df.get_column::<String>("col_string").unwrap(),
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()].as_slice()
|
||||
);
|
||||
|
||||
// Test getting non-existent column
|
||||
assert_eq!(df.get_column::<i32>("non_existent"), None);
|
||||
|
||||
// Test getting existing column with incorrect type
|
||||
assert_eq!(df.get_column::<f64>("col_int"), None);
|
||||
assert_eq!(df.get_column::<i32>("col_float"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_get_row() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
// Test getting an existing row
|
||||
let row0 = df.get_row(0).unwrap();
|
||||
assert_eq!(row0.get("col_int"), Some(&"1".to_string()));
|
||||
assert_eq!(row0.get("col_float"), Some(&"1.1".to_string()));
|
||||
assert_eq!(row0.get("col_string"), Some(&"a".to_string()));
|
||||
|
||||
let row1 = df.get_row(1).unwrap();
|
||||
assert_eq!(row1.get("col_int"), Some(&"2".to_string()));
|
||||
assert_eq!(row1.get("col_float"), Some(&"2.2".to_string()));
|
||||
assert_eq!(row1.get("col_string"), Some(&"b".to_string()));
|
||||
|
||||
// Test getting an out-of-bounds row
|
||||
assert_eq!(df.get_row(3), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "DataFrame::add_column: duplicate column name: 'col_int'")]
|
||||
fn test_dataframe_add_column_duplicate_name() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_int", vec![4, 5, 6]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "DataFrame::add_column: new column 'col_int2' has 2 rows, but existing columns have 3 rows"
|
||||
)]
|
||||
fn test_dataframe_add_column_mismatched_rows() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_int2", vec![4, 5]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_display() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3, 4, 5, 6]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec![
|
||||
"a".to_string(),
|
||||
"b".to_string(),
|
||||
"c".to_string(),
|
||||
"d".to_string(),
|
||||
"e".to_string(),
|
||||
"f".to_string(),
|
||||
],
|
||||
);
|
||||
|
||||
let expected_output = "\
|
||||
col_int col_float col_string
|
||||
1 1.1 a
|
||||
2 2.2 b
|
||||
3 3.3 c
|
||||
4 4.4 d
|
||||
5 5.5 e
|
||||
...
|
||||
";
|
||||
assert_eq!(format!("{}", df), expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_debug() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
|
||||
let debug_output = format!("{:?}", df);
|
||||
assert!(debug_output.contains("DataFrame {"));
|
||||
assert!(debug_output.contains("column_names: [\"col_int\", \"col_float\"]"));
|
||||
assert!(debug_output.contains("index: Range(0..3)"));
|
||||
assert!(debug_output.contains("column_to_type: {"));
|
||||
assert!(debug_output.contains("frames_by_type: {"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_drop_column_single_type() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_int2", vec![4, 5, 6]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
|
||||
assert_eq!(df.cols(), 3);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec![
|
||||
"col_int1".to_string(),
|
||||
"col_int2".to_string(),
|
||||
"col_float".to_string()
|
||||
]
|
||||
);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
df.drop_column("col_int1");
|
||||
|
||||
assert_eq!(df.cols(), 2);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec!["col_int2".to_string(), "col_float".to_string()]
|
||||
);
|
||||
assert!(df.column_to_type.get("col_int1").is_none());
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should still exist
|
||||
let int_frame = df
|
||||
.frames_by_type
|
||||
.get(&TypeId::of::<i32>())
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<Frame<i32>>()
|
||||
.unwrap();
|
||||
assert_eq!(int_frame.columns(), &vec!["col_int2".to_string()]);
|
||||
|
||||
df.drop_column("col_int2");
|
||||
|
||||
assert_eq!(df.cols(), 1);
|
||||
assert_eq!(df.get_column_names(), &vec!["col_float".to_string()]);
|
||||
assert!(df.column_to_type.get("col_int2").is_none());
|
||||
assert!(!df.frames_by_type.contains_key(&TypeId::of::<i32>())); // Frame<i32> should be removed
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_drop_column_mixed_types() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.add_column("col_float", vec![1.1, 2.2, 3.3]);
|
||||
df.add_column(
|
||||
"col_string",
|
||||
vec!["a".to_string(), "b".to_string(), "c".to_string()],
|
||||
);
|
||||
|
||||
assert_eq!(df.cols(), 3);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
|
||||
|
||||
df.drop_column("col_float");
|
||||
|
||||
assert_eq!(df.cols(), 2);
|
||||
assert_eq!(
|
||||
df.get_column_names(),
|
||||
&vec!["col_int".to_string(), "col_string".to_string()]
|
||||
);
|
||||
assert!(df.column_to_type.get("col_float").is_none());
|
||||
assert!(!df.frames_by_type.contains_key(&TypeId::of::<f64>())); // Frame<f64> should be removed
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<String>()));
|
||||
|
||||
df.drop_column("col_int");
|
||||
df.drop_column("col_string");
|
||||
|
||||
assert_eq!(df.cols(), 0);
|
||||
assert!(df.get_column_names().is_empty());
|
||||
assert!(df.frames_by_type.is_empty());
|
||||
assert!(df.column_to_type.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "DataFrame::drop_column: column 'non_existent' not found")]
|
||||
fn test_dataframe_drop_column_non_existent() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int", vec![1, 2, 3]);
|
||||
df.drop_column("non_existent");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dataframe_add_column_reuses_existing_frame() {
|
||||
let mut df = DataFrame::new();
|
||||
df.add_column("col_int1", vec![1, 2, 3]);
|
||||
df.add_column("col_float1", vec![1.1, 2.2, 3.3]);
|
||||
|
||||
// Initially, there should be two frames (one for i32, one for f64)
|
||||
assert_eq!(df.frames_by_type.len(), 2);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
// Add another integer column
|
||||
df.add_column("col_int2", vec![4, 5, 6]);
|
||||
|
||||
// The number of frames should still be 2, as the existing i32 frame should be reused
|
||||
assert_eq!(df.frames_by_type.len(), 2);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
// Verify the i32 frame now contains both integer columns
|
||||
let int_frame = df.frames_by_type.get(&TypeId::of::<i32>()).unwrap().as_any().downcast_ref::<Frame<i32>>().unwrap();
|
||||
assert_eq!(int_frame.columns(), &vec!["col_int1".to_string(), "col_int2".to_string()]);
|
||||
assert_eq!(int_frame.cols(), 2);
|
||||
|
||||
// Add another float column
|
||||
df.add_column("col_float2", vec![4.4, 5.5, 6.6]);
|
||||
|
||||
// The number of frames should still be 2, as the existing f64 frame should be reused
|
||||
assert_eq!(df.frames_by_type.len(), 2);
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<i32>()));
|
||||
assert!(df.frames_by_type.contains_key(&TypeId::of::<f64>()));
|
||||
|
||||
// Verify the f64 frame now contains both float columns
|
||||
let float_frame = df.frames_by_type.get(&TypeId::of::<f64>()).unwrap().as_any().downcast_ref::<Frame<f64>>().unwrap();
|
||||
assert_eq!(float_frame.columns(), &vec!["col_float1".to_string(), "col_float2".to_string()]);
|
||||
assert_eq!(float_frame.cols(), 2);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
//! This module provides the DataFrame structure for handling tabular data with mixed types.
|
||||
pub mod df;
|
||||
|
||||
pub use df::{DataFrame, SubFrame};
|
||||
@@ -332,7 +332,7 @@ impl<T: Clone + PartialEq> Frame<T> {
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns an immutable slice of the specified column's data by name.
|
||||
/// Returns an immutable slice of the specified column's data.
|
||||
/// Panics if the column name is not found.
|
||||
pub fn column(&self, name: &str) -> &[T] {
|
||||
let idx = self
|
||||
@@ -341,13 +341,7 @@ impl<T: Clone + PartialEq> Frame<T> {
|
||||
self.matrix.column(idx)
|
||||
}
|
||||
|
||||
/// Returns an immutable slice of the specified column's data by its physical index.
|
||||
/// Panics if the index is out of bounds.
|
||||
pub fn column_by_physical_idx(&self, idx: usize) -> &[T] {
|
||||
self.matrix.column(idx)
|
||||
}
|
||||
|
||||
/// Returns a mutable slice of the specified column's data by name.
|
||||
/// Returns a mutable slice of the specified column's data.
|
||||
/// Panics if the column name is not found.
|
||||
pub fn column_mut(&mut self, name: &str) -> &mut [T] {
|
||||
let idx = self
|
||||
@@ -356,12 +350,6 @@ impl<T: Clone + PartialEq> Frame<T> {
|
||||
self.matrix.column_mut(idx)
|
||||
}
|
||||
|
||||
/// Returns a mutable slice of the specified column's data by its physical index.
|
||||
/// Panics if the index is out of bounds.
|
||||
pub fn column_mut_by_physical_idx(&mut self, idx: usize) -> &mut [T] {
|
||||
self.matrix.column_mut(idx)
|
||||
}
|
||||
|
||||
// Row access methods
|
||||
|
||||
/// Returns an immutable view of the row for the given integer key.
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
#![doc = include_str!("../README.md")]
|
||||
|
||||
/// Documentation for the [`crate::dataframe`] module.
|
||||
pub mod dataframe;
|
||||
|
||||
/// Documentation for the [`crate::matrix`] module.
|
||||
pub mod matrix;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user