mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-11-19 23:16:11 +00:00
Compare commits
23 Commits
d023acbaf7
...
csv
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef25e77f04 | ||
|
|
4ba5cfea18 | ||
|
|
23367c7ca3 | ||
|
|
df8c1d2a12 | ||
|
|
1381c77eaf | ||
| c56574f0f3 | |||
| cd3aa84e60 | |||
| 27275e2479 | |||
| 9ef719316a | |||
| 960fd345c2 | |||
| 325e75419c | |||
| b1dc18d05b | |||
| 8cbb957764 | |||
| b937ed1cdf | |||
| 2e071a6974 | |||
| 689169bab2 | |||
| a45a5ecf4e | |||
| 84e1b423f4 | |||
| 197739bc2f | |||
| d2c2ebca0f | |||
| f5f3f2c100 | |||
| 9fcb1ea2cf | |||
|
|
623303cf72 |
34
.github/.archive/pr-checks.yml
vendored
Normal file
34
.github/.archive/pr-checks.yml
vendored
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# name: pr-checks
|
||||||
|
|
||||||
|
# on:
|
||||||
|
# pull_request:
|
||||||
|
# branches: [pr_checks_disabled_for_now]
|
||||||
|
# types:
|
||||||
|
# - opened
|
||||||
|
# # - synchronize
|
||||||
|
# - reopened
|
||||||
|
# - edited
|
||||||
|
# - ready_for_review
|
||||||
|
|
||||||
|
# concurrency:
|
||||||
|
# group: pr-checks-${{ github.event.number }}
|
||||||
|
|
||||||
|
# permissions:
|
||||||
|
# contents: read
|
||||||
|
# pull-requests: read
|
||||||
|
# checks: write
|
||||||
|
|
||||||
|
# jobs:
|
||||||
|
# pr-checks:
|
||||||
|
# name: pr-checks
|
||||||
|
# runs-on: ubuntu-latest
|
||||||
|
# steps:
|
||||||
|
# - uses: actions/checkout@v4
|
||||||
|
|
||||||
|
# - name: Run PR checks
|
||||||
|
# shell: bash
|
||||||
|
# env:
|
||||||
|
# GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
# PR_NUMBER: ${{ github.event.number }}
|
||||||
|
# run: |
|
||||||
|
# python .github/scripts/pr_checks.py $PR_NUMBER
|
||||||
64
.github/scripts/ci_checks.py
vendored
64
.github/scripts/ci_checks.py
vendored
@@ -1,64 +0,0 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
from typing import Any, Dict, Optional
|
|
||||||
import tomllib
|
|
||||||
import packaging.version
|
|
||||||
import requests
|
|
||||||
|
|
||||||
sys.path.append(os.getcwd())
|
|
||||||
|
|
||||||
ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
|
|
||||||
|
|
||||||
GITHUB_REQUEST_CONFIG = {
|
|
||||||
"Accept": "application/vnd.github.v3+json",
|
|
||||||
"Authorization": f"token {ACCESS_TOKEN}",
|
|
||||||
"X-GitHub-Api-Version": "2022-11-28",
|
|
||||||
}
|
|
||||||
REPO_OWNER_USERNAME: str = "Magnus167"
|
|
||||||
REPO_NAME: str = "rustframe"
|
|
||||||
REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
|
|
||||||
|
|
||||||
CARGO_TOML_PATH: str = "Cargo.toml"
|
|
||||||
|
|
||||||
|
|
||||||
def load_cargo_toml() -> Dict[str, Any]:
|
|
||||||
if not os.path.exists(CARGO_TOML_PATH):
|
|
||||||
raise FileNotFoundError(f"{CARGO_TOML_PATH} does not exist.")
|
|
||||||
|
|
||||||
with open(CARGO_TOML_PATH, "rb") as file:
|
|
||||||
return tomllib.load(file)
|
|
||||||
|
|
||||||
def get_latest_crates_io_version() -> str:
|
|
||||||
url = "https://crates.io/api/v1/crates/rustframe"
|
|
||||||
try:
|
|
||||||
response = requests.get(url, headers=GITHUB_REQUEST_CONFIG)
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
return data["crate"]["max_version"]
|
|
||||||
except requests.RequestException as e:
|
|
||||||
raise RuntimeError(f"Failed to fetch latest version from crates.io: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def get_current_version() -> str:
|
|
||||||
cargo_toml = load_cargo_toml()
|
|
||||||
version = cargo_toml.get("package", {}).get("version", None)
|
|
||||||
if not version:
|
|
||||||
raise ValueError("Version not found in Cargo.toml")
|
|
||||||
return version
|
|
||||||
|
|
||||||
|
|
||||||
def check_version() -> None:
|
|
||||||
latest_version = get_latest_crates_io_version()
|
|
||||||
latest_version_tuple = packaging.version.parse(latest_version)
|
|
||||||
current_version = get_current_version()
|
|
||||||
current_version_tuple = packaging.version.parse(current_version)
|
|
||||||
|
|
||||||
if latest_version_tuple >= current_version_tuple:
|
|
||||||
print(f"Current version {current_version_tuple} is less than or equal to latest version {latest_version_tuple} on crates.io.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print(f"Current version: {current_version_tuple}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
check_version()
|
|
||||||
236
.github/scripts/pr_checks.py
vendored
Normal file
236
.github/scripts/pr_checks.py
vendored
Normal file
@@ -0,0 +1,236 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
import warnings
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
sys.path.append(os.getcwd())
|
||||||
|
|
||||||
|
ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
|
||||||
|
|
||||||
|
REQUEST_CONFIG = {
|
||||||
|
"Accept": "application/vnd.github.v3+json",
|
||||||
|
"Authorization": f"token {ACCESS_TOKEN}",
|
||||||
|
"X-GitHub-Api-Version": "2022-11-28",
|
||||||
|
}
|
||||||
|
REPO_OWNER_USERNAME: str = "Magnus167"
|
||||||
|
REPO_NAME: str = "rustframe"
|
||||||
|
REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
|
||||||
|
|
||||||
|
|
||||||
|
def perform_api_call(
|
||||||
|
target_url: str,
|
||||||
|
call_headers: Optional[dict] = REQUEST_CONFIG,
|
||||||
|
query_parameters: Dict[str, Any] = {},
|
||||||
|
http_method: str = "GET",
|
||||||
|
maximum_attempts: int = 5,
|
||||||
|
) -> Any:
|
||||||
|
assert http_method in ["GET", "DELETE", "POST", "PATCH", "PUT"]
|
||||||
|
|
||||||
|
attempt_count = 0
|
||||||
|
while attempt_count < maximum_attempts:
|
||||||
|
try:
|
||||||
|
if query_parameters:
|
||||||
|
encoded_parameters = urllib.parse.urlencode(query_parameters)
|
||||||
|
target_url = f"{target_url}?{encoded_parameters}"
|
||||||
|
|
||||||
|
http_request_object = urllib.request.Request(target_url, method=http_method)
|
||||||
|
|
||||||
|
if call_headers:
|
||||||
|
for key, value in call_headers.items():
|
||||||
|
http_request_object.add_header(key, value)
|
||||||
|
|
||||||
|
with urllib.request.urlopen(http_request_object) as server_response:
|
||||||
|
if server_response.status == 404:
|
||||||
|
raise Exception(f"404: {target_url} not found.")
|
||||||
|
|
||||||
|
return json.loads(server_response.read().decode())
|
||||||
|
|
||||||
|
except urllib.error.HTTPError as error_details:
|
||||||
|
unrecoverable_codes = [403, 404, 422]
|
||||||
|
if error_details.code in unrecoverable_codes:
|
||||||
|
raise Exception(f"Request failed: {error_details}")
|
||||||
|
|
||||||
|
print(f"Request failed: {error_details}")
|
||||||
|
attempt_count += 1
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
|
except Exception as error_details:
|
||||||
|
print(f"Request failed: {error_details}")
|
||||||
|
attempt_count += 1
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
|
raise Exception("Request failed")
|
||||||
|
|
||||||
|
|
||||||
|
valid_title_prefixes: List[str] = [
|
||||||
|
"Feature:",
|
||||||
|
"Bugfix:",
|
||||||
|
"Documentation:",
|
||||||
|
"CI/CD:",
|
||||||
|
"Misc:",
|
||||||
|
"Suggestion:",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def validate_title_format(
|
||||||
|
item_title: str,
|
||||||
|
) -> bool:
|
||||||
|
estr = "Skipping PR title validation"
|
||||||
|
for _ in range(5):
|
||||||
|
warnings.warn(estr)
|
||||||
|
print(estr)
|
||||||
|
return True
|
||||||
|
|
||||||
|
is_format_correct: bool = False
|
||||||
|
for prefix_pattern in valid_title_prefixes:
|
||||||
|
cleaned_input: str = item_title.strip()
|
||||||
|
if cleaned_input.startswith(prefix_pattern):
|
||||||
|
is_format_correct = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not is_format_correct:
|
||||||
|
issue_message: str = (
|
||||||
|
f"PR title '{item_title}' does not match any "
|
||||||
|
f"of the accepted patterns: {valid_title_prefixes}"
|
||||||
|
)
|
||||||
|
raise ValueError(issue_message)
|
||||||
|
|
||||||
|
return is_format_correct
|
||||||
|
|
||||||
|
|
||||||
|
def _locate_segment_indices(
|
||||||
|
content_string: str,
|
||||||
|
search_pattern: str,
|
||||||
|
expect_numeric_segment: bool = False,
|
||||||
|
) -> Tuple[int, int]:
|
||||||
|
numeric_characters: List[str] = list(map(str, range(10))) + ["."]
|
||||||
|
assert bool(content_string)
|
||||||
|
assert bool(search_pattern)
|
||||||
|
assert search_pattern in content_string
|
||||||
|
start_index: int = content_string.find(search_pattern)
|
||||||
|
end_index: int = content_string.find("-", start_index)
|
||||||
|
if end_index == -1 and not expect_numeric_segment:
|
||||||
|
return (start_index, len(content_string))
|
||||||
|
|
||||||
|
if expect_numeric_segment:
|
||||||
|
start_index = start_index + len(search_pattern)
|
||||||
|
for char_index, current_character in enumerate(content_string[start_index:]):
|
||||||
|
if current_character not in numeric_characters:
|
||||||
|
break
|
||||||
|
end_index = start_index + char_index
|
||||||
|
|
||||||
|
return (start_index, end_index)
|
||||||
|
|
||||||
|
|
||||||
|
def _verify_no_merge_flag(
|
||||||
|
content_string: str,
|
||||||
|
) -> bool:
|
||||||
|
assert bool(content_string)
|
||||||
|
return "DO-NOT-MERGE" not in content_string
|
||||||
|
|
||||||
|
|
||||||
|
def _verify_merge_dependency(
|
||||||
|
content_string: str,
|
||||||
|
) -> bool:
|
||||||
|
assert bool(content_string)
|
||||||
|
dependency_marker: str = "MERGE-AFTER-#"
|
||||||
|
|
||||||
|
if dependency_marker not in content_string:
|
||||||
|
return True
|
||||||
|
|
||||||
|
start_index, end_index = _locate_segment_indices(
|
||||||
|
content_string=content_string, pattern=dependency_marker, numeric=True
|
||||||
|
)
|
||||||
|
dependent_item_id: str = content_string[start_index:end_index].strip()
|
||||||
|
try:
|
||||||
|
dependent_item_id = int(dependent_item_id)
|
||||||
|
except ValueError:
|
||||||
|
issue_message: str = f"PR number '{dependent_item_id}' is not an integer."
|
||||||
|
raise ValueError(issue_message)
|
||||||
|
|
||||||
|
dependent_item_data: Dict[str, Any] = fetch_item_details(
|
||||||
|
item_identifier=dependent_item_id
|
||||||
|
)
|
||||||
|
is_dependent_item_closed: bool = dependent_item_data["state"] == "closed"
|
||||||
|
return is_dependent_item_closed
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_merge_conditions(
|
||||||
|
item_details: Dict[str, Any],
|
||||||
|
) -> bool:
|
||||||
|
item_body_content: str = item_details["body"]
|
||||||
|
|
||||||
|
if item_body_content is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
item_body_content = item_body_content.strip().replace(" ", "-").upper()
|
||||||
|
item_body_content = f" {item_body_content} "
|
||||||
|
|
||||||
|
condition_outcomes: List[bool] = [
|
||||||
|
_verify_no_merge_flag(content_string=item_body_content),
|
||||||
|
_verify_merge_dependency(content_string=item_body_content),
|
||||||
|
]
|
||||||
|
|
||||||
|
return all(condition_outcomes)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_item_for_merge(
|
||||||
|
item_data: Dict[str, Any],
|
||||||
|
) -> bool:
|
||||||
|
assert set(["number", "title", "state", "body"]).issubset(item_data.keys())
|
||||||
|
accumulated_issues: str = ""
|
||||||
|
if not validate_title_format(item_title=item_data["title"]):
|
||||||
|
accumulated_issues += (
|
||||||
|
f"PR #{item_data['number']} is not mergable due to invalid title.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not evaluate_merge_conditions(item_details=item_data):
|
||||||
|
accumulated_issues += (
|
||||||
|
f"PR #{item_data['number']} is not mergable due to merge restrictions"
|
||||||
|
" specified in the PR body."
|
||||||
|
)
|
||||||
|
|
||||||
|
if accumulated_issues:
|
||||||
|
raise ValueError(accumulated_issues.strip())
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_item_details(
|
||||||
|
item_identifier: int,
|
||||||
|
):
|
||||||
|
api_request_url: str = f"https://api.github.com/repos/{REPO_OWNER_USERNAME}/{REPO_NAME}/pulls/{item_identifier}"
|
||||||
|
|
||||||
|
raw_api_response_data: Dict[str, Any] = perform_api_call(target_url=api_request_url)
|
||||||
|
|
||||||
|
extracted_item_info: Dict[str, Any] = {
|
||||||
|
"number": raw_api_response_data["number"],
|
||||||
|
"title": raw_api_response_data["title"],
|
||||||
|
"state": raw_api_response_data["state"],
|
||||||
|
"body": raw_api_response_data["body"],
|
||||||
|
}
|
||||||
|
|
||||||
|
return extracted_item_info
|
||||||
|
|
||||||
|
|
||||||
|
def process_item_request(requested_item_id: int):
|
||||||
|
extracted_item_info: Dict[str, Any] = fetch_item_details(
|
||||||
|
item_identifier=requested_item_id
|
||||||
|
)
|
||||||
|
if not validate_item_for_merge(item_data=extracted_item_info):
|
||||||
|
raise ValueError("PR is not mergable.")
|
||||||
|
|
||||||
|
print("PR is mergable.")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
requested_item_id: int = int(sys.argv[1])
|
||||||
|
process_item_request(requested_item_id=requested_item_id)
|
||||||
41
.github/workflows/ci-checks.yml
vendored
41
.github/workflows/ci-checks.yml
vendored
@@ -1,41 +0,0 @@
|
|||||||
name: ci-checks
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
pull_request:
|
|
||||||
types: [review_requested, ready_for_review, synchronize, opened, reopened]
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
- test
|
|
||||||
- develop
|
|
||||||
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
id-token: write
|
|
||||||
pages: write
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
ci-checks:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Install Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v6
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
uv venv
|
|
||||||
uv pip install requests packaging
|
|
||||||
- name: Run CI checks
|
|
||||||
run: |
|
|
||||||
uv run .github/scripts/ci_checks.py
|
|
||||||
9
.github/workflows/run-benchmarks.yml
vendored
9
.github/workflows/run-benchmarks.yml
vendored
@@ -2,12 +2,9 @@ name: run-benchmarks
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- test
|
- main
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
pick-runner:
|
pick-runner:
|
||||||
@@ -37,9 +34,9 @@ jobs:
|
|||||||
toolchain: stable
|
toolchain: stable
|
||||||
|
|
||||||
- name: Install Python
|
- name: Install Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v4
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v6
|
uses: astral-sh/setup-uv@v5
|
||||||
- name: Setup venv
|
- name: Setup venv
|
||||||
run: |
|
run: |
|
||||||
uv venv
|
uv venv
|
||||||
|
|||||||
2
.github/workflows/run-unit-tests.yml
vendored
2
.github/workflows/run-unit-tests.yml
vendored
@@ -5,8 +5,6 @@ on:
|
|||||||
types: [review_requested, ready_for_review, synchronize, opened, reopened]
|
types: [review_requested, ready_for_review, synchronize, opened, reopened]
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
- test
|
|
||||||
- develop
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
|||||||
411
src/csv/csv_core.rs
Normal file
411
src/csv/csv_core.rs
Normal file
@@ -0,0 +1,411 @@
|
|||||||
|
use chrono::{NaiveDate, NaiveDateTime};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{self, BufRead, BufReader};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
/// Represents the target type for a CSV column.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum DataType {
|
||||||
|
Int,
|
||||||
|
Float,
|
||||||
|
Bool,
|
||||||
|
UInt,
|
||||||
|
String,
|
||||||
|
Date,
|
||||||
|
DateTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a value parsed from the CSV.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum Value {
|
||||||
|
Int(i64),
|
||||||
|
Float(f64),
|
||||||
|
Bool(bool),
|
||||||
|
UInt(u64),
|
||||||
|
String(String),
|
||||||
|
Date(NaiveDate),
|
||||||
|
DateTime(NaiveDateTime),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience alias for a parsed CSV record.
|
||||||
|
pub type Record = HashMap<String, Value>;
|
||||||
|
|
||||||
|
/// A simple CSV reader that reads records line by line.
|
||||||
|
pub struct CsvReader<R: BufRead> {
|
||||||
|
reader: R,
|
||||||
|
separators: Vec<char>,
|
||||||
|
headers: Vec<String>,
|
||||||
|
types: Option<HashMap<String, DataType>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builder for [`CsvReader`] allowing chained configuration of headers, types, and separators.
|
||||||
|
pub struct CsvReaderBuilder<R: BufRead> {
|
||||||
|
reader: R,
|
||||||
|
separators: Vec<char>,
|
||||||
|
headers: Vec<String>,
|
||||||
|
types: Option<HashMap<String, DataType>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: BufRead> CsvReader<R> {
|
||||||
|
/// Create a new CSV reader from a [`BufRead`] source.
|
||||||
|
/// The first line is expected to contain headers.
|
||||||
|
/// `separators` is a list of characters considered as field separators.
|
||||||
|
/// `types` optionally maps column names to target data types.
|
||||||
|
pub fn new(
|
||||||
|
mut reader: R,
|
||||||
|
separators: Vec<char>,
|
||||||
|
types: Option<HashMap<String, DataType>>,
|
||||||
|
) -> io::Result<Self> {
|
||||||
|
let mut first_line = String::new();
|
||||||
|
reader.read_line(&mut first_line)?;
|
||||||
|
let headers = parse_line(&first_line, &separators);
|
||||||
|
Ok(Self {
|
||||||
|
reader,
|
||||||
|
separators,
|
||||||
|
headers,
|
||||||
|
types,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a reader with default settings (comma separator, automatic typing).
|
||||||
|
pub fn new_default(reader: R) -> io::Result<Self> {
|
||||||
|
Self::new(reader, vec![','], None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a reader with default separators and explicit type mapping.
|
||||||
|
pub fn new_with_types(reader: R, types: HashMap<String, DataType>) -> io::Result<Self> {
|
||||||
|
Self::new(reader, vec![','], Some(types))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Start building a reader from a source that lacks headers.
|
||||||
|
pub fn new_with_headers(reader: R, headers: Vec<String>) -> CsvReaderBuilder<R> {
|
||||||
|
CsvReaderBuilder {
|
||||||
|
reader,
|
||||||
|
separators: vec![','],
|
||||||
|
headers,
|
||||||
|
types: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the headers of the CSV file.
|
||||||
|
pub fn headers(&self) -> &[String] {
|
||||||
|
&self.headers
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the next record. Returns `Ok(None)` on EOF.
|
||||||
|
pub fn read_record(&mut self) -> io::Result<Option<Record>> {
|
||||||
|
let mut line = String::new();
|
||||||
|
if self.reader.read_line(&mut line)? == 0 {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
let fields = parse_line(&line, &self.separators);
|
||||||
|
let mut record = HashMap::new();
|
||||||
|
|
||||||
|
for (i, header) in self.headers.iter().enumerate() {
|
||||||
|
let field = fields.get(i).cloned().unwrap_or_default();
|
||||||
|
let value = match &self.types {
|
||||||
|
Some(map) => {
|
||||||
|
if let Some(dt) = map.get(header) {
|
||||||
|
parse_with_type(&field, dt)
|
||||||
|
} else {
|
||||||
|
Value::String(field)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => parse_auto(&field),
|
||||||
|
};
|
||||||
|
record.insert(header.clone(), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(record))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: BufRead> Iterator for CsvReader<R> {
|
||||||
|
type Item = io::Result<Record>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.read_record() {
|
||||||
|
Ok(Some(rec)) => Some(Ok(rec)),
|
||||||
|
Ok(None) => None,
|
||||||
|
Err(e) => Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: BufRead> CsvReaderBuilder<R> {
|
||||||
|
/// Override field separators for the upcoming reader.
|
||||||
|
pub fn separators(mut self, separators: Vec<char>) -> Self {
|
||||||
|
self.separators = separators;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finalize the builder with an explicit type mapping.
|
||||||
|
pub fn new_with_types(mut self, types: HashMap<String, DataType>) -> CsvReader<R> {
|
||||||
|
self.types = Some(types);
|
||||||
|
self.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finalize the builder without specifying types.
|
||||||
|
pub fn build(self) -> CsvReader<R> {
|
||||||
|
CsvReader {
|
||||||
|
reader: self.reader,
|
||||||
|
separators: self.separators,
|
||||||
|
headers: self.headers,
|
||||||
|
types: self.types,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: BufRead> CsvReader<R> {
|
||||||
|
/// Read all remaining records into a vector.
|
||||||
|
pub fn read_all(&mut self) -> io::Result<Vec<Record>> {
|
||||||
|
let mut records = Vec::new();
|
||||||
|
while let Some(rec) = self.read_record()? {
|
||||||
|
records.push(rec);
|
||||||
|
}
|
||||||
|
Ok(records)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CsvReader<BufReader<File>> {
|
||||||
|
/// Create a [`CsvReader`] from a file path using comma separators and
|
||||||
|
/// automatic type detection.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use rustframe::csv::{CsvReader, Value};
|
||||||
|
/// # let path = std::env::temp_dir().join("from_path_auto.csv");
|
||||||
|
/// # std::fs::write(&path, "a,b\n1,true\n").unwrap();
|
||||||
|
/// let mut reader = CsvReader::from_path_auto(&path).unwrap();
|
||||||
|
/// let rec = reader.next().unwrap().unwrap();
|
||||||
|
/// assert_eq!(rec.get("a"), Some(&Value::Int(1)));
|
||||||
|
/// assert_eq!(rec.get("b"), Some(&Value::Bool(true)));
|
||||||
|
/// # std::fs::remove_file(path).unwrap();
|
||||||
|
/// ```
|
||||||
|
pub fn from_path_auto<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
||||||
|
let file = File::open(path)?;
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
CsvReader::new_default(reader)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an iterator over records from a file path using default settings.
|
||||||
|
pub fn reader<P: AsRef<Path>>(path: P) -> io::Result<CsvReader<BufReader<File>>> {
|
||||||
|
reader_with(path, vec![','], None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an iterator over records from a file path with custom separators and type mapping.
|
||||||
|
pub fn reader_with<P: AsRef<Path>>(
|
||||||
|
path: P,
|
||||||
|
separators: Vec<char>,
|
||||||
|
types: Option<HashMap<String, DataType>>,
|
||||||
|
) -> io::Result<CsvReader<BufReader<File>>> {
|
||||||
|
let file = File::open(path)?;
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
CsvReader::new(reader, separators, types)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read an entire CSV file into memory using default settings.
|
||||||
|
pub fn read_file<P: AsRef<Path>>(path: P) -> io::Result<Vec<Record>> {
|
||||||
|
read_file_with(path, vec![','], None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read an entire CSV file into memory with custom separators and type mapping.
|
||||||
|
pub fn read_file_with<P: AsRef<Path>>(
|
||||||
|
path: P,
|
||||||
|
separators: Vec<char>,
|
||||||
|
types: Option<HashMap<String, DataType>>,
|
||||||
|
) -> io::Result<Vec<Record>> {
|
||||||
|
let mut reader = reader_with(path, separators, types)?;
|
||||||
|
reader.read_all()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_with_type(s: &str, ty: &DataType) -> Value {
|
||||||
|
match ty {
|
||||||
|
DataType::Int => s
|
||||||
|
.parse::<i64>()
|
||||||
|
.map(Value::Int)
|
||||||
|
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||||
|
DataType::Float => s
|
||||||
|
.parse::<f64>()
|
||||||
|
.map(Value::Float)
|
||||||
|
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||||
|
DataType::Bool => s
|
||||||
|
.parse::<bool>()
|
||||||
|
.map(Value::Bool)
|
||||||
|
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||||
|
DataType::UInt => s
|
||||||
|
.parse::<u64>()
|
||||||
|
.map(Value::UInt)
|
||||||
|
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||||
|
DataType::String => Value::String(s.to_string()),
|
||||||
|
DataType::Date => s
|
||||||
|
.parse::<NaiveDate>()
|
||||||
|
.map(Value::Date)
|
||||||
|
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||||
|
DataType::DateTime => NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S")
|
||||||
|
.map(Value::DateTime)
|
||||||
|
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_auto(s: &str) -> Value {
|
||||||
|
if let Ok(i) = s.parse::<i64>() {
|
||||||
|
Value::Int(i)
|
||||||
|
} else if let Ok(f) = s.parse::<f64>() {
|
||||||
|
Value::Float(f)
|
||||||
|
} else if let Ok(b) = s.parse::<bool>() {
|
||||||
|
Value::Bool(b)
|
||||||
|
} else if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
|
||||||
|
Value::DateTime(dt)
|
||||||
|
} else if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
|
||||||
|
Value::Date(d)
|
||||||
|
} else {
|
||||||
|
Value::String(s.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_line(line: &str, separators: &[char]) -> Vec<String> {
|
||||||
|
let mut fields = Vec::new();
|
||||||
|
let mut current = String::new();
|
||||||
|
let mut in_quotes: Option<char> = None;
|
||||||
|
let chars: Vec<char> = line.chars().collect();
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
while i < chars.len() {
|
||||||
|
let c = chars[i];
|
||||||
|
if let Some(q) = in_quotes {
|
||||||
|
if c == q {
|
||||||
|
if i + 1 < chars.len() && chars[i + 1] == q {
|
||||||
|
current.push(q);
|
||||||
|
i += 1; // skip escaped quote
|
||||||
|
} else {
|
||||||
|
in_quotes = None;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
current.push(c);
|
||||||
|
}
|
||||||
|
} else if c == '"' || c == '\'' {
|
||||||
|
in_quotes = Some(c);
|
||||||
|
} else if separators.contains(&c) {
|
||||||
|
fields.push(current.clone());
|
||||||
|
current.clear();
|
||||||
|
} else if c == '\r' {
|
||||||
|
// Ignore carriage returns
|
||||||
|
} else if c == '\n' {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
current.push(c);
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fields.push(current);
|
||||||
|
fields
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use chrono::{NaiveDate, NaiveDateTime};
|
||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_line() {
|
||||||
|
let line = "a,'b,c',\"d\"\"e\",f";
|
||||||
|
let fields = parse_line(line, &[',']);
|
||||||
|
assert_eq!(fields, vec!["a", "b,c", "d\"e", "f"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reader_auto() {
|
||||||
|
let data = "a,b,c\n1,2.5,true\n4,5.0,false\n";
|
||||||
|
let cursor = Cursor::new(data);
|
||||||
|
let mut reader = CsvReader::new_default(cursor).unwrap();
|
||||||
|
let rec = reader.next().unwrap().unwrap();
|
||||||
|
assert_eq!(rec.get("a"), Some(&Value::Int(1)));
|
||||||
|
assert_eq!(rec.get("b"), Some(&Value::Float(2.5)));
|
||||||
|
assert_eq!(rec.get("c"), Some(&Value::Bool(true)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reader_with_types() {
|
||||||
|
let data = "a,b,c\n1,2,3\n";
|
||||||
|
let cursor = Cursor::new(data);
|
||||||
|
let mut types = HashMap::new();
|
||||||
|
types.insert("a".to_string(), DataType::Int);
|
||||||
|
types.insert("b".to_string(), DataType::Int);
|
||||||
|
types.insert("c".to_string(), DataType::String);
|
||||||
|
let mut reader = CsvReader::new_with_types(cursor, types).unwrap();
|
||||||
|
let rec = reader.next().unwrap().unwrap();
|
||||||
|
assert_eq!(rec.get("a"), Some(&Value::Int(1)));
|
||||||
|
assert_eq!(rec.get("b"), Some(&Value::Int(2)));
|
||||||
|
assert_eq!(rec.get("c"), Some(&Value::String("3".to_string())));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chain_headers_and_types() {
|
||||||
|
let data = "1,2\n3,4\n";
|
||||||
|
let cursor = Cursor::new(data);
|
||||||
|
let headers = vec!["x".to_string(), "y".to_string()];
|
||||||
|
let mut types = HashMap::new();
|
||||||
|
types.insert("x".to_string(), DataType::Int);
|
||||||
|
types.insert("y".to_string(), DataType::UInt);
|
||||||
|
let mut reader = CsvReader::new_with_headers(cursor, headers).new_with_types(types);
|
||||||
|
let rec = reader.next().unwrap().unwrap();
|
||||||
|
assert_eq!(rec.get("x"), Some(&Value::Int(1)));
|
||||||
|
assert_eq!(rec.get("y"), Some(&Value::UInt(2)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_date_types() {
|
||||||
|
let data = "d,dt\n2024-01-01,2024-01-01 12:00:00\n";
|
||||||
|
let cursor = Cursor::new(data);
|
||||||
|
let mut types = HashMap::new();
|
||||||
|
types.insert("d".to_string(), DataType::Date);
|
||||||
|
types.insert("dt".to_string(), DataType::DateTime);
|
||||||
|
let mut reader = CsvReader::new_with_types(cursor, types).unwrap();
|
||||||
|
let rec = reader.next().unwrap().unwrap();
|
||||||
|
let date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
|
||||||
|
let datetime: NaiveDateTime = NaiveDate::from_ymd_opt(2024, 1, 1)
|
||||||
|
.unwrap()
|
||||||
|
.and_hms_opt(12, 0, 0)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(rec.get("d"), Some(&Value::Date(date)));
|
||||||
|
assert_eq!(rec.get("dt"), Some(&Value::DateTime(datetime)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_file_all() {
|
||||||
|
let path = std::env::temp_dir().join("csv_full_test.csv");
|
||||||
|
std::fs::write(&path, "a,b\n1,2\n3,4\n").unwrap();
|
||||||
|
let records = read_file(&path).unwrap();
|
||||||
|
assert_eq!(records.len(), 2);
|
||||||
|
assert_eq!(records[1].get("b"), Some(&Value::Int(4)));
|
||||||
|
std::fs::remove_file(path).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reader_from_path() {
|
||||||
|
let path = std::env::temp_dir().join("csv_iter_test.csv");
|
||||||
|
std::fs::write(&path, "a,b\n5,6\n").unwrap();
|
||||||
|
let mut iter = reader(&path).unwrap();
|
||||||
|
let rec = iter.next().unwrap().unwrap();
|
||||||
|
assert_eq!(rec.get("a"), Some(&Value::Int(5)));
|
||||||
|
assert_eq!(rec.get("b"), Some(&Value::Int(6)));
|
||||||
|
std::fs::remove_file(path).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_from_path_auto_method() {
|
||||||
|
let path = std::env::temp_dir().join("csv_method_auto.csv");
|
||||||
|
std::fs::write(&path, "a,b\n7,true\n").unwrap();
|
||||||
|
let mut reader = CsvReader::from_path_auto(&path).unwrap();
|
||||||
|
let rec = reader.next().unwrap().unwrap();
|
||||||
|
assert_eq!(rec.get("a"), Some(&Value::Int(7)));
|
||||||
|
assert_eq!(rec.get("b"), Some(&Value::Bool(true)));
|
||||||
|
std::fs::remove_file(path).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
69
src/csv/mod.rs
Normal file
69
src/csv/mod.rs
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
//! CSV handling utilities.
|
||||||
|
//!
|
||||||
|
//! The [`csv`] module offers a flexible [`CsvReader`] with automatic type
|
||||||
|
//! detection and optional builders for custom headers and types.
|
||||||
|
//!
|
||||||
|
//! # Examples
|
||||||
|
//!
|
||||||
|
//! Read from a file with auto type detection:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use rustframe::csv::CsvReader;
|
||||||
|
//! # let path = std::env::temp_dir().join("docs_auto.csv");
|
||||||
|
//! # std::fs::write(&path, "a,b\n1,true\n").unwrap();
|
||||||
|
//! let mut reader = CsvReader::from_path_auto(&path).unwrap();
|
||||||
|
//! for rec in reader {
|
||||||
|
//! let rec = rec.unwrap();
|
||||||
|
//! println!("{:?}", rec);
|
||||||
|
//! }
|
||||||
|
//! # std::fs::remove_file(path).unwrap();
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Specify column types explicitly:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use rustframe::csv::{CsvReader, DataType, Value};
|
||||||
|
//! use std::collections::HashMap;
|
||||||
|
//! use std::io::Cursor;
|
||||||
|
//! let data = "a,b\n1,2\n";
|
||||||
|
//! let mut types = HashMap::new();
|
||||||
|
//! types.insert("a".into(), DataType::Int);
|
||||||
|
//! types.insert("b".into(), DataType::Float);
|
||||||
|
//! let mut reader = CsvReader::new_with_types(Cursor::new(data), types).unwrap();
|
||||||
|
//! let rec = reader.next().unwrap().unwrap();
|
||||||
|
//! assert_eq!(rec.get("b"), Some(&Value::Float(2.0)));
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Building from custom headers and types:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use rustframe::csv::{CsvReader, DataType, Value};
|
||||||
|
//! use std::collections::HashMap;
|
||||||
|
//! use std::io::Cursor;
|
||||||
|
//! let data = "1,2\n";
|
||||||
|
//! let headers = vec!["x".to_string(), "y".to_string()];
|
||||||
|
//! let mut types = HashMap::new();
|
||||||
|
//! types.insert("x".into(), DataType::Int);
|
||||||
|
//! types.insert("y".into(), DataType::UInt);
|
||||||
|
//! let mut reader = CsvReader::new_with_headers(Cursor::new(data), headers).new_with_types(types);
|
||||||
|
//! let rec = reader.next().unwrap().unwrap();
|
||||||
|
//! assert_eq!(rec.get("y"), Some(&Value::UInt(2)));
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Reading an entire file into memory:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use rustframe::csv::read_file;
|
||||||
|
//! # let path = std::env::temp_dir().join("docs_full.csv");
|
||||||
|
//! # std::fs::write(&path, "a,b\n1,2\n3,4\n").unwrap();
|
||||||
|
//! let records = read_file(&path).unwrap();
|
||||||
|
//! assert_eq!(records.len(), 2);
|
||||||
|
//! # std::fs::remove_file(path).unwrap();
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
pub mod csv_core;
|
||||||
|
|
||||||
|
pub use csv_core::{
|
||||||
|
CsvReader, CsvReaderBuilder, DataType, Record, Value, reader, reader_with,
|
||||||
|
read_file, read_file_with,
|
||||||
|
};
|
||||||
@@ -14,3 +14,6 @@ pub mod compute;
|
|||||||
|
|
||||||
/// Documentation for the [`crate::random`] module.
|
/// Documentation for the [`crate::random`] module.
|
||||||
pub mod random;
|
pub mod random;
|
||||||
|
|
||||||
|
/// Documentation for the [`crate::csv`] module.
|
||||||
|
pub mod csv;
|
||||||
|
|||||||
Reference in New Issue
Block a user