mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-11-19 06:16:11 +00:00
Compare commits
2 Commits
23367c7ca3
...
csv
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef25e77f04 | ||
|
|
4ba5cfea18 |
@@ -1,3 +1,4 @@
|
||||
use chrono::{NaiveDate, NaiveDateTime};
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead, BufReader};
|
||||
@@ -9,7 +10,10 @@ pub enum DataType {
|
||||
Int,
|
||||
Float,
|
||||
Bool,
|
||||
UInt,
|
||||
String,
|
||||
Date,
|
||||
DateTime,
|
||||
}
|
||||
|
||||
/// Represents a value parsed from the CSV.
|
||||
@@ -18,7 +22,10 @@ pub enum Value {
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
Bool(bool),
|
||||
UInt(u64),
|
||||
String(String),
|
||||
Date(NaiveDate),
|
||||
DateTime(NaiveDateTime),
|
||||
}
|
||||
|
||||
/// Convenience alias for a parsed CSV record.
|
||||
@@ -32,16 +39,53 @@ pub struct CsvReader<R: BufRead> {
|
||||
types: Option<HashMap<String, DataType>>,
|
||||
}
|
||||
|
||||
/// Builder for [`CsvReader`] allowing chained configuration of headers, types, and separators.
|
||||
pub struct CsvReaderBuilder<R: BufRead> {
|
||||
reader: R,
|
||||
separators: Vec<char>,
|
||||
headers: Vec<String>,
|
||||
types: Option<HashMap<String, DataType>>,
|
||||
}
|
||||
|
||||
impl<R: BufRead> CsvReader<R> {
|
||||
/// Create a new CSV reader from a [`BufRead`] source.
|
||||
/// The first line is expected to contain headers.
|
||||
/// `separators` is a list of characters considered as field separators.
|
||||
/// `types` optionally maps column names to target data types.
|
||||
pub fn new(mut reader: R, separators: Vec<char>, types: Option<HashMap<String, DataType>>) -> io::Result<Self> {
|
||||
pub fn new(
|
||||
mut reader: R,
|
||||
separators: Vec<char>,
|
||||
types: Option<HashMap<String, DataType>>,
|
||||
) -> io::Result<Self> {
|
||||
let mut first_line = String::new();
|
||||
reader.read_line(&mut first_line)?;
|
||||
let headers = parse_line(&first_line, &separators);
|
||||
Ok(Self { reader, separators, headers, types })
|
||||
Ok(Self {
|
||||
reader,
|
||||
separators,
|
||||
headers,
|
||||
types,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a reader with default settings (comma separator, automatic typing).
|
||||
pub fn new_default(reader: R) -> io::Result<Self> {
|
||||
Self::new(reader, vec![','], None)
|
||||
}
|
||||
|
||||
/// Create a reader with default separators and explicit type mapping.
|
||||
pub fn new_with_types(reader: R, types: HashMap<String, DataType>) -> io::Result<Self> {
|
||||
Self::new(reader, vec![','], Some(types))
|
||||
}
|
||||
|
||||
/// Start building a reader from a source that lacks headers.
|
||||
pub fn new_with_headers(reader: R, headers: Vec<String>) -> CsvReaderBuilder<R> {
|
||||
CsvReaderBuilder {
|
||||
reader,
|
||||
separators: vec![','],
|
||||
headers,
|
||||
types: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the headers of the CSV file.
|
||||
@@ -89,6 +133,30 @@ impl<R: BufRead> Iterator for CsvReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: BufRead> CsvReaderBuilder<R> {
|
||||
/// Override field separators for the upcoming reader.
|
||||
pub fn separators(mut self, separators: Vec<char>) -> Self {
|
||||
self.separators = separators;
|
||||
self
|
||||
}
|
||||
|
||||
/// Finalize the builder with an explicit type mapping.
|
||||
pub fn new_with_types(mut self, types: HashMap<String, DataType>) -> CsvReader<R> {
|
||||
self.types = Some(types);
|
||||
self.build()
|
||||
}
|
||||
|
||||
/// Finalize the builder without specifying types.
|
||||
pub fn build(self) -> CsvReader<R> {
|
||||
CsvReader {
|
||||
reader: self.reader,
|
||||
separators: self.separators,
|
||||
headers: self.headers,
|
||||
types: self.types,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: BufRead> CsvReader<R> {
|
||||
/// Read all remaining records into a vector.
|
||||
pub fn read_all(&mut self) -> io::Result<Vec<Record>> {
|
||||
@@ -100,13 +168,40 @@ impl<R: BufRead> CsvReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl CsvReader<BufReader<File>> {
|
||||
/// Create a [`CsvReader`] from a file path using comma separators and
|
||||
/// automatic type detection.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use rustframe::csv::{CsvReader, Value};
|
||||
/// # let path = std::env::temp_dir().join("from_path_auto.csv");
|
||||
/// # std::fs::write(&path, "a,b\n1,true\n").unwrap();
|
||||
/// let mut reader = CsvReader::from_path_auto(&path).unwrap();
|
||||
/// let rec = reader.next().unwrap().unwrap();
|
||||
/// assert_eq!(rec.get("a"), Some(&Value::Int(1)));
|
||||
/// assert_eq!(rec.get("b"), Some(&Value::Bool(true)));
|
||||
/// # std::fs::remove_file(path).unwrap();
|
||||
/// ```
|
||||
pub fn from_path_auto<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
||||
let file = File::open(path)?;
|
||||
let reader = BufReader::new(file);
|
||||
CsvReader::new_default(reader)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an iterator over records from a file path using default settings.
|
||||
pub fn reader<P: AsRef<Path>>(path: P) -> io::Result<CsvReader<BufReader<File>>> {
|
||||
reader_with(path, vec![',' ], None)
|
||||
reader_with(path, vec![','], None)
|
||||
}
|
||||
|
||||
/// Create an iterator over records from a file path with custom separators and type mapping.
|
||||
pub fn reader_with<P: AsRef<Path>>(path: P, separators: Vec<char>, types: Option<HashMap<String, DataType>>) -> io::Result<CsvReader<BufReader<File>>> {
|
||||
pub fn reader_with<P: AsRef<Path>>(
|
||||
path: P,
|
||||
separators: Vec<char>,
|
||||
types: Option<HashMap<String, DataType>>,
|
||||
) -> io::Result<CsvReader<BufReader<File>>> {
|
||||
let file = File::open(path)?;
|
||||
let reader = BufReader::new(file);
|
||||
CsvReader::new(reader, separators, types)
|
||||
@@ -114,21 +209,45 @@ pub fn reader_with<P: AsRef<Path>>(path: P, separators: Vec<char>, types: Option
|
||||
|
||||
/// Read an entire CSV file into memory using default settings.
|
||||
pub fn read_file<P: AsRef<Path>>(path: P) -> io::Result<Vec<Record>> {
|
||||
read_file_with(path, vec![',' ], None)
|
||||
read_file_with(path, vec![','], None)
|
||||
}
|
||||
|
||||
/// Read an entire CSV file into memory with custom separators and type mapping.
|
||||
pub fn read_file_with<P: AsRef<Path>>(path: P, separators: Vec<char>, types: Option<HashMap<String, DataType>>) -> io::Result<Vec<Record>> {
|
||||
pub fn read_file_with<P: AsRef<Path>>(
|
||||
path: P,
|
||||
separators: Vec<char>,
|
||||
types: Option<HashMap<String, DataType>>,
|
||||
) -> io::Result<Vec<Record>> {
|
||||
let mut reader = reader_with(path, separators, types)?;
|
||||
reader.read_all()
|
||||
}
|
||||
|
||||
fn parse_with_type(s: &str, ty: &DataType) -> Value {
|
||||
match ty {
|
||||
DataType::Int => s.parse::<i64>().map(Value::Int).unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::Float => s.parse::<f64>().map(Value::Float).unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::Bool => s.parse::<bool>().map(Value::Bool).unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::Int => s
|
||||
.parse::<i64>()
|
||||
.map(Value::Int)
|
||||
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::Float => s
|
||||
.parse::<f64>()
|
||||
.map(Value::Float)
|
||||
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::Bool => s
|
||||
.parse::<bool>()
|
||||
.map(Value::Bool)
|
||||
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::UInt => s
|
||||
.parse::<u64>()
|
||||
.map(Value::UInt)
|
||||
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::String => Value::String(s.to_string()),
|
||||
DataType::Date => s
|
||||
.parse::<NaiveDate>()
|
||||
.map(Value::Date)
|
||||
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
DataType::DateTime => NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S")
|
||||
.map(Value::DateTime)
|
||||
.unwrap_or_else(|_| Value::String(s.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,6 +258,10 @@ fn parse_auto(s: &str) -> Value {
|
||||
Value::Float(f)
|
||||
} else if let Ok(b) = s.parse::<bool>() {
|
||||
Value::Bool(b)
|
||||
} else if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
|
||||
Value::DateTime(dt)
|
||||
} else if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
|
||||
Value::Date(d)
|
||||
} else {
|
||||
Value::String(s.to_string())
|
||||
}
|
||||
@@ -186,6 +309,7 @@ fn parse_line(line: &str, separators: &[char]) -> Vec<String> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::{NaiveDate, NaiveDateTime};
|
||||
use std::io::Cursor;
|
||||
|
||||
#[test]
|
||||
@@ -199,7 +323,7 @@ mod tests {
|
||||
fn test_reader_auto() {
|
||||
let data = "a,b,c\n1,2.5,true\n4,5.0,false\n";
|
||||
let cursor = Cursor::new(data);
|
||||
let mut reader = CsvReader::new(cursor, vec![','], None).unwrap();
|
||||
let mut reader = CsvReader::new_default(cursor).unwrap();
|
||||
let rec = reader.next().unwrap().unwrap();
|
||||
assert_eq!(rec.get("a"), Some(&Value::Int(1)));
|
||||
assert_eq!(rec.get("b"), Some(&Value::Float(2.5)));
|
||||
@@ -208,19 +332,51 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_reader_with_types() {
|
||||
let data = "a;b;c\n1;2;3\n";
|
||||
let data = "a,b,c\n1,2,3\n";
|
||||
let cursor = Cursor::new(data);
|
||||
let mut types = HashMap::new();
|
||||
types.insert("a".to_string(), DataType::Int);
|
||||
types.insert("b".to_string(), DataType::Int);
|
||||
types.insert("c".to_string(), DataType::String);
|
||||
let mut reader = CsvReader::new(cursor, vec![';', ','], Some(types)).unwrap();
|
||||
let mut reader = CsvReader::new_with_types(cursor, types).unwrap();
|
||||
let rec = reader.next().unwrap().unwrap();
|
||||
assert_eq!(rec.get("a"), Some(&Value::Int(1)));
|
||||
assert_eq!(rec.get("b"), Some(&Value::Int(2)));
|
||||
assert_eq!(rec.get("c"), Some(&Value::String("3".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chain_headers_and_types() {
|
||||
let data = "1,2\n3,4\n";
|
||||
let cursor = Cursor::new(data);
|
||||
let headers = vec!["x".to_string(), "y".to_string()];
|
||||
let mut types = HashMap::new();
|
||||
types.insert("x".to_string(), DataType::Int);
|
||||
types.insert("y".to_string(), DataType::UInt);
|
||||
let mut reader = CsvReader::new_with_headers(cursor, headers).new_with_types(types);
|
||||
let rec = reader.next().unwrap().unwrap();
|
||||
assert_eq!(rec.get("x"), Some(&Value::Int(1)));
|
||||
assert_eq!(rec.get("y"), Some(&Value::UInt(2)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_types() {
|
||||
let data = "d,dt\n2024-01-01,2024-01-01 12:00:00\n";
|
||||
let cursor = Cursor::new(data);
|
||||
let mut types = HashMap::new();
|
||||
types.insert("d".to_string(), DataType::Date);
|
||||
types.insert("dt".to_string(), DataType::DateTime);
|
||||
let mut reader = CsvReader::new_with_types(cursor, types).unwrap();
|
||||
let rec = reader.next().unwrap().unwrap();
|
||||
let date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
|
||||
let datetime: NaiveDateTime = NaiveDate::from_ymd_opt(2024, 1, 1)
|
||||
.unwrap()
|
||||
.and_hms_opt(12, 0, 0)
|
||||
.unwrap();
|
||||
assert_eq!(rec.get("d"), Some(&Value::Date(date)));
|
||||
assert_eq!(rec.get("dt"), Some(&Value::DateTime(datetime)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_file_all() {
|
||||
let path = std::env::temp_dir().join("csv_full_test.csv");
|
||||
@@ -241,5 +397,15 @@ mod tests {
|
||||
assert_eq!(rec.get("b"), Some(&Value::Int(6)));
|
||||
std::fs::remove_file(path).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_path_auto_method() {
|
||||
let path = std::env::temp_dir().join("csv_method_auto.csv");
|
||||
std::fs::write(&path, "a,b\n7,true\n").unwrap();
|
||||
let mut reader = CsvReader::from_path_auto(&path).unwrap();
|
||||
let rec = reader.next().unwrap().unwrap();
|
||||
assert_eq!(rec.get("a"), Some(&Value::Int(7)));
|
||||
assert_eq!(rec.get("b"), Some(&Value::Bool(true)));
|
||||
std::fs::remove_file(path).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,69 @@
|
||||
//! CSV handling utilities.
|
||||
//!
|
||||
//! The [`csv`] module offers a flexible [`CsvReader`] with automatic type
|
||||
//! detection and optional builders for custom headers and types.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! Read from a file with auto type detection:
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::csv::CsvReader;
|
||||
//! # let path = std::env::temp_dir().join("docs_auto.csv");
|
||||
//! # std::fs::write(&path, "a,b\n1,true\n").unwrap();
|
||||
//! let mut reader = CsvReader::from_path_auto(&path).unwrap();
|
||||
//! for rec in reader {
|
||||
//! let rec = rec.unwrap();
|
||||
//! println!("{:?}", rec);
|
||||
//! }
|
||||
//! # std::fs::remove_file(path).unwrap();
|
||||
//! ```
|
||||
//!
|
||||
//! Specify column types explicitly:
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::csv::{CsvReader, DataType, Value};
|
||||
//! use std::collections::HashMap;
|
||||
//! use std::io::Cursor;
|
||||
//! let data = "a,b\n1,2\n";
|
||||
//! let mut types = HashMap::new();
|
||||
//! types.insert("a".into(), DataType::Int);
|
||||
//! types.insert("b".into(), DataType::Float);
|
||||
//! let mut reader = CsvReader::new_with_types(Cursor::new(data), types).unwrap();
|
||||
//! let rec = reader.next().unwrap().unwrap();
|
||||
//! assert_eq!(rec.get("b"), Some(&Value::Float(2.0)));
|
||||
//! ```
|
||||
//!
|
||||
//! Building from custom headers and types:
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::csv::{CsvReader, DataType, Value};
|
||||
//! use std::collections::HashMap;
|
||||
//! use std::io::Cursor;
|
||||
//! let data = "1,2\n";
|
||||
//! let headers = vec!["x".to_string(), "y".to_string()];
|
||||
//! let mut types = HashMap::new();
|
||||
//! types.insert("x".into(), DataType::Int);
|
||||
//! types.insert("y".into(), DataType::UInt);
|
||||
//! let mut reader = CsvReader::new_with_headers(Cursor::new(data), headers).new_with_types(types);
|
||||
//! let rec = reader.next().unwrap().unwrap();
|
||||
//! assert_eq!(rec.get("y"), Some(&Value::UInt(2)));
|
||||
//! ```
|
||||
//!
|
||||
//! Reading an entire file into memory:
|
||||
//!
|
||||
//! ```
|
||||
//! use rustframe::csv::read_file;
|
||||
//! # let path = std::env::temp_dir().join("docs_full.csv");
|
||||
//! # std::fs::write(&path, "a,b\n1,2\n3,4\n").unwrap();
|
||||
//! let records = read_file(&path).unwrap();
|
||||
//! assert_eq!(records.len(), 2);
|
||||
//! # std::fs::remove_file(path).unwrap();
|
||||
//! ```
|
||||
|
||||
pub mod csv_core;
|
||||
|
||||
pub use csv_core::CsvReader;
|
||||
pub use csv_core::{
|
||||
CsvReader, CsvReaderBuilder, DataType, Record, Value, reader, reader_with,
|
||||
read_file, read_file_with,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user