From 4ba5cfea18d92dcee3d0b01a614a08125ffabfae Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Sun, 24 Aug 2025 19:51:47 +0100 Subject: [PATCH] Enhance CSV reader with support for UInt, Date, and DateTime types; add builder methods for easier configuration --- src/csv/csv_core.rs | 192 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 179 insertions(+), 13 deletions(-) diff --git a/src/csv/csv_core.rs b/src/csv/csv_core.rs index bf66e9e..e2f2697 100644 --- a/src/csv/csv_core.rs +++ b/src/csv/csv_core.rs @@ -1,3 +1,4 @@ +use chrono::{NaiveDate, NaiveDateTime}; use std::collections::HashMap; use std::fs::File; use std::io::{self, BufRead, BufReader}; @@ -9,7 +10,10 @@ pub enum DataType { Int, Float, Bool, + UInt, String, + Date, + DateTime, } /// Represents a value parsed from the CSV. @@ -18,7 +22,10 @@ pub enum Value { Int(i64), Float(f64), Bool(bool), + UInt(u64), String(String), + Date(NaiveDate), + DateTime(NaiveDateTime), } /// Convenience alias for a parsed CSV record. @@ -32,16 +39,53 @@ pub struct CsvReader { types: Option>, } +/// Builder for [`CsvReader`] allowing chained configuration of headers, types, and separators. +pub struct CsvReaderBuilder { + reader: R, + separators: Vec, + headers: Vec, + types: Option>, +} + impl CsvReader { /// Create a new CSV reader from a [`BufRead`] source. /// The first line is expected to contain headers. /// `separators` is a list of characters considered as field separators. /// `types` optionally maps column names to target data types. - pub fn new(mut reader: R, separators: Vec, types: Option>) -> io::Result { + pub fn new( + mut reader: R, + separators: Vec, + types: Option>, + ) -> io::Result { let mut first_line = String::new(); reader.read_line(&mut first_line)?; let headers = parse_line(&first_line, &separators); - Ok(Self { reader, separators, headers, types }) + Ok(Self { + reader, + separators, + headers, + types, + }) + } + + /// Create a reader with default settings (comma separator, automatic typing). + pub fn new_default(reader: R) -> io::Result { + Self::new(reader, vec![','], None) + } + + /// Create a reader with default separators and explicit type mapping. + pub fn new_with_types(reader: R, types: HashMap) -> io::Result { + Self::new(reader, vec![','], Some(types)) + } + + /// Start building a reader from a source that lacks headers. + pub fn new_with_headers(reader: R, headers: Vec) -> CsvReaderBuilder { + CsvReaderBuilder { + reader, + separators: vec![','], + headers, + types: None, + } } /// Return the headers of the CSV file. @@ -89,6 +133,30 @@ impl Iterator for CsvReader { } } +impl CsvReaderBuilder { + /// Override field separators for the upcoming reader. + pub fn separators(mut self, separators: Vec) -> Self { + self.separators = separators; + self + } + + /// Finalize the builder with an explicit type mapping. + pub fn new_with_types(mut self, types: HashMap) -> CsvReader { + self.types = Some(types); + self.build() + } + + /// Finalize the builder without specifying types. + pub fn build(self) -> CsvReader { + CsvReader { + reader: self.reader, + separators: self.separators, + headers: self.headers, + types: self.types, + } + } +} + impl CsvReader { /// Read all remaining records into a vector. pub fn read_all(&mut self) -> io::Result> { @@ -100,13 +168,40 @@ impl CsvReader { } } +impl CsvReader> { + /// Create a [`CsvReader`] from a file path using comma separators and + /// automatic type detection. + /// + /// # Examples + /// + /// ``` + /// use rustframe::csv::{CsvReader, Value}; + /// # let path = std::env::temp_dir().join("from_path_auto.csv"); + /// # std::fs::write(&path, "a,b\n1,true\n").unwrap(); + /// let mut reader = CsvReader::from_path_auto(&path).unwrap(); + /// let rec = reader.next().unwrap().unwrap(); + /// assert_eq!(rec.get("a"), Some(&Value::Int(1))); + /// assert_eq!(rec.get("b"), Some(&Value::Bool(true))); + /// # std::fs::remove_file(path).unwrap(); + /// ``` + pub fn from_path_auto>(path: P) -> io::Result { + let file = File::open(path)?; + let reader = BufReader::new(file); + CsvReader::new_default(reader) + } +} + /// Create an iterator over records from a file path using default settings. pub fn reader>(path: P) -> io::Result>> { - reader_with(path, vec![',' ], None) + reader_with(path, vec![','], None) } /// Create an iterator over records from a file path with custom separators and type mapping. -pub fn reader_with>(path: P, separators: Vec, types: Option>) -> io::Result>> { +pub fn reader_with>( + path: P, + separators: Vec, + types: Option>, +) -> io::Result>> { let file = File::open(path)?; let reader = BufReader::new(file); CsvReader::new(reader, separators, types) @@ -114,21 +209,45 @@ pub fn reader_with>(path: P, separators: Vec, types: Option /// Read an entire CSV file into memory using default settings. pub fn read_file>(path: P) -> io::Result> { - read_file_with(path, vec![',' ], None) + read_file_with(path, vec![','], None) } /// Read an entire CSV file into memory with custom separators and type mapping. -pub fn read_file_with>(path: P, separators: Vec, types: Option>) -> io::Result> { +pub fn read_file_with>( + path: P, + separators: Vec, + types: Option>, +) -> io::Result> { let mut reader = reader_with(path, separators, types)?; reader.read_all() } fn parse_with_type(s: &str, ty: &DataType) -> Value { match ty { - DataType::Int => s.parse::().map(Value::Int).unwrap_or_else(|_| Value::String(s.to_string())), - DataType::Float => s.parse::().map(Value::Float).unwrap_or_else(|_| Value::String(s.to_string())), - DataType::Bool => s.parse::().map(Value::Bool).unwrap_or_else(|_| Value::String(s.to_string())), + DataType::Int => s + .parse::() + .map(Value::Int) + .unwrap_or_else(|_| Value::String(s.to_string())), + DataType::Float => s + .parse::() + .map(Value::Float) + .unwrap_or_else(|_| Value::String(s.to_string())), + DataType::Bool => s + .parse::() + .map(Value::Bool) + .unwrap_or_else(|_| Value::String(s.to_string())), + DataType::UInt => s + .parse::() + .map(Value::UInt) + .unwrap_or_else(|_| Value::String(s.to_string())), DataType::String => Value::String(s.to_string()), + DataType::Date => s + .parse::() + .map(Value::Date) + .unwrap_or_else(|_| Value::String(s.to_string())), + DataType::DateTime => NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") + .map(Value::DateTime) + .unwrap_or_else(|_| Value::String(s.to_string())), } } @@ -139,6 +258,10 @@ fn parse_auto(s: &str) -> Value { Value::Float(f) } else if let Ok(b) = s.parse::() { Value::Bool(b) + } else if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { + Value::DateTime(dt) + } else if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") { + Value::Date(d) } else { Value::String(s.to_string()) } @@ -186,6 +309,7 @@ fn parse_line(line: &str, separators: &[char]) -> Vec { #[cfg(test)] mod tests { use super::*; + use chrono::{NaiveDate, NaiveDateTime}; use std::io::Cursor; #[test] @@ -199,7 +323,7 @@ mod tests { fn test_reader_auto() { let data = "a,b,c\n1,2.5,true\n4,5.0,false\n"; let cursor = Cursor::new(data); - let mut reader = CsvReader::new(cursor, vec![','], None).unwrap(); + let mut reader = CsvReader::new_default(cursor).unwrap(); let rec = reader.next().unwrap().unwrap(); assert_eq!(rec.get("a"), Some(&Value::Int(1))); assert_eq!(rec.get("b"), Some(&Value::Float(2.5))); @@ -208,19 +332,51 @@ mod tests { #[test] fn test_reader_with_types() { - let data = "a;b;c\n1;2;3\n"; + let data = "a,b,c\n1,2,3\n"; let cursor = Cursor::new(data); let mut types = HashMap::new(); types.insert("a".to_string(), DataType::Int); types.insert("b".to_string(), DataType::Int); types.insert("c".to_string(), DataType::String); - let mut reader = CsvReader::new(cursor, vec![';', ','], Some(types)).unwrap(); + let mut reader = CsvReader::new_with_types(cursor, types).unwrap(); let rec = reader.next().unwrap().unwrap(); assert_eq!(rec.get("a"), Some(&Value::Int(1))); assert_eq!(rec.get("b"), Some(&Value::Int(2))); assert_eq!(rec.get("c"), Some(&Value::String("3".to_string()))); } + #[test] + fn test_chain_headers_and_types() { + let data = "1,2\n3,4\n"; + let cursor = Cursor::new(data); + let headers = vec!["x".to_string(), "y".to_string()]; + let mut types = HashMap::new(); + types.insert("x".to_string(), DataType::Int); + types.insert("y".to_string(), DataType::UInt); + let mut reader = CsvReader::new_with_headers(cursor, headers).new_with_types(types); + let rec = reader.next().unwrap().unwrap(); + assert_eq!(rec.get("x"), Some(&Value::Int(1))); + assert_eq!(rec.get("y"), Some(&Value::UInt(2))); + } + + #[test] + fn test_date_types() { + let data = "d,dt\n2024-01-01,2024-01-01 12:00:00\n"; + let cursor = Cursor::new(data); + let mut types = HashMap::new(); + types.insert("d".to_string(), DataType::Date); + types.insert("dt".to_string(), DataType::DateTime); + let mut reader = CsvReader::new_with_types(cursor, types).unwrap(); + let rec = reader.next().unwrap().unwrap(); + let date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(); + let datetime: NaiveDateTime = NaiveDate::from_ymd_opt(2024, 1, 1) + .unwrap() + .and_hms_opt(12, 0, 0) + .unwrap(); + assert_eq!(rec.get("d"), Some(&Value::Date(date))); + assert_eq!(rec.get("dt"), Some(&Value::DateTime(datetime))); + } + #[test] fn test_read_file_all() { let path = std::env::temp_dir().join("csv_full_test.csv"); @@ -241,5 +397,15 @@ mod tests { assert_eq!(rec.get("b"), Some(&Value::Int(6))); std::fs::remove_file(path).unwrap(); } -} + #[test] + fn test_from_path_auto_method() { + let path = std::env::temp_dir().join("csv_method_auto.csv"); + std::fs::write(&path, "a,b\n7,true\n").unwrap(); + let mut reader = CsvReader::from_path_auto(&path).unwrap(); + let rec = reader.next().unwrap().unwrap(); + assert_eq!(rec.get("a"), Some(&Value::Int(7))); + assert_eq!(rec.get("b"), Some(&Value::Bool(true))); + std::fs::remove_file(path).unwrap(); + } +}