Merge pull request #44 from Magnus167/date_utils

Refactor date grouping methods for clarity and modularity
This commit is contained in:
Palash Tyagi 2025-05-15 00:12:32 +01:00 committed by GitHub
commit afe0e15ede
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 96 additions and 132 deletions

View File

@ -5,7 +5,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
use rustframe::{ use rustframe::{
frame::{Frame, RowIndex}, frame::{Frame, RowIndex},
matrix::{BoolMatrix, Matrix, SeriesOps}, matrix::{BoolMatrix, Matrix, SeriesOps},
utils::{BDateFreq, BDatesList}, utils::{BDatesList, BDateFreq},
}; };
use std::time::Duration; use std::time::Duration;
@ -254,7 +254,6 @@ fn config_large_arrays() -> Criterion {
.warm_up_time(Duration::from_millis(200)) .warm_up_time(Duration::from_millis(200))
} }
criterion_group!( criterion_group!(
name = benches_small_arrays; name = benches_small_arrays;
config = config_small_arrays(); config = config_small_arrays();

View File

@ -1,13 +1,18 @@
//! This module provides functionality for generating and manipulating business dates.
//! It includes the `BDatesList`, which emulates a `DateList` structure and its properties.
//! It uses `DateList` and `DateListGenerator`, adjusting the output to work on business dates.
use chrono::{Datelike, Duration, NaiveDate, Weekday}; use chrono::{Datelike, Duration, NaiveDate, Weekday};
use std::collections::HashMap;
use std::error::Error; use std::error::Error;
use std::hash::Hash;
use std::result::Result; use std::result::Result;
use crate::utils::dateutils::dates::{find_next_date, AggregationType, DateFreq, DatesGenerator}; use crate::utils::dateutils::dates::{find_next_date, AggregationType, DateFreq, DatesGenerator};
use crate::utils::dateutils::dates; use crate::utils::dateutils::dates;
/// Type alias for `DateFreq` to represent business date frequency.
pub type BDateFreq = DateFreq;
/// Represents a list of business dates generated between a start and end date /// Represents a list of business dates generated between a start and end date
/// at a specified frequency. Provides methods to retrieve the full list, /// at a specified frequency. Provides methods to retrieve the full list,
/// count, or dates grouped by period. /// count, or dates grouped by period.
@ -16,19 +21,6 @@ pub struct BDatesList {
start_date_str: String, start_date_str: String,
end_date_str: String, end_date_str: String,
freq: DateFreq, freq: DateFreq,
// TODO: cache the generated date list to reduce repeated computation.
// Currently, list(), count(), and groups() regenerate the list on every invocation.
// cached_list: Option<Vec<NaiveDate>>,
}
// Enumeration of period keys used for grouping dates.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
enum GroupKey {
Daily(NaiveDate), // Daily grouping: use the exact date
Weekly(i32, u32), // Weekly grouping: use year and ISO week number
Monthly(i32, u32), // Monthly grouping: use year and month (1-12)
Quarterly(i32, u32), // Quarterly grouping: use year and quarter (1-4)
Yearly(i32), // Yearly grouping: use year
} }
/// Represents a collection of business dates generated according to specific rules. /// Represents a collection of business dates generated according to specific rules.
@ -219,45 +211,8 @@ impl BDatesList {
/// ///
/// Returns an error if the start or end date strings cannot be parsed. /// Returns an error if the start or end date strings cannot be parsed.
pub fn groups(&self) -> Result<Vec<Vec<NaiveDate>>, Box<dyn Error>> { pub fn groups(&self) -> Result<Vec<Vec<NaiveDate>>, Box<dyn Error>> {
// Retrieve all business dates in chronological order.
let dates = self.list()?; let dates = self.list()?;
dates::group_dates_helper(dates, self.freq)
// Aggregate dates into buckets keyed by period.
let mut groups: HashMap<GroupKey, Vec<NaiveDate>> = HashMap::new();
for date in dates {
// Derive the appropriate GroupKey for the current date based on the configured frequency.
let key = match self.freq {
DateFreq::Daily => GroupKey::Daily(date),
DateFreq::WeeklyMonday | DateFreq::WeeklyFriday => {
let iso_week = date.iso_week();
GroupKey::Weekly(iso_week.year(), iso_week.week())
}
DateFreq::MonthStart | DateFreq::MonthEnd => {
GroupKey::Monthly(date.year(), date.month())
}
DateFreq::QuarterStart | DateFreq::QuarterEnd => {
GroupKey::Quarterly(date.year(), dates::month_to_quarter(date.month()))
}
DateFreq::YearStart | DateFreq::YearEnd => GroupKey::Yearly(date.year()),
};
// Append the date to its period group.
groups.entry(key).or_insert_with(Vec::new).push(date);
}
// Transform the group map into a vector of (GroupKey, Vec<NaiveDate>) tuples.
let mut sorted_groups: Vec<(GroupKey, Vec<NaiveDate>)> = groups.into_iter().collect();
// Sort groups chronologically using the derived `Ord` implementation on `GroupKey`.
sorted_groups.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
// Note: Dates within each group remain sorted due to initial ordered input.
// Discard group keys to return only the list of date vectors.
let result_groups = sorted_groups.into_iter().map(|(_, dates)| dates).collect();
Ok(result_groups)
} }
/// Returns the start date parsed as a `NaiveDate`. /// Returns the start date parsed as a `NaiveDate`.
@ -441,7 +396,6 @@ impl Iterator for BDatesGenerator {
DateFreq::WeeklyMonday | DateFreq::WeeklyFriday => next_date, DateFreq::WeeklyMonday | DateFreq::WeeklyFriday => next_date,
DateFreq::MonthEnd | DateFreq::QuarterEnd | DateFreq::YearEnd => { DateFreq::MonthEnd | DateFreq::QuarterEnd | DateFreq::YearEnd => {
// Adjust to the last business date of the month, quarter, or year.
let adjusted_date = iter_reverse_till_bdate(next_date); let adjusted_date = iter_reverse_till_bdate(next_date);
if self.start_date > adjusted_date { if self.start_date > adjusted_date {
// Skip this iteration if the adjusted date is before the start date. // Skip this iteration if the adjusted date is before the start date.
@ -1229,4 +1183,4 @@ mod tests {
); );
Ok(()) Ok(())
} }
} // end mod tests }

View File

@ -5,8 +5,6 @@ use std::hash::Hash;
use std::result::Result; use std::result::Result;
use std::str::FromStr; use std::str::FromStr;
// --- Core Enums ---
/// Represents the frequency at which calendar dates should be generated. /// Represents the frequency at which calendar dates should be generated.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DateFreq { pub enum DateFreq {
@ -124,8 +122,6 @@ impl FromStr for DateFreq {
} }
} }
// --- DatesList Struct ---
/// Represents a list of calendar dates generated between a start and end date /// Represents a list of calendar dates generated between a start and end date
/// at a specified frequency. Provides methods to retrieve the full list, /// at a specified frequency. Provides methods to retrieve the full list,
/// count, or dates grouped by period. /// count, or dates grouped by period.
@ -340,32 +336,7 @@ impl DatesList {
/// Returns an error if the start or end date strings cannot be parsed. /// Returns an error if the start or end date strings cannot be parsed.
pub fn groups(&self) -> Result<Vec<Vec<NaiveDate>>, Box<dyn Error>> { pub fn groups(&self) -> Result<Vec<Vec<NaiveDate>>, Box<dyn Error>> {
let dates = self.list()?; let dates = self.list()?;
let mut groups: HashMap<GroupKey, Vec<NaiveDate>> = HashMap::new(); group_dates_helper(dates, self.freq)
for date in dates {
let key = match self.freq {
DateFreq::Daily => GroupKey::Daily(date),
DateFreq::WeeklyMonday | DateFreq::WeeklyFriday => {
let iso_week = date.iso_week();
GroupKey::Weekly(iso_week.year(), iso_week.week())
}
DateFreq::MonthStart | DateFreq::MonthEnd => {
GroupKey::Monthly(date.year(), date.month())
}
DateFreq::QuarterStart | DateFreq::QuarterEnd => {
GroupKey::Quarterly(date.year(), month_to_quarter(date.month()))
}
DateFreq::YearStart | DateFreq::YearEnd => GroupKey::Yearly(date.year()),
};
groups.entry(key).or_insert_with(Vec::new).push(date);
}
let mut sorted_groups: Vec<(GroupKey, Vec<NaiveDate>)> = groups.into_iter().collect();
sorted_groups.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
// Dates within groups are already sorted because they came from the sorted `self.list()`.
let result_groups = sorted_groups.into_iter().map(|(_, dates)| dates).collect();
Ok(result_groups)
} }
/// Returns the start date parsed as a `NaiveDate`. /// Returns the start date parsed as a `NaiveDate`.
@ -407,8 +378,6 @@ impl DatesList {
} }
} }
// --- Dates Generator (Iterator) ---
/// An iterator that generates a sequence of calendar dates based on a start date, /// An iterator that generates a sequence of calendar dates based on a start date,
/// frequency, and a specified number of periods. /// frequency, and a specified number of periods.
/// ///
@ -561,7 +530,39 @@ impl Iterator for DatesGenerator {
} }
} }
// --- Internal helper functions --- // Internal helper functions
pub fn group_dates_helper(
dates: Vec<NaiveDate>,
freq: DateFreq,
) -> Result<Vec<Vec<NaiveDate>>, Box<dyn Error + 'static>> {
let mut groups: HashMap<GroupKey, Vec<NaiveDate>> = HashMap::new();
for date in dates {
let key = match freq {
DateFreq::Daily => GroupKey::Daily(date),
DateFreq::WeeklyMonday | DateFreq::WeeklyFriday => {
let iso_week = date.iso_week();
GroupKey::Weekly(iso_week.year(), iso_week.week())
}
DateFreq::MonthStart | DateFreq::MonthEnd => {
GroupKey::Monthly(date.year(), date.month())
}
DateFreq::QuarterStart | DateFreq::QuarterEnd => {
GroupKey::Quarterly(date.year(), month_to_quarter(date.month()))
}
DateFreq::YearStart | DateFreq::YearEnd => GroupKey::Yearly(date.year()),
};
groups.entry(key).or_insert_with(Vec::new).push(date);
}
let mut sorted_groups: Vec<(GroupKey, Vec<NaiveDate>)> = groups.into_iter().collect();
sorted_groups.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
// Dates within groups are already sorted because they came from the sorted `self.list()`.
let result_groups = sorted_groups.into_iter().map(|(_, dates)| dates).collect();
Ok(result_groups)
}
/// Generates the flat list of dates for the given range and frequency. /// Generates the flat list of dates for the given range and frequency.
/// Assumes the `collect_*` functions return sorted dates. /// Assumes the `collect_*` functions return sorted dates.
@ -601,7 +602,7 @@ pub fn get_dates_list_with_freq(
Ok(dates) Ok(dates)
} }
/* ---------------------- Low-Level Date Collection Functions (Internal) ---------------------- */ // Low-Level Date Collection Functions (Internal)
// These functions generate dates within a *range* [start_date, end_date] // These functions generate dates within a *range* [start_date, end_date]
/// Returns all calendar days day-by-day within the range. /// Returns all calendar days day-by-day within the range.
@ -733,6 +734,9 @@ fn collect_quarterly(
Ok(result) Ok(result)
} }
/// Returns a list of dates between the given start and end dates, inclusive,
/// at the specified frequency.
/// This function is a convenience wrapper around `get_dates_list_with_freq`.
pub fn get_dates_list_with_freq_from_naive_date( pub fn get_dates_list_with_freq_from_naive_date(
start_date: NaiveDate, start_date: NaiveDate,
end_date: NaiveDate, end_date: NaiveDate,
@ -774,8 +778,6 @@ fn collect_yearly(
Ok(result) Ok(result)
} }
/* ---------------------- Core Date Utility Functions (Internal) ---------------------- */
/// Given a date and a `target_weekday`, returns the date that is the first /// Given a date and a `target_weekday`, returns the date that is the first
/// `target_weekday` on or after the given date. /// `target_weekday` on or after the given date.
fn move_to_day_of_week_on_or_after( fn move_to_day_of_week_on_or_after(
@ -1467,8 +1469,6 @@ mod tests {
Ok(()) Ok(())
} }
// --- Tests for internal helper functions ---
#[test] #[test]
fn test_move_to_day_of_week_on_or_after() -> Result<(), Box<dyn Error>> { fn test_move_to_day_of_week_on_or_after() -> Result<(), Box<dyn Error>> {
assert_eq!( assert_eq!(
@ -1520,12 +1520,15 @@ mod tests {
fn test_days_in_month() -> Result<(), Box<dyn Error>> { fn test_days_in_month() -> Result<(), Box<dyn Error>> {
assert_eq!(days_in_month(2023, 1)?, 31); assert_eq!(days_in_month(2023, 1)?, 31);
assert_eq!(days_in_month(2023, 2)?, 28); assert_eq!(days_in_month(2023, 2)?, 28);
assert_eq!(days_in_month(2024, 2)?, 29); // Leap // Leap
assert_eq!(days_in_month(2024, 2)?, 29);
assert_eq!(days_in_month(2023, 4)?, 30); assert_eq!(days_in_month(2023, 4)?, 30);
assert_eq!(days_in_month(2023, 12)?, 31); assert_eq!(days_in_month(2023, 12)?, 31);
assert!(days_in_month(2023, 0).is_err()); // Invalid month 0 // Invalid month 0
assert!(days_in_month(2023, 13).is_err()); // Invalid month 13 assert!(days_in_month(2023, 0).is_err());
// Test near max date year overflow - Use MAX.year() // Invalid month 13
// Test near max date year overflow - Use MAX.year()
assert!(days_in_month(2023, 13).is_err());
assert!(days_in_month(NaiveDate::MAX.year(), 12).is_err()); assert!(days_in_month(NaiveDate::MAX.year(), 12).is_err());
Ok(()) Ok(())
} }
@ -1535,9 +1538,12 @@ mod tests {
assert_eq!(last_day_of_month(2023, 11)?, date(2023, 11, 30)); assert_eq!(last_day_of_month(2023, 11)?, date(2023, 11, 30));
assert_eq!(last_day_of_month(2024, 2)?, date(2024, 2, 29)); // Leap assert_eq!(last_day_of_month(2024, 2)?, date(2024, 2, 29)); // Leap
assert_eq!(last_day_of_month(2023, 12)?, date(2023, 12, 31)); assert_eq!(last_day_of_month(2023, 12)?, date(2023, 12, 31));
assert!(last_day_of_month(2023, 0).is_err()); // Invalid month 0 // Invalid month 0
assert!(last_day_of_month(2023, 13).is_err()); // Invalid month 13 assert!(last_day_of_month(2023, 0).is_err());
// Test near max date year overflow - use MAX.year() // Invalid month 13
// Test near max date year overflow - use MAX.year()
assert!(last_day_of_month(2023, 13).is_err());
assert!(last_day_of_month(NaiveDate::MAX.year(), 12).is_err()); assert!(last_day_of_month(NaiveDate::MAX.year(), 12).is_err());
Ok(()) Ok(())
} }
@ -1581,7 +1587,8 @@ mod tests {
assert_eq!(first_day_of_quarter(2023, 2)?, date(2023, 4, 1)); assert_eq!(first_day_of_quarter(2023, 2)?, date(2023, 4, 1));
assert_eq!(first_day_of_quarter(2023, 3)?, date(2023, 7, 1)); assert_eq!(first_day_of_quarter(2023, 3)?, date(2023, 7, 1));
assert_eq!(first_day_of_quarter(2023, 4)?, date(2023, 10, 1)); assert_eq!(first_day_of_quarter(2023, 4)?, date(2023, 10, 1));
assert!(first_day_of_quarter(2023, 5).is_err()); // Invalid quarter // Invalid quarter
assert!(first_day_of_quarter(2023, 5).is_err());
Ok(()) Ok(())
} }
@ -1601,9 +1608,11 @@ mod tests {
assert_eq!(last_day_of_quarter(2023, 2)?, date(2023, 6, 30)); assert_eq!(last_day_of_quarter(2023, 2)?, date(2023, 6, 30));
assert_eq!(last_day_of_quarter(2023, 3)?, date(2023, 9, 30)); assert_eq!(last_day_of_quarter(2023, 3)?, date(2023, 9, 30));
assert_eq!(last_day_of_quarter(2023, 4)?, date(2023, 12, 31)); assert_eq!(last_day_of_quarter(2023, 4)?, date(2023, 12, 31));
assert_eq!(last_day_of_quarter(2024, 1)?, date(2024, 3, 31)); // Leap year doesn't affect March end // Leap year doesn't affect March end
assert!(last_day_of_quarter(2023, 5).is_err()); // Invalid quarter assert_eq!(last_day_of_quarter(2024, 1)?, date(2024, 3, 31));
// Test overflow propagation - use MAX.year() // Invalid quarter
// Test overflow propagation - use MAX.year()
assert!(last_day_of_quarter(2023, 5).is_err());
assert!(last_day_of_quarter(NaiveDate::MAX.year(), 4).is_err()); assert!(last_day_of_quarter(NaiveDate::MAX.year(), 4).is_err());
Ok(()) Ok(())
} }
@ -1620,16 +1629,13 @@ mod tests {
#[test] #[test]
fn test_last_day_of_year() -> Result<(), Box<dyn Error>> { fn test_last_day_of_year() -> Result<(), Box<dyn Error>> {
assert_eq!(last_day_of_year(2023)?, date(2023, 12, 31)); assert_eq!(last_day_of_year(2023)?, date(2023, 12, 31));
assert_eq!(last_day_of_year(2024)?, date(2024, 12, 31)); // Leap year doesn't affect Dec 31st existence // Leap year doesn't affect Dec 31st existence
// Test MAX year - should be okay since MAX is Dec 31 // Test MAX year - should be okay since MAX is Dec 31
assert_eq!(last_day_of_year(2024)?, date(2024, 12, 31));
assert_eq!(last_day_of_year(NaiveDate::MAX.year())?, NaiveDate::MAX); assert_eq!(last_day_of_year(NaiveDate::MAX.year())?, NaiveDate::MAX);
Ok(()) Ok(())
} }
// Overflow tests for collect_* removed as they were misleading
// --- Tests for Generator Helper Functions ---
#[test] #[test]
fn test_find_first_date_on_or_after() -> Result<(), Box<dyn Error>> { fn test_find_first_date_on_or_after() -> Result<(), Box<dyn Error>> {
// Daily // Daily
@ -1637,10 +1643,11 @@ mod tests {
find_first_date_on_or_after(date(2023, 11, 8), DateFreq::Daily)?, find_first_date_on_or_after(date(2023, 11, 8), DateFreq::Daily)?,
date(2023, 11, 8) date(2023, 11, 8)
); );
// Sat -> Sat
assert_eq!( assert_eq!(
find_first_date_on_or_after(date(2023, 11, 11), DateFreq::Daily)?, find_first_date_on_or_after(date(2023, 11, 11), DateFreq::Daily)?,
date(2023, 11, 11) date(2023, 11, 11)
); // Sat -> Sat );
// Weekly Mon // Weekly Mon
assert_eq!( assert_eq!(
@ -1651,10 +1658,11 @@ mod tests {
find_first_date_on_or_after(date(2023, 11, 13), DateFreq::WeeklyMonday)?, find_first_date_on_or_after(date(2023, 11, 13), DateFreq::WeeklyMonday)?,
date(2023, 11, 13) date(2023, 11, 13)
); );
// Sun -> Mon
assert_eq!( assert_eq!(
find_first_date_on_or_after(date(2023, 11, 12), DateFreq::WeeklyMonday)?, find_first_date_on_or_after(date(2023, 11, 12), DateFreq::WeeklyMonday)?,
date(2023, 11, 13) date(2023, 11, 13)
); // Sun -> Mon );
// Weekly Fri // Weekly Fri
assert_eq!( assert_eq!(
@ -1683,10 +1691,11 @@ mod tests {
find_first_date_on_or_after(date(2023, 12, 15), DateFreq::MonthStart)?, find_first_date_on_or_after(date(2023, 12, 15), DateFreq::MonthStart)?,
date(2024, 1, 1) date(2024, 1, 1)
); );
// Oct 1 -> Oct 1
assert_eq!( assert_eq!(
find_first_date_on_or_after(date(2023, 10, 1), DateFreq::MonthStart)?, find_first_date_on_or_after(date(2023, 10, 1), DateFreq::MonthStart)?,
date(2023, 10, 1) date(2023, 10, 1)
); // Oct 1 -> Oct 1 );
// Month End // Month End
assert_eq!( assert_eq!(
@ -1697,18 +1706,21 @@ mod tests {
find_first_date_on_or_after(date(2023, 11, 15), DateFreq::MonthEnd)?, find_first_date_on_or_after(date(2023, 11, 15), DateFreq::MonthEnd)?,
date(2023, 11, 30) date(2023, 11, 30)
); );
// Dec 31 -> Dec 31
assert_eq!( assert_eq!(
find_first_date_on_or_after(date(2023, 12, 31), DateFreq::MonthEnd)?, find_first_date_on_or_after(date(2023, 12, 31), DateFreq::MonthEnd)?,
date(2023, 12, 31) date(2023, 12, 31)
); // Dec 31 -> Dec 31 );
// Mid Feb (Leap) -> Feb 29
assert_eq!( assert_eq!(
find_first_date_on_or_after(date(2024, 2, 15), DateFreq::MonthEnd)?, find_first_date_on_or_after(date(2024, 2, 15), DateFreq::MonthEnd)?,
date(2024, 2, 29) date(2024, 2, 29)
); // Mid Feb (Leap) -> Feb 29 );
// Feb 29 -> Feb 29
assert_eq!( assert_eq!(
find_first_date_on_or_after(date(2024, 2, 29), DateFreq::MonthEnd)?, find_first_date_on_or_after(date(2024, 2, 29), DateFreq::MonthEnd)?,
date(2024, 2, 29) date(2024, 2, 29)
); // Feb 29 -> Feb 29 );
// Quarter Start // Quarter Start
assert_eq!( assert_eq!(
@ -2139,12 +2151,15 @@ mod tests {
// find_first returns start_date (YE MAX-1) // find_first returns start_date (YE MAX-1)
assert_eq!(generator.next(), Some(start_date)); assert_eq!(generator.next(), Some(start_date));
// find_next finds YE(MAX) // find_next finds YE(MAX)
assert_eq!(generator.next(), Some(last_day_of_year(start_year)?)); // Should be MAX assert_eq!(generator.next(), Some(last_day_of_year(start_year)?));
// find_next tries YE(MAX+1) - this call to find_next_date fails internally // Should be MAX
assert_eq!(generator.next(), None); // Returns None because internal find_next_date failed // find_next tries YE(MAX+1) - this call to find_next_date fails internally
assert_eq!(generator.next(), None);
// Returns None because internal find_next_date failed
// State after the *first* None is returned: // State after the *first* None is returned:
assert_eq!(generator.periods_remaining, 0); // Corrected assertion // Corrected assertion
assert_eq!(generator.periods_remaining, 0);
assert!(generator.next_date_candidate.is_none()); assert!(generator.next_date_candidate.is_none());
// Calling next() again should also return None // Calling next() again should also return None

View File

@ -1,8 +1,5 @@
pub mod bdates; pub mod bdates;
// pub use bdates::{BDateFreq, BDatesList, BDatesGenerator};
pub mod dates; pub mod dates;
// pub use dates::{DateFreq, DatesList, DatesGenerator};
// pub mod base; pub use bdates::{BDateFreq, BDatesGenerator, BDatesList};
// pub use base::{BDatesGenerator, BDatesList}; pub use dates::{DateFreq, DatesGenerator, DatesList};
// pub use base::{DateFreq, DatesGenerator, DatesList};

View File

@ -1,5 +1,4 @@
pub mod dateutils; pub mod dateutils;
pub use dateutils::bdates::{BDatesGenerator, BDatesList};
pub use dateutils::dates::{DateFreq, DatesGenerator, DatesList}; pub use dateutils::{BDateFreq, BDatesGenerator, BDatesList};
// pub use dateutils::{BDatesGenerator, BDatesList}; pub use dateutils::{DateFreq, DatesGenerator, DatesList};
// pub use dateutils::{DateFreq, DatesGenerator, DatesList};