use chrono::NaiveDate; use chrono::{Datelike, Weekday}; use polars::prelude::*; use std::collections::HashMap; use std::error::Error; /// Get the minimum and maximum dates from a date column in a DataFrame. pub fn get_min_max_real_dates( df: &DataFrame, date_col: &str, ) -> Result<(NaiveDate, NaiveDate), Box> { let date_series = df.column(date_col)?; if let DataType::Date = date_series.dtype() { // Convert the `date` series to an i32 (days since 1970-01-01) let date_as_days = date_series.cast(&DataType::Int32)?; let min_days = date_as_days.i32()?.min().ok_or("No minimum value found")?; let max_days = date_as_days.i32()?.max().ok_or("No maximum value found")?; // Convert the days back to `NaiveDate` let min_date = NaiveDate::from_ymd_opt(1970, 1, 1) .unwrap() .checked_add_signed(chrono::Duration::days(min_days as i64)) .ok_or("Invalid minimum date")?; let max_date = NaiveDate::from_ymd_opt(1970, 1, 1) .unwrap() .checked_add_signed(chrono::Duration::days(max_days as i64)) .ok_or("Invalid maximum date")?; Ok((min_date, max_date)) } else { Err(Box::new(polars::error::PolarsError::ComputeError( "The column is not of Date type".into(), ))) } } /// Get the business dates between two dates. pub fn get_bdates_list( start_date: String, end_date: String, ) -> Result, Box> { let start_date = NaiveDate::parse_from_str(&start_date, "%Y-%m-%d")?; let end_date = NaiveDate::parse_from_str(&end_date, "%Y-%m-%d")?; let mut business_days = Vec::new(); let mut current_date = start_date; while current_date <= end_date { // Check if the current date is a business day (not Saturday or Sunday) if current_date.weekday() != Weekday::Sat && current_date.weekday() != Weekday::Sun { business_days.push(current_date); } current_date = current_date.succ_opt().ok_or(format!( "Failed to get the next day for : {:?}", current_date ))?; } Ok(business_days) } #[derive(Debug, Clone, Copy)] pub enum BDateFreq { Daily, WeeklyMonday, MonthStart, QuarterStart, YearStart, MonthEnd, QuarterEnd, WeeklyFriday, YearEnd, } impl BDateFreq { pub fn from_str(freq: &str) -> Result> { match freq { "D" => Ok(BDateFreq::Daily), "W" => Ok(BDateFreq::WeeklyMonday), "M" => Ok(BDateFreq::MonthStart), "Q" => Ok(BDateFreq::QuarterStart), "A" => Ok(BDateFreq::YearStart), "ME" => Ok(BDateFreq::MonthEnd), "QE" => Ok(BDateFreq::QuarterEnd), "WF" => Ok(BDateFreq::WeeklyFriday), "YE" => Ok(BDateFreq::YearEnd), _ => Err("Invalid frequency specified".into()), } } pub fn agg_type(&self) -> AggregationType { match self { BDateFreq::Daily | BDateFreq::WeeklyMonday | BDateFreq::MonthStart | BDateFreq::QuarterStart | BDateFreq::YearStart => AggregationType::Start, BDateFreq::WeeklyFriday | BDateFreq::MonthEnd | BDateFreq::QuarterEnd | BDateFreq::YearEnd => AggregationType::End, } } } #[derive(Debug, Clone, Copy)] pub enum AggregationType { Start, // Indicates picking the first date in a group. End, // Indicates picking the last date in a group. } // Map a BDateFreq to an AggregationType. fn compute_group_key(d: NaiveDate, freq: BDateFreq) -> String { match freq { // For Daily, each date is its own group. BDateFreq::Daily => format!("{}", d), // For weekly grouping, we use ISO week information. BDateFreq::WeeklyMonday | BDateFreq::WeeklyFriday => { let iso = d.iso_week(); format!("{}-W{:02}", iso.year(), iso.week()) } // Group by Year-Month. BDateFreq::MonthStart | BDateFreq::MonthEnd => { format!("{}-M{:02}", d.year(), d.month()) } // Group by Year-Quarter. BDateFreq::QuarterStart | BDateFreq::QuarterEnd => { let quarter = (d.month() - 1) / 3 + 1; format!("{}-Q{}", d.year(), quarter) } // Group by Year. BDateFreq::YearStart | BDateFreq::YearEnd => format!("{}", d.year()), } } pub fn get_bdates_series_default( start_date: String, end_date: String, freq: Option, ) -> Result> { let freq = freq.unwrap_or_else(|| "D".to_string()); let freq = BDateFreq::from_str(&freq)?; get_bdates_series(start_date, end_date, freq) } /// Get the business dates between two dates as a Series. pub fn get_bdates_series( start_date: String, end_date: String, freq: BDateFreq, ) -> Result> { let business_days = get_bdates_list(start_date, end_date)?; let group_keys: Vec = business_days .iter() .map(|&d| compute_group_key(d, freq)) .collect(); let df = DataFrame::new(vec![ Column::new("bdates".into(), business_days), Column::new("group".into(), group_keys), ])?; let gb = df.lazy().group_by(["group"]); let aggx = match freq.agg_type() { AggregationType::Start => gb.agg([col("bdates").first()]), AggregationType::End => gb.agg([col("bdates").last()]), }; let result = aggx.collect()?; let result = result .column("bdates")? .as_series() .ok_or("Column 'bdates' not found")? .clone(); let result = result.sort(SortOptions { descending: false, nulls_last: false, multithreaded: false, maintain_order: false, })?; Ok(result) } /// Get the business dates from a date column in a DataFrame. /// Identify business days, bucket them by period, and pick the first available date from each period. pub fn get_bdates_from_col(date_col: &Series, freq: &str) -> Result> { // Ensure the column is of Date type if date_col.dtype() != &DataType::Date { return Err("The column is not of Date type".into()); } // Step 1: Identify business days (exclude weekends) let date_as_days = date_col.cast(&DataType::Int32)?; let business_days: Vec = date_as_days .i32()? .into_iter() .filter_map(|opt_days| { opt_days.map(|days| { NaiveDate::from_ymd_opt(1970, 1, 1).unwrap() + chrono::Duration::days(days as i64) }) }) .filter(|date| { // Exclude weekends (Saturday and Sunday) let weekday = date.weekday(); weekday != Weekday::Sat && weekday != Weekday::Sun }) .collect(); // Step 2: Bucket dates by period let mut buckets: HashMap> = HashMap::new(); for date in &business_days { let bucket_key = match freq { "D" => date.format("%Y-%m-%d").to_string(), "W" => format!("{}-W{:02}", date.year(), date.iso_week().week()), "M" => date.format("%Y-%m").to_string(), "Q" => format!("{}-Q{}", date.year(), (date.month() - 1) / 3 + 1), "A" => date.year().to_string(), _ => return Err("Invalid frequency specified".into()), }; buckets.entry(bucket_key).or_default().push(*date); } // Step 3: Pick the first available date from each bucket let mut selected_dates: Vec = Vec::new(); for (_, mut dates) in buckets { dates.sort(); // Ensure dates are sorted within the bucket if let Some(first_date) = dates.first() { selected_dates.push(*first_date); } } // Step 4: Convert selected dates back to a Series of Date type let bdates_series = Series::new( "bdates".into(), selected_dates .into_iter() .map(|date| date.format("%Y-%m-%d").to_string()) // Format as strings .collect::>(), ) .cast(&DataType::Date)?; // Cast to Date type Ok(bdates_series) }