diff --git a/src/utils/misc.rs b/src/utils/misc.rs index 1e28163..ded3a79 100644 --- a/src/utils/misc.rs +++ b/src/utils/misc.rs @@ -43,53 +43,65 @@ pub fn get_min_max_real_dates( ))) } } - /// Get the business dates from a date column in a DataFrame. +/// Identify business days, bucket them by period, and pick the first available date from each period. pub fn get_bdates_from_col(date_col: &Series, freq: &str) -> Result> { - let dates = date_col - .date()? - .into_iter() - .filter_map(|opt| opt.map(|d| NaiveDate::from_num_days_from_ce_opt(d as i32))) - .filter(|d| { - if let Some(date) = d { - let wd = date.weekday(); - wd != Weekday::Sat && wd != Weekday::Sun - } else { - false - } - }) - .filter_map(|opt| opt) // Filter out None and unwrap Some - .collect::>(); + // Ensure the column is of Date type + if date_col.dtype() != &DataType::Date { + return Err("The column is not of Date type".into()); + } - let aligned_dates: Vec = dates - .iter() - .map(|date| match freq { - "D" => *date, - "W" => { - let weekday = date.weekday().num_days_from_monday(); // Get the weekday directly - *date - chrono::Duration::days(weekday as i64) - } - "M" => NaiveDate::from_ymd_opt(date.year(), date.month(), 1) - .unwrap_or_else(|| NaiveDate::from_ymd_opt(1970, 1, 1).unwrap()), - "Q" => { - let quarter = (date.month0() / 3) + 1; - let month = match quarter { - 1 => 1, - 2 => 4, - 3 => 7, - 4 => 10, - _ => unreachable!(), - }; - NaiveDate::from_ymd_opt(date.year(), month, 1) - .unwrap_or_else(|| NaiveDate::from_ymd_opt(1970, 1, 1).unwrap()) - } - "A" => NaiveDate::from_ymd_opt(date.year(), 1, 1) - .unwrap_or_else(|| NaiveDate::from_ymd_opt(1970, 1, 1).unwrap()), - _ => *date, // fallback + // Step 1: Identify business days (exclude weekends) + let date_as_days = date_col.cast(&DataType::Int32)?; + let business_days: Vec = date_as_days + .i32()? + .into_iter() + .filter_map(|opt_days| { + opt_days.map(|days| { + NaiveDate::from_ymd_opt(1970, 1, 1).unwrap() + chrono::Duration::days(days as i64) + }) + }) + .filter(|date| { + // Exclude weekends (Saturday and Sunday) + let weekday = date.weekday(); + weekday != Weekday::Sat && weekday != Weekday::Sun }) .collect(); - Ok(DateChunked::from_naive_date(date_col.name().clone(), aligned_dates).into_series()) + // Step 2: Bucket dates by period + let mut buckets: HashMap> = HashMap::new(); + for date in &business_days { + let bucket_key = match freq { + "D" => date.format("%Y-%m-%d").to_string(), + "W" => format!("{}-W{:02}", date.year(), date.iso_week().week()), + "M" => date.format("%Y-%m").to_string(), + "Q" => format!("{}-Q{}", date.year(), (date.month() - 1) / 3 + 1), + "A" => date.year().to_string(), + _ => return Err("Invalid frequency specified".into()), + }; + buckets.entry(bucket_key).or_default().push(*date); + } + + // Step 3: Pick the first available date from each bucket + let mut selected_dates: Vec = Vec::new(); + for (_, mut dates) in buckets { + dates.sort(); // Ensure dates are sorted within the bucket + if let Some(first_date) = dates.first() { + selected_dates.push(*first_date); + } + } + + // Step 4: Convert selected dates back to a Series of Date type + let bdates_series = Series::new( + "bdates".into(), + selected_dates + .into_iter() + .map(|date| date.format("%Y-%m-%d").to_string()) // Format as strings + .collect::>(), + ) + .cast(&DataType::Date)?; // Cast to Date type + + Ok(bdates_series) } /// Get the `cid` from a ticker string.