diff --git a/src/utils/dateutils.rs b/src/utils/dateutils.rs new file mode 100644 index 0000000..9f4c388 --- /dev/null +++ b/src/utils/dateutils.rs @@ -0,0 +1,126 @@ +use chrono::NaiveDate; +use chrono::{Datelike, Weekday}; +use polars::prelude::*; +use std::collections::HashMap; +use std::error::Error; + +/// Get the minimum and maximum dates from a date column in a DataFrame. +pub fn get_min_max_real_dates( + df: &DataFrame, + date_col: &str, +) -> Result<(NaiveDate, NaiveDate), Box> { + let date_series = df.column(date_col)?; + if let DataType::Date = date_series.dtype() { + // Convert the `date` series to an i32 (days since 1970-01-01) + let date_as_days = date_series.cast(&DataType::Int32)?; + let min_days = date_as_days.i32()?.min().ok_or("No minimum value found")?; + let max_days = date_as_days.i32()?.max().ok_or("No maximum value found")?; + + // Convert the days back to `NaiveDate` + let min_date = NaiveDate::from_ymd_opt(1970, 1, 1) + .unwrap() + .checked_add_signed(chrono::Duration::days(min_days as i64)) + .ok_or("Invalid minimum date")?; + let max_date = NaiveDate::from_ymd_opt(1970, 1, 1) + .unwrap() + .checked_add_signed(chrono::Duration::days(max_days as i64)) + .ok_or("Invalid maximum date")?; + + Ok((min_date, max_date)) + } else { + Err(Box::new(polars::error::PolarsError::ComputeError( + "The column is not of Date type".into(), + ))) + } +} + +/// Get the business dates between two dates. +pub fn get_bdates_list( + start_date: String, + end_date: String, +) -> Result, Box> { + let start_date = NaiveDate::parse_from_str(&start_date, "%Y-%m-%d")?; + let end_date = NaiveDate::parse_from_str(&end_date, "%Y-%m-%d")?; + + let mut business_days = Vec::new(); + let mut current_date = start_date; + while current_date <= end_date { + // Check if the current date is a business day (not Saturday or Sunday) + if current_date.weekday() != Weekday::Sat && current_date.weekday() != Weekday::Sun { + business_days.push(current_date); + } + current_date = current_date.succ_opt().ok_or(format!( + "Failed to get the next day for : {:?}", + current_date + ))?; + } + Ok(business_days) +} + +/// Get the business dates between two dates as a Series. +pub fn get_bdates_series(start_date: String, end_date: String) -> Result> { + let business_days = get_bdates_list(start_date, end_date)?; + let series = Series::new("business_dates".into(), business_days); + Ok(series) +} + +/// Get the business dates from a date column in a DataFrame. +/// Identify business days, bucket them by period, and pick the first available date from each period. +pub fn get_bdates_from_col(date_col: &Series, freq: &str) -> Result> { + // Ensure the column is of Date type + if date_col.dtype() != &DataType::Date { + return Err("The column is not of Date type".into()); + } + + // Step 1: Identify business days (exclude weekends) + let date_as_days = date_col.cast(&DataType::Int32)?; + let business_days: Vec = date_as_days + .i32()? + .into_iter() + .filter_map(|opt_days| { + opt_days.map(|days| { + NaiveDate::from_ymd_opt(1970, 1, 1).unwrap() + chrono::Duration::days(days as i64) + }) + }) + .filter(|date| { + // Exclude weekends (Saturday and Sunday) + let weekday = date.weekday(); + weekday != Weekday::Sat && weekday != Weekday::Sun + }) + .collect(); + + // Step 2: Bucket dates by period + let mut buckets: HashMap> = HashMap::new(); + for date in &business_days { + let bucket_key = match freq { + "D" => date.format("%Y-%m-%d").to_string(), + "W" => format!("{}-W{:02}", date.year(), date.iso_week().week()), + "M" => date.format("%Y-%m").to_string(), + "Q" => format!("{}-Q{}", date.year(), (date.month() - 1) / 3 + 1), + "A" => date.year().to_string(), + _ => return Err("Invalid frequency specified".into()), + }; + buckets.entry(bucket_key).or_default().push(*date); + } + + // Step 3: Pick the first available date from each bucket + let mut selected_dates: Vec = Vec::new(); + for (_, mut dates) in buckets { + dates.sort(); // Ensure dates are sorted within the bucket + if let Some(first_date) = dates.first() { + selected_dates.push(*first_date); + } + } + + // Step 4: Convert selected dates back to a Series of Date type + let bdates_series = Series::new( + "bdates".into(), + selected_dates + .into_iter() + .map(|date| date.format("%Y-%m-%d").to_string()) // Format as strings + .collect::>(), + ) + .cast(&DataType::Date)?; // Cast to Date type + + Ok(bdates_series) +} diff --git a/src/utils/misc.rs b/src/utils/misc.rs index ded3a79..79daf59 100644 --- a/src/utils/misc.rs +++ b/src/utils/misc.rs @@ -1,5 +1,3 @@ -use chrono::NaiveDate; -use chrono::{Datelike, Weekday}; use polars::prelude::*; use std::collections::HashMap; use std::error::Error; @@ -14,96 +12,6 @@ pub fn split_ticker(ticker: String) -> Result<(String, String), Box> Ok((parts[0].to_string(), parts[1].to_string())) } -/// Get the minimum and maximum dates from a date column in a DataFrame. -pub fn get_min_max_real_dates( - df: &DataFrame, - date_col: &str, -) -> Result<(NaiveDate, NaiveDate), Box> { - let date_series = df.column(date_col)?; - if let DataType::Date = date_series.dtype() { - // Convert the `date` series to an i32 (days since 1970-01-01) - let date_as_days = date_series.cast(&DataType::Int32)?; - let min_days = date_as_days.i32()?.min().ok_or("No minimum value found")?; - let max_days = date_as_days.i32()?.max().ok_or("No maximum value found")?; - - // Convert the days back to `NaiveDate` - let min_date = NaiveDate::from_ymd_opt(1970, 1, 1) - .unwrap() - .checked_add_signed(chrono::Duration::days(min_days as i64)) - .ok_or("Invalid minimum date")?; - let max_date = NaiveDate::from_ymd_opt(1970, 1, 1) - .unwrap() - .checked_add_signed(chrono::Duration::days(max_days as i64)) - .ok_or("Invalid maximum date")?; - - Ok((min_date, max_date)) - } else { - Err(Box::new(polars::error::PolarsError::ComputeError( - "The column is not of Date type".into(), - ))) - } -} -/// Get the business dates from a date column in a DataFrame. -/// Identify business days, bucket them by period, and pick the first available date from each period. -pub fn get_bdates_from_col(date_col: &Series, freq: &str) -> Result> { - // Ensure the column is of Date type - if date_col.dtype() != &DataType::Date { - return Err("The column is not of Date type".into()); - } - - // Step 1: Identify business days (exclude weekends) - let date_as_days = date_col.cast(&DataType::Int32)?; - let business_days: Vec = date_as_days - .i32()? - .into_iter() - .filter_map(|opt_days| { - opt_days.map(|days| { - NaiveDate::from_ymd_opt(1970, 1, 1).unwrap() + chrono::Duration::days(days as i64) - }) - }) - .filter(|date| { - // Exclude weekends (Saturday and Sunday) - let weekday = date.weekday(); - weekday != Weekday::Sat && weekday != Weekday::Sun - }) - .collect(); - - // Step 2: Bucket dates by period - let mut buckets: HashMap> = HashMap::new(); - for date in &business_days { - let bucket_key = match freq { - "D" => date.format("%Y-%m-%d").to_string(), - "W" => format!("{}-W{:02}", date.year(), date.iso_week().week()), - "M" => date.format("%Y-%m").to_string(), - "Q" => format!("{}-Q{}", date.year(), (date.month() - 1) / 3 + 1), - "A" => date.year().to_string(), - _ => return Err("Invalid frequency specified".into()), - }; - buckets.entry(bucket_key).or_default().push(*date); - } - - // Step 3: Pick the first available date from each bucket - let mut selected_dates: Vec = Vec::new(); - for (_, mut dates) in buckets { - dates.sort(); // Ensure dates are sorted within the bucket - if let Some(first_date) = dates.first() { - selected_dates.push(*first_date); - } - } - - // Step 4: Convert selected dates back to a Series of Date type - let bdates_series = Series::new( - "bdates".into(), - selected_dates - .into_iter() - .map(|date| date.format("%Y-%m-%d").to_string()) // Format as strings - .collect::>(), - ) - .cast(&DataType::Date)?; // Cast to Date type - - Ok(bdates_series) -} - /// Get the `cid` from a ticker string. #[allow(dead_code)] pub fn get_cid(ticker: String) -> Result> {