mirror of
https://github.com/Magnus167/msyrs.git
synced 2025-08-20 04:20:00 +00:00
Remove date utility functions from misc.rs and move them to dateutils.rs
This commit is contained in:
parent
b4bc443f64
commit
6c72a6ef0a
126
src/utils/dateutils.rs
Normal file
126
src/utils/dateutils.rs
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
use chrono::NaiveDate;
|
||||||
|
use chrono::{Datelike, Weekday};
|
||||||
|
use polars::prelude::*;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::error::Error;
|
||||||
|
|
||||||
|
/// Get the minimum and maximum dates from a date column in a DataFrame.
|
||||||
|
pub fn get_min_max_real_dates(
|
||||||
|
df: &DataFrame,
|
||||||
|
date_col: &str,
|
||||||
|
) -> Result<(NaiveDate, NaiveDate), Box<dyn Error>> {
|
||||||
|
let date_series = df.column(date_col)?;
|
||||||
|
if let DataType::Date = date_series.dtype() {
|
||||||
|
// Convert the `date` series to an i32 (days since 1970-01-01)
|
||||||
|
let date_as_days = date_series.cast(&DataType::Int32)?;
|
||||||
|
let min_days = date_as_days.i32()?.min().ok_or("No minimum value found")?;
|
||||||
|
let max_days = date_as_days.i32()?.max().ok_or("No maximum value found")?;
|
||||||
|
|
||||||
|
// Convert the days back to `NaiveDate`
|
||||||
|
let min_date = NaiveDate::from_ymd_opt(1970, 1, 1)
|
||||||
|
.unwrap()
|
||||||
|
.checked_add_signed(chrono::Duration::days(min_days as i64))
|
||||||
|
.ok_or("Invalid minimum date")?;
|
||||||
|
let max_date = NaiveDate::from_ymd_opt(1970, 1, 1)
|
||||||
|
.unwrap()
|
||||||
|
.checked_add_signed(chrono::Duration::days(max_days as i64))
|
||||||
|
.ok_or("Invalid maximum date")?;
|
||||||
|
|
||||||
|
Ok((min_date, max_date))
|
||||||
|
} else {
|
||||||
|
Err(Box::new(polars::error::PolarsError::ComputeError(
|
||||||
|
"The column is not of Date type".into(),
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the business dates between two dates.
|
||||||
|
pub fn get_bdates_list(
|
||||||
|
start_date: String,
|
||||||
|
end_date: String,
|
||||||
|
) -> Result<Vec<NaiveDate>, Box<dyn Error>> {
|
||||||
|
let start_date = NaiveDate::parse_from_str(&start_date, "%Y-%m-%d")?;
|
||||||
|
let end_date = NaiveDate::parse_from_str(&end_date, "%Y-%m-%d")?;
|
||||||
|
|
||||||
|
let mut business_days = Vec::new();
|
||||||
|
let mut current_date = start_date;
|
||||||
|
while current_date <= end_date {
|
||||||
|
// Check if the current date is a business day (not Saturday or Sunday)
|
||||||
|
if current_date.weekday() != Weekday::Sat && current_date.weekday() != Weekday::Sun {
|
||||||
|
business_days.push(current_date);
|
||||||
|
}
|
||||||
|
current_date = current_date.succ_opt().ok_or(format!(
|
||||||
|
"Failed to get the next day for : {:?}",
|
||||||
|
current_date
|
||||||
|
))?;
|
||||||
|
}
|
||||||
|
Ok(business_days)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the business dates between two dates as a Series.
|
||||||
|
pub fn get_bdates_series(start_date: String, end_date: String) -> Result<Series, Box<dyn Error>> {
|
||||||
|
let business_days = get_bdates_list(start_date, end_date)?;
|
||||||
|
let series = Series::new("business_dates".into(), business_days);
|
||||||
|
Ok(series)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the business dates from a date column in a DataFrame.
|
||||||
|
/// Identify business days, bucket them by period, and pick the first available date from each period.
|
||||||
|
pub fn get_bdates_from_col(date_col: &Series, freq: &str) -> Result<Series, Box<dyn Error>> {
|
||||||
|
// Ensure the column is of Date type
|
||||||
|
if date_col.dtype() != &DataType::Date {
|
||||||
|
return Err("The column is not of Date type".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Identify business days (exclude weekends)
|
||||||
|
let date_as_days = date_col.cast(&DataType::Int32)?;
|
||||||
|
let business_days: Vec<NaiveDate> = date_as_days
|
||||||
|
.i32()?
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|opt_days| {
|
||||||
|
opt_days.map(|days| {
|
||||||
|
NaiveDate::from_ymd_opt(1970, 1, 1).unwrap() + chrono::Duration::days(days as i64)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.filter(|date| {
|
||||||
|
// Exclude weekends (Saturday and Sunday)
|
||||||
|
let weekday = date.weekday();
|
||||||
|
weekday != Weekday::Sat && weekday != Weekday::Sun
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Step 2: Bucket dates by period
|
||||||
|
let mut buckets: HashMap<String, Vec<NaiveDate>> = HashMap::new();
|
||||||
|
for date in &business_days {
|
||||||
|
let bucket_key = match freq {
|
||||||
|
"D" => date.format("%Y-%m-%d").to_string(),
|
||||||
|
"W" => format!("{}-W{:02}", date.year(), date.iso_week().week()),
|
||||||
|
"M" => date.format("%Y-%m").to_string(),
|
||||||
|
"Q" => format!("{}-Q{}", date.year(), (date.month() - 1) / 3 + 1),
|
||||||
|
"A" => date.year().to_string(),
|
||||||
|
_ => return Err("Invalid frequency specified".into()),
|
||||||
|
};
|
||||||
|
buckets.entry(bucket_key).or_default().push(*date);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Pick the first available date from each bucket
|
||||||
|
let mut selected_dates: Vec<NaiveDate> = Vec::new();
|
||||||
|
for (_, mut dates) in buckets {
|
||||||
|
dates.sort(); // Ensure dates are sorted within the bucket
|
||||||
|
if let Some(first_date) = dates.first() {
|
||||||
|
selected_dates.push(*first_date);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Convert selected dates back to a Series of Date type
|
||||||
|
let bdates_series = Series::new(
|
||||||
|
"bdates".into(),
|
||||||
|
selected_dates
|
||||||
|
.into_iter()
|
||||||
|
.map(|date| date.format("%Y-%m-%d").to_string()) // Format as strings
|
||||||
|
.collect::<Vec<String>>(),
|
||||||
|
)
|
||||||
|
.cast(&DataType::Date)?; // Cast to Date type
|
||||||
|
|
||||||
|
Ok(bdates_series)
|
||||||
|
}
|
@ -1,5 +1,3 @@
|
|||||||
use chrono::NaiveDate;
|
|
||||||
use chrono::{Datelike, Weekday};
|
|
||||||
use polars::prelude::*;
|
use polars::prelude::*;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
@ -14,96 +12,6 @@ pub fn split_ticker(ticker: String) -> Result<(String, String), Box<dyn Error>>
|
|||||||
Ok((parts[0].to_string(), parts[1].to_string()))
|
Ok((parts[0].to_string(), parts[1].to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the minimum and maximum dates from a date column in a DataFrame.
|
|
||||||
pub fn get_min_max_real_dates(
|
|
||||||
df: &DataFrame,
|
|
||||||
date_col: &str,
|
|
||||||
) -> Result<(NaiveDate, NaiveDate), Box<dyn Error>> {
|
|
||||||
let date_series = df.column(date_col)?;
|
|
||||||
if let DataType::Date = date_series.dtype() {
|
|
||||||
// Convert the `date` series to an i32 (days since 1970-01-01)
|
|
||||||
let date_as_days = date_series.cast(&DataType::Int32)?;
|
|
||||||
let min_days = date_as_days.i32()?.min().ok_or("No minimum value found")?;
|
|
||||||
let max_days = date_as_days.i32()?.max().ok_or("No maximum value found")?;
|
|
||||||
|
|
||||||
// Convert the days back to `NaiveDate`
|
|
||||||
let min_date = NaiveDate::from_ymd_opt(1970, 1, 1)
|
|
||||||
.unwrap()
|
|
||||||
.checked_add_signed(chrono::Duration::days(min_days as i64))
|
|
||||||
.ok_or("Invalid minimum date")?;
|
|
||||||
let max_date = NaiveDate::from_ymd_opt(1970, 1, 1)
|
|
||||||
.unwrap()
|
|
||||||
.checked_add_signed(chrono::Duration::days(max_days as i64))
|
|
||||||
.ok_or("Invalid maximum date")?;
|
|
||||||
|
|
||||||
Ok((min_date, max_date))
|
|
||||||
} else {
|
|
||||||
Err(Box::new(polars::error::PolarsError::ComputeError(
|
|
||||||
"The column is not of Date type".into(),
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Get the business dates from a date column in a DataFrame.
|
|
||||||
/// Identify business days, bucket them by period, and pick the first available date from each period.
|
|
||||||
pub fn get_bdates_from_col(date_col: &Series, freq: &str) -> Result<Series, Box<dyn Error>> {
|
|
||||||
// Ensure the column is of Date type
|
|
||||||
if date_col.dtype() != &DataType::Date {
|
|
||||||
return Err("The column is not of Date type".into());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 1: Identify business days (exclude weekends)
|
|
||||||
let date_as_days = date_col.cast(&DataType::Int32)?;
|
|
||||||
let business_days: Vec<NaiveDate> = date_as_days
|
|
||||||
.i32()?
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|opt_days| {
|
|
||||||
opt_days.map(|days| {
|
|
||||||
NaiveDate::from_ymd_opt(1970, 1, 1).unwrap() + chrono::Duration::days(days as i64)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.filter(|date| {
|
|
||||||
// Exclude weekends (Saturday and Sunday)
|
|
||||||
let weekday = date.weekday();
|
|
||||||
weekday != Weekday::Sat && weekday != Weekday::Sun
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Step 2: Bucket dates by period
|
|
||||||
let mut buckets: HashMap<String, Vec<NaiveDate>> = HashMap::new();
|
|
||||||
for date in &business_days {
|
|
||||||
let bucket_key = match freq {
|
|
||||||
"D" => date.format("%Y-%m-%d").to_string(),
|
|
||||||
"W" => format!("{}-W{:02}", date.year(), date.iso_week().week()),
|
|
||||||
"M" => date.format("%Y-%m").to_string(),
|
|
||||||
"Q" => format!("{}-Q{}", date.year(), (date.month() - 1) / 3 + 1),
|
|
||||||
"A" => date.year().to_string(),
|
|
||||||
_ => return Err("Invalid frequency specified".into()),
|
|
||||||
};
|
|
||||||
buckets.entry(bucket_key).or_default().push(*date);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 3: Pick the first available date from each bucket
|
|
||||||
let mut selected_dates: Vec<NaiveDate> = Vec::new();
|
|
||||||
for (_, mut dates) in buckets {
|
|
||||||
dates.sort(); // Ensure dates are sorted within the bucket
|
|
||||||
if let Some(first_date) = dates.first() {
|
|
||||||
selected_dates.push(*first_date);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4: Convert selected dates back to a Series of Date type
|
|
||||||
let bdates_series = Series::new(
|
|
||||||
"bdates".into(),
|
|
||||||
selected_dates
|
|
||||||
.into_iter()
|
|
||||||
.map(|date| date.format("%Y-%m-%d").to_string()) // Format as strings
|
|
||||||
.collect::<Vec<String>>(),
|
|
||||||
)
|
|
||||||
.cast(&DataType::Date)?; // Cast to Date type
|
|
||||||
|
|
||||||
Ok(bdates_series)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the `cid` from a ticker string.
|
/// Get the `cid` from a ticker string.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn get_cid(ticker: String) -> Result<String, Box<dyn Error>> {
|
pub fn get_cid(ticker: String) -> Result<String, Box<dyn Error>> {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user