Refactor frequency calculation functions to use estimated frequency and improve date handling

This commit is contained in:
Palash Tyagi 2025-04-06 05:13:41 +01:00
parent 3bca3a931f
commit d897965dd2

View File

@ -1,6 +1,7 @@
use crate::utils::misc::*;
use crate::utils::qdf::pivots::*; use crate::utils::qdf::pivots::*;
use crate::utils::qdf::reduce_df::*; use crate::utils::qdf::reduce_df::*;
use chrono::{Datelike, NaiveDate}; use chrono::NaiveDate;
use ndarray::{s, Array, Array1, Zip}; use ndarray::{s, Array, Array1, Zip};
use polars::prelude::*; use polars::prelude::*;
use polars::series::Series; // Series struct use polars::series::Series; // Series struct
@ -136,7 +137,7 @@ fn freq_period_calc(
half_life: Option<f64>, half_life: Option<f64>,
remove_zeros: bool, remove_zeros: bool,
nan_tolerance: f64, nan_tolerance: f64,
period: &str, est_freq: &str,
) -> Result<DataFrame, Box<dyn std::error::Error>> { ) -> Result<DataFrame, Box<dyn std::error::Error>> {
if lback_method == "xma" { if lback_method == "xma" {
assert!( assert!(
@ -146,9 +147,9 @@ fn freq_period_calc(
} }
println!("Calculating historic volatility with the following parameters:"); println!("Calculating historic volatility with the following parameters:");
println!("lback_periods: {:?}, lback_method: {:?}, half_life: {:?}, remove_zeros: {:?}, nan_tolerance: {:?}, period: {:?}", lback_periods, lback_method, half_life, remove_zeros, nan_tolerance, period); println!("lback_periods: {:?}, lback_method: {:?}, half_life: {:?}, remove_zeros: {:?}, nan_tolerance: {:?}, period: {:?}", lback_periods, lback_method, half_life, remove_zeros, nan_tolerance, est_freq);
let period_indices: Vec<usize> = get_period_indices(dfw, period)?; let period_indices: Vec<usize> = get_period_indices(dfw, est_freq)?;
// new_df = dfw['real_date'].iloc[period_indices].copy() // new_df = dfw['real_date'].iloc[period_indices].copy()
let idx = UInt32Chunked::from_vec( let idx = UInt32Chunked::from_vec(
@ -176,6 +177,7 @@ fn freq_period_calc(
if i >= lback_periods - 1 { if i >= lback_periods - 1 {
let window = values.slice(s![i + 1 - lback_periods..=i]); let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = flat_std(&window.to_owned(), remove_zeros); let std = flat_std(&window.to_owned(), remove_zeros);
let std = std * annualization_factor();
result.push(std); result.push(std);
} else { } else {
result.push(f64::NAN); result.push(f64::NAN);
@ -191,6 +193,7 @@ fn freq_period_calc(
if i >= lback_periods - 1 { if i >= lback_periods - 1 {
let window = values.slice(s![i + 1 - lback_periods..=i]); let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = expo_std(&window.to_owned(), &weights, remove_zeros); let std = expo_std(&window.to_owned(), &weights, remove_zeros);
let std = std * annualization_factor();
result.push(std); result.push(std);
} else { } else {
result.push(f64::NAN); result.push(f64::NAN);
@ -210,56 +213,39 @@ fn freq_period_calc(
Ok(new_df) Ok(new_df)
} }
pub fn get_bdates_from_col_hv(
dfw: &DataFrame,
est_freq: &str,
) -> Result<Series, Box<dyn std::error::Error>> {
let date_series = dfw.column("real_date")?.as_series().unwrap();
Ok(get_bdates_from_col(date_series, est_freq)?)
}
pub fn get_period_indices_hv(
dfw: &DataFrame,
est_freq: &str,
) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
get_period_indices(dfw, est_freq)
}
fn get_period_indices( fn get_period_indices(
dfw: &DataFrame, dfw: &DataFrame,
period: &str, est_freq: &str,
) -> Result<Vec<usize>, Box<dyn std::error::Error>> { ) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
let date_series: &Logical<DateType, Int32Type> = dfw.column("real_date")?.date()?; // let date_series: &Logical<DateType, Int32Type> = dfw.column("real_date")?.date()?;
let date_series = dfw.column("real_date")?.as_series().unwrap();
let mut indices = Vec::new(); let mut indices = Vec::new();
match period { let bdates: Series = get_bdates_from_col(date_series, est_freq)?;
"weekly" => {
for (i, date) in date_series.into_iter().enumerate() { for bdate in bdates.iter() {
if let Some(date) = date { if let Some(index) = date_series.iter().position(|date| date == bdate) {
if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) { indices.push(index);
if naive_date.weekday() == chrono::Weekday::Fri {
indices.push(i);
}
}
}
}
} }
"monthly" => {
let mut current_month = None;
for (i, date) in date_series.into_iter().enumerate() {
if let Some(date) = date {
if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) {
if current_month.is_none() || current_month.unwrap() != naive_date.month() {
current_month = Some(naive_date.month());
if i > 0 {
indices.push(i - 1);
}
}
}
}
}
if let Some(_) = current_month {
indices.push(date_series.len() - 1);
}
}
_ => return Err("Invalid period specified.".into()),
} }
Ok(indices) Ok(indices)
} }
// #[allow(dead_code)]
// fn single_calc(
// // end_date: -- naive datetime
// end_date: NaiveDate,
// wide_df: &DataFrame,
// lback_periods: usize,
// lback_method: &str,
// nan_tolerance: f64,
/// Calculate historic volatility. /// Calculate historic volatility.
/// Arguments: /// Arguments:
@ -366,17 +352,10 @@ pub fn historic_vol(
println!("Successfully filtered the DataFrame."); println!("Successfully filtered the DataFrame.");
let period = match est_freq.as_str() {
"D" => "daily",
"W" => "weekly",
"M" => "monthly",
_ => return Err("Invalid frequency specified.".into()),
};
println!("Successfully got period."); println!("Successfully got period.");
let mut dfw = match est_freq.as_str() { let mut dfw = match est_freq.as_str() {
"D" => freq_daily_calc( "X" => freq_daily_calc(
&dfw, &dfw,
lback_periods, lback_periods,
&lback_method, &lback_method,
@ -391,7 +370,7 @@ pub fn historic_vol(
half_life, half_life,
remove_zeros, remove_zeros,
nan_tolerance, nan_tolerance,
&period, &est_freq,
)?, )?,
}; };