wip: hist vol

This commit is contained in:
Palash Tyagi 2024-11-26 23:59:45 +00:00
parent 82fed27116
commit 6fb9b0f076
2 changed files with 284 additions and 781 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,8 @@
use chrono::NaiveDate;
use ndarray::{Array, Array1, Zip};
use crate::utils::qdf::pivots::*;
use chrono::{Datelike, NaiveDate};
use ndarray::{s, Array, Array1, Zip};
use polars::prelude::*;
// use polars::time::Duration;
/// Returns the annualization factor for 252 trading days.
/// (SQRT(252))
@ -56,6 +58,7 @@ fn flat_std(x: &Array1<f64>, remove_zeros: bool) -> f64 {
filtered_x.mapv(f64::abs).mean().unwrap_or(0.0)
}
#[allow(unused_variables)]
fn freq_daily_calc(
dfw: &DataFrame,
lback_periods: usize,
@ -71,14 +74,155 @@ fn freq_daily_calc(
);
}
let mut new_df = DataFrame::new(vec![])?;
match lback_method {
"ma" => Ok(new_df),
"xma" => Ok(new_df),
_ => Err("Invalid lookback method.".into()),
let mut new_df = dfw.clone();
for col_name in dfw.get_column_names() {
let series = dfw.column(col_name)?;
let values: Array1<f64> = series
.f64()?
.into_iter()
.map(|opt| opt.unwrap_or(0.0))
.collect();
let result_series = match lback_method {
"ma" => {
let mut result = Vec::new();
for i in 0..(values.len() - lback_periods + 1) {
let window = values.slice(s![i..i + lback_periods]);
let std = flat_std(&window.to_owned(), remove_zeros);
result.push(std);
}
Series::new(col_name.clone(), result)
}
"xma" => {
let half_life = half_life.unwrap();
let weights = expo_weights(lback_periods, half_life);
let mut result = Vec::new();
for i in 0..(values.len() - lback_periods + 1) {
let window = values.slice(s![i..i + lback_periods]);
let std = expo_std(&window.to_owned(), &weights, remove_zeros);
result.push(std);
}
Series::new(col_name.clone(), result)
}
_ => return Err("Invalid lookback method.".into()),
};
new_df.with_column(result_series)?;
}
Ok(new_df)
}
#[allow(unused_variables)]
fn freq_period_calc(
dfw: &DataFrame,
lback_periods: usize,
lback_method: &str,
half_life: Option<f64>,
remove_zeros: bool,
nan_tolerance: f64,
period: &str,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
if lback_method == "xma" {
assert!(
half_life.is_some(),
"If lback_method is 'xma', half_life must be provided."
);
}
let mut new_df = dfw.clone();
for col_name in dfw.get_column_names() {
let series = dfw.column(col_name)?;
let values: Array1<f64> = series
.f64()?
.into_iter()
.map(|opt| opt.unwrap_or(0.0))
.collect();
let result_series = match lback_method {
"ma" => {
let mut result = Vec::new();
let period_indices = get_period_indices(dfw, period)?;
for &i in &period_indices {
if i >= lback_periods - 1 {
let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = flat_std(&window.to_owned(), remove_zeros);
result.push(std);
} else {
result.push(f64::NAN);
}
}
Series::new(col_name.clone(), result)
}
"xma" => {
let half_life = half_life.unwrap();
let weights = expo_weights(lback_periods, half_life);
let mut result = Vec::new();
let period_indices = get_period_indices(dfw, period)?;
for &i in &period_indices {
if i >= lback_periods - 1 {
let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = expo_std(&window.to_owned(), &weights, remove_zeros);
result.push(std);
} else {
result.push(f64::NAN);
}
}
Series::new(col_name.clone(), result)
}
_ => return Err("Invalid lookback method.".into()),
};
new_df.with_column(result_series)?;
}
Ok(new_df)
}
fn get_period_indices(
dfw: &DataFrame,
period: &str,
) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
let date_series: &Logical<DateType, Int32Type> = dfw.column("real_date")?.date()?;
let mut indices = Vec::new();
match period {
"weekly" => {
for (i, date) in date_series.into_iter().enumerate() {
if let Some(date) = date {
if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) {
if naive_date.weekday() == chrono::Weekday::Fri {
indices.push(i);
}
}
}
}
}
"monthly" => {
let mut current_month = None;
for (i, date) in date_series.into_iter().enumerate() {
if let Some(date) = date {
if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) {
if current_month.is_none() || current_month.unwrap() != naive_date.month() {
current_month = Some(naive_date.month());
if i > 0 {
indices.push(i - 1);
}
}
}
}
}
if let Some(_) = current_month {
indices.push(date_series.len() - 1);
}
}
_ => return Err("Invalid period specified.".into()),
}
Ok(indices)
}
// #[allow(dead_code)]
// fn single_calc(
// // end_date: -- naive datetime
@ -105,6 +249,8 @@ fn freq_daily_calc(
/// - `remove_zeros`: Whether to remove zero values from the calculation. Defaults to False.
/// - `postfix`: A string to append to XCAT of the result series.
/// - `nan_tolerance`: The maximum proportion of NaN values allowed in the calculation.
///
#[allow(unused_variables)]
pub fn historic_vol(
df: polars::prelude::DataFrame,
xcat: String,
@ -121,6 +267,62 @@ pub fn historic_vol(
) -> Result<DataFrame, Box<dyn std::error::Error>> {
println!("Calculating historic volatility with the following parameters:");
println!("xcat: {:?},\ncids: {:?},\nlback_periods: {:?},lback_method: {:?},\nhalf_life: {:?},\nstart: {:?},\nend: {:?},\nest_freq: {:?},\nremove_zeros: {:?},\npostfix: {:?},\nnan_tolerance: {:?}", xcat, cids, lback_periods,lback_method, half_life, start, end, est_freq, remove_zeros, postfix, nan_tolerance);
let mut dfw = pivot_dataframe_by_ticker(df.clone(), Some("value".to_string()))?;
Ok(df.to_owned())
let lback_periods = lback_periods.unwrap_or(20);
let lback_method = lback_method.unwrap_or("ma".to_string());
let half_life = half_life;
let start = start.unwrap_or(dfw.column("real_date")?.date()?.min().unwrap().to_string());
let end = end.unwrap_or(dfw.column("real_date")?.date()?.max().unwrap().to_string());
let est_freq = est_freq.unwrap_or("D".to_string());
let remove_zeros = remove_zeros.unwrap_or(false);
let postfix = postfix.unwrap_or("_HISTVOL".to_string());
let nan_tolerance = nan_tolerance.unwrap_or(0.25);
let start_date = NaiveDate::parse_from_str(&start, "%Y-%m-%d")?;
let end_date = NaiveDate::parse_from_str(&end, "%Y-%m-%d")?;
dfw = dfw
.lazy()
.filter(
col("real_date")
.lt_eq(lit(end_date))
.alias("real_date")
.into(),
)
.filter(
col("real_date")
.gt_eq(lit(start_date))
.alias("real_date")
.into(),
)
.collect()?;
let period = match est_freq.as_str() {
"W" => "weekly",
"M" => "monthly",
_ => return Err("Invalid frequency specified.".into()),
};
let dfw = match est_freq.as_str() {
"D" => freq_daily_calc(
&dfw,
lback_periods,
&lback_method,
half_life,
remove_zeros,
nan_tolerance,
)?,
_ => freq_period_calc(
&dfw,
lback_periods,
&lback_method,
half_life,
remove_zeros,
nan_tolerance,
&period,
)?,
};
Ok(dfw)
}