Compare commits

..

9 Commits

Author SHA1 Message Date
Palash Tyagi
ed9d5d01a2 update notebook 2025-04-15 19:33:29 +01:00
Palash Tyagi
dda6e3e12f wip: blacklist util 2025-04-15 19:31:15 +01:00
Palash Tyagi
5ef2c7e6c7 refactor: simplify type annotations in reduce_dataframe function 2025-04-15 00:00:45 +01:00
Palash Tyagi
5c3862c297 refactor: reorganize module exports for clarity 2025-04-14 00:25:23 +01:00
Palash Tyagi
3559a90ad2 adding blacklist ability 2025-04-14 00:25:09 +01:00
Palash Tyagi
b7368a366e add weight_xcat capability 2025-04-14 00:24:26 +01:00
Palash Tyagi
24a4176e17 fix: correct import statement for reduce_dataframe in historic_vol.rs 2025-04-14 00:24:02 +01:00
Palash Tyagi
165e1c19e4 updating linear composite binding 2025-04-14 00:22:59 +01:00
Palash Tyagi
fefe849394 updating notebook 2025-04-14 00:22:38 +01:00
16 changed files with 2866 additions and 697 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -68,7 +68,7 @@ pub fn get_period_indices_hv(dfw: PyDataFrame, est_freq: &str) -> PyResult<Vec<u
cids,
weights = None,
signs = None,
weight_xcats = None,
weight_xcat = None,
normalize_weights = false,
start = None,
end = None,
@@ -84,7 +84,7 @@ pub fn linear_composite(
cids: Vec<String>,
weights: Option<Vec<f64>>,
signs: Option<Vec<f64>>,
weight_xcats: Option<Vec<String>>,
weight_xcat: Option<String>,
normalize_weights: bool,
start: Option<String>,
end: Option<String>,
@@ -101,7 +101,7 @@ pub fn linear_composite(
cids,
weights,
signs,
weight_xcats,
weight_xcat,
normalize_weights,
start,
end,

View File

@@ -1,4 +1,5 @@
use pyo3::prelude::*;
use pyo3::types::IntoPyDict;
use pyo3::{prelude::*, types::PyDict};
use pyo3_polars::{PyDataFrame, PySeries};
/// Python wrapper for [`crate::utils::qdf`] module.
@@ -7,6 +8,7 @@ use pyo3_polars::{PyDataFrame, PySeries};
pub fn utils(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(get_bdates_series_default_pl, m)?)?;
m.add_function(wrap_pyfunction!(get_bdates_series_default_opt, m)?)?;
m.add_function(wrap_pyfunction!(create_blacklist_from_qdf, m)?)?;
Ok(())
}
@@ -33,3 +35,19 @@ pub fn get_bdates_series_default_opt(
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?,
))
}
#[allow(deprecated)]
#[pyfunction(signature = (df, metric = None))]
pub fn create_blacklist_from_qdf(df: PyDataFrame, metric: Option<String>) -> PyResult<PyObject> {
let result = crate::utils::qdf::blacklist::create_blacklist_from_qdf(&df.into(), metric)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{}", e)))?;
Python::with_gil(|py| {
let dict = PyDict::new(py);
// for (key, (start_date, end_date)) in result {
// dict.set_item(key, (start_date, end_date))
for (key, dates) in result {
dict.set_item(key, dates).map_err(|e| PyErr::from(e))?;
}
Ok(dict.into())
})
}

View File

@@ -1,281 +0,0 @@
//! # DateSeries and BDateSeries Implementations
//!
//! This module provides two date-handling types using the [`chrono`](https://docs.rs/chrono) crate:
//!
//! - [`DateSeries`]: Stores any set of calendar dates and allows adding/subtracting *calendar days*.
//! - [`BDateSeries`]: Stores only MondayFriday business days and interprets add/sub as *business day* shifts,
//! skipping weekends (e.g., adding 1 to Friday goes to Monday).
//!
//! Both types also provide a [`from_iso8601_range`](#method.from_iso8601_range) constructor
//! that builds a date series (or businessdate series) from a start/end string (YYYYMMDD).
use chrono::{Datelike, Duration, NaiveDate, ParseResult};
use std::ops::{Add, Sub};
/// Determines if the date is Saturday or Sunday.
fn is_weekend(date: NaiveDate) -> bool {
matches!(date.weekday(), chrono::Weekday::Sat | chrono::Weekday::Sun)
}
/// A `DateSeries` stores a list of [`NaiveDate`] values and shifts by **calendar days**.
///
/// ## Example Usage
///
/// ```
/// use chrono::NaiveDate;
/// use msyrs::core::dateseries::DateSeries;
///
/// // Create from explicit dates
/// let ds = DateSeries::new(vec![
/// NaiveDate::from_ymd_opt(2023, 7, 14).unwrap(), // a Friday
/// NaiveDate::from_ymd_opt(2023, 7, 15).unwrap(), // a Saturday
/// ]);
///
/// // Shift forward by 5 calendar days
/// let ds_plus = ds + 5;
/// // 2023-07-14 + 5 => 2023-07-19 (Wednesday)
/// // 2023-07-15 + 5 => 2023-07-20 (Thursday)
///
/// assert_eq!(ds_plus.data()[0], NaiveDate::from_ymd_opt(2023, 7, 19).unwrap());
/// assert_eq!(ds_plus.data()[1], NaiveDate::from_ymd_opt(2023, 7, 20).unwrap());
/// ```
///
#[derive(Debug, Clone)]
pub struct DateSeries {
data: Vec<NaiveDate>,
}
impl DateSeries {
/// Creates a new `DateSeries` from a vector of [`NaiveDate`] values.
///
/// # Panics
/// - Does not panic on invalid weekend or anything; this type accepts all valid dates.
pub fn new(data: Vec<NaiveDate>) -> Self {
Self { data }
}
/// Constructs a `DateSeries` by parsing an ISO8601 start/end string (YYYYMMDD)
/// and including **every calendar date** from start to end (inclusive).
///
/// # Errors
/// - Returns a [`chrono::ParseError`](chrono::ParseError) if parsing fails.
/// - Panics if `start` > `end` chronologically.
///
/// # Examples
///
/// ```
/// use msyrs::core::dateseries::DateSeries;
/// # fn main() -> Result<(), chrono::ParseError> {
/// let ds = DateSeries::from_iso8601_range("2023-07-14", "2023-07-16")?;
/// assert_eq!(ds.data().len(), 3);
/// # Ok(())
/// # }
/// ```
pub fn from_iso8601_range(start: &str, end: &str) -> ParseResult<Self> {
let start_date = NaiveDate::parse_from_str(start, "%Y-%m-%d")?;
let end_date = NaiveDate::parse_from_str(end, "%Y-%m-%d")?;
assert!(
start_date <= end_date,
"start date cannot be after end date"
);
let mut dates = Vec::new();
let mut current = start_date;
while current <= end_date {
dates.push(current);
current = current
.checked_add_signed(Duration::days(1))
.expect("Date overflow in from_iso8601_range");
}
Ok(Self::new(dates))
}
/// Returns a reference to the underlying slice of dates.
pub fn data(&self) -> &[NaiveDate] {
&self.data
}
/// Internal helper applying a function to each date.
fn apply<F>(&self, op: F) -> Self
where
F: Fn(NaiveDate) -> NaiveDate,
{
let new_data = self.data.iter().map(|&date| op(date)).collect();
Self { data: new_data }
}
}
/// Implements adding calendar days to each `NaiveDate`.
///
/// If the shifted date goes out of chrono's valid range, it panics.
impl Add<i64> for DateSeries {
type Output = Self;
fn add(self, rhs: i64) -> Self::Output {
self.apply(|date| {
date.checked_add_signed(Duration::days(rhs))
.expect("Overflow in date addition")
})
}
}
/// Implements subtracting calendar days from each `NaiveDate`.
///
/// If the shifted date goes out of chrono's valid range, it panics.
impl Sub<i64> for DateSeries {
type Output = Self;
fn sub(self, rhs: i64) -> Self::Output {
self.apply(|date| {
date.checked_sub_signed(Duration::days(rhs))
.expect("Overflow in date subtraction")
})
}
}
/// A “Business Date Series” for MondayFriday only.
///
/// 1. The constructor disallows weekend dates (panics if any date is Sat/Sun).
/// 2. Adding or subtracting an `i64` interprets that integer as *business days*, skipping weekends.
/// For example, adding 1 to a Friday yields the following Monday.
///
/// ## Example Usage
///
/// ```
/// use chrono::NaiveDate;
/// use msyrs::core::dateseries::BDateSeries;
///
/// // Friday
/// let friday = NaiveDate::from_ymd_opt(2023, 7, 14).unwrap();
/// let mut bds = BDateSeries::new(vec![friday]);
///
/// // Adding 1 “business day” => next Monday, 2023-07-17
/// bds = bds + 1;
/// assert_eq!(bds.data()[0], NaiveDate::from_ymd_opt(2023, 7, 17).unwrap());
/// ```
#[derive(Debug, Clone)]
pub struct BDateSeries {
data: Vec<NaiveDate>,
}
impl BDateSeries {
/// Creates a new `BDateSeries`, panicking if any of the supplied dates is on Saturday/Sunday.
pub fn new(data: Vec<NaiveDate>) -> Self {
for &d in &data {
if is_weekend(d) {
panic!("BDateSeries cannot contain weekend dates: {}", d);
}
}
Self { data }
}
/// Constructs a `BDateSeries` by parsing an ISO8601 start/end string (YYYYMMDD).
///
/// Only MondayFriday dates within `[start, end]` are included in the series.
///
/// # Errors
/// - Returns a [`chrono::ParseError`](chrono::ParseError) if parsing fails.
/// - Panics if `start` > `end` chronologically.
///
/// # Examples
///
/// ```
/// use msyrs::core::dateseries::BDateSeries;
/// # fn main() -> Result<(), chrono::ParseError> {
/// let bds = BDateSeries::from_iso8601_range("2023-07-14", "2023-07-18")?;
/// // 2023-07-14 (Friday), 2023-07-15 (Saturday) => skipped,
/// // 2023-07-16 (Sunday) => skipped,
/// // 2023-07-17 (Monday), 2023-07-18 (Tuesday)
/// // so total 3 valid business days
/// assert_eq!(bds.data().len(), 3);
/// # Ok(())
/// # }
/// ```
pub fn from_iso8601_range(start: &str, end: &str) -> ParseResult<Self> {
let start_date = NaiveDate::parse_from_str(start, "%Y-%m-%d")?;
let end_date = NaiveDate::parse_from_str(end, "%Y-%m-%d")?;
assert!(
start_date <= end_date,
"start date cannot be after end date"
);
let mut dates = Vec::new();
let mut current = start_date;
while current <= end_date {
if !is_weekend(current) {
dates.push(current);
}
current = current
.checked_add_signed(Duration::days(1))
.expect("Date overflow in from_iso8601_range");
}
Ok(Self::new(dates))
}
/// Returns a reference to the underlying slice of dates.
pub fn data(&self) -> &[NaiveDate] {
&self.data
}
/// Internal helper that tries to shift a date forward or backward by one day at a time,
/// skipping weekends, for a total of `delta` business days.
fn shift_business_days(date: NaiveDate, delta: i64) -> NaiveDate {
if delta == 0 {
return date;
}
let step = if delta > 0 { 1 } else { -1 };
let abs_delta = delta.abs();
let mut new_date = date;
for _ in 0..abs_delta {
// Move by 1 day in the correct direction
new_date = new_date
.checked_add_signed(Duration::days(step))
.expect("Overflow in BDateSeries add/sub");
// If we land on weekend, keep moving until Monday..Friday
while is_weekend(new_date) {
new_date = new_date
.checked_add_signed(Duration::days(step))
.expect("Overflow in BDateSeries skipping weekend");
}
}
new_date
}
/// Internal helper to apply a shift of `delta` business days to each date.
fn apply(&self, delta: i64) -> Self {
let new_data = self
.data
.iter()
.map(|&date| Self::shift_business_days(date, delta))
.collect();
Self { data: new_data }
}
}
/// Implement *business day* addition for `BDateSeries`.
///
/// # Panics
/// - If the resulting date(s) overflow `NaiveDate` range.
/// - `BDateSeries` is guaranteed to remain Monday..Friday after the shift.
impl Add<i64> for BDateSeries {
type Output = Self;
fn add(self, rhs: i64) -> Self::Output {
self.apply(rhs)
}
}
/// Implement *business day* subtraction for `BDateSeries`.
///
/// # Panics
/// - If the resulting date(s) overflow `NaiveDate`.
/// - `BDateSeries` is guaranteed to remain Monday..Friday after the shift.
impl Sub<i64> for BDateSeries {
type Output = Self;
fn sub(self, rhs: i64) -> Self::Output {
self.apply(-rhs)
}
}

View File

View File

@@ -1,3 +0,0 @@
pub mod df;
pub mod xseries;
pub mod dateseries;

View File

@@ -1,223 +0,0 @@
use std::ops::{Add, Div, Mul, Sub};
//
// 1) Define a float series: FSeries
//
#[derive(Debug, Clone)]
pub struct FSeries {
data: Vec<f64>,
}
impl FSeries {
/// Create a new FSeries from a vector of f64 values.
pub fn new(data: Vec<f64>) -> Self {
Self { data }
}
pub fn len(&self) -> usize {
self.data.len()
}
/// Elementwise helper applying an operation between two FSeries.
pub fn apply<F>(&self, other: &Self, op: F) -> Self
where
F: Fn(f64, f64) -> f64,
{
assert!(
self.len() == other.len(),
"FSeries must have the same length to apply operations."
);
let data = self
.data
.iter()
.zip(other.data.iter())
.map(|(&a, &b)| op(a, b))
.collect();
FSeries { data }
}
/// Access to the underlying data
pub fn data(&self) -> &[f64] {
&self.data
}
}
// Macros for float series arithmetic (elementwise)
macro_rules! impl_fseries_bin_op {
($trait:ident, $method:ident, $op:tt) => {
impl $trait for FSeries {
type Output = Self;
fn $method(self, rhs: Self) -> Self::Output {
self.apply(&rhs, |a, b| a $op b)
}
}
};
}
impl_fseries_bin_op!(Add, add, +);
impl_fseries_bin_op!(Sub, sub, -);
impl_fseries_bin_op!(Mul, mul, *);
impl_fseries_bin_op!(Div, div, /);
macro_rules! impl_fseries_scalar_op {
($trait:ident, $method:ident, $op:tt) => {
impl $trait<f64> for FSeries {
type Output = Self;
fn $method(mut self, scalar: f64) -> Self::Output {
for x in self.data.iter_mut() {
*x = *x $op scalar;
}
self
}
}
};
}
impl_fseries_scalar_op!(Add, add, +);
impl_fseries_scalar_op!(Sub, sub, -);
impl_fseries_scalar_op!(Mul, mul, *);
impl_fseries_scalar_op!(Div, div, /);
//
// 2) Define an integer series: ISeries
//
#[derive(Debug, Clone)]
pub struct ISeries {
data: Vec<i64>,
}
impl ISeries {
/// Create an ISeries from a vector of i64 values.
pub fn new(data: Vec<i64>) -> Self {
Self { data }
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn data(&self) -> &[i64] {
&self.data
}
/// Elementwise helper for integer series.
pub fn apply<F>(&self, other: &Self, op: F) -> Self
where
F: Fn(i64, i64) -> i64,
{
assert!(
self.len() == other.len(),
"ISeries must have the same length to apply operations."
);
let data = self
.data
.iter()
.zip(other.data.iter())
.map(|(&a, &b)| op(a, b))
.collect();
ISeries { data }
}
}
// Macros for integer series arithmetic (elementwise)
macro_rules! impl_iseries_bin_op {
($trait:ident, $method:ident, $op:tt) => {
impl $trait for ISeries {
type Output = Self;
fn $method(self, rhs: Self) -> Self::Output {
self.apply(&rhs, |a, b| a $op b)
}
}
};
}
impl_iseries_bin_op!(Add, add, +);
impl_iseries_bin_op!(Sub, sub, -);
impl_iseries_bin_op!(Mul, mul, *);
impl_iseries_bin_op!(Div, div, /); // integer division (floor trunc)
// Optional scalar operations (for i64)
macro_rules! impl_iseries_scalar_op {
($trait:ident, $method:ident, $op:tt) => {
impl $trait<i64> for ISeries {
type Output = Self;
fn $method(mut self, scalar: i64) -> Self::Output {
for x in self.data.iter_mut() {
*x = *x $op scalar;
}
self
}
}
};
}
impl_iseries_scalar_op!(Add, add, +);
impl_iseries_scalar_op!(Sub, sub, -);
impl_iseries_scalar_op!(Mul, mul, *);
impl_iseries_scalar_op!(Div, div, /); // floor/trunc division by scalar
/// A boolean series: BSeries
///
#[derive(Debug, Clone)]
pub struct BSeries {
data: Vec<bool>,
}
impl BSeries {
pub fn new(data: Vec<bool>) -> Self {
Self { data }
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn data(&self) -> &[bool] {
&self.data
}
}
/// Convert an FSeries to ISeries by truncation (floor cast).
impl From<FSeries> for ISeries {
fn from(fseries: FSeries) -> Self {
let data = fseries
.data
.into_iter()
.map(|val| val as i64) // trunc cast
.collect();
ISeries::new(data)
}
}
/// Implement conversion from ISeries to FSeries by casting to f64.
impl From<ISeries> for FSeries {
fn from(iseries: ISeries) -> Self {
let data = iseries.data.into_iter().map(|val| val as f64).collect();
FSeries::new(data)
}
}
/// Convert an ISeries to BSeries by checking if each value is non-zero.
impl From<ISeries> for BSeries {
fn from(iseries: ISeries) -> Self {
let data = iseries.data.into_iter().map(|val| val != 0).collect();
BSeries::new(data)
}
}
impl From<BSeries> for ISeries {
fn from(bseries: BSeries) -> Self {
let data = bseries
.data
.into_iter()
.map(|val| if val { 1 } else { 0 })
.collect();
ISeries::new(data)
}
}

View File

@@ -102,7 +102,7 @@ impl Default for JPMaQSDownloadGetIndicatorArgs {
/// Struct for downloading data from the JPMaQS data from JPMorgan DataQuery API.
///
/// ## Example Usage
/// ```ignore
/// ```rust
/// use msyrs::download::jpmaqsdownload::JPMaQSDownload;
/// use msyrs::download::jpmaqsdownload::JPMaQSDownloadGetIndicatorArgs;
/// use polars::prelude::*;
@@ -140,7 +140,7 @@ impl Default for JPMaQSDownloadGetIndicatorArgs {
/// Ok(_) => println!("Saved indicators to disk"),
/// Err(e) => println!("Error saving indicators: {:?}", e),
/// }
/// ```
///
#[derive(Debug, Clone)]
pub struct JPMaQSDownload {
requester: DQRequester,
@@ -277,7 +277,7 @@ impl JPMaQSDownload {
///
/// Usage:
///
/// ```ignore
/// ```rust
/// use msyrs::download::jpmaqsdownload::JPMaQSDownload;
/// use msyrs::download::jpmaqsdownload::JPMaQSDownloadGetIndicatorArgs;
/// let mut jpamqs_download = JPMaQSDownload::default();

View File

@@ -1,5 +1,4 @@
// #![doc = include_str!("../README.md")]
// uncomment the above line to include the README.md file in the documentation
//! # msyrs
//!
@@ -19,9 +18,6 @@
/// Documentation and type-stubs for the `msyrs` Python API.
pub mod _py;
/// Implementation for the `core` module.
pub mod core;
/// Implementation for the `download` module.
pub mod download;

View File

@@ -56,3 +56,5 @@ class utils:
def get_bdates_series_default_pl(*args, **kwargs) -> Series: ...
@staticmethod
def get_bdates_series_default_opt(*args, **kwargs) -> Series: ...
@staticmethod
def create_blacklist_from_qdf(*args, **kwargs) -> dict: ...

View File

@@ -1,6 +1,6 @@
use crate::utils::dateutils::{get_bdates_from_col, get_min_max_real_dates};
use crate::utils::qdf::pivots::*;
use crate::utils::qdf::reduce_df::*;
use crate::utils::qdf::reduce_dataframe;
use chrono::NaiveDate;
use ndarray::{s, Array, Array1, Zip};
use polars::prelude::*;

View File

@@ -1,6 +1,6 @@
use crate::utils::qdf::check_quantamental_dataframe;
use crate::utils::qdf::pivots::*;
use crate::utils::qdf::reduce_df::*;
use crate::utils::qdf::pivots::{pivot_dataframe_by_ticker, pivot_wide_dataframe_to_qdf};
use crate::utils::qdf::reduce_df::reduce_dataframe;
use polars::prelude::*;
use std::collections::HashMap;
const TOLERANCE: f64 = 1e-8;
@@ -108,14 +108,42 @@ fn _form_agg_nan_mask_series(nan_mask_dfw: &DataFrame) -> Result<Series, PolarsE
Ok(combined.into_series())
}
/// Form the weights DataFrame
fn _form_agg_weights_dfw(
agg_weights_map: &HashMap<String, Vec<f64>>,
data_dfw: DataFrame,
agg_weights_map: &HashMap<String, (WeightValue, f64)>,
dfw: &DataFrame,
) -> Result<DataFrame, PolarsError> {
let mut weights_dfw = DataFrame::new(vec![])?;
for (agg_targ, weight_signs) in agg_weights_map.iter() {
let wgt = weight_signs[0] * weight_signs[1];
let wgt_series = Series::new(agg_targ.into(), vec![wgt; data_dfw.height()]);
// let wgt = weight_signs[0] * weight_signs[1];
let wgt_series = match &weight_signs.0 {
WeightValue::F64(val) => {
let wgt = val * weight_signs.1;
Series::new(agg_targ.into(), vec![wgt; dfw.height()])
}
WeightValue::Str(vstr) => {
// vstr column from data_dfw, else raise wieght specification error
if !dfw.get_column_names().contains(&&PlSmallStr::from(vstr)) {
return Err(PolarsError::ComputeError(
format!(
"The column {} does not exist in the DataFrame. {:?}",
vstr, agg_weights_map
)
.into(),
));
}
let vstr_series = dfw.column(vstr)?;
let multiplied_series = vstr_series * weight_signs.1;
let mut multiplied_series =
multiplied_series.as_series().cloned().ok_or_else(|| {
PolarsError::ComputeError(
"Failed to convert multiplied_series to Series".into(),
)
})?;
multiplied_series.rename(agg_targ.into());
multiplied_series
}
};
weights_dfw.with_column(wgt_series)?;
}
Ok(weights_dfw)
@@ -143,14 +171,14 @@ fn perform_single_group_agg(
dfw: &DataFrame,
agg_on: &String,
agg_targs: &Vec<String>,
agg_weights_map: &HashMap<String, Vec<f64>>,
agg_weights_map: &HashMap<String, (WeightValue, f64)>,
normalize_weights: bool,
complete: bool,
) -> Result<Column, PolarsError> {
let data_dfw = _form_agg_data_dfw(dfw, agg_targs)?;
let nan_mask_dfw = _form_agg_nan_mask_dfw(&data_dfw)?;
let nan_mask_series = _form_agg_nan_mask_series(&nan_mask_dfw)?;
let weights_dfw = _form_agg_weights_dfw(agg_weights_map, data_dfw.clone())?;
let weights_dfw = _form_agg_weights_dfw(agg_weights_map, dfw)?;
let weights_dfw = match normalize_weights {
true => normalize_weights_with_nan_mask(weights_dfw, nan_mask_dfw)?,
false => weights_dfw,
@@ -192,7 +220,7 @@ fn perform_single_group_agg(
fn perform_multiplication(
dfw: &DataFrame,
mult_targets: &HashMap<String, Vec<String>>,
weights_map: &HashMap<String, HashMap<String, Vec<f64>>>,
weights_map: &HashMap<String, HashMap<String, (WeightValue, f64)>>,
complete: bool,
normalize_weights: bool,
) -> Result<DataFrame, PolarsError> {
@@ -200,6 +228,7 @@ fn perform_multiplication(
// let mut new_dfw = DataFrame::new(vec![real_date])?;
let mut new_dfw = DataFrame::new(vec![])?;
assert!(!mult_targets.is_empty(), "agg_targs is empty");
for (agg_on, agg_targs) in mult_targets.iter() {
// perform_single_group_agg
let cols_len = new_dfw.get_column_names().len();
@@ -288,76 +317,122 @@ fn get_mul_targets(
Ok(mul_targets)
}
/// Builds a map of the shape:
/// `HashMap<String, HashMap<String, (WeightValue, f64)>>`
/// where only one of `weights` or `weight_xcats` can be provided.
/// If neither is provided, weights default to 1.0.
/// Each tuple is `(WeightValue, f64) = (weight, sign)`.
fn form_weights_and_signs_map(
cids: Vec<String>,
xcats: Vec<String>,
weights: Option<Vec<f64>>,
weight_xcat: Option<String>,
signs: Option<Vec<f64>>,
) -> Result<HashMap<String, HashMap<String, Vec<f64>>>, Box<dyn std::error::Error>> {
let _agg_xcats_for_cid = agg_xcats_for_cid(cids.clone(), xcats.clone());
) -> Result<HashMap<String, HashMap<String, (WeightValue, f64)>>, Box<dyn std::error::Error>> {
// For demonstration, we pretend to load or infer these from helpers:
let agg_xcats_for_cid = agg_xcats_for_cid(cids.clone(), xcats.clone());
let (agg_on, agg_targ) = get_agg_on_agg_targs(cids.clone(), xcats.clone());
// if weights are None, create a vector of 1s of the same length as agg_targ
let weights = weights.unwrap_or(vec![1.0 / agg_targ.len() as f64; agg_targ.len()]);
let signs = signs.unwrap_or(vec![1.0; agg_targ.len()]);
// Determine if each weight option has non-empty values.
let weights_provided = weights.as_ref().map_or(false, |v| !v.is_empty());
let weight_xcats_provided = weight_xcat.as_ref().map_or(false, |v| !v.is_empty());
// check that the lengths of weights and signs match the length of agg_targ
check_weights_signs_lengths(
weights.clone(),
signs.clone(),
_agg_xcats_for_cid,
agg_targ.len(),
)?;
// Enforce that only one of weights or weight_xcats is specified.
if weights_provided && weight_xcats_provided {
return Err("Only one of `weights` and `weight_xcats` may be specified.".into());
}
let mut weights_map = HashMap::new();
// 1) Build the "actual_weights" vector as WeightValue.
let actual_weights: Vec<WeightValue> = if weights_provided {
weights.unwrap().into_iter().map(WeightValue::F64).collect()
} else if weight_xcats_provided {
vec![WeightValue::Str(weight_xcat.unwrap()); agg_targ.len()]
} else {
// Default to numeric 1.0 if neither is provided
vec![WeightValue::F64(1.0); agg_targ.len()]
};
// 2) Build the "signs" vector; default to 1.0 if not provided
let signs = signs.unwrap_or_else(|| vec![1.0; agg_targ.len()]);
// 3) Optional: check lengths & zero values (only numeric weights).
check_weights_signs_lengths(&actual_weights, &signs, agg_xcats_for_cid, agg_targ.len())?;
// 4) Build the final nested HashMap
let mut weights_map: HashMap<String, HashMap<String, (WeightValue, f64)>> = HashMap::new();
for agg_o in agg_on {
let mut agg_t_map = HashMap::new();
for (i, agg_t) in agg_targ.iter().enumerate() {
let ticker = match _agg_xcats_for_cid {
true => format!("{}_{}", agg_o, agg_t),
false => format!("{}_{}", agg_t, agg_o),
// Format the ticker
let ticker = if agg_xcats_for_cid {
format!("{}_{}", agg_o, agg_t)
} else {
format!("{}_{}", agg_t, agg_o)
};
let weight_signs = vec![weights[i], signs[i]];
agg_t_map.insert(ticker, weight_signs);
// Build the tuple (WeightValue, f64)
let weight_sign_tuple = match &actual_weights[i] {
WeightValue::F64(val) => (WeightValue::F64(*val).clone(), signs[i]),
WeightValue::Str(vstr) => {
let new_str = format!("{}_{}", agg_t, vstr);
(WeightValue::Str(new_str), signs[i])
}
};
agg_t_map.insert(ticker, weight_sign_tuple);
}
weights_map.insert(agg_o.clone(), agg_t_map);
}
Ok(weights_map)
}
/// Checks that the given slices have the expected length and that:
/// - numeric weights are non-zero,
/// - signs are non-zero.
fn check_weights_signs_lengths(
weights_vec: Vec<f64>,
signs_vec: Vec<f64>,
_agg_xcats_for_cid: bool,
weights_vec: &[WeightValue],
signs_vec: &[f64],
agg_xcats_for_cid: bool,
agg_targ_len: usize,
) -> Result<(), Box<dyn std::error::Error>> {
// for vx, vname in ...
let agg_targ = match _agg_xcats_for_cid {
true => "xcats",
false => "cids",
};
for (vx, vname) in vec![
(weights_vec.clone(), "weights"),
(signs_vec.clone(), "signs"),
] {
for (i, v) in vx.iter().enumerate() {
if *v == 0.0 {
return Err(format!("The {} at index {} is 0.0", vname, i).into());
// For diagnostics, decide what to call the dimension
let agg_targ = if agg_xcats_for_cid { "xcats" } else { "cids" };
// 1) Check numeric weights for zeroes.
for (i, weight) in weights_vec.iter().enumerate() {
if let WeightValue::F64(val) = weight {
if *val == 0.0 {
return Err(format!("The weight at index {} is 0.0", i).into());
}
}
if vx.len() != agg_targ_len {
return Err(format!(
"The length of {} ({}) does not match the length of {} ({})",
vname,
vx.len(),
agg_targ,
agg_targ_len
)
.into());
}
// 2) Ensure the weights vector is the expected length.
if weights_vec.len() != agg_targ_len {
return Err(format!(
"The length of weights ({}) does not match the length of {} ({})",
weights_vec.len(),
agg_targ,
agg_targ_len
)
.into());
}
// 3) Check signs for zero.
for (i, sign) in signs_vec.iter().enumerate() {
if *sign == 0.0 {
return Err(format!("The sign at index {} is 0.0", i).into());
}
}
// 4) Ensure the signs vector is the expected length.
if signs_vec.len() != agg_targ_len {
return Err(format!(
"The length of signs ({}) does not match the length of {} ({})",
signs_vec.len(),
agg_targ,
agg_targ_len
)
.into());
}
Ok(())
}
fn rename_result_dfw_cols(
@@ -393,6 +468,36 @@ fn agg_xcats_for_cid(cids: Vec<String>, xcats: Vec<String>) -> bool {
xcats.len() > 1
}
/// Represents a weight value that can be a string, (float, or integer).
#[derive(Debug, Clone, PartialEq)]
pub enum WeightValue {
Str(String),
F64(f64),
}
impl From<String> for WeightValue {
fn from(s: String) -> Self {
WeightValue::Str(s)
}
}
impl<'a> From<&'a str> for WeightValue {
fn from(s: &'a str) -> Self {
WeightValue::Str(s.to_string())
}
}
impl From<f64> for WeightValue {
fn from(f: f64) -> Self {
WeightValue::F64(f)
}
}
impl From<i32> for WeightValue {
fn from(i: i32) -> Self {
WeightValue::F64(i as f64)
}
}
/// Weighted linear combinations of cross sections or categories
/// # Arguments
/// * `df` - QDF DataFrame
@@ -417,7 +522,7 @@ pub fn linear_composite(
cids: Vec<String>,
weights: Option<Vec<f64>>,
signs: Option<Vec<f64>>,
weight_xcats: Option<Vec<String>>,
weight_xcat: Option<String>,
normalize_weights: bool,
start: Option<String>,
end: Option<String>,
@@ -429,10 +534,28 @@ pub fn linear_composite(
) -> Result<DataFrame, Box<dyn std::error::Error>> {
// Check if the DataFrame is a Quantamental DataFrame
check_quantamental_dataframe(df)?;
if agg_xcats_for_cid(cids.clone(), xcats.clone()) {
if weight_xcat.is_some() {
return Err(
format!(
"Using xcats as weights is not supported when aggregating cids for a single xcat. {:?} {:?}",
cids, xcats
)
.into(),
);
}
}
let mut rxcats = xcats.clone();
if weight_xcat.is_some() {
rxcats.extend(vec![weight_xcat.clone().unwrap()]);
}
let rdf = reduce_dataframe(
df.clone(),
Some(cids.clone()),
Some(xcats.clone()),
Some(rxcats.clone()),
Some(vec!["value".to_string()]),
start.clone(),
end.clone(),
@@ -443,10 +566,11 @@ pub fn linear_composite(
let new_xcat = new_xcat.unwrap_or_else(|| "COMPOSITE".to_string());
let new_cid = new_cid.unwrap_or_else(|| "GLB".to_string());
let dfw = pivot_dataframe_by_ticker(rdf.clone(), Some("value".to_string())).unwrap();
let dfw = pivot_dataframe_by_ticker(rdf, Some("value".to_string())).unwrap();
let mul_targets = get_mul_targets(cids.clone(), xcats.clone())?;
let weights_map = form_weights_and_signs_map(cids.clone(), xcats.clone(), weights, signs)?;
let weights_map =
form_weights_and_signs_map(cids.clone(), xcats.clone(), weights, weight_xcat, signs)?;
for (ticker, targets) in mul_targets.iter() {
println!("ticker: {}, targets: {:?}", ticker, targets);

161
src/utils/qdf/blacklist.rs Normal file
View File

@@ -0,0 +1,161 @@
use crate::utils::bdates::{self, get_bdates_list_with_freq, BDateFreq};
use crate::utils::dateutils::{get_bdates_series_default_opt, get_min_max_real_dates};
use crate::utils::qdf::core::check_quantamental_dataframe;
use chrono::NaiveDate;
use polars::prelude::*;
use std::collections::HashMap;
use std::error::Error;
pub fn create_blacklist_from_qdf(
df: &DataFrame,
metric: Option<String>,
// ) -> Result<HashMap<String, (String, String)>, Box<dyn Error>> {
) -> Result<HashMap<String, Vec<String>>, Box<dyn Error>> {
// Verify that the DataFrame follows the Quantamental structure.
check_quantamental_dataframe(df)?;
let mut blacklist: HashMap<String, Vec<String>> = HashMap::new();
// let mut blacklist: HashMap<String, (String, String)> = HashMap::new();
let mut blk: HashMap<String, Vec<NaiveDate>> = HashMap::new();
// Use the provided metric or default to "value".
let metric = metric.unwrap_or_else(|| "value".into());
let (min_date, max_date) = get_min_max_real_dates(df, "real_date".into())?;
let min_date_str = min_date.format("%Y-%m-%d").to_string();
let max_date_str = max_date.format("%Y-%m-%d").to_string();
// let all_bdates = get_bdates_series_default_opt(min_date_str, max_date_str, None)?;
let all_bdates = get_bdates_list_with_freq(
min_date_str.clone().as_str(),
max_date_str.clone().as_str(),
BDateFreq::Daily,
)?;
// filter df
let null_mask = df.column(metric.as_str())?.is_null();
let nan_mask = df.column(metric.as_str())?.is_nan()?;
let null_mask = null_mask | nan_mask;
let df = df.filter(&null_mask)?;
let df = df
.lazy()
.filter(
col(metric.as_str())
.is_null()
.or(col(metric.as_str()).is_nan()),
)
.sort(
["cid", "xcat"],
SortMultipleOptions::default().with_maintain_order(true),
)
.group_by([col("cid"), col("xcat")])
// .agg([col("real_date").sort(SortOptions::default())])
.agg([col("real_date")
.dt()
.strftime("%Y-%m-%d")
.sort(SortOptions::default())])
.select([
concat_str([col("cid"), col("xcat")], "_", true).alias("ticker"),
col("real_date").alias("real_dates"),
])
.collect()?;
// assert!(0 == 1, "{:?}", df);
let ticker_vec = df
.column("ticker")?
.str()?
.into_iter()
.filter_map(|opt| opt.map(|s| s.to_string()))
.collect::<Vec<String>>();
let rdt = get_vec_of_vec_of_dates_from_df(df)?;
// assert!(0 == 1, "{:?}", rdt);
for (tkr, dates) in ticker_vec.iter().zip(rdt.iter()) {
blacklist.insert(tkr.to_string(), dates.clone());
}
Ok(blacklist)
}
fn convert_dates_list_to_date_ranges(
blacklist: HashMap<String, Vec<String>>,
all_bdates: Vec<NaiveDate>,
) -> HashMap<String, (String, String)> {
let blk = HashMap::new();
let bdates = all_bdates
.iter()
.map(|date| date.format("%Y-%m-%d").to_string())
.collect::<Vec<String>>();
//
blk
}
fn get_vec_of_vec_of_dates_from_df(df: DataFrame) -> Result<Vec<Vec<String>>, Box<dyn Error>> {
let rdt = df
.column("real_dates")?
// .clone()
.as_series()
.unwrap()
.list()?
.into_iter()
.filter_map(|opt| opt)
.collect::<Vec<Series>>()
.iter()
.map(|s| {
s.str()
.unwrap()
.into_iter()
.filter_map(|opt| opt.map(|s| s.to_string()))
.collect::<Vec<String>>()
})
.collect::<Vec<Vec<String>>>();
Ok(rdt)
}
fn get_vec_of_vec_of_naivedates_from_df(
df: DataFrame,
) -> Result<Vec<Vec<NaiveDate>>, Box<dyn Error>> {
let rdt = df
.column("real_dates")?
// .clone()
.as_series()
.unwrap()
.list()?
.into_iter()
.filter_map(|opt| opt)
.collect::<Vec<Series>>()
.iter()
.map(|s| {
s.date()
.unwrap()
.into_iter()
.filter_map(|opt| opt.and_then(|date| NaiveDate::from_num_days_from_ce_opt(date)))
.collect::<Vec<NaiveDate>>()
})
.collect::<Vec<Vec<NaiveDate>>>();
Ok(rdt)
}
// fn get_vec_of_vec_of_dates_from_df(df: DataFrame) -> Result<Vec<Vec<String>>, Box<dyn Error>> {
// let real_dates_column = df.column("real_dates")?.clone();
// let series = real_dates_column.as_series().unwrap().clone();
// let rdt = series.list()?.clone();
// let rdt = rdt
// .into_iter()
// .filter_map(|opt| opt)
// .collect::<Vec<Series>>();
// let rdt = rdt
// .iter()
// .map(|s| {
// s.str()
// .unwrap()
// .into_iter()
// .filter_map(|opt| opt.map(|s| s.to_string()))
// .collect::<Vec<String>>()
// })
// .collect::<Vec<Vec<String>>>();
// Ok(rdt)
// }

View File

@@ -1,11 +1,12 @@
pub mod blacklist;
pub mod core;
pub mod update_df;
pub mod load;
pub mod reduce_df;
pub mod pivots;
pub mod reduce_df;
pub mod update_df;
// Re-export submodules for easier access
pub use core::*;
pub use update_df::*;
pub use load::*;
pub use reduce_df::*;
pub use update_df::*;

View File

@@ -30,12 +30,12 @@ pub fn reduce_dataframe(
let df_size = df.shape();
let mut new_df = df.clone();
let ticker_col: Column = get_ticker_column_for_quantamental_dataframe(&new_df)?;
let ticker_col = get_ticker_column_for_quantamental_dataframe(&new_df)?;
// if cids is not provided, get all unique cids
let u_cids: Vec<String> = get_unique_cids(&new_df)?;
let u_xcats: Vec<String> = get_unique_xcats(&new_df)?;
let u_tickers: Vec<String> = _get_unique_strs_from_str_column_object(&ticker_col)?;
let u_cids = get_unique_cids(&new_df)?;
let u_xcats = get_unique_xcats(&new_df)?;
let u_tickers = _get_unique_strs_from_str_column_object(&ticker_col)?;
let cids_vec = cids.unwrap_or_else(|| u_cids.clone());
let specified_cids: Vec<&str> = cids_vec.iter().map(AsRef::as_ref).collect();