From 2b55b199d6c3995450b15441237341733765e0da Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 19 Nov 2024 00:32:21 +0000 Subject: [PATCH] feat: add pivot functionality for Quantamental DataFrame --- src/utils/qdf/mod.rs | 1 + src/utils/qdf/pivots.rs | 73 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 src/utils/qdf/pivots.rs diff --git a/src/utils/qdf/mod.rs b/src/utils/qdf/mod.rs index cea06c3..6e3a19f 100644 --- a/src/utils/qdf/mod.rs +++ b/src/utils/qdf/mod.rs @@ -2,6 +2,7 @@ pub mod core; pub mod update_df; pub mod load; pub mod reduce_df; +pub mod pivots; // Re-export submodules for easier access pub use core::*; pub use update_df::*; diff --git a/src/utils/qdf/pivots.rs b/src/utils/qdf/pivots.rs new file mode 100644 index 0000000..fe148c2 --- /dev/null +++ b/src/utils/qdf/pivots.rs @@ -0,0 +1,73 @@ +use crate::utils::misc::*; +use crate::utils::qdf::core::*; +use polars::prelude::*; +use std::collections::HashMap; +use std::error::Error; + +/// The required columns for a Quantamental DataFrame. +const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"]; + +/// Pivots a dataframe to a format where each ticker a column. +pub fn pivot_dataframe_by_ticker( + df: DataFrame, + metric: Option, +) -> Result> { + check_quantamental_dataframe(&df)?; + + // if no metric is provided, set it to 'value' + let mut metric = metric.unwrap_or("value".into()); + if !df + .get_column_names() + .contains(&&PlSmallStr::from_string(metric.clone())) + { + // set metric to the first non-index column + metric = df.get_column_names()[3].to_string(); + } + + let mut new_df = df.clone(); + // keep only the index columns and the metric column + let mut keep_cols = QDF_INDEX_COLUMNS + .to_vec() + .iter() + .map(|s| s.to_string()) + .collect::>(); + keep_cols.push(metric.clone()); + + new_df = new_df.select(keep_cols)?; + // let ticker_col = get_ticker_column_for_quantamental_dataframe(&new_df)?; + + // new_df.with_column(ticker_col)?; + // // drop the cid and xcat columns + // new_df = new_df.drop_many(&["cid".to_string(), "xcat".to_string()]); + + // let dates_col = df.column("real_date")?; + + Ok(df) +} + +fn split_df_by_tickers(df: &DataFrame) -> Result, Box> { + check_quantamental_dataframe(df)?; + + let mut df_outs = HashMap::new(); + + let unique_tickers = get_unique_tickers(df)?; + let mut icids = Vec::new(); + let mut ixcats = Vec::new(); + for ticker in unique_tickers.iter() { + let (cid, xcat) = split_ticker(ticker.to_string())?; + icids.push(cid); + ixcats.push(xcat); + } + + for (cid, xcat) in icids.iter().zip(ixcats.iter()) { + // Apply filter while borrowing df and avoid moving ownership. + let filter = col("cid") + .eq(lit(cid.clone())) + .and(col("xcat").eq(lit(xcat.clone()))); + + let df_out = df.clone().lazy().filter(filter).collect()?; + df_outs.insert(format!("{}_{}", cid, xcat), df_out); + } + + Ok(df_outs) +}