feat: add pivot functionality for Quantamental DataFrame

This commit is contained in:
Palash Tyagi 2024-11-19 00:32:21 +00:00
parent 0a400fb5b2
commit 2b55b199d6
2 changed files with 74 additions and 0 deletions

View File

@ -2,6 +2,7 @@ pub mod core;
pub mod update_df;
pub mod load;
pub mod reduce_df;
pub mod pivots;
// Re-export submodules for easier access
pub use core::*;
pub use update_df::*;

73
src/utils/qdf/pivots.rs Normal file
View File

@ -0,0 +1,73 @@
use crate::utils::misc::*;
use crate::utils::qdf::core::*;
use polars::prelude::*;
use std::collections::HashMap;
use std::error::Error;
/// The required columns for a Quantamental DataFrame.
const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
/// Pivots a dataframe to a format where each ticker a column.
pub fn pivot_dataframe_by_ticker(
df: DataFrame,
metric: Option<String>,
) -> Result<DataFrame, Box<dyn Error>> {
check_quantamental_dataframe(&df)?;
// if no metric is provided, set it to 'value'
let mut metric = metric.unwrap_or("value".into());
if !df
.get_column_names()
.contains(&&PlSmallStr::from_string(metric.clone()))
{
// set metric to the first non-index column
metric = df.get_column_names()[3].to_string();
}
let mut new_df = df.clone();
// keep only the index columns and the metric column
let mut keep_cols = QDF_INDEX_COLUMNS
.to_vec()
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>();
keep_cols.push(metric.clone());
new_df = new_df.select(keep_cols)?;
// let ticker_col = get_ticker_column_for_quantamental_dataframe(&new_df)?;
// new_df.with_column(ticker_col)?;
// // drop the cid and xcat columns
// new_df = new_df.drop_many(&["cid".to_string(), "xcat".to_string()]);
// let dates_col = df.column("real_date")?;
Ok(df)
}
fn split_df_by_tickers(df: &DataFrame) -> Result<HashMap<String, DataFrame>, Box<dyn Error>> {
check_quantamental_dataframe(df)?;
let mut df_outs = HashMap::new();
let unique_tickers = get_unique_tickers(df)?;
let mut icids = Vec::new();
let mut ixcats = Vec::new();
for ticker in unique_tickers.iter() {
let (cid, xcat) = split_ticker(ticker.to_string())?;
icids.push(cid);
ixcats.push(xcat);
}
for (cid, xcat) in icids.iter().zip(ixcats.iter()) {
// Apply filter while borrowing df and avoid moving ownership.
let filter = col("cid")
.eq(lit(cid.clone()))
.and(col("xcat").eq(lit(xcat.clone())));
let df_out = df.clone().lazy().filter(filter).collect()?;
df_outs.insert(format!("{}_{}", cid, xcat), df_out);
}
Ok(df_outs)
}