mirror of
https://github.com/Magnus167/msyrs.git
synced 2025-08-20 07:20:01 +00:00
wip
This commit is contained in:
parent
7ea8aa6dd8
commit
fdc5f7d95f
@ -37,21 +37,11 @@ pub fn is_quantamental_dataframe(df: &DataFrame) -> bool {
|
||||
check_quantamental_dataframe(df).is_ok()
|
||||
}
|
||||
|
||||
/// Sort the columns of a Quantamental DataFrame.
|
||||
/// The first columns are `real_date`, `cid`, and `xcat`.
|
||||
/// These are followed by any available JPMAQS metrics, 'value', 'grading', 'eop_lag', 'mop_lag',
|
||||
/// (**in that order**), followed by any other metrics (in alphabetical order).
|
||||
pub fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box<dyn Error>> {
|
||||
pub fn get_sorted_qdf_columns(columns: Vec<String>) -> Vec<String> {
|
||||
let index_columns = ["real_date", "cid", "xcat"];
|
||||
let known_metrics = ["value", "grading", "eop_lag", "mop_lag"];
|
||||
|
||||
let df_columns = qdf
|
||||
.get_column_names()
|
||||
.into_iter()
|
||||
.map(|s| s.clone().into_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let mut unknown_metrics: Vec<String> = df_columns
|
||||
let mut unknown_metrics: Vec<String> = columns
|
||||
.iter()
|
||||
.filter(|&m| !known_metrics.contains(&m.as_str()))
|
||||
.filter(|&m| !index_columns.contains(&m.as_str()))
|
||||
@ -61,13 +51,30 @@ pub fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box<dyn Error>> {
|
||||
let mut new_columns: Vec<String> = vec![];
|
||||
new_columns.extend(index_columns.iter().map(|s| s.to_string()));
|
||||
for &colname in &known_metrics {
|
||||
if df_columns.contains(&colname.into()) {
|
||||
if columns.contains(&colname.into()) {
|
||||
new_columns.push(colname.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
unknown_metrics.sort();
|
||||
new_columns.extend(unknown_metrics);
|
||||
|
||||
new_columns
|
||||
}
|
||||
|
||||
/// Sort the columns of a Quantamental DataFrame.
|
||||
/// The first columns are `real_date`, `cid`, and `xcat`.
|
||||
/// These are followed by any available JPMAQS metrics, 'value', 'grading', 'eop_lag', 'mop_lag',
|
||||
/// (**in that order**), followed by any other metrics (in alphabetical order).
|
||||
pub fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box<dyn Error>> {
|
||||
let df_columns = qdf
|
||||
.get_column_names()
|
||||
.into_iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let new_columns = get_sorted_qdf_columns(df_columns);
|
||||
|
||||
*qdf = qdf
|
||||
.select(new_columns.clone())
|
||||
.expect("Failed to select columns");
|
||||
@ -142,21 +149,23 @@ pub fn get_unique_xcats(df: &DataFrame) -> Result<Vec<String>, Box<dyn Error>> {
|
||||
get_unique_from_str_column(df, "xcat")
|
||||
}
|
||||
|
||||
pub fn get_unique_metrics(df: &DataFrame) -> Result<Vec<String>, Box<dyn Error>> {
|
||||
// return a list of all columns that are not 'real_date', 'cid', 'xcat'
|
||||
let columns = df
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|s| s.as_str().to_string())
|
||||
.collect();
|
||||
|
||||
let sorted_cols = get_sorted_qdf_columns(columns);
|
||||
|
||||
// return sorted_cols[3..].to_vec()
|
||||
Ok(sorted_cols[3..].to_vec())
|
||||
}
|
||||
|
||||
/// Get the unique dates as a polars Column from a Quantamental DataFrame.
|
||||
pub fn get_unique_dates(df: &DataFrame) -> Result<Column, Box<dyn Error>> {
|
||||
let date_col = df.column("real_date")?;
|
||||
let unique_dates = date_col.unique()?.sort(SortOptions::default())?;
|
||||
Ok(unique_dates)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -27,8 +27,8 @@ pub fn pivot_dataframe_by_ticker(
|
||||
// set metric to the first non-index column
|
||||
metric = df.get_column_names()[3].to_string();
|
||||
}
|
||||
let unique_dates: Column = get_unique_dates(&df)?;
|
||||
|
||||
// let mut new_df = df.clone();
|
||||
// keep only the index columns and the metric column
|
||||
let mut keep_cols = QDF_INDEX_COLUMNS
|
||||
.to_vec()
|
||||
@ -37,23 +37,24 @@ pub fn pivot_dataframe_by_ticker(
|
||||
.collect::<Vec<String>>();
|
||||
keep_cols.push(metric.clone());
|
||||
|
||||
return Err("Not implemented".into());
|
||||
let out_dfs = split_df_by_tickers(&df, Some(vec![metric.clone()]))?;
|
||||
|
||||
// create a new dataframe with the unique dates, and iteratively add add the metric columns
|
||||
let mut new_df = DataFrame::new(vec![unique_dates])?;
|
||||
|
||||
|
||||
|
||||
// new_df = new_df.select(keep_cols)?;
|
||||
// let ticker_col = get_ticker_column_for_quantamental_dataframe(&new_df)?;
|
||||
|
||||
// new_df.with_column(ticker_col)?;
|
||||
// // drop the cid and xcat columns
|
||||
// new_df = new_df.drop_many(&["cid".to_string(), "xcat".to_string()]);
|
||||
|
||||
// let dates_col = df.column("real_date")?;
|
||||
|
||||
// Ok(df)
|
||||
}
|
||||
|
||||
/// Splits a dataframe by ticker.
|
||||
#[allow(dead_code)]
|
||||
fn split_df_by_tickers(df: &DataFrame) -> Result<HashMap<String, DataFrame>, Box<dyn Error>> {
|
||||
fn split_df_by_tickers(
|
||||
df: &DataFrame,
|
||||
metrics: Option<Vec<String>>,
|
||||
) -> Result<HashMap<String, DataFrame>, Box<dyn Error>> {
|
||||
check_quantamental_dataframe(df)?;
|
||||
|
||||
let mut df_outs = HashMap::new();
|
||||
@ -66,6 +67,15 @@ fn split_df_by_tickers(df: &DataFrame) -> Result<HashMap<String, DataFrame>, Box
|
||||
icids.push(cid);
|
||||
ixcats.push(xcat);
|
||||
}
|
||||
let metrics = metrics.unwrap_or_else(|| get_unique_metrics(df).unwrap());
|
||||
// let keep_cols:Vec<String> = QDF_INDEX_COLUMNS + metrics;
|
||||
let mut keep_cols = QDF_INDEX_COLUMNS
|
||||
.to_vec()
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
keep_cols.extend(metrics);
|
||||
|
||||
for (cid, xcat) in icids.iter().zip(ixcats.iter()) {
|
||||
// Apply filter while borrowing df and avoid moving ownership.
|
||||
@ -74,17 +84,39 @@ fn split_df_by_tickers(df: &DataFrame) -> Result<HashMap<String, DataFrame>, Box
|
||||
.and(col("xcat").eq(lit(xcat.clone())));
|
||||
|
||||
let df_out = df.clone().lazy().filter(filter).collect()?;
|
||||
// select keep_cols
|
||||
let df_out = df_out.select(keep_cols.clone())?;
|
||||
df_outs.insert(format!("{}_{}", cid, xcat), df_out);
|
||||
}
|
||||
|
||||
Ok(df_outs)
|
||||
}
|
||||
|
||||
fn single_ticker_qdf_to_timeseries(df: &DataFrame) -> Result<Vec<DataFrame>, Box<dyn Error>> {
|
||||
fn single_ticker_qdf_to_timeseries(mut df: DataFrame) -> Result<Vec<DataFrame>, Box<dyn Error>> {
|
||||
let mut df_vec = Vec::new();
|
||||
|
||||
// copy the date col
|
||||
// let date_col =
|
||||
// Since we own `df`, we can remove the "real_date" column, consuming it.
|
||||
let date_col = df.drop_in_place("real_date")?;
|
||||
|
||||
Ok(vec![df.to_owned()])
|
||||
// Assuming `get_unique_metrics` only needs a reference to `df`
|
||||
let metrics = get_unique_metrics(&df)?;
|
||||
|
||||
for metric in metrics.iter() {
|
||||
// Remove the metric column from `df`, consuming it.
|
||||
let metric_col = df.drop_in_place(metric)?;
|
||||
|
||||
// Create a new DataFrame with the date column and the metric column.
|
||||
let new_df = DataFrame::new(vec![
|
||||
date_col.clone(), // Clone because `date_col` is used multiple times
|
||||
metric_col, // Move the metric column (no clone needed)
|
||||
])?;
|
||||
df_vec.push(new_df);
|
||||
}
|
||||
// At this point, `df` has had its metric columns removed and is effectively consumed.
|
||||
// The "date_col" has been moved out of `df` as well.
|
||||
|
||||
// If you need to use `df` after this function, consider redesigning the function
|
||||
// to avoid consuming it.
|
||||
|
||||
Ok(df_vec)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user