wip

2025-11-19 14:16:10 +00:00 · 2024-11-20 01:44:15 +00:00
parent 7ea8aa6dd8
commit fdc5f7d95f
2 changed files with 80 additions and 39 deletions
--- a/src/utils/qdf/core.rs
+++ b/src/utils/qdf/core.rs
@@ -37,21 +37,11 @@ pub fn is_quantamental_dataframe(df: &DataFrame) -> bool {
    check_quantamental_dataframe(df).is_ok()
 }
-/// Sort the columns of a Quantamental DataFrame.
+pub fn get_sorted_qdf_columns(columns: Vec<String>) -> Vec<String> {
 /// The first columns are `real_date`, `cid`, and `xcat`.
 /// These are followed by any available JPMAQS metrics, 'value', 'grading', 'eop_lag', 'mop_lag',
 /// (**in that order**), followed by any other metrics (in alphabetical order).
 pub fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box<dyn Error>> {
    let index_columns = ["real_date", "cid", "xcat"];
    let known_metrics = ["value", "grading", "eop_lag", "mop_lag"];
-    let df_columns = qdf
+    let mut unknown_metrics: Vec<String> = columns
        .get_column_names()
        .into_iter()
        .map(|s| s.clone().into_string())
        .collect::<Vec<String>>();
    let mut unknown_metrics: Vec<String> = df_columns
        .iter()
        .filter(|&m| !known_metrics.contains(&m.as_str()))
        .filter(|&m| !index_columns.contains(&m.as_str()))
@@ -61,13 +51,30 @@ pub fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box<dyn Error>> {
    let mut new_columns: Vec<String> = vec![];
    new_columns.extend(index_columns.iter().map(|s| s.to_string()));
    for &colname in &known_metrics {
-        if df_columns.contains(&colname.into()) {
+        if columns.contains(&colname.into()) {
            new_columns.push(colname.to_string());
        }
    }
    unknown_metrics.sort();
    new_columns.extend(unknown_metrics);
    new_columns
 }
 /// Sort the columns of a Quantamental DataFrame.
 /// The first columns are `real_date`, `cid`, and `xcat`.
 /// These are followed by any available JPMAQS metrics, 'value', 'grading', 'eop_lag', 'mop_lag',
 /// (**in that order**), followed by any other metrics (in alphabetical order).
 pub fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box<dyn Error>> {
    let df_columns = qdf
        .get_column_names()
        .into_iter()
        .map(|s| s.to_string())
        .collect::<Vec<String>>();
    let new_columns = get_sorted_qdf_columns(df_columns);
    *qdf = qdf
        .select(new_columns.clone())
        .expect("Failed to select columns");
@@ -142,21 +149,23 @@ pub fn get_unique_xcats(df: &DataFrame) -> Result<Vec<String>, Box<dyn Error>> {
    get_unique_from_str_column(df, "xcat")
 }
 pub fn get_unique_metrics(df: &DataFrame) -> Result<Vec<String>, Box<dyn Error>> {
    // return a list of all columns that are not 'real_date', 'cid', 'xcat'
    let columns = df
        .get_column_names()
        .iter()
        .map(|s| s.as_str().to_string())
        .collect();
    let sorted_cols = get_sorted_qdf_columns(columns);
    // return sorted_cols[3..].to_vec()
    Ok(sorted_cols[3..].to_vec())
 }
 /// Get the unique dates as a polars Column from a Quantamental DataFrame.
 pub fn get_unique_dates(df: &DataFrame) -> Result<Column, Box<dyn Error>> {
    let date_col = df.column("real_date")?;
    let unique_dates = date_col.unique()?.sort(SortOptions::default())?;
    Ok(unique_dates)
 }
--- a/src/utils/qdf/pivots.rs
+++ b/src/utils/qdf/pivots.rs
@@ -27,8 +27,8 @@ pub fn pivot_dataframe_by_ticker(
        // set metric to the first non-index column
        metric = df.get_column_names()[3].to_string();
    }
    let unique_dates: Column = get_unique_dates(&df)?;
    // let mut new_df = df.clone();
    // keep only the index columns and the metric column
    let mut keep_cols = QDF_INDEX_COLUMNS
        .to_vec()
@@ -37,23 +37,24 @@ pub fn pivot_dataframe_by_ticker(
        .collect::<Vec<String>>();
    keep_cols.push(metric.clone());
-    return Err("Not implemented".into());
+    let out_dfs = split_df_by_tickers(&df, Some(vec![metric.clone()]))?;
    // create a new dataframe with the unique dates, and iteratively add add the metric columns
    let mut new_df = DataFrame::new(vec![unique_dates])?;
    // new_df = new_df.select(keep_cols)?;
    // let ticker_col = get_ticker_column_for_quantamental_dataframe(&new_df)?;
    // new_df.with_column(ticker_col)?;
    // // drop the cid and xcat columns
    // new_df = new_df.drop_many(&["cid".to_string(), "xcat".to_string()]);
    // let dates_col = df.column("real_date")?;
    // Ok(df)
 }
 /// Splits a dataframe by ticker.
 #[allow(dead_code)]
-fn split_df_by_tickers(df: &DataFrame) -> Result<HashMap<String, DataFrame>, Box<dyn Error>> {
+fn split_df_by_tickers(
    df: &DataFrame,
    metrics: Option<Vec<String>>,
 ) -> Result<HashMap<String, DataFrame>, Box<dyn Error>> {
    check_quantamental_dataframe(df)?;
    let mut df_outs = HashMap::new();
@@ -66,6 +67,15 @@ fn split_df_by_tickers(df: &DataFrame) -> Result<HashMap<String, DataFrame>, Box
        icids.push(cid);
        ixcats.push(xcat);
    }
    let metrics = metrics.unwrap_or_else(|| get_unique_metrics(df).unwrap());
    // let keep_cols:Vec<String> = QDF_INDEX_COLUMNS + metrics;
    let mut keep_cols = QDF_INDEX_COLUMNS
        .to_vec()
        .iter()
        .map(|s| s.to_string())
        .collect::<Vec<String>>();
    keep_cols.extend(metrics);
    for (cid, xcat) in icids.iter().zip(ixcats.iter()) {
        // Apply filter while borrowing df and avoid moving ownership.
@@ -74,17 +84,39 @@ fn split_df_by_tickers(df: &DataFrame) -> Result<HashMap<String, DataFrame>, Box
            .and(col("xcat").eq(lit(xcat.clone())));
        let df_out = df.clone().lazy().filter(filter).collect()?;
        // select keep_cols
        let df_out = df_out.select(keep_cols.clone())?;
        df_outs.insert(format!("{}_{}", cid, xcat), df_out);
    }
    Ok(df_outs)
 }
-fn single_ticker_qdf_to_timeseries(df: &DataFrame) -> Result<Vec<DataFrame>, Box<dyn Error>> {
+fn single_ticker_qdf_to_timeseries(mut df: DataFrame) -> Result<Vec<DataFrame>, Box<dyn Error>> {
    let mut df_vec = Vec::new();
-    // copy the date col
+    // Since we own `df`, we can remove the "real_date" column, consuming it.
-    // let date_col =
+    let date_col = df.drop_in_place("real_date")?;
-    Ok(vec![df.to_owned()])
+    // Assuming `get_unique_metrics` only needs a reference to `df`
    let metrics = get_unique_metrics(&df)?;
    for metric in metrics.iter() {
        // Remove the metric column from `df`, consuming it.
        let metric_col = df.drop_in_place(metric)?;
        // Create a new DataFrame with the date column and the metric column.
        let new_df = DataFrame::new(vec![
            date_col.clone(), // Clone because `date_col` is used multiple times
            metric_col,       // Move the metric column (no clone needed)
        ])?;
        df_vec.push(new_df);
    }
    // At this point, `df` has had its metric columns removed and is effectively consumed.
    // The "date_col" has been moved out of `df` as well.
    // If you need to use `df` after this function, consider redesigning the function
    // to avoid consuming it.
    Ok(df_vec)
 }