wip

2025-11-19 14:06:11 +00:00 · 2024-11-20 18:21:48 +00:00
parent fdc5f7d95f
commit 99a33237d6
4 changed files with 51 additions and 14 deletions
--- a/notebooks/python-notebook.ipynb
+++ b/notebooks/python-notebook.ipynb
@@ -437,11 +437,32 @@
   "source": [
    "new_df.tail(10)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "PanicException",
+     "evalue": "called `Result::unwrap()` on an `Err` value: Duplicate(ErrString(\"unable to hstack, column with name \\\"value_right\\\" already exists\"))",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mPanicException\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[16], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mmsyrs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mqdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpivot_dataframe_by_ticker\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_df\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m10\u001b[39m)\n",
+      "\u001b[1;31mPanicException\u001b[0m: called `Result::unwrap()` on an `Err` value: Duplicate(ErrString(\"unable to hstack, column with name \\\"value_right\\\" already exists\"))"
+     ]
+    }
+   ],
+   "source": [
+    "msyrs.qdf.pivot_dataframe_by_ticker(df=new_df).head(10)"
+   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
--- a/src/py/qdf.pyi
+++ b/src/py/qdf.pyi
@@ -8,17 +8,16 @@ __all__ = [
 ]

 # qdf/load_qdf.pyi
-def load_qdf(*args, **kwargs) -> DataFrame: 
-    ...
+def load_qdf(*args, **kwargs) -> DataFrame: ...

 # qdf/load_qdf_from_download_bank.pyi
-def load_qdf_from_download_bank(*args, **kwargs) -> DataFrame: 
-    ...
+def load_qdf_from_download_bank(*args, **kwargs) -> DataFrame: ...

 # qdf/reduce_dataframe.pyi
-def reduce_dataframe(*args, **kwargs) -> DataFrame: 
-    ...
+def reduce_dataframe(*args, **kwargs) -> DataFrame: ...

 # qdf/update_dataframe.pyi
-def update_dataframe(*args, **kwargs) -> DataFrame: 
-    ...
+def update_dataframe(*args, **kwargs) -> DataFrame: ...
+
+# qdf/pivots.pyi
+def pivot_dataframe_by_ticker(*args, **kwargs) -> DataFrame: ...
--- a/src/py/qdf.rs
+++ b/src/py/qdf.rs
@@ -9,6 +9,7 @@ pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_function(wrap_pyfunction!(load_qdf_from_download_bank, m)?)?;
    m.add_function(wrap_pyfunction!(reduce_dataframe, m)?)?;
    m.add_function(wrap_pyfunction!(update_dataframe, m)?)?;
+    m.add_function(wrap_pyfunction!(pivot_dataframe_by_ticker, m)?)?;
    Ok(())
 }

@@ -76,3 +77,12 @@ pub fn update_dataframe(
            .unwrap(),
    ))
 }
+
+/// Python wrapper for pivoting a dataframe by ticker.
+/// See [`crate::utils::qdf::pivots::pivot_dataframe_by_ticker`] for full documentation.
+#[pyfunction]
+pub fn pivot_dataframe_by_ticker(df: PyDataFrame, metric: Option<String>) -> PyResult<PyDataFrame> {
+    Ok(PyDataFrame(
+        crate::utils::qdf::pivots::pivot_dataframe_by_ticker(df.into(), metric).unwrap(),
+    ))
+}
--- a/src/utils/qdf/pivots.rs
+++ b/src/utils/qdf/pivots.rs
@@ -37,16 +37,20 @@ pub fn pivot_dataframe_by_ticker(
        .collect::<Vec<String>>();
    keep_cols.push(metric.clone());

-    let out_dfs = split_df_by_tickers(&df, Some(vec![metric.clone()]))?;
+    let mut out_dfs = split_df_by_tickers(&df, Some(vec![metric.clone()]))?;
+    for (_, odf) in out_dfs.iter_mut() {
+        // select keep_cols
+        *odf = odf.select(vec!["real_date", &*metric.clone()])?;
+    }

    // create a new dataframe with the unique dates, and iteratively add add the metric columns
    let mut new_df = DataFrame::new(vec![unique_dates])?;

-    
-
-
-
+    for (_, odf) in out_dfs.iter() {
+        new_df = new_df.left_join(odf, ["real_date"], ["real_date"])?;
+    }

+    Ok(new_df)
 }

 /// Splits a dataframe by ticker.
@@ -92,6 +96,9 @@ fn split_df_by_tickers(
    Ok(df_outs)
 }

+/// Splits a QDF container data for a single ticker into a Vec of DataFrames (one per metric).
+/// The resulting DataFrames will have the "real_date" column and the metric column.
+#[allow(dead_code)]
 fn single_ticker_qdf_to_timeseries(mut df: DataFrame) -> Result<Vec<DataFrame>, Box<dyn Error>> {
    let mut df_vec = Vec::new();