diff --git a/notebooks/python-notebook.ipynb b/notebooks/python-notebook.ipynb index e973adc..b66eace 100644 --- a/notebooks/python-notebook.ipynb +++ b/notebooks/python-notebook.ipynb @@ -437,11 +437,32 @@ "source": [ "new_df.tail(10)" ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "ename": "PanicException", + "evalue": "called `Result::unwrap()` on an `Err` value: Duplicate(ErrString(\"unable to hstack, column with name \\\"value_right\\\" already exists\"))", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mPanicException\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[16], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mmsyrs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mqdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpivot_dataframe_by_ticker\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_df\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m10\u001b[39m)\n", + "\u001b[1;31mPanicException\u001b[0m: called `Result::unwrap()` on an `Err` value: Duplicate(ErrString(\"unable to hstack, column with name \\\"value_right\\\" already exists\"))" + ] + } + ], + "source": [ + "msyrs.qdf.pivot_dataframe_by_ticker(df=new_df).head(10)" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, diff --git a/src/py/qdf.pyi b/src/py/qdf.pyi index 3030965..960d281 100644 --- a/src/py/qdf.pyi +++ b/src/py/qdf.pyi @@ -8,17 +8,16 @@ __all__ = [ ] # qdf/load_qdf.pyi -def load_qdf(*args, **kwargs) -> DataFrame: - ... +def load_qdf(*args, **kwargs) -> DataFrame: ... # qdf/load_qdf_from_download_bank.pyi -def load_qdf_from_download_bank(*args, **kwargs) -> DataFrame: - ... +def load_qdf_from_download_bank(*args, **kwargs) -> DataFrame: ... # qdf/reduce_dataframe.pyi -def reduce_dataframe(*args, **kwargs) -> DataFrame: - ... +def reduce_dataframe(*args, **kwargs) -> DataFrame: ... # qdf/update_dataframe.pyi -def update_dataframe(*args, **kwargs) -> DataFrame: - ... +def update_dataframe(*args, **kwargs) -> DataFrame: ... + +# qdf/pivots.pyi +def pivot_dataframe_by_ticker(*args, **kwargs) -> DataFrame: ... diff --git a/src/py/qdf.rs b/src/py/qdf.rs index 4273584..ea83597 100644 --- a/src/py/qdf.rs +++ b/src/py/qdf.rs @@ -9,6 +9,7 @@ pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(load_qdf_from_download_bank, m)?)?; m.add_function(wrap_pyfunction!(reduce_dataframe, m)?)?; m.add_function(wrap_pyfunction!(update_dataframe, m)?)?; + m.add_function(wrap_pyfunction!(pivot_dataframe_by_ticker, m)?)?; Ok(()) } @@ -76,3 +77,12 @@ pub fn update_dataframe( .unwrap(), )) } + +/// Python wrapper for pivoting a dataframe by ticker. +/// See [`crate::utils::qdf::pivots::pivot_dataframe_by_ticker`] for full documentation. +#[pyfunction] +pub fn pivot_dataframe_by_ticker(df: PyDataFrame, metric: Option) -> PyResult { + Ok(PyDataFrame( + crate::utils::qdf::pivots::pivot_dataframe_by_ticker(df.into(), metric).unwrap(), + )) +} diff --git a/src/utils/qdf/pivots.rs b/src/utils/qdf/pivots.rs index 4699870..7d6bbb3 100644 --- a/src/utils/qdf/pivots.rs +++ b/src/utils/qdf/pivots.rs @@ -37,16 +37,20 @@ pub fn pivot_dataframe_by_ticker( .collect::>(); keep_cols.push(metric.clone()); - let out_dfs = split_df_by_tickers(&df, Some(vec![metric.clone()]))?; + let mut out_dfs = split_df_by_tickers(&df, Some(vec![metric.clone()]))?; + for (_, odf) in out_dfs.iter_mut() { + // select keep_cols + *odf = odf.select(vec!["real_date", &*metric.clone()])?; + } // create a new dataframe with the unique dates, and iteratively add add the metric columns let mut new_df = DataFrame::new(vec![unique_dates])?; - - - - + for (_, odf) in out_dfs.iter() { + new_df = new_df.left_join(odf, ["real_date"], ["real_date"])?; + } + Ok(new_df) } /// Splits a dataframe by ticker. @@ -92,6 +96,9 @@ fn split_df_by_tickers( Ok(df_outs) } +/// Splits a QDF container data for a single ticker into a Vec of DataFrames (one per metric). +/// The resulting DataFrames will have the "real_date" column and the metric column. +#[allow(dead_code)] fn single_ticker_qdf_to_timeseries(mut df: DataFrame) -> Result, Box> { let mut df_vec = Vec::new();