From 7ea8aa6dd88d78fc9ddadbddaeaf144df3e1388a Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:27:18 +0000 Subject: [PATCH] wip --- notebooks/python-notebook.ipynb | 279 ++++++++++++++++++++++++++++---- src/utils/qdf/mod.rs | 1 + src/utils/qdf/pivots.rs | 9 ++ 3 files changed, 254 insertions(+), 35 deletions(-) diff --git a/notebooks/python-notebook.ipynb b/notebooks/python-notebook.ipynb index 7311e77..e973adc 100644 --- a/notebooks/python-notebook.ipynb +++ b/notebooks/python-notebook.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -71,21 +71,53 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n", - "# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n", + "# DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n", + "DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n", "DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n", "DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2010-03-03"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0333
2010-03-04"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0434
2010-03-05"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0535
2010-03-08"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0838
2010-03-09"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0939
" + ], + "text/plain": [ + "shape: (5, 7)\n", + "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n", + "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n", + "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n", + "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n", + "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n", + "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", "\n", @@ -96,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -119,53 +151,118 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(\n", - " client_id=DQ_CLIENT_ID,\n", - " client_secret=DQ_CLIENT_SECRET,\n", - " tickers=tickers,\n", - ")\n", - "downloaded_df.head(5)" + "# downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(\n", + "# client_id=DQ_CLIENT_ID,\n", + "# client_secret=DQ_CLIENT_SECRET,\n", + "# tickers=tickers,\n", + "# )\n", + "# downloaded_df.head(5)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
1990-04-26"AUD""CPIC_SA_P1M1ML12"6.4345992.026223
1990-04-27"AUD""CPIC_SA_P1M1ML12"6.4345992.027224
1990-04-30"AUD""CPIC_SA_P1M1ML12"6.4345992.030227
1990-05-01"AUD""CPIC_SA_P1M1ML12"6.4345992.031228
1990-05-02"AUD""CPIC_SA_P1M1ML12"6.4345992.032229
" + ], + "text/plain": [ + "shape: (5, 7)\n", + "┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n", + "│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26 ┆ 223 │\n", + "│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27 ┆ 224 │\n", + "│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30 ┆ 227 │\n", + "│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31 ┆ 228 │\n", + "│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32 ┆ 229 │\n", + "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n", - " folder_path=DATA_FOLDER_PATH, tickers=tickers\n", + " folder_path=DATA_FOLDER_PATH, xcats=xcats\n", ")\n", "big_df.head(5)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "275.7717933654785" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "big_df.estimated_size(\"mb\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "870.6678009033203" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "211.74823188781738" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas()).memory_usage(\n", " deep=True\n", @@ -174,7 +271,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -184,9 +281,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (30, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-14"AUD""EQXR_NSA"0.3291881.000
2024-11-15"AUD""EQXR_NSA"0.8263461.000
2024-11-18"AUD""EQXR_NSA"0.1566831.000
2024-11-14"CAD""EQXR_NSA"0.1994021.000
2024-11-15"CAD""EQXR_NSA"-0.6965171.000
2024-11-15"GBP""EQXR_VT10"-0.0687781.000
2024-11-18"GBP""EQXR_VT10"0.4886261.000
2024-11-14"USD""EQXR_VT10"-0.5499831.000
2024-11-15"USD""EQXR_VT10"-1.1985441.000
2024-11-18"USD""EQXR_VT10"0.3493121.000
" + ], + "text/plain": [ + "shape: (30, 7)\n", + "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ AUD ┆ EQXR_NSA ┆ 0.156683 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2024-11-15 ┆ GBP ┆ EQXR_VT10 ┆ -0.068778 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ GBP ┆ EQXR_VT10 ┆ 0.488626 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ USD ┆ EQXR_VT10 ┆ -0.549983 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ USD ┆ EQXR_VT10 ┆ -1.198544 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ USD ┆ EQXR_VT10 ┆ 0.349312 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "eq_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df,\n", @@ -199,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -211,9 +346,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-14"AUD""EQXR_NSA"0.3291881.000
2024-11-15"AUD""EQXR_NSA"0.8263461.000
2024-11-18"AUD""EQXR_NSA"0.1566831.000
2024-11-14"CAD""EQXR_NSA"0.1994021.000
2024-11-15"CAD""EQXR_NSA"-0.6965171.000
2024-11-18"CAD""EQXR_NSA"0.1469611.000
2024-11-14"EUR""EQXR_NSA"2.0248891.000
2024-11-15"EUR""EQXR_NSA"-0.6615671.000
2024-11-18"EUR""EQXR_NSA"-0.1456821.000
2024-11-14"GBP""EQXR_NSA"0.5965331.000
" + ], + "text/plain": [ + "shape: (10, 7)\n", + "┌────────────┬─────┬──────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪══════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ AUD ┆ EQXR_NSA ┆ 0.156683 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ CAD ┆ EQXR_NSA ┆ 0.146961 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ EUR ┆ EQXR_NSA ┆ 2.024889 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ EUR ┆ EQXR_NSA ┆ -0.661567 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ EUR ┆ EQXR_NSA ┆ -0.145682 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ GBP ┆ EQXR_NSA ┆ 0.596533 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "└────────────┴─────┴──────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", "\n", @@ -222,9 +394,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-18"AUD""FXXR_VT10"0.5185011.000
2024-11-14"CAD""FXXR_VT10"-1.1323141.000
2024-11-15"CAD""FXXR_VT10"-1.7556051.000
2024-11-18"CAD""FXXR_VT10"0.7516751.000
2024-11-14"EUR""FXXR_VT10"-0.2924221.000
2024-11-15"EUR""FXXR_VT10"-0.8551081.000
2024-11-18"EUR""FXXR_VT10"0.7918661.000
2024-11-14"GBP""FXXR_VT10"-0.1105261.000
2024-11-15"GBP""FXXR_VT10"-0.7009771.000
2024-11-18"GBP""FXXR_VT10"-0.1408051.000
" + ], + "text/plain": [ + "shape: (10, 7)\n", + "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2024-11-18 ┆ AUD ┆ FXXR_VT10 ┆ 0.518501 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ CAD ┆ FXXR_VT10 ┆ -1.132314 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ CAD ┆ FXXR_VT10 ┆ -1.755605 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ CAD ┆ FXXR_VT10 ┆ 0.751675 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ EUR ┆ FXXR_VT10 ┆ -0.292422 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ EUR ┆ FXXR_VT10 ┆ -0.855108 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ EUR ┆ FXXR_VT10 ┆ 0.791866 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ GBP ┆ FXXR_VT10 ┆ -0.110526 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ GBP ┆ FXXR_VT10 ┆ -0.700977 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-18 ┆ GBP ┆ FXXR_VT10 ┆ -0.140805 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "new_df.tail(10)" ] @@ -232,7 +441,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -246,9 +455,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.11.0" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/src/utils/qdf/mod.rs b/src/utils/qdf/mod.rs index 6e3a19f..e2386f0 100644 --- a/src/utils/qdf/mod.rs +++ b/src/utils/qdf/mod.rs @@ -3,6 +3,7 @@ pub mod update_df; pub mod load; pub mod reduce_df; pub mod pivots; + // Re-export submodules for easier access pub use core::*; pub use update_df::*; diff --git a/src/utils/qdf/pivots.rs b/src/utils/qdf/pivots.rs index 197b097..acf7acf 100644 --- a/src/utils/qdf/pivots.rs +++ b/src/utils/qdf/pivots.rs @@ -79,3 +79,12 @@ fn split_df_by_tickers(df: &DataFrame) -> Result, Box Ok(df_outs) } + +fn single_ticker_qdf_to_timeseries(df: &DataFrame) -> Result, Box> { + let mut df_vec = Vec::new(); + + // copy the date col + // let date_col = + + Ok(vec![df.to_owned()]) +}