diff --git a/notebooks/python-notebook.ipynb b/notebooks/python-notebook.ipynb index 8942e28..8335b3c 100644 --- a/notebooks/python-notebook.ipynb +++ b/notebooks/python-notebook.ipynb @@ -8,10 +8,8 @@ "\n", "First patch `pyo3-polars`:\n", "\n", - "\n", "- Use [this diff](https://github.com/pola-rs/pyo3-polars/compare/main...Magnus167:pyo3-polars:main) to make changes to the `pyo3-polars` package.\n", "\n", - "\n", "Install the package:\n", "\n", "```bash\n", @@ -81,7 +79,8 @@ "outputs": [], "source": [ "import time\n", - "startime = time.time()" + "\n", + "nb_start_time = time.time()" ] }, { @@ -89,6 +88,13 @@ "execution_count": 5, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken to load qdf: 0.007575511932373047\n" + ] + }, { "data": { "text/html": [ @@ -124,8 +130,9 @@ "source": [ "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", "\n", - "\n", + "starttime = time.time()\n", "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n", + "print(f\"Time taken to load qdf: {time.time() - starttime}\")\n", "ldf.head(5)" ] }, @@ -171,6 +178,57 @@ "execution_count": 8, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading the JPMAQS catalogue from DataQuery...\n", + "Downloaded JPMAQS catalogue with 18711 tickers.\n", + "Removed 21/600 expressions that are not in the JPMaQS catalogue.\n", + "Downloading data from JPMaQS.\n", + "Timestamp UTC: 2024-11-22 17:13:07\n", + "Connection successful!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Requesting data: 100%|██████████| 29/29 [00:05<00:00, 4.91it/s]\n", + "Downloading data: 100%|██████████| 29/29 [00:22<00:00, 1.26it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Some dates are missing from the downloaded data. \n", + "2 out of 9107 dates are missing.\n" + ] + } + ], + "source": [ + "pddf = macrosynergy.download.JPMaQSDownload().download(\n", + " tickers=tickers,\n", + " get_catalogue=True,\n", + " show_progress=True,\n", + " start_date=\"1990-01-01\",\n", + ")\n", + "pddf = macrosynergy.management.types.QuantamentalDataFrame(pddf)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken to load qdf batch: 1.8986454010009766\n" + ] + }, { "data": { "text/html": [ @@ -198,37 +256,26 @@ "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘" ] }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n", - " folder_path=DATA_FOLDER_PATH, xcats=xcats\n", - " # folder_path=DATA_FOLDER_PATH, cids=cids\n", - ")\n", - "big_df.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "277.8842191696167" - ] - }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "big_df.estimated_size(\"mb\")" + "starttime = time.time()\n", + "\n", + "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n", + "\n", + " folder_path=DATA_FOLDER_PATH,\n", + " xcats=xcats,\n", + "\n", + " # folder_path=DATA_FOLDER_PATH, cids=cids\n", + "\n", + ")\n", + "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n", + "\n", + "\n", + "big_df.head(5)" ] }, { @@ -239,7 +286,7 @@ { "data": { "text/plain": [ - "877.1654348373413" + "275.89989376068115" ] }, "execution_count": 10, @@ -248,7 +295,7 @@ } ], "source": [ - "big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2" + "big_df.estimated_size(\"mb\")" ] }, { @@ -259,7 +306,7 @@ { "data": { "text/plain": [ - "213.30906867980957" + "871.0723962783813" ] }, "execution_count": 11, @@ -267,6 +314,26 @@ "output_type": "execute_result" } ], + "source": [ + "big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "211.8466453552246" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas()).memory_usage(\n", " deep=True\n", @@ -275,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -285,9 +352,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken to reduce qdf: 0.34674978256225586\n" + ] + }, { "data": { "text/html": [ @@ -321,38 +395,97 @@ "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "starttime = time.time()\n", "eq_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df,\n", " cids=sel_cids,\n", " xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n", + "\n", " start=start,\n", ")\n", + "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n", "eq_df" ] }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", - "fx_df = msyrs.qdf.reduce_dataframe(\n", - " df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n", - ")" - ] - }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken to reduce qdf: 0.13223624229431152\n" + ] + } + ], + "source": [ + "starttime = time.time()\n", + "eq_pd_df = pddf.reduce_df(cids=sel_cids, xcats=[\"EQXR_NSA\", \"EQXR_VT10\"], start=start)\n", + "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken to reduce qdf: 0.3902719020843506\n" + ] + } + ], + "source": [ + "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", + "starttime = time.time()\n", + "\n", + "fx_df = msyrs.qdf.reduce_dataframe(\n", + " df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n", + ")\n", + "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken to reduce qdf: 0.171736478805542\n" + ] + } + ], + "source": [ + "starttime = time.time()\n", + "fx_pd_df = pddf.reduce_df(cids=sel_cids, xcats=fx_xcats, start=start, intersect=True)\n", + "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken: 0.024325132369995117\n" + ] + }, { "data": { "text/html": [ @@ -385,20 +518,40 @@ "└────────────┴─────┴──────────┴───────────┴─────────┴─────────┴─────────┘" ] }, - "execution_count": 15, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "starttime = time.time()\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", - "\n", + "print(\"Time taken: \", time.time() - starttime)\n", "new_df.head(10)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time taken: 0.8326597213745117\n" + ] + } + ], + "source": [ + "starttime = time.time()\n", + "new_pd_df = pddf.update_df(df_add=eq_pd_df,)\n", + "print(\"Time taken: \", time.time() - starttime)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -433,7 +586,7 @@ "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" ] }, - "execution_count": 16, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -444,77 +597,117 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
real_date | EUR_EQXR_NSA | EUR_FXXR_NSA | EUR_FXXR_VT10 | GBP_FXCRR_NSA | CAD_FXCRR_NSA | GBP_FXTARGETED_NSA | CAD_FXTARGETED_NSA | AUD_FXXR_VT10 | AUD_EQXR_VT10 | EUR_FXTARGETED_NSA | USD_EQXR_NSA | EUR_FXUNTRADABLE_NSA | AUD_EQXR_NSA | AUD_FXCRR_NSA | USD_EQXR_VT10 | CAD_FXXR_NSA | CAD_EQXR_NSA | GBP_EQXR_NSA | EUR_FXCRR_NSA | CAD_FXXR_VT10 | GBP_FXXR_VT10 | GBP_FXUNTRADABLE_NSA | AUD_FXTARGETED_NSA | CAD_FXUNTRADABLE_NSA | GBP_FXXR_NSA | AUD_FXUNTRADABLE_NSA | GBP_EQXR_VT10 | EUR_EQXR_VT10 | AUD_FXXR_NSA | CAD_EQXR_VT10 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
1999-11-15 | 1.280812 | -0.044278 | -0.047169 | null | null | 0.0 | 0.0 | 0.162526 | null | 0.0 | 0.099886 | 0.0 | null | null | 0.042075 | -0.16911 | 1.59292 | 0.566529 | null | -0.399282 | 0.737643 | 0.0 | 0.0 | 0.0 | 0.39863 | 0.0 | 0.261301 | 0.607996 | 0.147108 | 0.782683 |
1999-11-16 | 0.596516 | 0.067358 | 0.071755 | null | null | 0.0 | 0.0 | -0.650292 | null | 0.0 | 1.995723 | 0.0 | null | null | 0.840653 | 0.100898 | -0.783972 | 0.746041 | null | 0.238229 | -0.587343 | 0.0 | 0.0 | 0.0 | -0.317407 | 0.0 | 0.344098 | 0.283164 | -0.588599 | -0.385205 |
1999-11-17 | 0.735294 | 0.860897 | 0.917107 | null | null | 0.0 | 0.0 | -0.413651 | null | 0.0 | -0.908456 | 0.0 | null | null | -0.382666 | -0.368379 | -0.054873 | -0.740517 | null | -0.869773 | -0.015875 | 0.0 | 0.0 | 0.0 | -0.008579 | 0.0 | -0.34155 | 0.349041 | -0.374409 | -0.026962 |
1999-11-18 | 1.012479 | -1.147048 | -1.221942 | null | null | 0.0 | 0.0 | -0.107377 | null | 0.0 | 0.747532 | 0.0 | null | null | 0.314881 | 0.215459 | 0.614912 | 0.045676 | null | 0.508717 | -0.040124 | 0.0 | 0.0 | 0.0 | -0.021683 | 0.0 | 0.021067 | 0.48062 | -0.097191 | 0.302137 |
1999-11-19 | 0.34965 | 0.077241 | 0.082284 | null | null | 0.0 | 0.0 | 0.067403 | null | 0.0 | -0.076998 | 0.0 | null | null | -0.032434 | 0.267814 | 0.622067 | -0.989195 | null | 0.63233 | 0.465899 | 0.0 | 0.0 | 0.0 | 0.251777 | 0.0 | -0.456249 | 0.165977 | 0.061008 | 0.305653 |
1999-11-22 | -2.090592 | 0.222742 | 0.237285 | null | null | 0.0 | 0.0 | -0.623692 | null | 0.0 | -0.021016 | 0.0 | null | null | -0.008852 | -0.029578 | -0.824295 | -0.814633 | null | -0.069836 | -0.077427 | 0.0 | 0.0 | 0.0 | -0.041842 | 0.0 | -0.375735 | -0.992395 | -0.564523 | -0.405018 |
1999-11-23 | 0.54567 | -0.484609 | -0.51625 | null | null | 0.0 | 0.0 | -0.1249 | null | 0.0 | -1.065022 | 0.0 | null | null | -0.448616 | -0.234997 | 1.049869 | 1.62715 | null | -0.554847 | 0.907404 | 0.0 | 0.0 | 0.0 | 0.490371 | 0.0 | 0.750494 | 0.259027 | -0.113051 | 0.515854 |
1999-11-24 | 0.0 | -0.846988 | -0.902291 | null | null | 0.0 | 0.0 | -0.53942 | null | 0.0 | 0.672805 | 0.0 | null | null | 0.283403 | -0.003023 | 0.757576 | 0.465081 | null | -0.007137 | -0.866831 | 0.0 | 0.0 | 0.0 | -0.468445 | 0.0 | 0.21451 | 0.0 | -0.488246 | 0.372235 |
1999-11-26 | 0.780712 | -0.353428 | -0.376504 | null | null | 0.0 | 0.0 | -0.279707 | null | 0.0 | -0.485403 | 0.0 | null | null | -0.204465 | -0.137121 | 0.297556 | -0.044776 | null | -0.323755 | -0.553951 | 0.0 | 0.0 | 0.0 | -0.299362 | 0.0 | -0.020652 | 0.3706 | -0.253172 | 0.146204 |
1999-11-29 | -0.615174 | -0.696737 | -0.742228 | null | null | 0.0 | 0.0 | 0.224152 | null | 0.0 | -0.296904 | 0.0 | null | null | -0.125064 | -0.423601 | -1.970757 | 0.029864 | null | -1.000157 | 0.516482 | 0.0 | 0.0 | 0.0 | 0.279113 | 0.0 | 0.013774 | -0.292021 | 0.202887 | -0.968333 |