From 6fb9b0f076b27a94bd35fb426a7bb6ef88a0508a Mon Sep 17 00:00:00 2001 From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com> Date: Tue, 26 Nov 2024 23:59:45 +0000 Subject: [PATCH] wip: hist vol --- notebooks/python-notebook.ipynb | 847 +++----------------------------- src/panel/historic_vol.rs | 218 +++++++- 2 files changed, 284 insertions(+), 781 deletions(-) diff --git a/notebooks/python-notebook.ipynb b/notebooks/python-notebook.ipynb index a5f11d2..0b3f0e3 100644 --- a/notebooks/python-notebook.ipynb +++ b/notebooks/python-notebook.ipynb @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -85,48 +85,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to load qdf: 0.0040130615234375\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2010-03-03"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0333
2010-03-04"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0434
2010-03-05"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0535
2010-03-08"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0838
2010-03-09"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0939
" - ], - "text/plain": [ - "shape: (5, 7)\n", - "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n", - "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", - "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n", - "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n", - "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n", - "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n", - "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n", - "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n", - "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", "\n", @@ -138,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -161,53 +122,21 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64f64f64
1990-04-26"AUD""CPIC_SA_P1M1ML12"6.4345992.026.0223.0
1990-04-27"AUD""CPIC_SA_P1M1ML12"6.4345992.027.0224.0
1990-04-30"AUD""CPIC_SA_P1M1ML12"6.4345992.030.0227.0
1990-05-01"AUD""CPIC_SA_P1M1ML12"6.4345992.031.0228.0
1990-05-02"AUD""CPIC_SA_P1M1ML12"6.4345992.032.0229.0
" - ], - "text/plain": [ - "shape: (5, 7)\n", - "┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n", - "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", - "╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n", - "│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26.0 ┆ 223.0 │\n", - "│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27.0 ┆ 224.0 │\n", - "│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30.0 ┆ 227.0 │\n", - "│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31.0 ┆ 228.0 │\n", - "│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32.0 ┆ 229.0 │\n", - "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(\n", - " client_id=DQ_CLIENT_ID,\n", - " client_secret=DQ_CLIENT_SECRET,\n", - " tickers=tickers,\n", - ")\n", - "downloaded_df.head(5)" + "# downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(\n", + "# client_id=DQ_CLIENT_ID,\n", + "# client_secret=DQ_CLIENT_SECRET,\n", + "# tickers=tickers,\n", + "# )\n", + "# downloaded_df.head(5)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -216,92 +145,24 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading the JPMAQS catalogue from DataQuery...\n", - "Downloaded JPMAQS catalogue with 18711 tickers.\n", - "Removed 21/600 expressions that are not in the JPMaQS catalogue.\n", - "Downloading data from JPMaQS.\n", - "Timestamp UTC: 2024-11-22 20:35:07\n", - "Connection successful!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Requesting data: 100%|██████████| 29/29 [00:05<00:00, 4.93it/s]\n", - "Downloading data: 100%|██████████| 29/29 [00:16<00:00, 1.80it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Some dates are missing from the downloaded data. \n", - "2 out of 9107 dates are missing.\n" - ] - } - ], + "outputs": [], "source": [ - "pddf = macrosynergy.download.JPMaQSDownload().download(\n", - " tickers=tickers,\n", - " get_catalogue=True,\n", - " show_progress=True,\n", - " start_date=\"1990-01-01\",\n", - ")\n", - "pddf = macrosynergy.management.types.QuantamentalDataFrame(pddf)" + "# pddf = macrosynergy.download.JPMaQSDownload().download(\n", + "# tickers=tickers,\n", + "# get_catalogue=True,\n", + "# show_progress=True,\n", + "# start_date=\"1990-01-01\",\n", + "# )\n", + "# pddf = macrosynergy.management.types.QuantamentalDataFrame(pddf)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to load qdf batch: 1.1382660865783691\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
1990-04-26"AUD""CPIC_SA_P1M1ML12"6.4345992.026223
1990-04-27"AUD""CPIC_SA_P1M1ML12"6.4345992.027224
1990-04-30"AUD""CPIC_SA_P1M1ML12"6.4345992.030227
1990-05-01"AUD""CPIC_SA_P1M1ML12"6.4345992.031228
1990-05-02"AUD""CPIC_SA_P1M1ML12"6.4345992.032229
" - ], - "text/plain": [ - "shape: (5, 7)\n", - "┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n", - "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", - "╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n", - "│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26 ┆ 223 │\n", - "│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27 ┆ 224 │\n", - "│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30 ┆ 227 │\n", - "│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31 ┆ 228 │\n", - "│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32 ┆ 229 │\n", - "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "starttime = time.time()\n", "\n", @@ -321,414 +182,45 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "275.7299613952637" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "big_df.estimated_size(\"mb\")" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
real_datecidxcatvaluegradingeop_lagmop_lag
01990-04-26AUDCPIC_SA_P1M1ML126.4345992.026223
11990-04-27AUDCPIC_SA_P1M1ML126.4345992.027224
21990-04-30AUDCPIC_SA_P1M1ML126.4345992.030227
31990-05-01AUDCPIC_SA_P1M1ML126.4345992.031228
41990-05-02AUDCPIC_SA_P1M1ML126.4345992.032229
........................
52855772024-11-08ZARRYLDIRS05Y_NSA3.0975341.000
52855782024-11-12ZARRYLDIRS05Y_NSA3.1898841.000
52855792024-11-13ZARRYLDIRS05Y_NSA3.1080951.000
52855802024-11-14ZARRYLDIRS05Y_NSA3.0609681.000
52855812024-11-15ZARRYLDIRS05Y_NSA3.1020661.000
\n", - "

5285582 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " real_date cid xcat value grading eop_lag mop_lag\n", - "0 1990-04-26 AUD CPIC_SA_P1M1ML12 6.434599 2.0 26 223\n", - "1 1990-04-27 AUD CPIC_SA_P1M1ML12 6.434599 2.0 27 224\n", - "2 1990-04-30 AUD CPIC_SA_P1M1ML12 6.434599 2.0 30 227\n", - "3 1990-05-01 AUD CPIC_SA_P1M1ML12 6.434599 2.0 31 228\n", - "4 1990-05-02 AUD CPIC_SA_P1M1ML12 6.434599 2.0 32 229\n", - "... ... ... ... ... ... ... ...\n", - "5285577 2024-11-08 ZAR RYLDIRS05Y_NSA 3.097534 1.0 0 0\n", - "5285578 2024-11-12 ZAR RYLDIRS05Y_NSA 3.189884 1.0 0 0\n", - "5285579 2024-11-13 ZAR RYLDIRS05Y_NSA 3.108095 1.0 0 0\n", - "5285580 2024-11-14 ZAR RYLDIRS05Y_NSA 3.060968 1.0 0 0\n", - "5285581 2024-11-15 ZAR RYLDIRS05Y_NSA 3.102066 1.0 0 0\n", - "\n", - "[5285582 rows x 7 columns]" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "big_df.to_pandas()" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "789.8839149475098" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
real_datecidxcatvaluegradingeop_lagmop_lag
01990-04-26AUDCPIC_SA_P1M1ML126.4345992.026223
11990-04-27AUDCPIC_SA_P1M1ML126.4345992.027224
21990-04-30AUDCPIC_SA_P1M1ML126.4345992.030227
31990-05-01AUDCPIC_SA_P1M1ML126.4345992.031228
41990-05-02AUDCPIC_SA_P1M1ML126.4345992.032229
........................
52855772024-11-08ZARRYLDIRS05Y_NSA3.0975341.000
52855782024-11-12ZARRYLDIRS05Y_NSA3.1898841.000
52855792024-11-13ZARRYLDIRS05Y_NSA3.1080951.000
52855802024-11-14ZARRYLDIRS05Y_NSA3.0609681.000
52855812024-11-15ZARRYLDIRS05Y_NSA3.1020661.000
\n", - "

5285582 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " real_date cid xcat value grading eop_lag mop_lag\n", - "0 1990-04-26 AUD CPIC_SA_P1M1ML12 6.434599 2.0 26 223\n", - "1 1990-04-27 AUD CPIC_SA_P1M1ML12 6.434599 2.0 27 224\n", - "2 1990-04-30 AUD CPIC_SA_P1M1ML12 6.434599 2.0 30 227\n", - "3 1990-05-01 AUD CPIC_SA_P1M1ML12 6.434599 2.0 31 228\n", - "4 1990-05-02 AUD CPIC_SA_P1M1ML12 6.434599 2.0 32 229\n", - "... ... ... ... ... ... ... ...\n", - "5285577 2024-11-08 ZAR RYLDIRS05Y_NSA 3.097534 1.0 0 0\n", - "5285578 2024-11-12 ZAR RYLDIRS05Y_NSA 3.189884 1.0 0 0\n", - "5285579 2024-11-13 ZAR RYLDIRS05Y_NSA 3.108095 1.0 0 0\n", - "5285580 2024-11-14 ZAR RYLDIRS05Y_NSA 3.060968 1.0 0 0\n", - "5285581 2024-11-15 ZAR RYLDIRS05Y_NSA 3.102066 1.0 0 0\n", - "\n", - "[5285582 rows x 7 columns]" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas())" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "211.71554374694824" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas()).memory_usage(\n", " deep=True\n", @@ -737,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -747,54 +239,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to reduce qdf: 0.2668800354003906\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (73_069, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2000-05-04"AUD""EQXR_NSA"-1.2516051.000
2000-05-05"AUD""EQXR_NSA"1.7874551.000
2000-05-08"AUD""EQXR_NSA"-0.5747131.000
2000-05-09"AUD""EQXR_NSA"-0.9312781.000
2000-05-10"AUD""EQXR_NSA"-1.5235011.000
2024-11-08"USD""EQXR_VT10"0.3138751.000
2024-11-12"USD""EQXR_VT10"-0.2724571.000
2024-11-13"USD""EQXR_VT10"0.0437291.000
2024-11-14"USD""EQXR_VT10"-0.5499831.000
2024-11-15"USD""EQXR_VT10"-1.1985441.000
" - ], - "text/plain": [ - "shape: (73_069, 7)\n", - "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", - "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", - "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", - "│ 2000-05-04 ┆ AUD ┆ EQXR_NSA ┆ -1.251605 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-05 ┆ AUD ┆ EQXR_NSA ┆ 1.787455 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-08 ┆ AUD ┆ EQXR_NSA ┆ -0.574713 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-09 ┆ AUD ┆ EQXR_NSA ┆ -0.931278 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-10 ┆ AUD ┆ EQXR_NSA ┆ -1.523501 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 2024-11-08 ┆ USD ┆ EQXR_VT10 ┆ 0.313875 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-12 ┆ USD ┆ EQXR_VT10 ┆ -0.272457 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-13 ┆ USD ┆ EQXR_VT10 ┆ 0.043729 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-14 ┆ USD ┆ EQXR_VT10 ┆ -0.549983 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-15 ┆ USD ┆ EQXR_VT10 ┆ -1.198544 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "starttime = time.time()\n", "eq_df = msyrs.qdf.reduce_dataframe(\n", @@ -810,36 +257,20 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to reduce qdf: 0.057086944580078125\n" - ] - } - ], + "outputs": [], "source": [ - "starttime = time.time()\n", - "eq_pd_df = pddf.reduce_df(cids=sel_cids, xcats=[\"EQXR_NSA\", \"EQXR_VT10\"], start=start)\n", - "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" + "# starttime = time.time()\n", + "# eq_pd_df = pddf.reduce_df(cids=sel_cids, xcats=[\"EQXR_NSA\", \"EQXR_VT10\"], start=start)\n", + "# print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to reduce qdf: 0.298140287399292\n" - ] - } - ], + "outputs": [], "source": [ "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", "starttime = time.time()\n", @@ -852,72 +283,20 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to reduce qdf: 0.08732080459594727\n" - ] - } - ], + "outputs": [], "source": [ - "starttime = time.time()\n", - "fx_pd_df = pddf.reduce_df(cids=sel_cids, xcats=fx_xcats, start=start, intersect=True)\n", - "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" + "# starttime = time.time()\n", + "# fx_pd_df = pddf.reduce_df(cids=sel_cids, xcats=fx_xcats, start=start, intersect=True)\n", + "# print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken: 0.01651310920715332\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2000-05-04"AUD""EQXR_NSA"-1.2516051.000
2000-05-05"AUD""EQXR_NSA"1.7874551.000
2000-05-08"AUD""EQXR_NSA"-0.5747131.000
2000-05-09"AUD""EQXR_NSA"-0.9312781.000
2000-05-10"AUD""EQXR_NSA"-1.5235011.000
2000-05-11"AUD""EQXR_NSA"-1.5799871.000
2000-05-12"AUD""EQXR_NSA"1.806021.000
2000-05-15"AUD""EQXR_NSA"0.2956641.000
2000-05-16"AUD""EQXR_NSA"1.3101871.000
2000-05-17"AUD""EQXR_NSA"-0.7112841.000
" - ], - "text/plain": [ - "shape: (10, 7)\n", - "┌────────────┬─────┬──────────┬───────────┬─────────┬─────────┬─────────┐\n", - "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", - "╞════════════╪═════╪══════════╪═══════════╪═════════╪═════════╪═════════╡\n", - "│ 2000-05-04 ┆ AUD ┆ EQXR_NSA ┆ -1.251605 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-05 ┆ AUD ┆ EQXR_NSA ┆ 1.787455 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-08 ┆ AUD ┆ EQXR_NSA ┆ -0.574713 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-09 ┆ AUD ┆ EQXR_NSA ┆ -0.931278 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-10 ┆ AUD ┆ EQXR_NSA ┆ -1.523501 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-11 ┆ AUD ┆ EQXR_NSA ┆ -1.579987 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-12 ┆ AUD ┆ EQXR_NSA ┆ 1.80602 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-15 ┆ AUD ┆ EQXR_NSA ┆ 0.295664 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-16 ┆ AUD ┆ EQXR_NSA ┆ 1.310187 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2000-05-17 ┆ AUD ┆ EQXR_NSA ┆ -0.711284 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "└────────────┴─────┴──────────┴───────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "starttime = time.time()\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", @@ -927,83 +306,29 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken: 0.4808037281036377\n" - ] - } - ], + "outputs": [], "source": [ - "starttime = time.time()\n", - "new_pd_df = pddf.update_df(df_add=eq_pd_df,)\n", - "print(\"Time taken: \", time.time() - starttime)" + "# starttime = time.time()\n", + "# new_pd_df = pddf.update_df(df_add=eq_pd_df,)\n", + "# print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-01"GBP""FXXR_VT10"1.378051.000
2024-11-04"GBP""FXXR_VT10"-0.1119311.000
2024-11-05"GBP""FXXR_VT10"0.252941.000
2024-11-06"GBP""FXXR_VT10"-0.1751611.000
2024-11-07"GBP""FXXR_VT10"0.8066821.000
2024-11-08"GBP""FXXR_VT10"-0.2473461.000
2024-11-12"GBP""FXXR_VT10"-1.0831371.000
2024-11-13"GBP""FXXR_VT10"-0.3289581.000
2024-11-14"GBP""FXXR_VT10"-0.1105261.000
2024-11-15"GBP""FXXR_VT10"-0.7009771.000
" - ], - "text/plain": [ - "shape: (10, 7)\n", - "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", - "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", - "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", - "│ 2024-11-01 ┆ GBP ┆ FXXR_VT10 ┆ 1.37805 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-04 ┆ GBP ┆ FXXR_VT10 ┆ -0.111931 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-05 ┆ GBP ┆ FXXR_VT10 ┆ 0.25294 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-06 ┆ GBP ┆ FXXR_VT10 ┆ -0.175161 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-07 ┆ GBP ┆ FXXR_VT10 ┆ 0.806682 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-08 ┆ GBP ┆ FXXR_VT10 ┆ -0.247346 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-12 ┆ GBP ┆ FXXR_VT10 ┆ -1.083137 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-13 ┆ GBP ┆ FXXR_VT10 ┆ -0.328958 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-14 ┆ GBP ┆ FXXR_VT10 ┆ -0.110526 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "│ 2024-11-15 ┆ GBP ┆ FXXR_VT10 ┆ -0.700977 ┆ 1.0 ┆ 0 ┆ 0 │\n", - "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "new_df.tail(10)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken: 0.0010075569152832031\n", - "Time taken: 0.0\n" - ] - } - ], + "outputs": [], "source": [ "# df: polars::prelude::DataFrame,\n", "# xcat: String,\n", @@ -1042,17 +367,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken: 0.05028486251831055\n" - ] - } - ], + "outputs": [], "source": [ "starttime = time.time()\n", "msyrs.qdf.pivot_dataframe_by_ticker(df=new_df).head(10)\n", @@ -1061,45 +378,29 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "new_pd_df = macrosynergy.management.types.QuantamentalDataFrame(new_pd_df)" + "# new_pd_df = macrosynergy.management.types.QuantamentalDataFrame(new_pd_df)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken: 2.0133273601531982\n" - ] - } - ], + "outputs": [], "source": [ - "starttime = time.time()\n", - "new_pd_df.to_wide()\n", - "print(\"Time taken: \", time.time() - starttime)" + "# starttime = time.time()\n", + "# new_pd_df.to_wide()\n", + "# print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken: 130.01306796073914 seconds\n" - ] - } - ], + "outputs": [], "source": [ "end_time = time.time()\n", "print(f\"Time taken: {end_time - nb_start_time} seconds\")" diff --git a/src/panel/historic_vol.rs b/src/panel/historic_vol.rs index a7f6077..d89dcef 100644 --- a/src/panel/historic_vol.rs +++ b/src/panel/historic_vol.rs @@ -1,6 +1,8 @@ -use chrono::NaiveDate; -use ndarray::{Array, Array1, Zip}; +use crate::utils::qdf::pivots::*; +use chrono::{Datelike, NaiveDate}; +use ndarray::{s, Array, Array1, Zip}; use polars::prelude::*; +// use polars::time::Duration; /// Returns the annualization factor for 252 trading days. /// (SQRT(252)) @@ -56,6 +58,7 @@ fn flat_std(x: &Array1, remove_zeros: bool) -> f64 { filtered_x.mapv(f64::abs).mean().unwrap_or(0.0) } +#[allow(unused_variables)] fn freq_daily_calc( dfw: &DataFrame, lback_periods: usize, @@ -71,14 +74,155 @@ fn freq_daily_calc( ); } - let mut new_df = DataFrame::new(vec![])?; - match lback_method { - "ma" => Ok(new_df), - "xma" => Ok(new_df), - _ => Err("Invalid lookback method.".into()), + let mut new_df = dfw.clone(); + + for col_name in dfw.get_column_names() { + let series = dfw.column(col_name)?; + let values: Array1 = series + .f64()? + .into_iter() + .map(|opt| opt.unwrap_or(0.0)) + .collect(); + + let result_series = match lback_method { + "ma" => { + let mut result = Vec::new(); + for i in 0..(values.len() - lback_periods + 1) { + let window = values.slice(s![i..i + lback_periods]); + let std = flat_std(&window.to_owned(), remove_zeros); + result.push(std); + } + Series::new(col_name.clone(), result) + } + "xma" => { + let half_life = half_life.unwrap(); + let weights = expo_weights(lback_periods, half_life); + let mut result = Vec::new(); + for i in 0..(values.len() - lback_periods + 1) { + let window = values.slice(s![i..i + lback_periods]); + let std = expo_std(&window.to_owned(), &weights, remove_zeros); + result.push(std); + } + Series::new(col_name.clone(), result) + } + _ => return Err("Invalid lookback method.".into()), + }; + + new_df.with_column(result_series)?; } + + Ok(new_df) +} +#[allow(unused_variables)] + +fn freq_period_calc( + dfw: &DataFrame, + lback_periods: usize, + lback_method: &str, + half_life: Option, + remove_zeros: bool, + nan_tolerance: f64, + period: &str, +) -> Result> { + if lback_method == "xma" { + assert!( + half_life.is_some(), + "If lback_method is 'xma', half_life must be provided." + ); + } + + let mut new_df = dfw.clone(); + + for col_name in dfw.get_column_names() { + let series = dfw.column(col_name)?; + let values: Array1 = series + .f64()? + .into_iter() + .map(|opt| opt.unwrap_or(0.0)) + .collect(); + + let result_series = match lback_method { + "ma" => { + let mut result = Vec::new(); + let period_indices = get_period_indices(dfw, period)?; + for &i in &period_indices { + if i >= lback_periods - 1 { + let window = values.slice(s![i + 1 - lback_periods..=i]); + let std = flat_std(&window.to_owned(), remove_zeros); + result.push(std); + } else { + result.push(f64::NAN); + } + } + Series::new(col_name.clone(), result) + } + "xma" => { + let half_life = half_life.unwrap(); + let weights = expo_weights(lback_periods, half_life); + let mut result = Vec::new(); + let period_indices = get_period_indices(dfw, period)?; + for &i in &period_indices { + if i >= lback_periods - 1 { + let window = values.slice(s![i + 1 - lback_periods..=i]); + let std = expo_std(&window.to_owned(), &weights, remove_zeros); + result.push(std); + } else { + result.push(f64::NAN); + } + } + Series::new(col_name.clone(), result) + } + _ => return Err("Invalid lookback method.".into()), + }; + + new_df.with_column(result_series)?; + } + + Ok(new_df) } +fn get_period_indices( + dfw: &DataFrame, + period: &str, +) -> Result, Box> { + let date_series: &Logical = dfw.column("real_date")?.date()?; + let mut indices = Vec::new(); + + match period { + "weekly" => { + for (i, date) in date_series.into_iter().enumerate() { + if let Some(date) = date { + if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) { + if naive_date.weekday() == chrono::Weekday::Fri { + indices.push(i); + } + } + } + } + } + "monthly" => { + let mut current_month = None; + for (i, date) in date_series.into_iter().enumerate() { + if let Some(date) = date { + if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) { + if current_month.is_none() || current_month.unwrap() != naive_date.month() { + current_month = Some(naive_date.month()); + if i > 0 { + indices.push(i - 1); + } + } + } + } + } + if let Some(_) = current_month { + indices.push(date_series.len() - 1); + } + } + _ => return Err("Invalid period specified.".into()), + } + + Ok(indices) +} // #[allow(dead_code)] // fn single_calc( // // end_date: -- naive datetime @@ -105,6 +249,8 @@ fn freq_daily_calc( /// - `remove_zeros`: Whether to remove zero values from the calculation. Defaults to False. /// - `postfix`: A string to append to XCAT of the result series. /// - `nan_tolerance`: The maximum proportion of NaN values allowed in the calculation. +/// +#[allow(unused_variables)] pub fn historic_vol( df: polars::prelude::DataFrame, xcat: String, @@ -121,6 +267,62 @@ pub fn historic_vol( ) -> Result> { println!("Calculating historic volatility with the following parameters:"); println!("xcat: {:?},\ncids: {:?},\nlback_periods: {:?},lback_method: {:?},\nhalf_life: {:?},\nstart: {:?},\nend: {:?},\nest_freq: {:?},\nremove_zeros: {:?},\npostfix: {:?},\nnan_tolerance: {:?}", xcat, cids, lback_periods,lback_method, half_life, start, end, est_freq, remove_zeros, postfix, nan_tolerance); + let mut dfw = pivot_dataframe_by_ticker(df.clone(), Some("value".to_string()))?; - Ok(df.to_owned()) + let lback_periods = lback_periods.unwrap_or(20); + let lback_method = lback_method.unwrap_or("ma".to_string()); + let half_life = half_life; + let start = start.unwrap_or(dfw.column("real_date")?.date()?.min().unwrap().to_string()); + let end = end.unwrap_or(dfw.column("real_date")?.date()?.max().unwrap().to_string()); + let est_freq = est_freq.unwrap_or("D".to_string()); + let remove_zeros = remove_zeros.unwrap_or(false); + let postfix = postfix.unwrap_or("_HISTVOL".to_string()); + let nan_tolerance = nan_tolerance.unwrap_or(0.25); + + let start_date = NaiveDate::parse_from_str(&start, "%Y-%m-%d")?; + let end_date = NaiveDate::parse_from_str(&end, "%Y-%m-%d")?; + + dfw = dfw + .lazy() + .filter( + col("real_date") + .lt_eq(lit(end_date)) + .alias("real_date") + .into(), + ) + .filter( + col("real_date") + .gt_eq(lit(start_date)) + .alias("real_date") + .into(), + ) + .collect()?; + + let period = match est_freq.as_str() { + "W" => "weekly", + "M" => "monthly", + _ => return Err("Invalid frequency specified.".into()), + }; + + let dfw = match est_freq.as_str() { + "D" => freq_daily_calc( + &dfw, + lback_periods, + &lback_method, + half_life, + remove_zeros, + nan_tolerance, + )?, + _ => freq_period_calc( + &dfw, + lback_periods, + &lback_method, + half_life, + remove_zeros, + nan_tolerance, + &period, + )?, + }; + + Ok(dfw) }