{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Build and install the package\n", "\n", "First patch `pyo3-polars`:\n", "\n", "- Use [this diff](https://github.com/pola-rs/pyo3-polars/compare/main...Magnus167:pyo3-polars:main) to make changes to the `pyo3-polars` package.\n", "\n", "Install the package:\n", "\n", "```bash\n", "python -m venv .venv\n", "\n", "# source .venv/bin/activate\n", "./.venv/Scripts/activate\n", "\n", "pip install maturin ipywidgets\n", "\n", "maturin develop --release\n", "```\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import Python packages\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import macrosynergy\n", "import pandas as pd\n", "import numpy as np\n", "import polars as pl\n", "import os" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import Python bindings - `msyrs`\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import msyrs" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n", "# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n", "DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n", "DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import time\n", "\n", "nb_start_time = time.time()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to load qdf: 0.0040130615234375\n" ] }, { "data": { "text/html": [ "
\n", "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2010-03-03"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0333
2010-03-04"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0434
2010-03-05"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0535
2010-03-08"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0838
2010-03-09"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0939
" ], "text/plain": [ "shape: (5, 7)\n", "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n", "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n", "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n", "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n", "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n", "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n", "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n", "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", "\n", "starttime = time.time()\n", "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n", "print(f\"Time taken to load qdf: {time.time() - starttime}\")\n", "ldf.head(5)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n", "cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n", "cids = cids_dm + cids_em\n", "cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n", "ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n", " \".\"\n", ")\n", "\n", "\n", "mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n", " \".\"\n", ")\n", "xcats = ecos + mkts\n", "\n", "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64f64f64
1990-04-26"AUD""CPIC_SA_P1M1ML12"6.4345992.026.0223.0
1990-04-27"AUD""CPIC_SA_P1M1ML12"6.4345992.027.0224.0
1990-04-30"AUD""CPIC_SA_P1M1ML12"6.4345992.030.0227.0
1990-05-01"AUD""CPIC_SA_P1M1ML12"6.4345992.031.0228.0
1990-05-02"AUD""CPIC_SA_P1M1ML12"6.4345992.032.0229.0
" ], "text/plain": [ "shape: (5, 7)\n", "┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n", "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n", "│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26.0 ┆ 223.0 │\n", "│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27.0 ┆ 224.0 │\n", "│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30.0 ┆ 227.0 │\n", "│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31.0 ┆ 228.0 │\n", "│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32.0 ┆ 229.0 │\n", "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(\n", " client_id=DQ_CLIENT_ID,\n", " client_secret=DQ_CLIENT_SECRET,\n", " tickers=tickers,\n", ")\n", "downloaded_df.head(5)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "downloaded_df = None" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading the JPMAQS catalogue from DataQuery...\n", "Downloaded JPMAQS catalogue with 18711 tickers.\n", "Removed 21/600 expressions that are not in the JPMaQS catalogue.\n", "Downloading data from JPMaQS.\n", "Timestamp UTC: 2024-11-22 20:35:07\n", "Connection successful!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Requesting data: 100%|██████████| 29/29 [00:05<00:00, 4.93it/s]\n", "Downloading data: 100%|██████████| 29/29 [00:16<00:00, 1.80it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Some dates are missing from the downloaded data. \n", "2 out of 9107 dates are missing.\n" ] } ], "source": [ "pddf = macrosynergy.download.JPMaQSDownload().download(\n", " tickers=tickers,\n", " get_catalogue=True,\n", " show_progress=True,\n", " start_date=\"1990-01-01\",\n", ")\n", "pddf = macrosynergy.management.types.QuantamentalDataFrame(pddf)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to load qdf batch: 1.1382660865783691\n" ] }, { "data": { "text/html": [ "
\n", "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
1990-04-26"AUD""CPIC_SA_P1M1ML12"6.4345992.026223
1990-04-27"AUD""CPIC_SA_P1M1ML12"6.4345992.027224
1990-04-30"AUD""CPIC_SA_P1M1ML12"6.4345992.030227
1990-05-01"AUD""CPIC_SA_P1M1ML12"6.4345992.031228
1990-05-02"AUD""CPIC_SA_P1M1ML12"6.4345992.032229
" ], "text/plain": [ "shape: (5, 7)\n", "┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n", "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", "╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n", "│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26 ┆ 223 │\n", "│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27 ┆ 224 │\n", "│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30 ┆ 227 │\n", "│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31 ┆ 228 │\n", "│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32 ┆ 229 │\n", "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "starttime = time.time()\n", "\n", "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n", "\n", " folder_path=DATA_FOLDER_PATH,\n", " xcats=xcats,\n", "\n", " # folder_path=DATA_FOLDER_PATH, cids=cids\n", "\n", ")\n", "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n", "\n", "\n", "big_df.head(5)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "275.7299613952637" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "big_df.estimated_size(\"mb\")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
real_datecidxcatvaluegradingeop_lagmop_lag
01990-04-26AUDCPIC_SA_P1M1ML126.4345992.026223
11990-04-27AUDCPIC_SA_P1M1ML126.4345992.027224
21990-04-30AUDCPIC_SA_P1M1ML126.4345992.030227
31990-05-01AUDCPIC_SA_P1M1ML126.4345992.031228
41990-05-02AUDCPIC_SA_P1M1ML126.4345992.032229
........................
52855772024-11-08ZARRYLDIRS05Y_NSA3.0975341.000
52855782024-11-12ZARRYLDIRS05Y_NSA3.1898841.000
52855792024-11-13ZARRYLDIRS05Y_NSA3.1080951.000
52855802024-11-14ZARRYLDIRS05Y_NSA3.0609681.000
52855812024-11-15ZARRYLDIRS05Y_NSA3.1020661.000
\n", "

5285582 rows × 7 columns

\n", "
" ], "text/plain": [ " real_date cid xcat value grading eop_lag mop_lag\n", "0 1990-04-26 AUD CPIC_SA_P1M1ML12 6.434599 2.0 26 223\n", "1 1990-04-27 AUD CPIC_SA_P1M1ML12 6.434599 2.0 27 224\n", "2 1990-04-30 AUD CPIC_SA_P1M1ML12 6.434599 2.0 30 227\n", "3 1990-05-01 AUD CPIC_SA_P1M1ML12 6.434599 2.0 31 228\n", "4 1990-05-02 AUD CPIC_SA_P1M1ML12 6.434599 2.0 32 229\n", "... ... ... ... ... ... ... ...\n", "5285577 2024-11-08 ZAR RYLDIRS05Y_NSA 3.097534 1.0 0 0\n", "5285578 2024-11-12 ZAR RYLDIRS05Y_NSA 3.189884 1.0 0 0\n", "5285579 2024-11-13 ZAR RYLDIRS05Y_NSA 3.108095 1.0 0 0\n", "5285580 2024-11-14 ZAR RYLDIRS05Y_NSA 3.060968 1.0 0 0\n", "5285581 2024-11-15 ZAR RYLDIRS05Y_NSA 3.102066 1.0 0 0\n", "\n", "[5285582 rows x 7 columns]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "big_df.to_pandas()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "789.8839149475098" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
real_datecidxcatvaluegradingeop_lagmop_lag
01990-04-26AUDCPIC_SA_P1M1ML126.4345992.026223
11990-04-27AUDCPIC_SA_P1M1ML126.4345992.027224
21990-04-30AUDCPIC_SA_P1M1ML126.4345992.030227
31990-05-01AUDCPIC_SA_P1M1ML126.4345992.031228
41990-05-02AUDCPIC_SA_P1M1ML126.4345992.032229
........................
52855772024-11-08ZARRYLDIRS05Y_NSA3.0975341.000
52855782024-11-12ZARRYLDIRS05Y_NSA3.1898841.000
52855792024-11-13ZARRYLDIRS05Y_NSA3.1080951.000
52855802024-11-14ZARRYLDIRS05Y_NSA3.0609681.000
52855812024-11-15ZARRYLDIRS05Y_NSA3.1020661.000
\n", "

5285582 rows × 7 columns

\n", "
" ], "text/plain": [ " real_date cid xcat value grading eop_lag mop_lag\n", "0 1990-04-26 AUD CPIC_SA_P1M1ML12 6.434599 2.0 26 223\n", "1 1990-04-27 AUD CPIC_SA_P1M1ML12 6.434599 2.0 27 224\n", "2 1990-04-30 AUD CPIC_SA_P1M1ML12 6.434599 2.0 30 227\n", "3 1990-05-01 AUD CPIC_SA_P1M1ML12 6.434599 2.0 31 228\n", "4 1990-05-02 AUD CPIC_SA_P1M1ML12 6.434599 2.0 32 229\n", "... ... ... ... ... ... ... ...\n", "5285577 2024-11-08 ZAR RYLDIRS05Y_NSA 3.097534 1.0 0 0\n", "5285578 2024-11-12 ZAR RYLDIRS05Y_NSA 3.189884 1.0 0 0\n", "5285579 2024-11-13 ZAR RYLDIRS05Y_NSA 3.108095 1.0 0 0\n", "5285580 2024-11-14 ZAR RYLDIRS05Y_NSA 3.060968 1.0 0 0\n", "5285581 2024-11-15 ZAR RYLDIRS05Y_NSA 3.102066 1.0 0 0\n", "\n", "[5285582 rows x 7 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas())" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "211.71554374694824" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas()).memory_usage(\n", " deep=True\n", ").sum() / 1024**2" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n", "start = \"1990-01-01\"" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to reduce qdf: 0.2668800354003906\n" ] }, { "data": { "text/html": [ "
\n", "shape: (73_069, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2000-05-04"AUD""EQXR_NSA"-1.2516051.000
2000-05-05"AUD""EQXR_NSA"1.7874551.000
2000-05-08"AUD""EQXR_NSA"-0.5747131.000
2000-05-09"AUD""EQXR_NSA"-0.9312781.000
2000-05-10"AUD""EQXR_NSA"-1.5235011.000
2024-11-08"USD""EQXR_VT10"0.3138751.000
2024-11-12"USD""EQXR_VT10"-0.2724571.000
2024-11-13"USD""EQXR_VT10"0.0437291.000
2024-11-14"USD""EQXR_VT10"-0.5499831.000
2024-11-15"USD""EQXR_VT10"-1.1985441.000
" ], "text/plain": [ "shape: (73_069, 7)\n", "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", "│ 2000-05-04 ┆ AUD ┆ EQXR_NSA ┆ -1.251605 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-05 ┆ AUD ┆ EQXR_NSA ┆ 1.787455 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-08 ┆ AUD ┆ EQXR_NSA ┆ -0.574713 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-09 ┆ AUD ┆ EQXR_NSA ┆ -0.931278 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-10 ┆ AUD ┆ EQXR_NSA ┆ -1.523501 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-11-08 ┆ USD ┆ EQXR_VT10 ┆ 0.313875 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-12 ┆ USD ┆ EQXR_VT10 ┆ -0.272457 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-13 ┆ USD ┆ EQXR_VT10 ┆ 0.043729 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-14 ┆ USD ┆ EQXR_VT10 ┆ -0.549983 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-15 ┆ USD ┆ EQXR_VT10 ┆ -1.198544 ┆ 1.0 ┆ 0 ┆ 0 │\n", "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "starttime = time.time()\n", "eq_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df,\n", " cids=sel_cids,\n", " xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n", "\n", " start=start,\n", ")\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n", "eq_df" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to reduce qdf: 0.057086944580078125\n" ] } ], "source": [ "starttime = time.time()\n", "eq_pd_df = pddf.reduce_df(cids=sel_cids, xcats=[\"EQXR_NSA\", \"EQXR_VT10\"], start=start)\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to reduce qdf: 0.298140287399292\n" ] } ], "source": [ "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", "starttime = time.time()\n", "\n", "fx_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n", ")\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to reduce qdf: 0.08732080459594727\n" ] } ], "source": [ "starttime = time.time()\n", "fx_pd_df = pddf.reduce_df(cids=sel_cids, xcats=fx_xcats, start=start, intersect=True)\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 0.01651310920715332\n" ] }, { "data": { "text/html": [ "
\n", "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2000-05-04"AUD""EQXR_NSA"-1.2516051.000
2000-05-05"AUD""EQXR_NSA"1.7874551.000
2000-05-08"AUD""EQXR_NSA"-0.5747131.000
2000-05-09"AUD""EQXR_NSA"-0.9312781.000
2000-05-10"AUD""EQXR_NSA"-1.5235011.000
2000-05-11"AUD""EQXR_NSA"-1.5799871.000
2000-05-12"AUD""EQXR_NSA"1.806021.000
2000-05-15"AUD""EQXR_NSA"0.2956641.000
2000-05-16"AUD""EQXR_NSA"1.3101871.000
2000-05-17"AUD""EQXR_NSA"-0.7112841.000
" ], "text/plain": [ "shape: (10, 7)\n", "┌────────────┬─────┬──────────┬───────────┬─────────┬─────────┬─────────┐\n", "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", "╞════════════╪═════╪══════════╪═══════════╪═════════╪═════════╪═════════╡\n", "│ 2000-05-04 ┆ AUD ┆ EQXR_NSA ┆ -1.251605 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-05 ┆ AUD ┆ EQXR_NSA ┆ 1.787455 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-08 ┆ AUD ┆ EQXR_NSA ┆ -0.574713 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-09 ┆ AUD ┆ EQXR_NSA ┆ -0.931278 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-10 ┆ AUD ┆ EQXR_NSA ┆ -1.523501 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-11 ┆ AUD ┆ EQXR_NSA ┆ -1.579987 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-12 ┆ AUD ┆ EQXR_NSA ┆ 1.80602 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-15 ┆ AUD ┆ EQXR_NSA ┆ 0.295664 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-16 ┆ AUD ┆ EQXR_NSA ┆ 1.310187 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2000-05-17 ┆ AUD ┆ EQXR_NSA ┆ -0.711284 ┆ 1.0 ┆ 0 ┆ 0 │\n", "└────────────┴─────┴──────────┴───────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "starttime = time.time()\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", "print(\"Time taken: \", time.time() - starttime)\n", "new_df.head(10)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 0.4808037281036377\n" ] } ], "source": [ "starttime = time.time()\n", "new_pd_df = pddf.update_df(df_add=eq_pd_df,)\n", "print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-01"GBP""FXXR_VT10"1.378051.000
2024-11-04"GBP""FXXR_VT10"-0.1119311.000
2024-11-05"GBP""FXXR_VT10"0.252941.000
2024-11-06"GBP""FXXR_VT10"-0.1751611.000
2024-11-07"GBP""FXXR_VT10"0.8066821.000
2024-11-08"GBP""FXXR_VT10"-0.2473461.000
2024-11-12"GBP""FXXR_VT10"-1.0831371.000
2024-11-13"GBP""FXXR_VT10"-0.3289581.000
2024-11-14"GBP""FXXR_VT10"-0.1105261.000
2024-11-15"GBP""FXXR_VT10"-0.7009771.000
" ], "text/plain": [ "shape: (10, 7)\n", "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", "│ 2024-11-01 ┆ GBP ┆ FXXR_VT10 ┆ 1.37805 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-04 ┆ GBP ┆ FXXR_VT10 ┆ -0.111931 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-05 ┆ GBP ┆ FXXR_VT10 ┆ 0.25294 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-06 ┆ GBP ┆ FXXR_VT10 ┆ -0.175161 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-07 ┆ GBP ┆ FXXR_VT10 ┆ 0.806682 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-08 ┆ GBP ┆ FXXR_VT10 ┆ -0.247346 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-12 ┆ GBP ┆ FXXR_VT10 ┆ -1.083137 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-13 ┆ GBP ┆ FXXR_VT10 ┆ -0.328958 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-14 ┆ GBP ┆ FXXR_VT10 ┆ -0.110526 ┆ 1.0 ┆ 0 ┆ 0 │\n", "│ 2024-11-15 ┆ GBP ┆ FXXR_VT10 ┆ -0.700977 ┆ 1.0 ┆ 0 ┆ 0 │\n", "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.tail(10)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 0.0010075569152832031\n", "Time taken: 0.0\n" ] } ], "source": [ "# df: polars::prelude::DataFrame,\n", "# xcat: String,\n", "# cids: Option>,\n", "# lback_periods: Option,\n", "# lback_method: Option,\n", "# half_life: Option,\n", "# start: Option,\n", "# end: Option,\n", "# est_freq: Option,\n", "# remove_zeros: Option,\n", "# postfix: Option,\n", "# nan_tolerance: Option,\n", "\n", "starttime = time.time()\n", "hv = msyrs.panel.historic_vol(\n", " df=new_df,\n", " xcat=\"EQXR_NSA\",\n", " cids=None,\n", " lback_periods=252,\n", " lback_method=\"calendar\",\n", " half_life=None,\n", " start=None,\n", " end=None,\n", " est_freq=None,\n", " remove_zeros=None,\n", " postfix=\"_HV\",\n", " nan_tolerance=None,\n", ")\n", "print(f\"Time taken: {time.time() - starttime}\")\n", "\n", "starttime = time.time()\n", "a = 1 + 5\n", "print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 0.05028486251831055\n" ] } ], "source": [ "starttime = time.time()\n", "msyrs.qdf.pivot_dataframe_by_ticker(df=new_df).head(10)\n", "print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "new_pd_df = macrosynergy.management.types.QuantamentalDataFrame(new_pd_df)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 2.0133273601531982\n" ] } ], "source": [ "starttime = time.time()\n", "new_pd_df.to_wide()\n", "print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 130.01306796073914 seconds\n" ] } ], "source": [ "end_time = time.time()\n", "print(f\"Time taken: {end_time - nb_start_time} seconds\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 4 }