{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# ! uv pip install /home/palash/Code/msyrs --upgrade" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import Python packages\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import macrosynergy\n", "import pandas as pd\n", "import numpy as np\n", "import polars as pl\n", "import os\n", "\n", "from macrosynergy.panel import view_timelines\n", "from macrosynergy.management.types import QuantamentalDataFrame\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import Python bindings - `msyrs`\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import msyrs" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n", "DATA_FOLDER_PATH = os.path.abspath(os.path.expanduser(\"~/Code/go-dataquery/jpmaqs-data\"))\n", "# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n", "DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n", "DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import time\n", "\n", "nb_start_time = time.time()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to load qdf: 0.006810665130615234\n" ] }, { "data": { "text/html": [ "
\n", "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2010-03-03"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0333
2010-03-04"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0434
2010-03-05"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0535
2010-03-08"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0838
2010-03-09"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0939
" ], "text/plain": [ "shape: (5, 7)\n", "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n", "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n", "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n", "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n", "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n", "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n", "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n", "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", "\n", "starttime = time.time()\n", "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n", "print(f\"Time taken to load qdf: {time.time() - starttime}\")\n", "ldf.head(5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n", "cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n", "cids = cids_dm + cids_em\n", "cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n", "ecos = (\n", " \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.\"\n", " \"CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.\"\n", " \"INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.\"\n", " \"RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\".\")\n", ")\n", "\n", "mkts = (\n", " \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.\"\n", " \"FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\".\")\n", ")\n", "xcats = ecos + mkts\n", "\n", "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to load qdf batch: 1.4180326461791992\n" ] } ], "source": [ "starttime = time.time()\n", "\n", "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n", " folder_path=DATA_FOLDER_PATH,\n", " xcats=xcats,\n", ")\n", "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "286.95422172546387" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "big_df.estimated_size(\"mb\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reduced DataFrame from 5484608 to 2091732 rows\n", "Time taken to reduce qdf: 1.9222838878631592\n", "99.42372608184814\n" ] } ], "source": [ "starttime = time.time()\n", "\n", "test_df = msyrs.qdf.reduce_dataframe(df=big_df, xcats=mkts)\n", "\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n", "print(test_df.estimated_size(\"mb\"))\n", "test_df = None" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\", \"CHF\", \"JPY\", \"INR\"]\n", "start = \"2010-01-01\"\n", "end = \"2011-01-01\"" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reduced DataFrame from 5484608 to 4000 rows\n", "Time taken to reduce qdf: 0.819500207901001\n" ] } ], "source": [ "eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n", "starttime = time.time()\n", "\n", "eq_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df, cids=sel_cids, xcats=eq_xcats, start=start, end=end\n", ")\n", "\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reduced DataFrame from 5484608 to 8750 rows\n", "Time taken to reduce qdf: 0.9090185165405273\n", "xcat_replace not implemented yet (passed value: false)\n" ] } ], "source": [ "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", "\n", "starttime = time.time()\n", "\n", "fx_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df, cids=sel_cids, xcats=fx_xcats, start=start, end=end\n", ")\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", "\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to update qdf: 0.00896310806274414\n", "xcat_replace not implemented yet (passed value: false)\n" ] } ], "source": [ "starttime = time.time()\n", "\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", "\n", "print(f\"Time taken to update qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wdf = QuantamentalDataFrame(new_df.to_pandas().fillna(0)).to_wide()\n", "\n", "for i in range(4):\n", " col = wdf.columns[np.random.randint(0, len(wdf.columns))]\n", " dates = sorted(np.random.choice(wdf.index, 2, replace=False))\n", " dtr = pd.bdate_range(dates[0], dates[1]) \n", " wdf.loc[dtr[0]:dtr[-1], col] = np.nan\n", "\n", "wdf = QuantamentalDataFrame.from_wide(wdf, categorical=False)\n", "# cast column 'real_date' to pl.Date\n", "new_df = pl.DataFrame(wdf).with_columns(pl.col(\"real_date\").cast(pl.Date, strict=True))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "msyrs.utils.create_blacklist_from_qdf(new_df)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# view_timelines(df=new_df.to_pandas())" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Date" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='D').dtype" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (18_088,)
bdates
date
1971-01-01
1971-01-04
1971-01-05
1971-01-06
1971-01-07
2040-04-25
2040-04-26
2040-04-27
2040-04-30
2040-05-01
" ], "text/plain": [ "shape: (18_088,)\n", "Series: 'bdates' [date]\n", "[\n", "\t1971-01-01\n", "\t1971-01-04\n", "\t1971-01-05\n", "\t1971-01-06\n", "\t1971-01-07\n", "\t…\n", "\t2040-04-25\n", "\t2040-04-26\n", "\t2040-04-27\n", "\t2040-04-30\n", "\t2040-05-01\n", "]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='D')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken: 5.6950860023498535 seconds\n" ] } ], "source": [ "end_time = time.time()\n", "print(f\"Time taken: {end_time - nb_start_time} seconds\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 4 }