msyrs/notebooks/funcwise/linear_composites.ipynb
2025-04-09 00:22:01 +01:00

490 lines
15 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import Python packages\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[2mUsing Python 3.12.4 environment at: E:\\Work\\ruzt\\msyrs\\.venv\u001b[0m\n",
"\u001b[2mResolved \u001b[1m34 packages\u001b[0m \u001b[2min 121ms\u001b[0m\u001b[0m\n",
" \u001b[36m\u001b[1mBuilding\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n",
" \u001b[32m\u001b[1mBuilt\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n",
"\u001b[2mPrepared \u001b[1m1 package\u001b[0m \u001b[2min 14.72s\u001b[0m\u001b[0m\n",
"\u001b[2mUninstalled \u001b[1m1 package\u001b[0m \u001b[2min 4ms\u001b[0m\u001b[0m\n",
"\u001b[1m\u001b[33mwarning\u001b[39m\u001b[0m\u001b[1m:\u001b[0m \u001b[1mFailed to hardlink files; falling back to full copy. This may lead to degraded performance.\n",
" If the cache and target directories are on different filesystems, hardlinking may not be supported.\n",
" If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\u001b[0m\n",
"\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 30ms\u001b[0m\u001b[0m\n",
" \u001b[33m~\u001b[39m \u001b[1mmsyrs\u001b[0m\u001b[2m==0.0.1 (from file:///E:/Work/ruzt/msyrs)\u001b[0m\n"
]
}
],
"source": [
"! uv pip install E:\\Work\\ruzt\\msyrs --upgrade"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import macrosynergy\n",
"import pandas as pd\n",
"import numpy as np\n",
"import polars as pl\n",
"import os\n",
"\n",
"from macrosynergy.panel import view_timelines\n",
"from macrosynergy.management.types import QuantamentalDataFrame\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import Python bindings - `msyrs`\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import msyrs"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n",
"# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n",
"DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n",
"DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"\n",
"nb_start_time = time.time()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken to load qdf: 0.004000425338745117\n"
]
},
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (5, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>2010-03-03</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>3</td><td>33</td></tr><tr><td>2010-03-04</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>4</td><td>34</td></tr><tr><td>2010-03-05</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>5</td><td>35</td></tr><tr><td>2010-03-08</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>8</td><td>38</td></tr><tr><td>2010-03-09</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>9</td><td>39</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (5, 7)\n",
"┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n",
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
"╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n",
"│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n",
"│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n",
"│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n",
"│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n",
"│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n",
"└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
"\n",
"starttime = time.time()\n",
"ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
"print(f\"Time taken to load qdf: {time.time() - starttime}\")\n",
"ldf.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n",
"cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n",
"cids = cids_dm + cids_em\n",
"cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n",
"ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n",
" \".\"\n",
")\n",
"\n",
"\n",
"mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n",
" \".\"\n",
")\n",
"xcats = ecos + mkts\n",
"\n",
"tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken to load qdf batch: 1.3058679103851318\n"
]
}
],
"source": [
"starttime = time.time()\n",
"\n",
"big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n",
" folder_path=DATA_FOLDER_PATH,\n",
" xcats=xcats,\n",
")\n",
"print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"286.69339656829834"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"big_df.estimated_size(\"mb\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n",
"start = \"1990-01-01\""
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken to reduce qdf: 0.9705278873443604\n"
]
}
],
"source": [
"fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
"eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n",
"starttime = time.time()\n",
"\n",
"eq_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df,\n",
" cids=sel_cids,\n",
" xcats=fx_xcats + eq_xcats,\n",
" start=start,\n",
")\n",
"\n",
"fx_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n",
")\n",
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
"\n",
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken: 2.3365907669067383 seconds\n"
]
}
],
"source": [
"end_time = time.time()\n",
"print(f\"Time taken: {end_time - nb_start_time} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n",
" warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n"
]
}
],
"source": [
"_cids = [\"USD\", \"CAD\"]\n",
"mx = macrosynergy.panel.linear_composite(\n",
" df=new_df.to_pandas(),\n",
" xcats=[\"EQXR_NSA\", \"FXXR_NSA\"], \n",
" cids=_cids,\n",
" weights=None,\n",
" signs=None,\n",
" normalize_weights=False,\n",
" start=None,\n",
" end=None,\n",
" blacklist=None,\n",
" complete_xcats=False,\n",
" complete_cids=False,\n",
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
"# view_timelines(QuantamentalDataFrame(mx), cids=_cids)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"_cids = [\"USD\", \"CAD\"]\n",
"x = msyrs.panel.linear_composite(\n",
" df=new_df,\n",
" xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n",
" cids=_cids,\n",
" weights=None,\n",
" signs=None,\n",
" weight_xcats=None,\n",
" normalize_weights=False,\n",
" start=None,\n",
" end=None,\n",
" blacklist=None,\n",
" complete_xcats=False,\n",
" complete_cids=False,\n",
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
"# view_timelines(QuantamentalDataFrame(x.to_pandas()), cids=_cids)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
"rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
"np.allclose((mwide - rwide).sum(axis=1), 0)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n",
" warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"_cids = [\"USD\", \"CAD\", \"EUR\", \"AUD\"]\n",
"_xcats = [\"EQXR_NSA\", \"FXXR_NSA\"]\n",
"\n",
"mx = macrosynergy.panel.linear_composite(\n",
" df=new_df.to_pandas(),\n",
" xcats=_xcats,\n",
" cids=_cids,\n",
" weights=[1, 9],\n",
" normalize_weights=False,\n",
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
"x = msyrs.panel.linear_composite(\n",
" df=new_df,\n",
" xcats=_xcats,\n",
" cids=_cids,\n",
" weights=[1, 9],\n",
" normalize_weights=False,\n",
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
"mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
"rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
"np.allclose((mwide - rwide).sum(axis=1), 0)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>real_date</th>\n",
" <th>value</th>\n",
" <th>cid</th>\n",
" <th>xcat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [real_date, value, cid, xcat]\n",
"Index: []"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"x = msyrs.panel.linear_composite(\n",
" df=new_df,\n",
" xcats=_xcats,\n",
" cids=_cids,\n",
" weights=[1, 9],\n",
" normalize_weights=True,\n",
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
"x.to_pandas().dropna(how=\"any\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}