updating linear composites notebook

This commit is contained in:
Palash Tyagi 2025-04-10 00:33:28 +01:00
parent f63eedb50a
commit f84879119b

View File

@ -9,34 +9,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[2mUsing Python 3.12.4 environment at: E:\\Work\\ruzt\\msyrs\\.venv\u001b[0m\n",
"\u001b[2mResolved \u001b[1m34 packages\u001b[0m \u001b[2min 121ms\u001b[0m\u001b[0m\n",
" \u001b[36m\u001b[1mBuilding\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n",
" \u001b[32m\u001b[1mBuilt\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n",
"\u001b[2mPrepared \u001b[1m1 package\u001b[0m \u001b[2min 14.72s\u001b[0m\u001b[0m\n",
"\u001b[2mUninstalled \u001b[1m1 package\u001b[0m \u001b[2min 4ms\u001b[0m\u001b[0m\n",
"\u001b[1m\u001b[33mwarning\u001b[39m\u001b[0m\u001b[1m:\u001b[0m \u001b[1mFailed to hardlink files; falling back to full copy. This may lead to degraded performance.\n",
" If the cache and target directories are on different filesystems, hardlinking may not be supported.\n",
" If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\u001b[0m\n",
"\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 30ms\u001b[0m\u001b[0m\n",
" \u001b[33m~\u001b[39m \u001b[1mmsyrs\u001b[0m\u001b[2m==0.0.1 (from file:///E:/Work/ruzt/msyrs)\u001b[0m\n"
]
}
],
"source": [ "source": [
"! uv pip install E:\\Work\\ruzt\\msyrs --upgrade" "# ! uv pip install E:\\Work\\ruzt\\msyrs --upgrade"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -44,10 +26,11 @@
"import pandas as pd\n", "import pandas as pd\n",
"import numpy as np\n", "import numpy as np\n",
"import polars as pl\n", "import polars as pl\n",
"import time\n",
"import os\n", "import os\n",
"\n", "\n",
"from macrosynergy.panel import view_timelines\n", "from macrosynergy.panel import view_timelines\n",
"from macrosynergy.management.types import QuantamentalDataFrame\n" "from macrosynergy.management.types import QuantamentalDataFrame"
] ]
}, },
{ {
@ -59,7 +42,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -68,83 +51,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n", "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\""
"# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n",
"DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n",
"DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"\n",
"nb_start_time = time.time()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken to load qdf: 0.004000425338745117\n"
]
},
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (5, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>2010-03-03</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>3</td><td>33</td></tr><tr><td>2010-03-04</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>4</td><td>34</td></tr><tr><td>2010-03-05</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>5</td><td>35</td></tr><tr><td>2010-03-08</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>8</td><td>38</td></tr><tr><td>2010-03-09</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>9</td><td>39</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (5, 7)\n",
"┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n",
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
"╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n",
"│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n",
"│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n",
"│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n",
"│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n",
"│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n",
"└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
"\n",
"starttime = time.time()\n",
"ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
"print(f\"Time taken to load qdf: {time.time() - starttime}\")\n",
"ldf.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -162,22 +78,18 @@
")\n", ")\n",
"xcats = ecos + mkts\n", "xcats = ecos + mkts\n",
"\n", "\n",
"cpi_xcats = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR\".split(\n",
" \".\"\n",
")\n",
"\n",
"tickers = [f\"{c}_{x}\" for c in cids for x in xcats]" "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken to load qdf batch: 1.3058679103851318\n"
]
}
],
"source": [ "source": [
"starttime = time.time()\n", "starttime = time.time()\n",
"\n", "\n",
@ -185,52 +97,41 @@
" folder_path=DATA_FOLDER_PATH,\n", " folder_path=DATA_FOLDER_PATH,\n",
" xcats=xcats,\n", " xcats=xcats,\n",
")\n", ")\n",
"print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n" "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"286.69339656829834"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"big_df.estimated_size(\"mb\")" "big_df.estimated_size(\"mb\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n", "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n",
"start = \"1990-01-01\"" "start = \"1990-01-01\"\n",
"nb_start_time = time.time()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Running with uniform weights, 2 xcats, 5 cids\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken to reduce qdf: 0.9705278873443604\n"
]
}
],
"source": [ "source": [
"fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
"eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n", "eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n",
@ -239,7 +140,7 @@
"eq_df = msyrs.qdf.reduce_dataframe(\n", "eq_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df,\n", " df=big_df,\n",
" cids=sel_cids,\n", " cids=sel_cids,\n",
" xcats=fx_xcats + eq_xcats,\n", " xcats=fx_xcats + eq_xcats + cpi_xcats,\n",
" start=start,\n", " start=start,\n",
")\n", ")\n",
"\n", "\n",
@ -248,133 +149,107 @@
")\n", ")\n",
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
"\n", "\n",
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n" "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time taken: 2.3365907669067383 seconds\n"
]
}
],
"source": [
"end_time = time.time()\n",
"print(f\"Time taken: {end_time - nb_start_time} seconds\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n",
" warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n"
]
}
],
"source": [ "source": [
"_cids = [\"USD\", \"CAD\"]\n", "_cids = [\"USD\", \"CAD\"]\n",
"\n",
"_df = new_df.to_pandas()\n",
"starttime = time.time()\n",
"\n",
"\n",
"mx = macrosynergy.panel.linear_composite(\n", "mx = macrosynergy.panel.linear_composite(\n",
" df=new_df.to_pandas(),\n", "\n",
" xcats=[\"EQXR_NSA\", \"FXXR_NSA\"], \n", " df=_df,\n",
"\n",
" xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n",
" cids=_cids,\n", " cids=_cids,\n",
"\n",
" weights=None,\n", " weights=None,\n",
"\n",
" signs=None,\n", " signs=None,\n",
"\n",
" normalize_weights=False,\n", " normalize_weights=False,\n",
" start=None,\n", " start=None,\n",
" end=None,\n", " end=None,\n",
"\n",
" blacklist=None,\n", " blacklist=None,\n",
"\n",
" complete_xcats=False,\n", " complete_xcats=False,\n",
"\n",
" complete_cids=False,\n", " complete_cids=False,\n",
"\n",
" new_xcat=\"COMPOSITE\",\n", " new_xcat=\"COMPOSITE\",\n",
"\n",
" new_cid=\"GLB\",\n", " new_cid=\"GLB\",\n",
"\n",
")\n", ")\n",
"print(f\"Time taken to run linear composite: {time.time() - starttime}\")\n",
"\n",
"\n",
"# view_timelines(QuantamentalDataFrame(mx), cids=_cids)" "# view_timelines(QuantamentalDataFrame(mx), cids=_cids)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"_cids = [\"USD\", \"CAD\"]\n", "_cids = [\"USD\", \"CAD\"]\n",
"starttime = time.time()\n",
"\n",
"x = msyrs.panel.linear_composite(\n", "x = msyrs.panel.linear_composite(\n",
"\n",
" df=new_df,\n", " df=new_df,\n",
"\n",
" xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n", " xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n",
" cids=_cids,\n", " cids=_cids,\n",
"\n",
" weights=None,\n", " weights=None,\n",
"\n",
" signs=None,\n", " signs=None,\n",
"\n",
" weight_xcats=None,\n", " weight_xcats=None,\n",
"\n",
" normalize_weights=False,\n", " normalize_weights=False,\n",
" start=None,\n", " start=None,\n",
" end=None,\n", " end=None,\n",
"\n",
" blacklist=None,\n", " blacklist=None,\n",
"\n",
" complete_xcats=False,\n", " complete_xcats=False,\n",
"\n",
" complete_cids=False,\n", " complete_cids=False,\n",
"\n",
" new_xcat=\"COMPOSITE\",\n", " new_xcat=\"COMPOSITE\",\n",
"\n",
" new_cid=\"GLB\",\n", " new_cid=\"GLB\",\n",
"\n",
")\n", ")\n",
"print(f\"Time taken to run linear composite rs: {time.time() - starttime}\")\n",
"\n",
"# view_timelines(QuantamentalDataFrame(x.to_pandas()), cids=_cids)" "# view_timelines(QuantamentalDataFrame(x.to_pandas()), cids=_cids)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n", "### Running with variable weights\n"
"rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
"np.allclose((mwide - rwide).sum(axis=1), 0)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n",
" warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"_cids = [\"USD\", \"CAD\", \"EUR\", \"AUD\"]\n", "_cids = [\"USD\", \"CAD\", \"EUR\", \"AUD\"]\n",
"_xcats = [\"EQXR_NSA\", \"FXXR_NSA\"]\n", "_xcats = [\"EQXR_NSA\", \"FXXR_NSA\"]\n",
@ -397,72 +272,90 @@
" new_xcat=\"COMPOSITE\",\n", " new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n", " new_cid=\"GLB\",\n",
")\n", ")\n",
"view_timelines(QuantamentalDataFrame(mx), cids=_cids)\n",
"mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n", "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
"rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n", "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
"np.allclose((mwide - rwide).sum(axis=1), 0)" "np.allclose((mwide - rwide).sum(axis=1), 0)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>real_date</th>\n",
" <th>value</th>\n",
" <th>cid</th>\n",
" <th>xcat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [real_date, value, cid, xcat]\n",
"Index: []"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"\n", "### Running with variable weights, normalized\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = msyrs.panel.linear_composite(\n", "x = msyrs.panel.linear_composite(\n",
" df=new_df,\n", " df=new_df,\n",
" xcats=_xcats,\n", " xcats=cpi_xcats,\n",
" cids=_cids,\n", " cids=_cids,\n",
" weights=[1, 9],\n", " weights=list(range(1, len(cpi_xcats) + 1)),\n",
" normalize_weights=True,\n", " normalize_weights=True,\n",
" new_xcat=\"COMPOSITE\",\n", " new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n", " new_cid=\"GLB\",\n",
")\n", ")\n",
"x.to_pandas().dropna(how=\"any\")" "x.to_pandas().dropna(how=\"any\")"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"view_timelines(x.to_pandas().dropna(how=\"any\"), cids=_cids)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mx = macrosynergy.panel.linear_composite(\n",
" df=new_df.to_pandas(),\n",
" xcats=cpi_xcats,\n",
" cids=_cids,\n",
" weights=list(range(1, len(cpi_xcats) + 1)),\n",
" normalize_weights=True,\n",
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
"view_timelines(mx.dropna(how=\"any\"), cids=_cids)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
"rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
"np.allclose((mwide - rwide).sum(axis=1), 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Running with categorical weights, normalized\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raise NotImplementedError(\"Not implemented yet\")"
]
} }
], ],
"metadata": { "metadata": {