From f84879119b88d0754aa77291c130d869040d6045 Mon Sep 17 00:00:00 2001
From: Palash Tyagi <23239946+Magnus167@users.noreply.github.com>
Date: Thu, 10 Apr 2025 00:33:28 +0100
Subject: [PATCH] updating linear composites notebook
---
notebooks/funcwise/linear_composites.ipynb | 395 ++++++++-------------
1 file changed, 144 insertions(+), 251 deletions(-)
diff --git a/notebooks/funcwise/linear_composites.ipynb b/notebooks/funcwise/linear_composites.ipynb
index 86feb2f..8f6522f 100644
--- a/notebooks/funcwise/linear_composites.ipynb
+++ b/notebooks/funcwise/linear_composites.ipynb
@@ -9,34 +9,16 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\u001b[2mUsing Python 3.12.4 environment at: E:\\Work\\ruzt\\msyrs\\.venv\u001b[0m\n",
- "\u001b[2mResolved \u001b[1m34 packages\u001b[0m \u001b[2min 121ms\u001b[0m\u001b[0m\n",
- " \u001b[36m\u001b[1mBuilding\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n",
- " \u001b[32m\u001b[1mBuilt\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n",
- "\u001b[2mPrepared \u001b[1m1 package\u001b[0m \u001b[2min 14.72s\u001b[0m\u001b[0m\n",
- "\u001b[2mUninstalled \u001b[1m1 package\u001b[0m \u001b[2min 4ms\u001b[0m\u001b[0m\n",
- "\u001b[1m\u001b[33mwarning\u001b[39m\u001b[0m\u001b[1m:\u001b[0m \u001b[1mFailed to hardlink files; falling back to full copy. This may lead to degraded performance.\n",
- " If the cache and target directories are on different filesystems, hardlinking may not be supported.\n",
- " If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\u001b[0m\n",
- "\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 30ms\u001b[0m\u001b[0m\n",
- " \u001b[33m~\u001b[39m \u001b[1mmsyrs\u001b[0m\u001b[2m==0.0.1 (from file:///E:/Work/ruzt/msyrs)\u001b[0m\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "! uv pip install E:\\Work\\ruzt\\msyrs --upgrade"
+ "# ! uv pip install E:\\Work\\ruzt\\msyrs --upgrade"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -44,10 +26,11 @@
"import pandas as pd\n",
"import numpy as np\n",
"import polars as pl\n",
+ "import time\n",
"import os\n",
"\n",
"from macrosynergy.panel import view_timelines\n",
- "from macrosynergy.management.types import QuantamentalDataFrame\n"
+ "from macrosynergy.management.types import QuantamentalDataFrame"
]
},
{
@@ -59,7 +42,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -68,83 +51,16 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n",
- "# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n",
- "DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n",
- "DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")"
+ "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\""
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "import time\n",
- "\n",
- "nb_start_time = time.time()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time taken to load qdf: 0.004000425338745117\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "
\n",
- "
shape: (5, 7)real_date | cid | xcat | value | grading | eop_lag | mop_lag |
---|
date | str | str | f64 | f64 | i64 | i64 |
2010-03-03 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 3 | 33 |
2010-03-04 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 4 | 34 |
2010-03-05 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 5 | 35 |
2010-03-08 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 8 | 38 |
2010-03-09 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 9 | 39 |
"
- ],
- "text/plain": [
- "shape: (5, 7)\n",
- "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n",
- "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
- "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
- "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n",
- "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n",
- "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n",
- "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n",
- "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n",
- "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n",
- "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
- "\n",
- "starttime = time.time()\n",
- "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
- "print(f\"Time taken to load qdf: {time.time() - starttime}\")\n",
- "ldf.head(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -162,22 +78,18 @@
")\n",
"xcats = ecos + mkts\n",
"\n",
+ "cpi_xcats = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR\".split(\n",
+ " \".\"\n",
+ ")\n",
+ "\n",
"tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time taken to load qdf batch: 1.3058679103851318\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"starttime = time.time()\n",
"\n",
@@ -185,52 +97,41 @@
" folder_path=DATA_FOLDER_PATH,\n",
" xcats=xcats,\n",
")\n",
- "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n"
+ "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "286.69339656829834"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"big_df.estimated_size(\"mb\")"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n",
- "start = \"1990-01-01\""
+ "start = \"1990-01-01\"\n",
+ "nb_start_time = time.time()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Running with uniform weights, 2 xcats, 5 cids\n"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time taken to reduce qdf: 0.9705278873443604\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
"eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n",
@@ -239,7 +140,7 @@
"eq_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df,\n",
" cids=sel_cids,\n",
- " xcats=fx_xcats + eq_xcats,\n",
+ " xcats=fx_xcats + eq_xcats + cpi_xcats,\n",
" start=start,\n",
")\n",
"\n",
@@ -248,133 +149,107 @@
")\n",
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
"\n",
- "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n"
+ "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time taken: 2.3365907669067383 seconds\n"
- ]
- }
- ],
- "source": [
- "end_time = time.time()\n",
- "print(f\"Time taken: {end_time - nb_start_time} seconds\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n",
- " warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"_cids = [\"USD\", \"CAD\"]\n",
+ "\n",
+ "_df = new_df.to_pandas()\n",
+ "starttime = time.time()\n",
+ "\n",
+ "\n",
"mx = macrosynergy.panel.linear_composite(\n",
- " df=new_df.to_pandas(),\n",
- " xcats=[\"EQXR_NSA\", \"FXXR_NSA\"], \n",
+ "\n",
+ " df=_df,\n",
+ "\n",
+ " xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n",
" cids=_cids,\n",
+ "\n",
" weights=None,\n",
+ "\n",
" signs=None,\n",
+ "\n",
" normalize_weights=False,\n",
" start=None,\n",
" end=None,\n",
+ "\n",
" blacklist=None,\n",
+ "\n",
" complete_xcats=False,\n",
+ "\n",
" complete_cids=False,\n",
+ "\n",
" new_xcat=\"COMPOSITE\",\n",
+ "\n",
" new_cid=\"GLB\",\n",
+ "\n",
")\n",
+ "print(f\"Time taken to run linear composite: {time.time() - starttime}\")\n",
+ "\n",
+ "\n",
"# view_timelines(QuantamentalDataFrame(mx), cids=_cids)"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"_cids = [\"USD\", \"CAD\"]\n",
+ "starttime = time.time()\n",
+ "\n",
"x = msyrs.panel.linear_composite(\n",
+ "\n",
" df=new_df,\n",
+ "\n",
" xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n",
" cids=_cids,\n",
+ "\n",
" weights=None,\n",
+ "\n",
" signs=None,\n",
+ "\n",
" weight_xcats=None,\n",
+ "\n",
" normalize_weights=False,\n",
" start=None,\n",
" end=None,\n",
+ "\n",
" blacklist=None,\n",
+ "\n",
" complete_xcats=False,\n",
+ "\n",
" complete_cids=False,\n",
+ "\n",
" new_xcat=\"COMPOSITE\",\n",
+ "\n",
" new_cid=\"GLB\",\n",
+ "\n",
")\n",
+ "print(f\"Time taken to run linear composite rs: {time.time() - starttime}\")\n",
+ "\n",
"# view_timelines(QuantamentalDataFrame(x.to_pandas()), cids=_cids)"
]
},
{
- "cell_type": "code",
- "execution_count": 15,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
"source": [
- "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
- "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
- "np.allclose((mwide - rwide).sum(axis=1), 0)"
+ "### Running with variable weights\n"
]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n",
- " warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"_cids = [\"USD\", \"CAD\", \"EUR\", \"AUD\"]\n",
"_xcats = [\"EQXR_NSA\", \"FXXR_NSA\"]\n",
@@ -397,72 +272,90 @@
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
+ "view_timelines(QuantamentalDataFrame(mx), cids=_cids)\n",
"mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
"rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
"np.allclose((mwide - rwide).sum(axis=1), 0)"
]
},
{
- "cell_type": "code",
- "execution_count": 17,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " real_date | \n",
- " value | \n",
- " cid | \n",
- " xcat | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "Empty DataFrame\n",
- "Columns: [real_date, value, cid, xcat]\n",
- "Index: []"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
"source": [
- "\n",
+ "### Running with variable weights, normalized\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
"x = msyrs.panel.linear_composite(\n",
" df=new_df,\n",
- " xcats=_xcats,\n",
+ " xcats=cpi_xcats,\n",
" cids=_cids,\n",
- " weights=[1, 9],\n",
+ " weights=list(range(1, len(cpi_xcats) + 1)),\n",
" normalize_weights=True,\n",
" new_xcat=\"COMPOSITE\",\n",
" new_cid=\"GLB\",\n",
")\n",
"x.to_pandas().dropna(how=\"any\")"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "view_timelines(x.to_pandas().dropna(how=\"any\"), cids=_cids)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mx = macrosynergy.panel.linear_composite(\n",
+ " df=new_df.to_pandas(),\n",
+ " xcats=cpi_xcats,\n",
+ " cids=_cids,\n",
+ " weights=list(range(1, len(cpi_xcats) + 1)),\n",
+ " normalize_weights=True,\n",
+ " new_xcat=\"COMPOSITE\",\n",
+ " new_cid=\"GLB\",\n",
+ ")\n",
+ "view_timelines(mx.dropna(how=\"any\"), cids=_cids)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
+ "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
+ "np.allclose((mwide - rwide).sum(axis=1), 0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Running with categorical weights, normalized\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "raise NotImplementedError(\"Not implemented yet\")"
+ ]
}
],
"metadata": {