diff --git a/notebooks/funcwise/linear_composites.ipynb b/notebooks/funcwise/linear_composites.ipynb index 86feb2f..8f6522f 100644 --- a/notebooks/funcwise/linear_composites.ipynb +++ b/notebooks/funcwise/linear_composites.ipynb @@ -9,34 +9,16 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[2mUsing Python 3.12.4 environment at: E:\\Work\\ruzt\\msyrs\\.venv\u001b[0m\n", - "\u001b[2mResolved \u001b[1m34 packages\u001b[0m \u001b[2min 121ms\u001b[0m\u001b[0m\n", - " \u001b[36m\u001b[1mBuilding\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n", - " \u001b[32m\u001b[1mBuilt\u001b[0m\u001b[39m msyrs\u001b[2m @ file:///E:/Work/ruzt/msyrs\u001b[0m\n", - "\u001b[2mPrepared \u001b[1m1 package\u001b[0m \u001b[2min 14.72s\u001b[0m\u001b[0m\n", - "\u001b[2mUninstalled \u001b[1m1 package\u001b[0m \u001b[2min 4ms\u001b[0m\u001b[0m\n", - "\u001b[1m\u001b[33mwarning\u001b[39m\u001b[0m\u001b[1m:\u001b[0m \u001b[1mFailed to hardlink files; falling back to full copy. This may lead to degraded performance.\n", - " If the cache and target directories are on different filesystems, hardlinking may not be supported.\n", - " If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\u001b[0m\n", - "\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 30ms\u001b[0m\u001b[0m\n", - " \u001b[33m~\u001b[39m \u001b[1mmsyrs\u001b[0m\u001b[2m==0.0.1 (from file:///E:/Work/ruzt/msyrs)\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ - "! uv pip install E:\\Work\\ruzt\\msyrs --upgrade" + "# ! uv pip install E:\\Work\\ruzt\\msyrs --upgrade" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -44,10 +26,11 @@ "import pandas as pd\n", "import numpy as np\n", "import polars as pl\n", + "import time\n", "import os\n", "\n", "from macrosynergy.panel import view_timelines\n", - "from macrosynergy.management.types import QuantamentalDataFrame\n" + "from macrosynergy.management.types import QuantamentalDataFrame" ] }, { @@ -59,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -68,83 +51,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n", - "# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n", - "DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n", - "DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")" + "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"" ] }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "\n", - "nb_start_time = time.time()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to load qdf: 0.004000425338745117\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2010-03-03"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0333
2010-03-04"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0434
2010-03-05"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0535
2010-03-08"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0838
2010-03-09"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0939
" - ], - "text/plain": [ - "shape: (5, 7)\n", - "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n", - "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", - "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n", - "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n", - "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n", - "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n", - "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n", - "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n", - "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", - "\n", - "starttime = time.time()\n", - "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n", - "print(f\"Time taken to load qdf: {time.time() - starttime}\")\n", - "ldf.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -162,22 +78,18 @@ ")\n", "xcats = ecos + mkts\n", "\n", + "cpi_xcats = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR\".split(\n", + " \".\"\n", + ")\n", + "\n", "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to load qdf batch: 1.3058679103851318\n" - ] - } - ], + "outputs": [], "source": [ "starttime = time.time()\n", "\n", @@ -185,52 +97,41 @@ " folder_path=DATA_FOLDER_PATH,\n", " xcats=xcats,\n", ")\n", - "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n" + "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "286.69339656829834" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "big_df.estimated_size(\"mb\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n", - "start = \"1990-01-01\"" + "start = \"1990-01-01\"\n", + "nb_start_time = time.time()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running with uniform weights, 2 xcats, 5 cids\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken to reduce qdf: 0.9705278873443604\n" - ] - } - ], + "outputs": [], "source": [ "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", "eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n", @@ -239,7 +140,7 @@ "eq_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df,\n", " cids=sel_cids,\n", - " xcats=fx_xcats + eq_xcats,\n", + " xcats=fx_xcats + eq_xcats + cpi_xcats,\n", " start=start,\n", ")\n", "\n", @@ -248,133 +149,107 @@ ")\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", "\n", - "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n" + "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time taken: 2.3365907669067383 seconds\n" - ] - } - ], - "source": [ - "end_time = time.time()\n", - "print(f\"Time taken: {end_time - nb_start_time} seconds\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n", - " warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n" - ] - } - ], + "outputs": [], "source": [ "_cids = [\"USD\", \"CAD\"]\n", + "\n", + "_df = new_df.to_pandas()\n", + "starttime = time.time()\n", + "\n", + "\n", "mx = macrosynergy.panel.linear_composite(\n", - " df=new_df.to_pandas(),\n", - " xcats=[\"EQXR_NSA\", \"FXXR_NSA\"], \n", + "\n", + " df=_df,\n", + "\n", + " xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n", " cids=_cids,\n", + "\n", " weights=None,\n", + "\n", " signs=None,\n", + "\n", " normalize_weights=False,\n", " start=None,\n", " end=None,\n", + "\n", " blacklist=None,\n", + "\n", " complete_xcats=False,\n", + "\n", " complete_cids=False,\n", + "\n", " new_xcat=\"COMPOSITE\",\n", + "\n", " new_cid=\"GLB\",\n", + "\n", ")\n", + "print(f\"Time taken to run linear composite: {time.time() - starttime}\")\n", + "\n", + "\n", "# view_timelines(QuantamentalDataFrame(mx), cids=_cids)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_cids = [\"USD\", \"CAD\"]\n", + "starttime = time.time()\n", + "\n", "x = msyrs.panel.linear_composite(\n", + "\n", " df=new_df,\n", + "\n", " xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n", " cids=_cids,\n", + "\n", " weights=None,\n", + "\n", " signs=None,\n", + "\n", " weight_xcats=None,\n", + "\n", " normalize_weights=False,\n", " start=None,\n", " end=None,\n", + "\n", " blacklist=None,\n", + "\n", " complete_xcats=False,\n", + "\n", " complete_cids=False,\n", + "\n", " new_xcat=\"COMPOSITE\",\n", + "\n", " new_cid=\"GLB\",\n", + "\n", ")\n", + "print(f\"Time taken to run linear composite rs: {time.time() - starttime}\")\n", + "\n", "# view_timelines(QuantamentalDataFrame(x.to_pandas()), cids=_cids)" ] }, { - "cell_type": "code", - "execution_count": 15, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n", - "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n", - "np.allclose((mwide - rwide).sum(axis=1), 0)" + "### Running with variable weights\n" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\macrosynergy\\panel\\linear_composite.py:437: UserWarning: USD does not have complete xcat data for ['FXXR_NSA']. These will be filled with NaNs for the calculation.\n", - " warnings.warn(wrn_msg.format(cidx=cidx, missing_xcats=missing_xcats))\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "_cids = [\"USD\", \"CAD\", \"EUR\", \"AUD\"]\n", "_xcats = [\"EQXR_NSA\", \"FXXR_NSA\"]\n", @@ -397,72 +272,90 @@ " new_xcat=\"COMPOSITE\",\n", " new_cid=\"GLB\",\n", ")\n", + "view_timelines(QuantamentalDataFrame(mx), cids=_cids)\n", "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n", "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n", "np.allclose((mwide - rwide).sum(axis=1), 0)" ] }, { - "cell_type": "code", - "execution_count": 17, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
real_datevaluecidxcat
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [real_date, value, cid, xcat]\n", - "Index: []" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "\n", + "### Running with variable weights, normalized\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "x = msyrs.panel.linear_composite(\n", " df=new_df,\n", - " xcats=_xcats,\n", + " xcats=cpi_xcats,\n", " cids=_cids,\n", - " weights=[1, 9],\n", + " weights=list(range(1, len(cpi_xcats) + 1)),\n", " normalize_weights=True,\n", " new_xcat=\"COMPOSITE\",\n", " new_cid=\"GLB\",\n", ")\n", "x.to_pandas().dropna(how=\"any\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "view_timelines(x.to_pandas().dropna(how=\"any\"), cids=_cids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mx = macrosynergy.panel.linear_composite(\n", + " df=new_df.to_pandas(),\n", + " xcats=cpi_xcats,\n", + " cids=_cids,\n", + " weights=list(range(1, len(cpi_xcats) + 1)),\n", + " normalize_weights=True,\n", + " new_xcat=\"COMPOSITE\",\n", + " new_cid=\"GLB\",\n", + ")\n", + "view_timelines(mx.dropna(how=\"any\"), cids=_cids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n", + "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n", + "np.allclose((mwide - rwide).sum(axis=1), 0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running with categorical weights, normalized\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raise NotImplementedError(\"Not implemented yet\")" + ] } ], "metadata": {