msyrs/notebooks/funcwise/linear_composites.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import Python packages\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ! uv pip install E:\\Work\\ruzt\\msyrs --upgrade"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import macrosynergy\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import polars as pl\n",
    "import time\n",
    "import os\n",
    "\n",
    "from macrosynergy.panel import view_timelines\n",
    "from macrosynergy.management.types import QuantamentalDataFrame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import Python bindings - `msyrs`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import msyrs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n",
    "cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n",
    "cids = cids_dm + cids_em\n",
    "cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n",
    "ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n",
    "    \".\"\n",
    ")\n",
    "\n",
    "\n",
    "mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n",
    "    \".\"\n",
    ")\n",
    "xcats = ecos + mkts\n",
    "\n",
    "cpi_xcats = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR\".split(\n",
    "    \".\"\n",
    ")\n",
    "\n",
    "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "starttime = time.time()\n",
    "\n",
    "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n",
    "    folder_path=DATA_FOLDER_PATH,\n",
    "    xcats=xcats,\n",
    ")\n",
    "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "big_df.estimated_size(\"mb\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n",
    "start = \"1990-01-01\"\n",
    "nb_start_time = time.time()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Running with uniform weights, 2 xcats, 5 cids\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
    "eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n",
    "starttime = time.time()\n",
    "\n",
    "eq_df = msyrs.qdf.reduce_dataframe(\n",
    "    df=big_df,\n",
    "    cids=sel_cids,\n",
    "    xcats=fx_xcats + eq_xcats + cpi_xcats,\n",
    "    start=start,\n",
    ")\n",
    "\n",
    "fx_df = msyrs.qdf.reduce_dataframe(\n",
    "    df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n",
    ")\n",
    "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
    "\n",
    "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "_cids = [\"USD\", \"CAD\"]\n",
    "\n",
    "_df = new_df.to_pandas()\n",
    "starttime = time.time()\n",
    "\n",
    "\n",
    "mx = macrosynergy.panel.linear_composite(\n",
    "\n",
    "    df=_df,\n",
    "\n",
    "    xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n",
    "    cids=_cids,\n",
    "\n",
    "    weights=None,\n",
    "\n",
    "    signs=None,\n",
    "\n",
    "    normalize_weights=False,\n",
    "    start=None,\n",
    "    end=None,\n",
    "\n",
    "    blacklist=None,\n",
    "\n",
    "    complete_xcats=False,\n",
    "\n",
    "    complete_cids=False,\n",
    "\n",
    "    new_xcat=\"COMPOSITE\",\n",
    "\n",
    "    new_cid=\"GLB\",\n",
    "\n",
    ")\n",
    "print(f\"Time taken to run linear composite: {time.time() - starttime}\")\n",
    "\n",
    "\n",
    "# view_timelines(QuantamentalDataFrame(mx), cids=_cids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "_cids = [\"USD\", \"CAD\"]\n",
    "starttime = time.time()\n",
    "\n",
    "x = msyrs.panel.linear_composite(\n",
    "\n",
    "    df=new_df,\n",
    "\n",
    "    xcats=[\"EQXR_NSA\", \"FXXR_NSA\"],\n",
    "    cids=_cids,\n",
    "\n",
    "    weights=None,\n",
    "\n",
    "    signs=None,\n",
    "\n",
    "    weight_xcats=None,\n",
    "\n",
    "    normalize_weights=False,\n",
    "    start=None,\n",
    "    end=None,\n",
    "\n",
    "    blacklist=None,\n",
    "\n",
    "    complete_xcats=False,\n",
    "\n",
    "    complete_cids=False,\n",
    "\n",
    "    new_xcat=\"COMPOSITE\",\n",
    "\n",
    "    new_cid=\"GLB\",\n",
    "\n",
    ")\n",
    "print(f\"Time taken to run linear composite rs: {time.time() - starttime}\")\n",
    "\n",
    "# view_timelines(QuantamentalDataFrame(x.to_pandas()), cids=_cids)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Running with variable weights\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "_cids = [\"USD\", \"CAD\", \"EUR\", \"AUD\"]\n",
    "_xcats = [\"EQXR_NSA\", \"FXXR_NSA\"]\n",
    "\n",
    "mx = macrosynergy.panel.linear_composite(\n",
    "    df=new_df.to_pandas(),\n",
    "    xcats=_xcats,\n",
    "    cids=_cids,\n",
    "    weights=[1, 9],\n",
    "    normalize_weights=False,\n",
    "    new_xcat=\"COMPOSITE\",\n",
    "    new_cid=\"GLB\",\n",
    ")\n",
    "x = msyrs.panel.linear_composite(\n",
    "    df=new_df,\n",
    "    xcats=_xcats,\n",
    "    cids=_cids,\n",
    "    weights=[1, 9],\n",
    "    normalize_weights=False,\n",
    "    new_xcat=\"COMPOSITE\",\n",
    "    new_cid=\"GLB\",\n",
    ")\n",
    "view_timelines(QuantamentalDataFrame(mx), cids=_cids)\n",
    "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
    "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
    "np.allclose((mwide - rwide).sum(axis=1), 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Running with variable weights, normalized\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = msyrs.panel.linear_composite(\n",
    "    df=new_df,\n",
    "    xcats=cpi_xcats,\n",
    "    cids=_cids,\n",
    "    weights=list(range(1, len(cpi_xcats) + 1)),\n",
    "    normalize_weights=True,\n",
    "    new_xcat=\"COMPOSITE\",\n",
    "    new_cid=\"GLB\",\n",
    ")\n",
    "x.to_pandas().dropna(how=\"any\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "view_timelines(x.to_pandas().dropna(how=\"any\"), cids=_cids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mx = macrosynergy.panel.linear_composite(\n",
    "    df=new_df.to_pandas(),\n",
    "    xcats=cpi_xcats,\n",
    "    cids=_cids,\n",
    "    weights=list(range(1, len(cpi_xcats) + 1)),\n",
    "    normalize_weights=True,\n",
    "    new_xcat=\"COMPOSITE\",\n",
    "    new_cid=\"GLB\",\n",
    ")\n",
    "view_timelines(mx.dropna(how=\"any\"), cids=_cids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mwide = QuantamentalDataFrame(mx).to_wide().sort_index()\n",
    "rwide = QuantamentalDataFrame(x.to_pandas()).to_wide().sort_index()\n",
    "np.allclose((mwide - rwide).sum(axis=1), 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Running with categorical weights, normalized\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "raise NotImplementedError(\"Not implemented yet\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}