{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Build and install the package\n", "\n", "First patch `pyo3-polars`:\n", "\n", "- Use [this diff](https://github.com/pola-rs/pyo3-polars/compare/main...Magnus167:pyo3-polars:main) to make changes to the `pyo3-polars` package.\n", "\n", "Install the package:\n", "\n", "```bash\n", "python -m venv .venv\n", "# source .venv/bin/activate\n", "./.venv/Scripts/activate\n", "pip install uv\n", "uv pip install maturin jupyter notebook ipython ipywidgets pyarrow polars macrosynergy --upgrade\n", "\n", "maturin develop --release\n", "```\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# ! uv pip install maturin jupyter notebook ipython ipywidgets pyarrow polars macrosynergy --upgrade\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import Python packages\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import macrosynergy\n", "import pandas as pd\n", "import numpy as np\n", "import polars as pl\n", "import os\n", "\n", "from macrosynergy.panel import view_timelines\n", "from macrosynergy.management.types import QuantamentalDataFrame\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import Python bindings - `msyrs`\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import msyrs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n", "# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n", "DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n", "DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", "\n", "nb_start_time = time.time()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", "\n", "starttime = time.time()\n", "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n", "print(f\"Time taken to load qdf: {time.time() - starttime}\")\n", "ldf.head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n", "cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n", "cids = cids_dm + cids_em\n", "cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n", "ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n", " \".\"\n", ")\n", "\n", "\n", "mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n", " \".\"\n", ")\n", "xcats = ecos + mkts\n", "\n", "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(\n", "# client_id=DQ_CLIENT_ID,\n", "# client_secret=DQ_CLIENT_SECRET,\n", "# tickers=tickers,\n", "# )\n", "# downloaded_df.head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "downloaded_df = None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# pddf = macrosynergy.download.JPMaQSDownload().download(\n", "# tickers=tickers,\n", "# get_catalogue=True,\n", "# show_progress=True,\n", "# start_date=\"1990-01-01\",\n", "# )\n", "# pddf = macrosynergy.management.types.QuantamentalDataFrame(pddf)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "starttime = time.time()\n", "\n", "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n", "\n", " folder_path=DATA_FOLDER_PATH,\n", " xcats=xcats,\n", "\n", " # folder_path=DATA_FOLDER_PATH, cids=cids\n", "\n", ")\n", "print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n", "\n", "\n", "big_df.head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "big_df.estimated_size(\"mb\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "big_df.to_pandas()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas()).memory_usage(\n", " deep=True\n", ").sum() / 1024**2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n", "start = \"1990-01-01\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "starttime = time.time()\n", "eq_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df,\n", " cids=sel_cids,\n", " xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n", "\n", " start=start,\n", ")\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n", "eq_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# starttime = time.time()\n", "# eq_pd_df = pddf.reduce_df(cids=sel_cids, xcats=[\"EQXR_NSA\", \"EQXR_VT10\"], start=start)\n", "# print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", "starttime = time.time()\n", "\n", "fx_df = msyrs.qdf.reduce_dataframe(\n", " df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n", ")\n", "print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# starttime = time.time()\n", "# fx_pd_df = pddf.reduce_df(cids=sel_cids, xcats=fx_xcats, start=start, intersect=True)\n", "# print(f\"Time taken to reduce qdf: {time.time() - starttime}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "starttime = time.time()\n", "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", "print(\"Time taken: \", time.time() - starttime)\n", "new_df.head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# starttime = time.time()\n", "# new_pd_df = pddf.update_df(df_add=eq_pd_df,)\n", "# print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "new_df.tail(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# df: polars::prelude::DataFrame,\n", "# xcat: String,\n", "# cids: Option>,\n", "# lback_periods: Option,\n", "# lback_method: Option,\n", "# half_life: Option,\n", "# start: Option,\n", "# end: Option,\n", "# est_freq: Option,\n", "# remove_zeros: Option,\n", "# postfix: Option,\n", "# nan_tolerance: Option,\n", "\n", "starttime = time.time()\n", "hv = msyrs.panel.historic_vol(\n", " df=new_df,\n", " xcat=\"EQXR_NSA\",\n", " cids=None,\n", " lback_periods=21,\n", " lback_method=\"xma\",\n", " half_life=11,\n", " start=None,\n", " end=None,\n", " est_freq=\"D\",\n", " remove_zeros=None,\n", " postfix=\"_HV_RS\",\n", " nan_tolerance=None,\n", ")\n", "print(f\"Time taken: {time.time() - starttime}\")\n", "\n", "starttime = time.time()\n", "a = 1 + 5\n", "print(\"Time taken: \", time.time() - starttime)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "hdf = hv.to_pandas()\n", "\n", "hdf = QuantamentalDataFrame.from_wide(\n", " (\n", " \n", " QuantamentalDataFrame(hdf)\n", " .to_wide()\n", " .reindex(\n", " pd.bdate_range(\n", " start=hdf[\"real_date\"].min(),\n", " end=hdf[\"real_date\"].max(),\n", " freq=\"B\",\n", " ),\n", " method=\"ffill\",\n", " )\n", " .dropna(axis=\"rows\", how=\"all\")\n", " ).sort_index(),\n", " categorical=False,\n", ")\n", "\n", "view_timelines(df=hdf)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "starttime = time.time()\n", "msyrs.qdf.pivot_dataframe_by_ticker(df=new_df).head(10)\n", "print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# new_pd_df = macrosynergy.management.types.QuantamentalDataFrame(new_pd_df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from macrosynergy.panel.historic_vol import historic_vol\n", "\n", "new_df_pd = QuantamentalDataFrame(\n", " new_df.to_pandas(), categorical=False\n", ")\n", "cids = new_df_pd['cid'].unique().tolist()\n", "\n", "starttime = time.time()\n", "hv_bench = historic_vol(\n", " df=new_df_pd,\n", " xcat='EQXR_NSA',\n", " cids=cids,\n", " lback_periods=21,\n", " lback_meth=\"xma\",\n", " half_life=11,\n", " est_freq=\"D\",\n", " blacklist=None,\n", " remove_zeros=True,\n", " postfix=\"_HV_PY\",\n", " nan_tolerance=0.001,\n", ")\n", "print(f\"Time taken: {time.time() - starttime}\")\n", "\n", "starttime = time.time()\n", "a = 1 + 5\n", "print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "view_timelines(df=hv_bench)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hv_bench = hv_bench.dropna(axis=\"rows\", how=\"any\").reset_index(drop=True)\n", "bdr = pd.bdate_range(new_df_pd.real_date.min(), new_df_pd.real_date.max())\n", "hv_bench = QuantamentalDataFrame.from_wide(\n", " QuantamentalDataFrame(hv_bench).to_wide().reindex(bdr, method=\"ffill\"),\n", " categorical=False,\n", ").reset_index(drop=True)\n", "\n", "view_timelines(df=hv_bench)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def zscore_series(s: pd.Series) -> pd.Series:\n", " return (s - s.mean()) / s.std()\n", "\n", "\n", "a = QuantamentalDataFrame.from_wide(\n", " QuantamentalDataFrame(hv_bench)\n", " .to_wide()\n", " .apply(lambda x: zscore_series(x), axis=1, result_type=\"expand\")\n", ")\n", "\n", "b = QuantamentalDataFrame.from_wide(\n", " QuantamentalDataFrame(hdf)\n", " .to_wide()\n", " .apply(lambda x: zscore_series(x), axis=1, result_type=\"expand\")\n", ")\n", "\n", "view_timelines(QuantamentalDataFrame.from_qdf_list([a, b]).dropna(axis=\"rows\", how=\"any\").reset_index(drop=True))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# a = QuantamentalDataFrame(hv_bench).to_wide().apply(lambda x: zscore_series(x), axis=1, result_type=\"expand\").rename(columns=lambda x: '_'.join(x.split(\"_\")[:-1]))\n", "\n", "# b = QuantamentalDataFrame(hdf).to_wide().apply(lambda x: zscore_series(x), axis=1, result_type=\"expand\").rename(columns=lambda x: '_'.join(x.split(\"_\")[:-1]))\n", "\n", "a = QuantamentalDataFrame(hv_bench).to_wide().rename(columns=lambda x: '_'.join(x.split(\"_\")[:-1]))\n", "\n", "b = QuantamentalDataFrame(hdf).to_wide().rename(columns=lambda x: '_'.join(x.split(\"_\")[:-1]))\n", "\n", "diff = (a - b) / (a.abs())\n", "\n", "view_timelines(QuantamentalDataFrame.from_wide(diff))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# starttime = time.time()\n", "# new_pd_df.to_wide()\n", "# print(\"Time taken: \", time.time() - starttime)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "end_time = time.time()\n", "print(f\"Time taken: {end_time - nb_start_time} seconds\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# pub fn linear_composite(\n", "# df: PyDataFrame,\n", "# xcats: Vec,\n", "# cids: Vec,\n", "# weights: Option>,\n", "# signs: Option>,\n", "# weight_xcats: Option>,\n", "# normalize_weights: bool,\n", "# start: Option,\n", "# end: Option,\n", "# blacklist: Option>>,\n", "# complete_xcats: bool,\n", "# complete_cids: bool,\n", "# new_xcat: Option,\n", "# new_cid: Option,\n", "# ) -> PyResult {\n", "\n", "msyrs.panel.linear_composite(\n", " df=new_df,\n", " xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n", " cids=[\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"],\n", " weights=None,\n", " signs=None,\n", " weight_xcats=None,\n", " normalize_weights=False,\n", " start=None,\n", " end=None,\n", " blacklist=None,\n", " complete_xcats=False,\n", " complete_cids=False,\n", " new_xcat=\"COMPOSITE\",\n", " new_cid=\"USD\",\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 4 }