msyrs/notebooks/funcwise/basic-utils.ipynb
2025-04-15 19:31:15 +01:00

301 lines
7.3 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ! uv pip install E:\\Work\\ruzt\\msyrs --upgrade"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import Python packages\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import macrosynergy\n",
"import pandas as pd\n",
"import numpy as np\n",
"import polars as pl\n",
"import os\n",
"\n",
"from macrosynergy.panel import view_timelines\n",
"from macrosynergy.management.types import QuantamentalDataFrame\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import Python bindings - `msyrs`\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import msyrs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n",
"# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n",
"DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n",
"DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"\n",
"nb_start_time = time.time()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
"\n",
"starttime = time.time()\n",
"ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
"print(f\"Time taken to load qdf: {time.time() - starttime}\")\n",
"ldf.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n",
"cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n",
"cids = cids_dm + cids_em\n",
"cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n",
"ecos = (\n",
" \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.\"\n",
" \"CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.\"\n",
" \"INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.\"\n",
" \"RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\".\")\n",
")\n",
"\n",
"mkts = (\n",
" \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.\"\n",
" \"FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\".\")\n",
")\n",
"xcats = ecos + mkts\n",
"\n",
"tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"starttime = time.time()\n",
"\n",
"big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n",
" folder_path=DATA_FOLDER_PATH,\n",
" xcats=xcats,\n",
")\n",
"print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"big_df.estimated_size(\"mb\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"starttime = time.time()\n",
"\n",
"test_df = msyrs.qdf.reduce_dataframe(df=big_df, xcats=mkts)\n",
"\n",
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n",
"print(test_df.estimated_size(\"mb\"))\n",
"test_df = None"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\", \"CHF\", \"JPY\", \"INR\"]\n",
"start = \"2000-01-01\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n",
"starttime = time.time()\n",
"\n",
"eq_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df, cids=sel_cids, xcats=eq_xcats, start=start\n",
")\n",
"\n",
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
"\n",
"starttime = time.time()\n",
"\n",
"fx_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df, cids=sel_cids, xcats=fx_xcats, start=start\n",
")\n",
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
"\n",
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"starttime = time.time()\n",
"\n",
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
"\n",
"print(f\"Time taken to update qdf: {time.time() - starttime}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='D')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='D').dtype"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wdf = QuantamentalDataFrame(new_df.to_pandas()).to_wide()\n",
"# wdf.values[np.random.rand(*wdf.shape) < 0.0001] = np.nan\n",
"wdf = QuantamentalDataFrame.from_wide(wdf, categorical=False)\n",
"# cast column 'real_date' to pl.Date\n",
"new_df = pl.DataFrame(wdf).with_columns(\n",
" pl.col(\"real_date\").cast(pl.Date, strict=True)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"msyrs.utils.create_blacklist_from_qdf(new_df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"view_timelines(df=new_df.to_pandas())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"end_time = time.time()\n",
"print(f\"Time taken: {end_time - nb_start_time} seconds\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}