mirror of
https://github.com/Magnus167/msyrs.git
synced 2025-08-20 09:59:59 +00:00
432 lines
10 KiB
Plaintext
432 lines
10 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Build and install the package\n",
|
|
"\n",
|
|
"First patch `pyo3-polars`:\n",
|
|
"\n",
|
|
"- Use [this diff](https://github.com/pola-rs/pyo3-polars/compare/main...Magnus167:pyo3-polars:main) to make changes to the `pyo3-polars` package.\n",
|
|
"\n",
|
|
"Install the package:\n",
|
|
"\n",
|
|
"```bash\n",
|
|
"python -m venv .venv\n",
|
|
"\n",
|
|
"# source .venv/bin/activate\n",
|
|
"./.venv/Scripts/activate\n",
|
|
"\n",
|
|
"pip install maturin ipywidgets\n",
|
|
"\n",
|
|
"maturin develop --release\n",
|
|
"```\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Import Python packages\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import macrosynergy\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import polars as pl\n",
|
|
"import os"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Import Python bindings - `msyrs`\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import msyrs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n",
|
|
"# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n",
|
|
"DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n",
|
|
"DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import time\n",
|
|
"\n",
|
|
"nb_start_time = time.time()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
|
|
"\n",
|
|
"starttime = time.time()\n",
|
|
"ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
|
|
"print(f\"Time taken to load qdf: {time.time() - starttime}\")\n",
|
|
"ldf.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n",
|
|
"cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n",
|
|
"cids = cids_dm + cids_em\n",
|
|
"cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n",
|
|
"ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n",
|
|
" \".\"\n",
|
|
")\n",
|
|
"\n",
|
|
"\n",
|
|
"mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n",
|
|
" \".\"\n",
|
|
")\n",
|
|
"xcats = ecos + mkts\n",
|
|
"\n",
|
|
"tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(\n",
|
|
"# client_id=DQ_CLIENT_ID,\n",
|
|
"# client_secret=DQ_CLIENT_SECRET,\n",
|
|
"# tickers=tickers,\n",
|
|
"# )\n",
|
|
"# downloaded_df.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"downloaded_df = None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# pddf = macrosynergy.download.JPMaQSDownload().download(\n",
|
|
"# tickers=tickers,\n",
|
|
"# get_catalogue=True,\n",
|
|
"# show_progress=True,\n",
|
|
"# start_date=\"1990-01-01\",\n",
|
|
"# )\n",
|
|
"# pddf = macrosynergy.management.types.QuantamentalDataFrame(pddf)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"starttime = time.time()\n",
|
|
"\n",
|
|
"big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n",
|
|
"\n",
|
|
" folder_path=DATA_FOLDER_PATH,\n",
|
|
" xcats=xcats,\n",
|
|
"\n",
|
|
" # folder_path=DATA_FOLDER_PATH, cids=cids\n",
|
|
"\n",
|
|
")\n",
|
|
"print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n",
|
|
"\n",
|
|
"\n",
|
|
"big_df.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"big_df.estimated_size(\"mb\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"big_df.to_pandas()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas()).memory_usage(\n",
|
|
" deep=True\n",
|
|
").sum() / 1024**2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n",
|
|
"start = \"1990-01-01\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"starttime = time.time()\n",
|
|
"eq_df = msyrs.qdf.reduce_dataframe(\n",
|
|
" df=big_df,\n",
|
|
" cids=sel_cids,\n",
|
|
" xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n",
|
|
"\n",
|
|
" start=start,\n",
|
|
")\n",
|
|
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n",
|
|
"eq_df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# starttime = time.time()\n",
|
|
"# eq_pd_df = pddf.reduce_df(cids=sel_cids, xcats=[\"EQXR_NSA\", \"EQXR_VT10\"], start=start)\n",
|
|
"# print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
|
|
"starttime = time.time()\n",
|
|
"\n",
|
|
"fx_df = msyrs.qdf.reduce_dataframe(\n",
|
|
" df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n",
|
|
")\n",
|
|
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# starttime = time.time()\n",
|
|
"# fx_pd_df = pddf.reduce_df(cids=sel_cids, xcats=fx_xcats, start=start, intersect=True)\n",
|
|
"# print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"starttime = time.time()\n",
|
|
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
|
|
"print(\"Time taken: \", time.time() - starttime)\n",
|
|
"new_df.head(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# starttime = time.time()\n",
|
|
"# new_pd_df = pddf.update_df(df_add=eq_pd_df,)\n",
|
|
"# print(\"Time taken: \", time.time() - starttime)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"new_df.tail(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# df: polars::prelude::DataFrame,\n",
|
|
"# xcat: String,\n",
|
|
"# cids: Option<Vec<String>>,\n",
|
|
"# lback_periods: Option<usize>,\n",
|
|
"# lback_method: Option<String>,\n",
|
|
"# half_life: Option<f64>,\n",
|
|
"# start: Option<String>,\n",
|
|
"# end: Option<String>,\n",
|
|
"# est_freq: Option<String>,\n",
|
|
"# remove_zeros: Option<bool>,\n",
|
|
"# postfix: Option<String>,\n",
|
|
"# nan_tolerance: Option<f64>,\n",
|
|
"\n",
|
|
"starttime = time.time()\n",
|
|
"hv = msyrs.panel.historic_vol(\n",
|
|
" df=new_df,\n",
|
|
" xcat=\"EQXR_NSA\",\n",
|
|
" cids=None,\n",
|
|
" lback_periods=252,\n",
|
|
" lback_method=\"calendar\",\n",
|
|
" half_life=None,\n",
|
|
" start=None,\n",
|
|
" end=None,\n",
|
|
" est_freq=None,\n",
|
|
" remove_zeros=None,\n",
|
|
" postfix=\"_HV\",\n",
|
|
" nan_tolerance=None,\n",
|
|
")\n",
|
|
"print(f\"Time taken: {time.time() - starttime}\")\n",
|
|
"\n",
|
|
"starttime = time.time()\n",
|
|
"a = 1 + 5\n",
|
|
"print(\"Time taken: \", time.time() - starttime)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"starttime = time.time()\n",
|
|
"msyrs.qdf.pivot_dataframe_by_ticker(df=new_df).head(10)\n",
|
|
"print(\"Time taken: \", time.time() - starttime)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# new_pd_df = macrosynergy.management.types.QuantamentalDataFrame(new_pd_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# starttime = time.time()\n",
|
|
"# new_pd_df.to_wide()\n",
|
|
"# print(\"Time taken: \", time.time() - starttime)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"end_time = time.time()\n",
|
|
"print(f\"Time taken: {end_time - nb_start_time} seconds\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|