# Build and install the package

First patch `pyo3-polars`:

- Use [this diff](https://github.com/pola-rs/pyo3-polars/compare/main...Magnus167:pyo3-polars:main) to make changes to the `pyo3-polars` package.

Install the package:

```bash
python -m venv .venv
# source .venv/bin/activate
./.venv/Scripts/activate
pip install uv
uv pip install maturin jupyter notebook ipython ipywidgets pyarrow polars macrosynergy --upgrade

maturin develop --release
```


In [None]:
# ! uv pip install maturin jupyter notebook ipython ipywidgets pyarrow polars macrosynergy --upgrade


### Import Python packages


In [None]:
import macrosynergy
import pandas as pd
import numpy as np
import polars as pl
import os

from macrosynergy.panel import view_timelines
from macrosynergy.management.types import QuantamentalDataFrame


### Import Python bindings - `msyrs`


In [None]:
import msyrs

In [None]:
DATA_FOLDER_PATH = "E:/Work/jpmaqs-data"
# DATA_FOLDER_PATH = "C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data"
DQ_CLIENT_ID = os.getenv("DQ_CLIENT_ID")
DQ_CLIENT_SECRET = os.getenv("DQ_CLIENT_SECRET")

In [None]:
import time

nb_start_time = time.time()

In [None]:
dfpath = f"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv"

starttime = time.time()
ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)
print(f"Time taken to load qdf: {time.time() - starttime}")
ldf.head(5)

In [None]:
cids_dm = "AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD".split(".")
cids_em = "CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR".split(".")
cids = cids_dm + cids_em
cids_dux = list(set(cids) - set(["IDR", "NZD"]))
ecos = "CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12".split(
 "."
)


mkts = "DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA".split(
 "."
)
xcats = ecos + mkts

tickers = [f"{c}_{x}" for c in cids for x in xcats]

In [None]:
# downloaded_df: pl.DataFrame = msyrs.download.download_jpmaqs_indicators_as_df(
# client_id=DQ_CLIENT_ID,
# client_secret=DQ_CLIENT_SECRET,
# tickers=tickers,
# )
# downloaded_df.head(5)

In [None]:
downloaded_df = None

In [None]:
# pddf = macrosynergy.download.JPMaQSDownload().download(
# tickers=tickers,
# get_catalogue=True,
# show_progress=True,
# start_date="1990-01-01",
# )
# pddf = macrosynergy.management.types.QuantamentalDataFrame(pddf)

In [None]:
starttime = time.time()

big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(

 folder_path=DATA_FOLDER_PATH,
 xcats=xcats,

 # folder_path=DATA_FOLDER_PATH, cids=cids

)
print(f"Time taken to load qdf batch: {time.time() - starttime}")


big_df.head(5)

In [None]:
big_df.estimated_size("mb")

In [None]:
big_df.to_pandas()

In [None]:
big_df.to_pandas().memory_usage(deep=True).sum() / 1024**2

In [None]:
macrosynergy.management.types.QuantamentalDataFrame(big_df.to_pandas()).memory_usage(
 deep=True
).sum() / 1024**2

In [None]:
sel_cids = ["USD", "EUR", "GBP", "AUD", "CAD"]
start = "1990-01-01"

In [None]:
starttime = time.time()
eq_df = msyrs.qdf.reduce_dataframe(
 df=big_df,
 cids=sel_cids,
 xcats=["EQXR_NSA", "EQXR_VT10"],

 start=start,
)
print(f"Time taken to reduce qdf: {time.time() - starttime}")
eq_df

In [None]:
# starttime = time.time()
# eq_pd_df = pddf.reduce_df(cids=sel_cids, xcats=["EQXR_NSA", "EQXR_VT10"], start=start)
# print(f"Time taken to reduce qdf: {time.time() - starttime}")

In [None]:
fx_xcats = [xc for xc in xcats if xc.startswith("FX")]
starttime = time.time()

fx_df = msyrs.qdf.reduce_dataframe(
 df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True
)
print(f"Time taken to reduce qdf: {time.time() - starttime}")

In [None]:
# starttime = time.time()
# fx_pd_df = pddf.reduce_df(cids=sel_cids, xcats=fx_xcats, start=start, intersect=True)
# print(f"Time taken to reduce qdf: {time.time() - starttime}")

In [None]:
starttime = time.time()
new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)
print("Time taken: ", time.time() - starttime)
new_df.head(10)

In [None]:
# starttime = time.time()
# new_pd_df = pddf.update_df(df_add=eq_pd_df,)
# print("Time taken: ", time.time() - starttime)

In [None]:
new_df.tail(10)

In [None]:
# df: polars::prelude::DataFrame,
# xcat: String,
# cids: Option>,
# lback_periods: Option,
# lback_method: Option,
# half_life: Option,
# start: Option,
# end: Option,
# est_freq: Option,
# remove_zeros: Option,
# postfix: Option,
# nan_tolerance: Option,

starttime = time.time()
hv = msyrs.panel.historic_vol(
 df=new_df,
 xcat="EQXR_NSA",
 cids=None,
 lback_periods=21,
 lback_method="xma",
 half_life=11,
 start=None,
 end=None,
 est_freq="D",
 remove_zeros=None,
 postfix="_HV_RS",
 nan_tolerance=None,
)
print(f"Time taken: {time.time() - starttime}")

starttime = time.time()
a = 1 + 5
print("Time taken: ", time.time() - starttime)


In [None]:

hdf = hv.to_pandas()

hdf = QuantamentalDataFrame.from_wide(
 (
 
 QuantamentalDataFrame(hdf)
 .to_wide()
 .reindex(
 pd.bdate_range(
 start=hdf["real_date"].min(),
 end=hdf["real_date"].max(),
 freq="B",
 ),
 method="ffill",
 )
 .dropna(axis="rows", how="all")
 ).sort_index(),
 categorical=False,
)

view_timelines(df=hdf)



In [None]:
starttime = time.time()
msyrs.qdf.pivot_dataframe_by_ticker(df=new_df).head(10)
print("Time taken: ", time.time() - starttime)

In [None]:
# new_pd_df = macrosynergy.management.types.QuantamentalDataFrame(new_pd_df)

In [None]:
from macrosynergy.panel.historic_vol import historic_vol

new_df_pd = QuantamentalDataFrame(
 new_df.to_pandas(), categorical=False
)
cids = new_df_pd['cid'].unique().tolist()

starttime = time.time()
hv_bench = historic_vol(
 df=new_df_pd,
 xcat='EQXR_NSA',
 cids=cids,
 lback_periods=21,
 lback_meth="xma",
 half_life=11,
 est_freq="D",
 blacklist=None,
 remove_zeros=True,
 postfix="_HV_PY",
 nan_tolerance=0.001,
)
print(f"Time taken: {time.time() - starttime}")

starttime = time.time()
a = 1 + 5
print("Time taken: ", time.time() - starttime)

In [None]:
view_timelines(df=hv_bench)

In [None]:
hv_bench = hv_bench.dropna(axis="rows", how="any").reset_index(drop=True)
bdr = pd.bdate_range(new_df_pd.real_date.min(), new_df_pd.real_date.max())
hv_bench = QuantamentalDataFrame.from_wide(
 QuantamentalDataFrame(hv_bench).to_wide().reindex(bdr, method="ffill"),
 categorical=False,
).reset_index(drop=True)

view_timelines(df=hv_bench)

In [None]:
def zscore_series(s: pd.Series) -> pd.Series:
 return (s - s.mean()) / s.std()


a = QuantamentalDataFrame.from_wide(
 QuantamentalDataFrame(hv_bench)
 .to_wide()
 .apply(lambda x: zscore_series(x), axis=1, result_type="expand")
)

b = QuantamentalDataFrame.from_wide(
 QuantamentalDataFrame(hdf)
 .to_wide()
 .apply(lambda x: zscore_series(x), axis=1, result_type="expand")
)

view_timelines(QuantamentalDataFrame.from_qdf_list([a, b]).dropna(axis="rows", how="any").reset_index(drop=True))


In [None]:
# a = QuantamentalDataFrame(hv_bench).to_wide().apply(lambda x: zscore_series(x), axis=1, result_type="expand").rename(columns=lambda x: '_'.join(x.split("_")[:-1]))

# b = QuantamentalDataFrame(hdf).to_wide().apply(lambda x: zscore_series(x), axis=1, result_type="expand").rename(columns=lambda x: '_'.join(x.split("_")[:-1]))

a = QuantamentalDataFrame(hv_bench).to_wide().rename(columns=lambda x: '_'.join(x.split("_")[:-1]))

b = QuantamentalDataFrame(hdf).to_wide().rename(columns=lambda x: '_'.join(x.split("_")[:-1]))

diff = (a - b) / (a.abs())

view_timelines(QuantamentalDataFrame.from_wide(diff))

In [None]:
# starttime = time.time()
# new_pd_df.to_wide()
# print("Time taken: ", time.time() - starttime)

In [None]:
end_time = time.time()
print(f"Time taken: {end_time - nb_start_time} seconds")

In [None]:
# pub fn linear_composite(
# df: PyDataFrame,
# xcats: Vec,
# cids: Vec,
# weights: Option>,
# signs: Option>,
# weight_xcats: Option>,
# normalize_weights: bool,
# start: Option,
# end: Option,
# blacklist: Option>>,
# complete_xcats: bool,
# complete_cids: bool,
# new_xcat: Option,
# new_cid: Option,
# ) -> PyResult {

msyrs.panel.linear_composite(
 df=new_df,
 xcats=["EQXR_NSA", "EQXR_VT10"],
 cids=["USD", "EUR", "GBP", "AUD", "CAD"],
 weights=None,
 signs=None,
 weight_xcats=None,
 normalize_weights=False,
 start=None,
 end=None,
 blacklist=None,
 complete_xcats=False,
 complete_cids=False,
 new_xcat="COMPOSITE",
 new_cid="USD",
)