diff --git a/Cargo.toml b/Cargo.toml index 430b8a4..a547cfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,8 +31,8 @@ futures = "0.3" # pyo3 = { version = "0.23.1", features = ["extension-module"] } # pyo3 = { version = "0.21.2", features = ["extension-module"] } # pyo3 = { version = "*", features = ["abi3-py38"] } -pyo3 = { version = "*", features = ["extension-module"] } -pyo3-polars = { version = "0.18.0" } +pyo3 = { version = "*", features = ["extension-module", "abi3-py37"] } +pyo3-polars = { version = "*" } polars = { version = "*", features = [ "lazy", "temporal", @@ -40,6 +40,7 @@ polars = { version = "*", features = [ "json", "parquet", "dtype-datetime", + # "dtype-categorical", "strings", "timezones", "ndarray", diff --git a/README.md b/README.md index 5c6841a..e13f367 100644 --- a/README.md +++ b/README.md @@ -2,33 +2,30 @@ A Rust implementation of the [Macrosynergy Python Package](https://github.com/macrosynergy/macrosynergy). -## Running Notebook +## Build and install the Python package ```bash -cargo install evcxr_jupyter -evcxr_jupyter --install -pip install jupyterlab -jupyter lab +python -m venv .venv +# source .venv/bin/activate +./.venv/Scripts/activate +pip install maturin +maturin develop --release ``` -Or try following this guide here: [DataCrayon - Setup Jupyter with Rust](https://datacrayon.com/data-analysis-with-rust-notebooks/setup-anaconda-jupyter-and-rust/) - ## Status -- Download +- [x] Download - - [x] Get Catalogue - - [x] Get Generic DQ Time Series - - [x] Get JPMaQS Indicators as Polars DataFrame - - [ ] Save to disk functionality - - [x] Hacky iterative method - - [ ] Non-hacky way to save to disk + - [ ] Pending: Optimize thread pool - Utils - - [ ] Reduce DF - - [ ] Apply Blacklist - - [ ] Update DF + - [ ] QDF + - [x] Read QDF + - [x] Reduce DF + - [x] Update DF + - [ ] Get Blacklist + - [ ] Apply Blacklist - Panel - [ ] Historic Volatility diff --git a/notebooks/python-notebook.ipynb b/notebooks/python-notebook.ipynb new file mode 100644 index 0000000..c70e3c3 --- /dev/null +++ b/notebooks/python-notebook.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build and install the package\n", + "\n", + "```bash\n", + "python -m venv .venv\n", + "\n", + "# source .venv/bin/activate\n", + "./.venv/Scripts/activate\n", + "\n", + "pip install maturin\n", + "\n", + "maturin develop --release\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import macrosynergy\n", + "import pandas as pd\n", + "import numpy as np\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import msyrs\n", + "import polars as pl" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2010-03-03"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0333
2010-03-04"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0434
2010-03-05"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0535
2010-03-08"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0838
2010-03-09"USD""ADPEMPL_SA_P1M1ML1"-0.1738063.0939
" + ], + "text/plain": [ + "shape: (5, 7)\n", + "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n", + "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n", + "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n", + "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n", + "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n", + "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n", + "\n", + "\n", + "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n", + "ldf.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n", + "cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n", + "cids = cids_dm + cids_em\n", + "cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n", + "ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n", + " \".\"\n", + ")\n", + "[\"CPIC\", \"CPIH\", \"INFTEFF\", \"INTRGDP\", \"INTRGDPv5Y\", \"PCREDITGDP\", \"RGDP\", \"RYLDIRS\", \"PCREDITBN\"]\n", + "[\"\"]\n", + "\n", + "mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n", + " \".\"\n", + ")\n", + "xcats = ecos + mkts\n", + "\n", + "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
1990-04-26"AUD""CPIC_SA_P1M1ML12"6.4345992.026223
1990-04-27"AUD""CPIC_SA_P1M1ML12"6.4345992.027224
1990-04-30"AUD""CPIC_SA_P1M1ML12"6.4345992.030227
1990-05-01"AUD""CPIC_SA_P1M1ML12"6.4345992.031228
1990-05-02"AUD""CPIC_SA_P1M1ML12"6.4345992.032229
" + ], + "text/plain": [ + "shape: (5, 7)\n", + "┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n", + "│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26 ┆ 223 │\n", + "│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27 ┆ 224 │\n", + "│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30 ┆ 227 │\n", + "│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31 ┆ 228 │\n", + "│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32 ┆ 229 │\n", + "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n", + " folder_path=DATA_FOLDER_PATH, tickers=tickers\n", + ")\n", + "big_df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n", + "start = \"2024-11-14\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (20, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-14"AUD""EQXR_NSA"0.3291881.000
2024-11-15"AUD""EQXR_NSA"0.8263461.000
2024-11-14"CAD""EQXR_NSA"0.1994021.000
2024-11-15"CAD""EQXR_NSA"-0.6965171.000
2024-11-14"EUR""EQXR_NSA"2.0248891.000
2024-11-15"EUR""EQXR_VT10"-0.4779011.000
2024-11-14"GBP""EQXR_VT10"0.6642081.000
2024-11-15"GBP""EQXR_VT10"-0.0687781.000
2024-11-14"USD""EQXR_VT10"-0.5499831.000
2024-11-15"USD""EQXR_VT10"-1.1985441.000
" + ], + "text/plain": [ + "shape: (20, 7)\n", + "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ EUR ┆ EQXR_NSA ┆ 2.024889 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 2024-11-15 ┆ EUR ┆ EQXR_VT10 ┆ -0.477901 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ GBP ┆ EQXR_VT10 ┆ 0.664208 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ GBP ┆ EQXR_VT10 ┆ -0.068778 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ USD ┆ EQXR_VT10 ┆ -0.549983 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ USD ┆ EQXR_VT10 ┆ -1.198544 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eq_df = msyrs.qdf.reduce_dataframe(\n", + " df=big_df,\n", + " cids=sel_cids,\n", + " xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n", + " start=start,\n", + ")\n", + "eq_df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n", + "fx_df = msyrs.qdf.reduce_dataframe(\n", + " df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-14"AUD""EQXR_NSA"0.3291881.000
2024-11-15"AUD""EQXR_NSA"0.8263461.000
2024-11-14"CAD""EQXR_NSA"0.1994021.000
2024-11-15"CAD""EQXR_NSA"-0.6965171.000
2024-11-14"EUR""EQXR_NSA"2.0248891.000
2024-11-15"EUR""EQXR_NSA"-0.6615671.000
2024-11-14"GBP""EQXR_NSA"0.5965331.000
2024-11-15"GBP""EQXR_NSA"-0.061771.000
2024-11-14"USD""EQXR_NSA"-0.6274931.000
2024-11-15"USD""EQXR_NSA"-1.3674571.000
" + ], + "text/plain": [ + "shape: (10, 7)\n", + "┌────────────┬─────┬──────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪══════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ EUR ┆ EQXR_NSA ┆ 2.024889 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ EUR ┆ EQXR_NSA ┆ -0.661567 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ GBP ┆ EQXR_NSA ┆ 0.596533 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ GBP ┆ EQXR_NSA ┆ -0.06177 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ USD ┆ EQXR_NSA ┆ -0.627493 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ USD ┆ EQXR_NSA ┆ -1.367457 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "└────────────┴─────┴──────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n", + "\n", + "new_df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (10, 7)
real_datecidxcatvaluegradingeop_lagmop_lag
datestrstrf64f64i64i64
2024-11-14"GBP""FXXR_NSA"-0.0678091.000
2024-11-15"GBP""FXXR_NSA"-0.4300551.000
2024-11-14"AUD""FXXR_VT10"-0.42941.000
2024-11-15"AUD""FXXR_VT10"-0.4525351.000
2024-11-14"CAD""FXXR_VT10"-1.1323141.000
2024-11-15"CAD""FXXR_VT10"-1.7556051.000
2024-11-14"EUR""FXXR_VT10"-0.2924221.000
2024-11-15"EUR""FXXR_VT10"-0.8551081.000
2024-11-14"GBP""FXXR_VT10"-0.1105261.000
2024-11-15"GBP""FXXR_VT10"-0.7009771.000
" + ], + "text/plain": [ + "shape: (10, 7)\n", + "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n", + "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", + "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n", + "│ 2024-11-14 ┆ GBP ┆ FXXR_NSA ┆ -0.067809 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ GBP ┆ FXXR_NSA ┆ -0.430055 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ AUD ┆ FXXR_VT10 ┆ -0.4294 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ AUD ┆ FXXR_VT10 ┆ -0.452535 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ CAD ┆ FXXR_VT10 ┆ -1.132314 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ CAD ┆ FXXR_VT10 ┆ -1.755605 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ EUR ┆ FXXR_VT10 ┆ -0.292422 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ EUR ┆ FXXR_VT10 ┆ -0.855108 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-14 ┆ GBP ┆ FXXR_VT10 ┆ -0.110526 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "│ 2024-11-15 ┆ GBP ┆ FXXR_VT10 ┆ -0.700977 ┆ 1.0 ┆ 0 ┆ 0 │\n", + "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df.tail(10)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/test-notebook.ipynb b/notebooks/rust-notebook.ipynb similarity index 93% rename from notebooks/test-notebook.ipynb rename to notebooks/rust-notebook.ipynb index 62615f3..fe1e332 100644 --- a/notebooks/test-notebook.ipynb +++ b/notebooks/rust-notebook.ipynb @@ -1,5 +1,32 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "31d0d7e3", + "metadata": {}, + "source": [ + "# Running this noteboook\n", + "\n", + "Create a new Python Venv using:\n", + "\n", + "```bash\n", + "python -m venv .venv\n", + "# source .venv/bin/activate\n", + "./.venv/Scripts/activate\n", + "```\n", + "\n", + "Install `evcxr_jupyter` and `jupyterlab` using:\n", + "\n", + "```bash\n", + "cargo install evcxr_jupyter\n", + "evcxr_jupyter --install\n", + "pip install jupyterlab\n", + "jupyter lab\n", + "```\n", + "\n", + "Or try following this guide here: [DataCrayon - Setup Jupyter with Rust](https://datacrayon.com/data-analysis-with-rust-notebooks/setup-anaconda-jupyter-and-rust/)" + ] + }, { "cell_type": "markdown", "id": "8d04a212-4025-41d7-809e-864649b08ab5", @@ -21,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "22e1ae9e-14b8-4be4-b852-8f0fb420eaca", "metadata": {}, "outputs": [], @@ -399,7 +426,7 @@ "mimetype": "text/rust", "name": "rust", "pygment_lexer": "rust", - "version": "" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/notebooks/test.py b/notebooks/test.py new file mode 100644 index 0000000..ddbedc3 --- /dev/null +++ b/notebooks/test.py @@ -0,0 +1,44 @@ +import msyrs +import datetime + +# "E:\Work\jpmaqs-data\data\ADPEMPL_SA_P1M1ML1\USD_ADPEMPL_SA_P1M1ML1.csv" + +DATA_FOLDER_PATH = "E:/Work/jpmaqs-data" + +dfpath = f"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv" + +print(msyrs.qdf.load_qdf(dfpath)) + +cids_dm = "AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD".split(".") +cids_em = "CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR".split(".") +cids = cids_dm + cids_em +cids_dux = list(set(cids) - set(["IDR", "NZD"])) +ecos = "CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12".split( + "." +) +mkts = "DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA".split( + "." +) +xcats = ecos + mkts + +tickers = [f"{c}_{x}" for c in cids for x in xcats] + + +# load_qdf_from_download_bank + +df = msyrs.qdf.load_qdf_from_download_bank( + folder_path=DATA_FOLDER_PATH, tickers=tickers +) +print(df) + +start_date = (datetime.datetime.now() - datetime.timedelta(days=5)).strftime("%Y-%m-%d") + +sel_cids = ["AUD", "USD", "GBP", "CAD", "JPY", "EUR"] +df_eq = msyrs.qdf.reduce_dataframe( + df=df, cids=["AUD"], xcats=["EQXR_NSA"], start=start_date +) +print(df_eq) + +fx_xcats = [xc for xc in xcats if xc.startswith("FX")] +df_fx = msyrs.qdf.reduce_dataframe(df=df, xcats=fx_xcats, intersect=True) +print(df_fx) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4d60e63 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[tool.maturin] +# "extension-module" tells pyo3 we want to build an extension module (skips linking against libpython.so) +features = ["pyo3/extension-module"] diff --git a/src/lib.rs b/src/lib.rs index 78ddedf..7e900db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,30 +1,14 @@ #![doc = include_str!("../README.md")] +/// Documentation for the `msyrs` Python API. +pub mod py; + +/// Documentation for the Rust API. + + +/// Documentation for the `download` module. pub mod download; pub mod utils; -use pyo3::{prelude::*, wrap_pymodule}; -use pyo3_polars::PyDataFrame; -#[pyfunction] -pub fn load_qdf(file_path: &str) -> PyResult { - Ok(PyDataFrame( - utils::qdf::load_quantamental_dataframe(file_path).unwrap(), - )) -} - -// ignore deprecated warning -#[allow(deprecated)] -#[pymodule] -pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(load_qdf, m)?)?; - Ok(()) -} - -#[allow(deprecated)] -#[pymodule] -pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> { -// add qdf as a submodule - m.add_wrapped(wrap_pymodule!(qdf))?; - Ok(()) -} +pub use py::msyrs; diff --git a/src/py/mod.rs b/src/py/mod.rs new file mode 100644 index 0000000..05e63c1 --- /dev/null +++ b/src/py/mod.rs @@ -0,0 +1,12 @@ + +/// Python API for [`crate::utils::qdf`]. +pub mod qdf; +use pyo3::{prelude::*, wrap_pymodule}; +// use pyo3_polars::PyDataFrame; + +#[allow(deprecated)] +#[pymodule] +pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pymodule!(qdf::qdf))?; + Ok(()) +} diff --git a/src/py/qdf.rs b/src/py/qdf.rs new file mode 100644 index 0000000..4273584 --- /dev/null +++ b/src/py/qdf.rs @@ -0,0 +1,78 @@ +use pyo3::prelude::*; +use pyo3_polars::PyDataFrame; + +/// Python wrapper for [`crate::utils::qdf`] module. +#[allow(deprecated)] +#[pymodule] +pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(load_qdf, m)?)?; + m.add_function(wrap_pyfunction!(load_qdf_from_download_bank, m)?)?; + m.add_function(wrap_pyfunction!(reduce_dataframe, m)?)?; + m.add_function(wrap_pyfunction!(update_dataframe, m)?)?; + Ok(()) +} + +/// Python wrapper for loading a Quantamental DataFrame from a CSV file. +/// See [`crate::utils::qdf::load_quantamental_dataframe`] for full documentation. +#[pyfunction] +pub fn load_qdf(file_path: String) -> PyResult { + Ok(PyDataFrame( + crate::utils::qdf::load_quantamental_dataframe(file_path).unwrap(), + )) +} + +/// Python wrapper for loading a Quantamental DataFrame from a download bank. +/// See [`crate::utils::qdf::load::load_qdf_from_download_bank`] for full documentation. +#[pyfunction] +pub fn load_qdf_from_download_bank( + folder_path: String, + cids: Option>, + xcats: Option>, + tickers: Option>, +) -> PyResult { + Ok(PyDataFrame( + crate::utils::qdf::load::load_qdf_from_download_bank(folder_path, cids, xcats, tickers) + .unwrap(), + )) +} + +/// Python wrapper for reduce_dataframe +/// See [`crate::utils::qdf::reduce_df::reduce_dataframe`] for full documentation. +#[pyfunction] +pub fn reduce_dataframe( + df: PyDataFrame, + cids: Option>, + xcats: Option>, + metrics: Option>, + start: Option, + end: Option, + intersect: Option, +) -> PyResult { + Ok(PyDataFrame( + crate::utils::qdf::reduce_df::reduce_dataframe( + df.into(), + cids, + xcats, + metrics, + start, + end, + intersect.unwrap_or(false), + ) + .unwrap(), + )) +} + +/// Python wrapper for update_dataframe +/// See [`crate::utils::qdf::update_df::update_dataframe`] for full documentation. +#[pyfunction] +pub fn update_dataframe( + df: PyDataFrame, + df_add: PyDataFrame, + xcat_replace: Option, +) -> PyResult { + let xcat_replace = xcat_replace.unwrap_or(false); + Ok(PyDataFrame( + crate::utils::qdf::update_df::update_dataframe(&df.into(), &df_add.into(), xcat_replace) + .unwrap(), + )) +} diff --git a/src/utils/qdf/load.rs b/src/utils/qdf/load.rs index 62c1165..277ddc4 100644 --- a/src/utils/qdf/load.rs +++ b/src/utils/qdf/load.rs @@ -23,10 +23,10 @@ fn _file_base_name(file_path: String) -> String { /// The CSV must be named in the format `cid_xcat.csv` (`ticker.csv`). /// The DataFrame must have a `real_date` column along with additional value columns. pub fn load_quantamental_dataframe( - file_path: &str, + file_path: String, ) -> Result> { // get the file base name - let base_file_name = _file_base_name(file_path.into()); + let base_file_name = _file_base_name(file_path.clone().into()); // if filename does not have _ then it is not a Quantamental DataFrame if !base_file_name.contains('_') { @@ -37,7 +37,7 @@ pub fn load_quantamental_dataframe( let (cid, xcat) = split_ticker(ticker.to_string())?; let mut df = CsvReadOptions::default() - .try_into_reader_with_file_path(Some(file_path.into())) + .try_into_reader_with_file_path(Some(file_path.to_string().into())) .unwrap() .finish() .unwrap(); @@ -99,7 +99,7 @@ fn collect_paths_recursively>(path: P) -> std::io::Res } fn _load_qdf_thread_safe(file_path: &str) -> Result> { - let res = load_quantamental_dataframe(file_path); + let res = load_quantamental_dataframe(file_path.to_string()); res.map_err(|e| { anyhow::Error::msg(e.to_string()) .context("Failed to load quantamental dataframe") @@ -107,10 +107,10 @@ fn _load_qdf_thread_safe(file_path: &str) -> Result>, - xcats: Option>, - tickers: Option>, + folder_path: String, + cids: Option>, + xcats: Option>, + tickers: Option>, ) -> Result> { let rcids = cids.unwrap_or_else(|| Vec::new()); let rxcats = xcats.unwrap_or_else(|| Vec::new()); @@ -145,9 +145,9 @@ pub fn load_qdf_from_download_bank( let load_files = rel_files .iter() .filter(|(_, cid, xcat)| { - let f1 = rcids.len() > 0 && rcids.contains(&cid.as_str()); - let f2 = rxcats.len() > 0 && rxcats.contains(&xcat.as_str()); - let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat).as_str()); + let f1 = rcids.len() > 0 && rcids.contains(&cid); + let f2 = rxcats.len() > 0 && rxcats.contains(&xcat); + let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat)); f1 | f2 | f3 }) .map(|(file, _, _)| *file) @@ -160,7 +160,7 @@ pub fn load_qdf_from_download_bank( return Err("No files to load".into()); } if load_files.len() == 1 { - let dfx = load_quantamental_dataframe(load_files[0]).unwrap(); + let dfx = load_quantamental_dataframe(load_files[0].to_string()).unwrap(); return Ok(dfx); } diff --git a/src/utils/qdf/reduce_df.rs b/src/utils/qdf/reduce_df.rs index a3270bd..d89c01e 100644 --- a/src/utils/qdf/reduce_df.rs +++ b/src/utils/qdf/reduce_df.rs @@ -17,11 +17,11 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"]; /// If no filters are provided, the original DataFrame is returned. pub fn reduce_dataframe( df: DataFrame, - cids: Option>, - xcats: Option>, + cids: Option>, + xcats: Option>, metrics: Option>, - start: Option<&str>, - end: Option<&str>, + start: Option, + end: Option, intersect: bool, ) -> Result> { check_quantamental_dataframe(&df)?; @@ -36,10 +36,10 @@ pub fn reduce_dataframe( let u_xcats: Vec = get_unique_xcats(&new_df)?; let u_tickers: Vec = _get_unique_strs_from_str_column_object(&ticker_col)?; - let specified_cids: Vec<&str> = - cids.unwrap_or_else(|| u_cids.iter().map(AsRef::as_ref).collect()); - let specified_xcats: Vec<&str> = - xcats.unwrap_or_else(|| u_xcats.iter().map(AsRef::as_ref).collect()); + let cids_vec = cids.unwrap_or_else(|| u_cids.clone()); + let specified_cids: Vec<&str> = cids_vec.iter().map(AsRef::as_ref).collect(); + let xcats_vec = xcats.unwrap_or_else(|| u_xcats.clone()); + let specified_xcats: Vec<&str> = xcats_vec.iter().map(AsRef::as_ref).collect(); let non_idx_cols: Vec = new_df .get_column_names() @@ -107,7 +107,7 @@ pub fn reduce_dataframe( // Apply date filtering if `start` or `end` is provided if let Some(start) = start { - let start_date = chrono::NaiveDate::parse_from_str(start, "%Y-%m-%d")?; + let start_date = chrono::NaiveDate::parse_from_str(&start, "%Y-%m-%d")?; new_df = new_df .lazy() .filter( @@ -120,7 +120,7 @@ pub fn reduce_dataframe( } if let Some(end) = end { - let end_date = chrono::NaiveDate::parse_from_str(end, "%Y-%m-%d")?; + let end_date = chrono::NaiveDate::parse_from_str(&end, "%Y-%m-%d")?; new_df = new_df .lazy() .filter( diff --git a/src/utils/qdf/update_df.rs b/src/utils/qdf/update_df.rs index bf24965..da711e0 100644 --- a/src/utils/qdf/update_df.rs +++ b/src/utils/qdf/update_df.rs @@ -11,7 +11,7 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"]; pub fn update_dataframe( df: &DataFrame, df_add: &DataFrame, - // xcat_replace: Option<&str>, + xcat_replace: bool, ) -> Result> { check_quantamental_dataframe(df)?; check_quantamental_dataframe(df_add)?; @@ -20,7 +20,10 @@ pub fn update_dataframe( } else if df_add.is_empty() { return Ok(df.clone()); }; - + println!( + "xcat_replace not implemented yet (passed value: {})", + xcat_replace + ); // vstack and drop duplicates keeping last let mut new_df = df.vstack(df_add)?; // help? diff --git a/test.py b/test.py deleted file mode 100644 index 05c8bdb..0000000 --- a/test.py +++ /dev/null @@ -1,10 +0,0 @@ -import msyrs - -# "E:\Work\jpmaqs-data\data\ADPEMPL_SA_P1M1ML1\USD_ADPEMPL_SA_P1M1ML1.csv" - -dfpath = "E:/Work/jpmaqs-data/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv" - -print(msyrs.qdf.load_qdf(dfpath)) - - -