diff --git a/Cargo.toml b/Cargo.toml
index 430b8a4..a547cfe 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,8 +31,8 @@ futures = "0.3"
# pyo3 = { version = "0.23.1", features = ["extension-module"] }
# pyo3 = { version = "0.21.2", features = ["extension-module"] }
# pyo3 = { version = "*", features = ["abi3-py38"] }
-pyo3 = { version = "*", features = ["extension-module"] }
-pyo3-polars = { version = "0.18.0" }
+pyo3 = { version = "*", features = ["extension-module", "abi3-py37"] }
+pyo3-polars = { version = "*" }
polars = { version = "*", features = [
"lazy",
"temporal",
@@ -40,6 +40,7 @@ polars = { version = "*", features = [
"json",
"parquet",
"dtype-datetime",
+ # "dtype-categorical",
"strings",
"timezones",
"ndarray",
diff --git a/README.md b/README.md
index 5c6841a..e13f367 100644
--- a/README.md
+++ b/README.md
@@ -2,33 +2,30 @@
A Rust implementation of the [Macrosynergy Python Package](https://github.com/macrosynergy/macrosynergy).
-## Running Notebook
+## Build and install the Python package
```bash
-cargo install evcxr_jupyter
-evcxr_jupyter --install
-pip install jupyterlab
-jupyter lab
+python -m venv .venv
+# source .venv/bin/activate
+./.venv/Scripts/activate
+pip install maturin
+maturin develop --release
```
-Or try following this guide here: [DataCrayon - Setup Jupyter with Rust](https://datacrayon.com/data-analysis-with-rust-notebooks/setup-anaconda-jupyter-and-rust/)
-
## Status
-- Download
+- [x] Download
- - [x] Get Catalogue
- - [x] Get Generic DQ Time Series
- - [x] Get JPMaQS Indicators as Polars DataFrame
- - [ ] Save to disk functionality
- - [x] Hacky iterative method
- - [ ] Non-hacky way to save to disk
+ - [ ] Pending: Optimize thread pool
- Utils
- - [ ] Reduce DF
- - [ ] Apply Blacklist
- - [ ] Update DF
+ - [ ] QDF
+ - [x] Read QDF
+ - [x] Reduce DF
+ - [x] Update DF
+ - [ ] Get Blacklist
+ - [ ] Apply Blacklist
- Panel
- [ ] Historic Volatility
diff --git a/notebooks/python-notebook.ipynb b/notebooks/python-notebook.ipynb
new file mode 100644
index 0000000..c70e3c3
--- /dev/null
+++ b/notebooks/python-notebook.ipynb
@@ -0,0 +1,364 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Build and install the package\n",
+ "\n",
+ "```bash\n",
+ "python -m venv .venv\n",
+ "\n",
+ "# source .venv/bin/activate\n",
+ "./.venv/Scripts/activate\n",
+ "\n",
+ "pip install maturin\n",
+ "\n",
+ "maturin develop --release\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ }
+ ],
+ "source": [
+ "import macrosynergy\n",
+ "import pandas as pd\n",
+ "import numpy as np\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import msyrs\n",
+ "import polars as pl"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
shape: (5, 7)real_date | cid | xcat | value | grading | eop_lag | mop_lag |
---|
date | str | str | f64 | f64 | i64 | i64 |
2010-03-03 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 3 | 33 |
2010-03-04 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 4 | 34 |
2010-03-05 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 5 | 35 |
2010-03-08 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 8 | 38 |
2010-03-09 | "USD" | "ADPEMPL_SA_P1M1ML1" | -0.173806 | 3.0 | 9 | 39 |
"
+ ],
+ "text/plain": [
+ "shape: (5, 7)\n",
+ "┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n",
+ "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
+ "╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n",
+ "│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n",
+ "│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n",
+ "│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n",
+ "│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n",
+ "│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n",
+ "└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
+ "\n",
+ "\n",
+ "ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
+ "ldf.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n",
+ "cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n",
+ "cids = cids_dm + cids_em\n",
+ "cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n",
+ "ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n",
+ " \".\"\n",
+ ")\n",
+ "[\"CPIC\", \"CPIH\", \"INFTEFF\", \"INTRGDP\", \"INTRGDPv5Y\", \"PCREDITGDP\", \"RGDP\", \"RYLDIRS\", \"PCREDITBN\"]\n",
+ "[\"\"]\n",
+ "\n",
+ "mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n",
+ " \".\"\n",
+ ")\n",
+ "xcats = ecos + mkts\n",
+ "\n",
+ "tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (5, 7)real_date | cid | xcat | value | grading | eop_lag | mop_lag |
---|
date | str | str | f64 | f64 | i64 | i64 |
1990-04-26 | "AUD" | "CPIC_SA_P1M1ML12" | 6.434599 | 2.0 | 26 | 223 |
1990-04-27 | "AUD" | "CPIC_SA_P1M1ML12" | 6.434599 | 2.0 | 27 | 224 |
1990-04-30 | "AUD" | "CPIC_SA_P1M1ML12" | 6.434599 | 2.0 | 30 | 227 |
1990-05-01 | "AUD" | "CPIC_SA_P1M1ML12" | 6.434599 | 2.0 | 31 | 228 |
1990-05-02 | "AUD" | "CPIC_SA_P1M1ML12" | 6.434599 | 2.0 | 32 | 229 |
"
+ ],
+ "text/plain": [
+ "shape: (5, 7)\n",
+ "┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n",
+ "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
+ "╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n",
+ "│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26 ┆ 223 │\n",
+ "│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27 ┆ 224 │\n",
+ "│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30 ┆ 227 │\n",
+ "│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31 ┆ 228 │\n",
+ "│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32 ┆ 229 │\n",
+ "└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n",
+ " folder_path=DATA_FOLDER_PATH, tickers=tickers\n",
+ ")\n",
+ "big_df.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n",
+ "start = \"2024-11-14\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (20, 7)real_date | cid | xcat | value | grading | eop_lag | mop_lag |
---|
date | str | str | f64 | f64 | i64 | i64 |
2024-11-14 | "AUD" | "EQXR_NSA" | 0.329188 | 1.0 | 0 | 0 |
2024-11-15 | "AUD" | "EQXR_NSA" | 0.826346 | 1.0 | 0 | 0 |
2024-11-14 | "CAD" | "EQXR_NSA" | 0.199402 | 1.0 | 0 | 0 |
2024-11-15 | "CAD" | "EQXR_NSA" | -0.696517 | 1.0 | 0 | 0 |
2024-11-14 | "EUR" | "EQXR_NSA" | 2.024889 | 1.0 | 0 | 0 |
… | … | … | … | … | … | … |
2024-11-15 | "EUR" | "EQXR_VT10" | -0.477901 | 1.0 | 0 | 0 |
2024-11-14 | "GBP" | "EQXR_VT10" | 0.664208 | 1.0 | 0 | 0 |
2024-11-15 | "GBP" | "EQXR_VT10" | -0.068778 | 1.0 | 0 | 0 |
2024-11-14 | "USD" | "EQXR_VT10" | -0.549983 | 1.0 | 0 | 0 |
2024-11-15 | "USD" | "EQXR_VT10" | -1.198544 | 1.0 | 0 | 0 |
"
+ ],
+ "text/plain": [
+ "shape: (20, 7)\n",
+ "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n",
+ "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
+ "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n",
+ "│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ EUR ┆ EQXR_NSA ┆ 2.024889 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ 2024-11-15 ┆ EUR ┆ EQXR_VT10 ┆ -0.477901 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ GBP ┆ EQXR_VT10 ┆ 0.664208 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ GBP ┆ EQXR_VT10 ┆ -0.068778 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ USD ┆ EQXR_VT10 ┆ -0.549983 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ USD ┆ EQXR_VT10 ┆ -1.198544 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eq_df = msyrs.qdf.reduce_dataframe(\n",
+ " df=big_df,\n",
+ " cids=sel_cids,\n",
+ " xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n",
+ " start=start,\n",
+ ")\n",
+ "eq_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
+ "fx_df = msyrs.qdf.reduce_dataframe(\n",
+ " df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (10, 7)real_date | cid | xcat | value | grading | eop_lag | mop_lag |
---|
date | str | str | f64 | f64 | i64 | i64 |
2024-11-14 | "AUD" | "EQXR_NSA" | 0.329188 | 1.0 | 0 | 0 |
2024-11-15 | "AUD" | "EQXR_NSA" | 0.826346 | 1.0 | 0 | 0 |
2024-11-14 | "CAD" | "EQXR_NSA" | 0.199402 | 1.0 | 0 | 0 |
2024-11-15 | "CAD" | "EQXR_NSA" | -0.696517 | 1.0 | 0 | 0 |
2024-11-14 | "EUR" | "EQXR_NSA" | 2.024889 | 1.0 | 0 | 0 |
2024-11-15 | "EUR" | "EQXR_NSA" | -0.661567 | 1.0 | 0 | 0 |
2024-11-14 | "GBP" | "EQXR_NSA" | 0.596533 | 1.0 | 0 | 0 |
2024-11-15 | "GBP" | "EQXR_NSA" | -0.06177 | 1.0 | 0 | 0 |
2024-11-14 | "USD" | "EQXR_NSA" | -0.627493 | 1.0 | 0 | 0 |
2024-11-15 | "USD" | "EQXR_NSA" | -1.367457 | 1.0 | 0 | 0 |
"
+ ],
+ "text/plain": [
+ "shape: (10, 7)\n",
+ "┌────────────┬─────┬──────────┬───────────┬─────────┬─────────┬─────────┐\n",
+ "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
+ "╞════════════╪═════╪══════════╪═══════════╪═════════╪═════════╪═════════╡\n",
+ "│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ EUR ┆ EQXR_NSA ┆ 2.024889 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ EUR ┆ EQXR_NSA ┆ -0.661567 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ GBP ┆ EQXR_NSA ┆ 0.596533 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ GBP ┆ EQXR_NSA ┆ -0.06177 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ USD ┆ EQXR_NSA ┆ -0.627493 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ USD ┆ EQXR_NSA ┆ -1.367457 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "└────────────┴─────┴──────────┴───────────┴─────────┴─────────┴─────────┘"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
+ "\n",
+ "new_df.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
shape: (10, 7)real_date | cid | xcat | value | grading | eop_lag | mop_lag |
---|
date | str | str | f64 | f64 | i64 | i64 |
2024-11-14 | "GBP" | "FXXR_NSA" | -0.067809 | 1.0 | 0 | 0 |
2024-11-15 | "GBP" | "FXXR_NSA" | -0.430055 | 1.0 | 0 | 0 |
2024-11-14 | "AUD" | "FXXR_VT10" | -0.4294 | 1.0 | 0 | 0 |
2024-11-15 | "AUD" | "FXXR_VT10" | -0.452535 | 1.0 | 0 | 0 |
2024-11-14 | "CAD" | "FXXR_VT10" | -1.132314 | 1.0 | 0 | 0 |
2024-11-15 | "CAD" | "FXXR_VT10" | -1.755605 | 1.0 | 0 | 0 |
2024-11-14 | "EUR" | "FXXR_VT10" | -0.292422 | 1.0 | 0 | 0 |
2024-11-15 | "EUR" | "FXXR_VT10" | -0.855108 | 1.0 | 0 | 0 |
2024-11-14 | "GBP" | "FXXR_VT10" | -0.110526 | 1.0 | 0 | 0 |
2024-11-15 | "GBP" | "FXXR_VT10" | -0.700977 | 1.0 | 0 | 0 |
"
+ ],
+ "text/plain": [
+ "shape: (10, 7)\n",
+ "┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n",
+ "│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
+ "╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n",
+ "│ 2024-11-14 ┆ GBP ┆ FXXR_NSA ┆ -0.067809 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ GBP ┆ FXXR_NSA ┆ -0.430055 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ AUD ┆ FXXR_VT10 ┆ -0.4294 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ AUD ┆ FXXR_VT10 ┆ -0.452535 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ CAD ┆ FXXR_VT10 ┆ -1.132314 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ CAD ┆ FXXR_VT10 ┆ -1.755605 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ EUR ┆ FXXR_VT10 ┆ -0.292422 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ EUR ┆ FXXR_VT10 ┆ -0.855108 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-14 ┆ GBP ┆ FXXR_VT10 ┆ -0.110526 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "│ 2024-11-15 ┆ GBP ┆ FXXR_VT10 ┆ -0.700977 ┆ 1.0 ┆ 0 ┆ 0 │\n",
+ "└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_df.tail(10)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/test-notebook.ipynb b/notebooks/rust-notebook.ipynb
similarity index 93%
rename from notebooks/test-notebook.ipynb
rename to notebooks/rust-notebook.ipynb
index 62615f3..fe1e332 100644
--- a/notebooks/test-notebook.ipynb
+++ b/notebooks/rust-notebook.ipynb
@@ -1,5 +1,32 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "id": "31d0d7e3",
+ "metadata": {},
+ "source": [
+ "# Running this noteboook\n",
+ "\n",
+ "Create a new Python Venv using:\n",
+ "\n",
+ "```bash\n",
+ "python -m venv .venv\n",
+ "# source .venv/bin/activate\n",
+ "./.venv/Scripts/activate\n",
+ "```\n",
+ "\n",
+ "Install `evcxr_jupyter` and `jupyterlab` using:\n",
+ "\n",
+ "```bash\n",
+ "cargo install evcxr_jupyter\n",
+ "evcxr_jupyter --install\n",
+ "pip install jupyterlab\n",
+ "jupyter lab\n",
+ "```\n",
+ "\n",
+ "Or try following this guide here: [DataCrayon - Setup Jupyter with Rust](https://datacrayon.com/data-analysis-with-rust-notebooks/setup-anaconda-jupyter-and-rust/)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "8d04a212-4025-41d7-809e-864649b08ab5",
@@ -21,7 +48,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "22e1ae9e-14b8-4be4-b852-8f0fb420eaca",
"metadata": {},
"outputs": [],
@@ -399,7 +426,7 @@
"mimetype": "text/rust",
"name": "rust",
"pygment_lexer": "rust",
- "version": ""
+ "version": "3.12.7"
}
},
"nbformat": 4,
diff --git a/notebooks/test.py b/notebooks/test.py
new file mode 100644
index 0000000..ddbedc3
--- /dev/null
+++ b/notebooks/test.py
@@ -0,0 +1,44 @@
+import msyrs
+import datetime
+
+# "E:\Work\jpmaqs-data\data\ADPEMPL_SA_P1M1ML1\USD_ADPEMPL_SA_P1M1ML1.csv"
+
+DATA_FOLDER_PATH = "E:/Work/jpmaqs-data"
+
+dfpath = f"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv"
+
+print(msyrs.qdf.load_qdf(dfpath))
+
+cids_dm = "AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD".split(".")
+cids_em = "CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR".split(".")
+cids = cids_dm + cids_em
+cids_dux = list(set(cids) - set(["IDR", "NZD"]))
+ecos = "CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12".split(
+ "."
+)
+mkts = "DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA".split(
+ "."
+)
+xcats = ecos + mkts
+
+tickers = [f"{c}_{x}" for c in cids for x in xcats]
+
+
+# load_qdf_from_download_bank
+
+df = msyrs.qdf.load_qdf_from_download_bank(
+ folder_path=DATA_FOLDER_PATH, tickers=tickers
+)
+print(df)
+
+start_date = (datetime.datetime.now() - datetime.timedelta(days=5)).strftime("%Y-%m-%d")
+
+sel_cids = ["AUD", "USD", "GBP", "CAD", "JPY", "EUR"]
+df_eq = msyrs.qdf.reduce_dataframe(
+ df=df, cids=["AUD"], xcats=["EQXR_NSA"], start=start_date
+)
+print(df_eq)
+
+fx_xcats = [xc for xc in xcats if xc.startswith("FX")]
+df_fx = msyrs.qdf.reduce_dataframe(df=df, xcats=fx_xcats, intersect=True)
+print(df_fx)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..4d60e63
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[build-system]
+requires = ["maturin>=1.0,<2.0"]
+build-backend = "maturin"
+
+[tool.maturin]
+# "extension-module" tells pyo3 we want to build an extension module (skips linking against libpython.so)
+features = ["pyo3/extension-module"]
diff --git a/src/lib.rs b/src/lib.rs
index 78ddedf..7e900db 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,30 +1,14 @@
#![doc = include_str!("../README.md")]
+/// Documentation for the `msyrs` Python API.
+pub mod py;
+
+/// Documentation for the Rust API.
+
+
+/// Documentation for the `download` module.
pub mod download;
pub mod utils;
-use pyo3::{prelude::*, wrap_pymodule};
-use pyo3_polars::PyDataFrame;
-#[pyfunction]
-pub fn load_qdf(file_path: &str) -> PyResult {
- Ok(PyDataFrame(
- utils::qdf::load_quantamental_dataframe(file_path).unwrap(),
- ))
-}
-
-// ignore deprecated warning
-#[allow(deprecated)]
-#[pymodule]
-pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> {
- m.add_function(wrap_pyfunction!(load_qdf, m)?)?;
- Ok(())
-}
-
-#[allow(deprecated)]
-#[pymodule]
-pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> {
-// add qdf as a submodule
- m.add_wrapped(wrap_pymodule!(qdf))?;
- Ok(())
-}
+pub use py::msyrs;
diff --git a/src/py/mod.rs b/src/py/mod.rs
new file mode 100644
index 0000000..05e63c1
--- /dev/null
+++ b/src/py/mod.rs
@@ -0,0 +1,12 @@
+
+/// Python API for [`crate::utils::qdf`].
+pub mod qdf;
+use pyo3::{prelude::*, wrap_pymodule};
+// use pyo3_polars::PyDataFrame;
+
+#[allow(deprecated)]
+#[pymodule]
+pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> {
+ m.add_wrapped(wrap_pymodule!(qdf::qdf))?;
+ Ok(())
+}
diff --git a/src/py/qdf.rs b/src/py/qdf.rs
new file mode 100644
index 0000000..4273584
--- /dev/null
+++ b/src/py/qdf.rs
@@ -0,0 +1,78 @@
+use pyo3::prelude::*;
+use pyo3_polars::PyDataFrame;
+
+/// Python wrapper for [`crate::utils::qdf`] module.
+#[allow(deprecated)]
+#[pymodule]
+pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(load_qdf, m)?)?;
+ m.add_function(wrap_pyfunction!(load_qdf_from_download_bank, m)?)?;
+ m.add_function(wrap_pyfunction!(reduce_dataframe, m)?)?;
+ m.add_function(wrap_pyfunction!(update_dataframe, m)?)?;
+ Ok(())
+}
+
+/// Python wrapper for loading a Quantamental DataFrame from a CSV file.
+/// See [`crate::utils::qdf::load_quantamental_dataframe`] for full documentation.
+#[pyfunction]
+pub fn load_qdf(file_path: String) -> PyResult {
+ Ok(PyDataFrame(
+ crate::utils::qdf::load_quantamental_dataframe(file_path).unwrap(),
+ ))
+}
+
+/// Python wrapper for loading a Quantamental DataFrame from a download bank.
+/// See [`crate::utils::qdf::load::load_qdf_from_download_bank`] for full documentation.
+#[pyfunction]
+pub fn load_qdf_from_download_bank(
+ folder_path: String,
+ cids: Option>,
+ xcats: Option>,
+ tickers: Option>,
+) -> PyResult {
+ Ok(PyDataFrame(
+ crate::utils::qdf::load::load_qdf_from_download_bank(folder_path, cids, xcats, tickers)
+ .unwrap(),
+ ))
+}
+
+/// Python wrapper for reduce_dataframe
+/// See [`crate::utils::qdf::reduce_df::reduce_dataframe`] for full documentation.
+#[pyfunction]
+pub fn reduce_dataframe(
+ df: PyDataFrame,
+ cids: Option>,
+ xcats: Option>,
+ metrics: Option>,
+ start: Option,
+ end: Option,
+ intersect: Option,
+) -> PyResult {
+ Ok(PyDataFrame(
+ crate::utils::qdf::reduce_df::reduce_dataframe(
+ df.into(),
+ cids,
+ xcats,
+ metrics,
+ start,
+ end,
+ intersect.unwrap_or(false),
+ )
+ .unwrap(),
+ ))
+}
+
+/// Python wrapper for update_dataframe
+/// See [`crate::utils::qdf::update_df::update_dataframe`] for full documentation.
+#[pyfunction]
+pub fn update_dataframe(
+ df: PyDataFrame,
+ df_add: PyDataFrame,
+ xcat_replace: Option,
+) -> PyResult {
+ let xcat_replace = xcat_replace.unwrap_or(false);
+ Ok(PyDataFrame(
+ crate::utils::qdf::update_df::update_dataframe(&df.into(), &df_add.into(), xcat_replace)
+ .unwrap(),
+ ))
+}
diff --git a/src/utils/qdf/load.rs b/src/utils/qdf/load.rs
index 62c1165..277ddc4 100644
--- a/src/utils/qdf/load.rs
+++ b/src/utils/qdf/load.rs
@@ -23,10 +23,10 @@ fn _file_base_name(file_path: String) -> String {
/// The CSV must be named in the format `cid_xcat.csv` (`ticker.csv`).
/// The DataFrame must have a `real_date` column along with additional value columns.
pub fn load_quantamental_dataframe(
- file_path: &str,
+ file_path: String,
) -> Result> {
// get the file base name
- let base_file_name = _file_base_name(file_path.into());
+ let base_file_name = _file_base_name(file_path.clone().into());
// if filename does not have _ then it is not a Quantamental DataFrame
if !base_file_name.contains('_') {
@@ -37,7 +37,7 @@ pub fn load_quantamental_dataframe(
let (cid, xcat) = split_ticker(ticker.to_string())?;
let mut df = CsvReadOptions::default()
- .try_into_reader_with_file_path(Some(file_path.into()))
+ .try_into_reader_with_file_path(Some(file_path.to_string().into()))
.unwrap()
.finish()
.unwrap();
@@ -99,7 +99,7 @@ fn collect_paths_recursively>(path: P) -> std::io::Res
}
fn _load_qdf_thread_safe(file_path: &str) -> Result> {
- let res = load_quantamental_dataframe(file_path);
+ let res = load_quantamental_dataframe(file_path.to_string());
res.map_err(|e| {
anyhow::Error::msg(e.to_string())
.context("Failed to load quantamental dataframe")
@@ -107,10 +107,10 @@ fn _load_qdf_thread_safe(file_path: &str) -> Result>,
- xcats: Option>,
- tickers: Option>,
+ folder_path: String,
+ cids: Option>,
+ xcats: Option>,
+ tickers: Option>,
) -> Result> {
let rcids = cids.unwrap_or_else(|| Vec::new());
let rxcats = xcats.unwrap_or_else(|| Vec::new());
@@ -145,9 +145,9 @@ pub fn load_qdf_from_download_bank(
let load_files = rel_files
.iter()
.filter(|(_, cid, xcat)| {
- let f1 = rcids.len() > 0 && rcids.contains(&cid.as_str());
- let f2 = rxcats.len() > 0 && rxcats.contains(&xcat.as_str());
- let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat).as_str());
+ let f1 = rcids.len() > 0 && rcids.contains(&cid);
+ let f2 = rxcats.len() > 0 && rxcats.contains(&xcat);
+ let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat));
f1 | f2 | f3
})
.map(|(file, _, _)| *file)
@@ -160,7 +160,7 @@ pub fn load_qdf_from_download_bank(
return Err("No files to load".into());
}
if load_files.len() == 1 {
- let dfx = load_quantamental_dataframe(load_files[0]).unwrap();
+ let dfx = load_quantamental_dataframe(load_files[0].to_string()).unwrap();
return Ok(dfx);
}
diff --git a/src/utils/qdf/reduce_df.rs b/src/utils/qdf/reduce_df.rs
index a3270bd..d89c01e 100644
--- a/src/utils/qdf/reduce_df.rs
+++ b/src/utils/qdf/reduce_df.rs
@@ -17,11 +17,11 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
/// If no filters are provided, the original DataFrame is returned.
pub fn reduce_dataframe(
df: DataFrame,
- cids: Option>,
- xcats: Option>,
+ cids: Option>,
+ xcats: Option>,
metrics: Option>,
- start: Option<&str>,
- end: Option<&str>,
+ start: Option,
+ end: Option,
intersect: bool,
) -> Result> {
check_quantamental_dataframe(&df)?;
@@ -36,10 +36,10 @@ pub fn reduce_dataframe(
let u_xcats: Vec = get_unique_xcats(&new_df)?;
let u_tickers: Vec = _get_unique_strs_from_str_column_object(&ticker_col)?;
- let specified_cids: Vec<&str> =
- cids.unwrap_or_else(|| u_cids.iter().map(AsRef::as_ref).collect());
- let specified_xcats: Vec<&str> =
- xcats.unwrap_or_else(|| u_xcats.iter().map(AsRef::as_ref).collect());
+ let cids_vec = cids.unwrap_or_else(|| u_cids.clone());
+ let specified_cids: Vec<&str> = cids_vec.iter().map(AsRef::as_ref).collect();
+ let xcats_vec = xcats.unwrap_or_else(|| u_xcats.clone());
+ let specified_xcats: Vec<&str> = xcats_vec.iter().map(AsRef::as_ref).collect();
let non_idx_cols: Vec = new_df
.get_column_names()
@@ -107,7 +107,7 @@ pub fn reduce_dataframe(
// Apply date filtering if `start` or `end` is provided
if let Some(start) = start {
- let start_date = chrono::NaiveDate::parse_from_str(start, "%Y-%m-%d")?;
+ let start_date = chrono::NaiveDate::parse_from_str(&start, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(
@@ -120,7 +120,7 @@ pub fn reduce_dataframe(
}
if let Some(end) = end {
- let end_date = chrono::NaiveDate::parse_from_str(end, "%Y-%m-%d")?;
+ let end_date = chrono::NaiveDate::parse_from_str(&end, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(
diff --git a/src/utils/qdf/update_df.rs b/src/utils/qdf/update_df.rs
index bf24965..da711e0 100644
--- a/src/utils/qdf/update_df.rs
+++ b/src/utils/qdf/update_df.rs
@@ -11,7 +11,7 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
pub fn update_dataframe(
df: &DataFrame,
df_add: &DataFrame,
- // xcat_replace: Option<&str>,
+ xcat_replace: bool,
) -> Result> {
check_quantamental_dataframe(df)?;
check_quantamental_dataframe(df_add)?;
@@ -20,7 +20,10 @@ pub fn update_dataframe(
} else if df_add.is_empty() {
return Ok(df.clone());
};
-
+ println!(
+ "xcat_replace not implemented yet (passed value: {})",
+ xcat_replace
+ );
// vstack and drop duplicates keeping last
let mut new_df = df.vstack(df_add)?;
// help?
diff --git a/test.py b/test.py
deleted file mode 100644
index 05c8bdb..0000000
--- a/test.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import msyrs
-
-# "E:\Work\jpmaqs-data\data\ADPEMPL_SA_P1M1ML1\USD_ADPEMPL_SA_P1M1ML1.csv"
-
-dfpath = "E:/Work/jpmaqs-data/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv"
-
-print(msyrs.qdf.load_qdf(dfpath))
-
-
-