python working with notebook!

This commit is contained in:
Palash Tyagi 2024-11-17 23:58:47 +00:00
parent 09f74916e8
commit bba5acd724
13 changed files with 586 additions and 79 deletions

View File

@ -31,8 +31,8 @@ futures = "0.3"
# pyo3 = { version = "0.23.1", features = ["extension-module"] }
# pyo3 = { version = "0.21.2", features = ["extension-module"] }
# pyo3 = { version = "*", features = ["abi3-py38"] }
pyo3 = { version = "*", features = ["extension-module"] }
pyo3-polars = { version = "0.18.0" }
pyo3 = { version = "*", features = ["extension-module", "abi3-py37"] }
pyo3-polars = { version = "*" }
polars = { version = "*", features = [
"lazy",
"temporal",
@ -40,6 +40,7 @@ polars = { version = "*", features = [
"json",
"parquet",
"dtype-datetime",
# "dtype-categorical",
"strings",
"timezones",
"ndarray",

View File

@ -2,33 +2,30 @@
A Rust implementation of the [Macrosynergy Python Package](https://github.com/macrosynergy/macrosynergy).
## Running Notebook
## Build and install the Python package
```bash
cargo install evcxr_jupyter
evcxr_jupyter --install
pip install jupyterlab
jupyter lab
python -m venv .venv
# source .venv/bin/activate
./.venv/Scripts/activate
pip install maturin
maturin develop --release
```
Or try following this guide here: [DataCrayon - Setup Jupyter with Rust](https://datacrayon.com/data-analysis-with-rust-notebooks/setup-anaconda-jupyter-and-rust/)
## Status
- Download
- [x] Download
- [x] Get Catalogue
- [x] Get Generic DQ Time Series
- [x] Get JPMaQS Indicators as Polars DataFrame
- [ ] Save to disk functionality
- [x] Hacky iterative method
- [ ] Non-hacky way to save to disk
- [ ] Pending: Optimize thread pool
- Utils
- [ ] Reduce DF
- [ ] QDF
- [x] Read QDF
- [x] Reduce DF
- [x] Update DF
- [ ] Get Blacklist
- [ ] Apply Blacklist
- [ ] Update DF
- Panel
- [ ] Historic Volatility

View File

@ -0,0 +1,364 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Build and install the package\n",
"\n",
"```bash\n",
"python -m venv .venv\n",
"\n",
"# source .venv/bin/activate\n",
"./.venv/Scripts/activate\n",
"\n",
"pip install maturin\n",
"\n",
"maturin develop --release\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"e:\\Work\\ruzt\\msyrs\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import macrosynergy\n",
"import pandas as pd\n",
"import numpy as np\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import msyrs\n",
"import polars as pl"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (5, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>2010-03-03</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>3</td><td>33</td></tr><tr><td>2010-03-04</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>4</td><td>34</td></tr><tr><td>2010-03-05</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>5</td><td>35</td></tr><tr><td>2010-03-08</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>8</td><td>38</td></tr><tr><td>2010-03-09</td><td>&quot;USD&quot;</td><td>&quot;ADPEMPL_SA_P1M1ML1&quot;</td><td>-0.173806</td><td>3.0</td><td>9</td><td>39</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (5, 7)\n",
"┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n",
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
"╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n",
"│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n",
"│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n",
"│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n",
"│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n",
"│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n",
"└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
"\n",
"\n",
"ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
"ldf.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n",
"cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n",
"cids = cids_dm + cids_em\n",
"cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n",
"ecos = \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\n",
" \".\"\n",
")\n",
"[\"CPIC\", \"CPIH\", \"INFTEFF\", \"INTRGDP\", \"INTRGDPv5Y\", \"PCREDITGDP\", \"RGDP\", \"RYLDIRS\", \"PCREDITBN\"]\n",
"[\"\"]\n",
"\n",
"mkts = \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\n",
" \".\"\n",
")\n",
"xcats = ecos + mkts\n",
"\n",
"tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (5, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>1990-04-26</td><td>&quot;AUD&quot;</td><td>&quot;CPIC_SA_P1M1ML12&quot;</td><td>6.434599</td><td>2.0</td><td>26</td><td>223</td></tr><tr><td>1990-04-27</td><td>&quot;AUD&quot;</td><td>&quot;CPIC_SA_P1M1ML12&quot;</td><td>6.434599</td><td>2.0</td><td>27</td><td>224</td></tr><tr><td>1990-04-30</td><td>&quot;AUD&quot;</td><td>&quot;CPIC_SA_P1M1ML12&quot;</td><td>6.434599</td><td>2.0</td><td>30</td><td>227</td></tr><tr><td>1990-05-01</td><td>&quot;AUD&quot;</td><td>&quot;CPIC_SA_P1M1ML12&quot;</td><td>6.434599</td><td>2.0</td><td>31</td><td>228</td></tr><tr><td>1990-05-02</td><td>&quot;AUD&quot;</td><td>&quot;CPIC_SA_P1M1ML12&quot;</td><td>6.434599</td><td>2.0</td><td>32</td><td>229</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (5, 7)\n",
"┌────────────┬─────┬──────────────────┬──────────┬─────────┬─────────┬─────────┐\n",
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
"╞════════════╪═════╪══════════════════╪══════════╪═════════╪═════════╪═════════╡\n",
"│ 1990-04-26 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 26 ┆ 223 │\n",
"│ 1990-04-27 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 27 ┆ 224 │\n",
"│ 1990-04-30 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 30 ┆ 227 │\n",
"│ 1990-05-01 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 31 ┆ 228 │\n",
"│ 1990-05-02 ┆ AUD ┆ CPIC_SA_P1M1ML12 ┆ 6.434599 ┆ 2.0 ┆ 32 ┆ 229 │\n",
"└────────────┴─────┴──────────────────┴──────────┴─────────┴─────────┴─────────┘"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n",
" folder_path=DATA_FOLDER_PATH, tickers=tickers\n",
")\n",
"big_df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\"]\n",
"start = \"2024-11-14\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (20, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>2024-11-14</td><td>&quot;AUD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>0.329188</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;AUD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>0.826346</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;CAD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>0.199402</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;CAD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>-0.696517</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;EUR&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>2.024889</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>2024-11-15</td><td>&quot;EUR&quot;</td><td>&quot;EQXR_VT10&quot;</td><td>-0.477901</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;GBP&quot;</td><td>&quot;EQXR_VT10&quot;</td><td>0.664208</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;GBP&quot;</td><td>&quot;EQXR_VT10&quot;</td><td>-0.068778</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;USD&quot;</td><td>&quot;EQXR_VT10&quot;</td><td>-0.549983</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;USD&quot;</td><td>&quot;EQXR_VT10&quot;</td><td>-1.198544</td><td>1.0</td><td>0</td><td>0</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (20, 7)\n",
"┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n",
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
"╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n",
"│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ EUR ┆ EQXR_NSA ┆ 2.024889 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 2024-11-15 ┆ EUR ┆ EQXR_VT10 ┆ -0.477901 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ GBP ┆ EQXR_VT10 ┆ 0.664208 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ GBP ┆ EQXR_VT10 ┆ -0.068778 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ USD ┆ EQXR_VT10 ┆ -0.549983 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ USD ┆ EQXR_VT10 ┆ -1.198544 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eq_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df,\n",
" cids=sel_cids,\n",
" xcats=[\"EQXR_NSA\", \"EQXR_VT10\"],\n",
" start=start,\n",
")\n",
"eq_df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
"fx_df = msyrs.qdf.reduce_dataframe(\n",
" df=big_df, cids=sel_cids, start=start, xcats=fx_xcats, intersect=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (10, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>2024-11-14</td><td>&quot;AUD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>0.329188</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;AUD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>0.826346</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;CAD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>0.199402</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;CAD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>-0.696517</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;EUR&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>2.024889</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;EUR&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>-0.661567</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;GBP&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>0.596533</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;GBP&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>-0.06177</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;USD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>-0.627493</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;USD&quot;</td><td>&quot;EQXR_NSA&quot;</td><td>-1.367457</td><td>1.0</td><td>0</td><td>0</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (10, 7)\n",
"┌────────────┬─────┬──────────┬───────────┬─────────┬─────────┬─────────┐\n",
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
"╞════════════╪═════╪══════════╪═══════════╪═════════╪═════════╪═════════╡\n",
"│ 2024-11-14 ┆ AUD ┆ EQXR_NSA ┆ 0.329188 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ AUD ┆ EQXR_NSA ┆ 0.826346 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ CAD ┆ EQXR_NSA ┆ 0.199402 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ CAD ┆ EQXR_NSA ┆ -0.696517 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ EUR ┆ EQXR_NSA ┆ 2.024889 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ EUR ┆ EQXR_NSA ┆ -0.661567 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ GBP ┆ EQXR_NSA ┆ 0.596533 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ GBP ┆ EQXR_NSA ┆ -0.06177 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ USD ┆ EQXR_NSA ┆ -0.627493 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ USD ┆ EQXR_NSA ┆ -1.367457 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"└────────────┴─────┴──────────┴───────────┴─────────┴─────────┴─────────┘"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
"\n",
"new_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (10, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>2024-11-14</td><td>&quot;GBP&quot;</td><td>&quot;FXXR_NSA&quot;</td><td>-0.067809</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;GBP&quot;</td><td>&quot;FXXR_NSA&quot;</td><td>-0.430055</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;AUD&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-0.4294</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;AUD&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-0.452535</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;CAD&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-1.132314</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;CAD&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-1.755605</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;EUR&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-0.292422</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;EUR&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-0.855108</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-14</td><td>&quot;GBP&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-0.110526</td><td>1.0</td><td>0</td><td>0</td></tr><tr><td>2024-11-15</td><td>&quot;GBP&quot;</td><td>&quot;FXXR_VT10&quot;</td><td>-0.700977</td><td>1.0</td><td>0</td><td>0</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (10, 7)\n",
"┌────────────┬─────┬───────────┬───────────┬─────────┬─────────┬─────────┐\n",
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
"╞════════════╪═════╪═══════════╪═══════════╪═════════╪═════════╪═════════╡\n",
"│ 2024-11-14 ┆ GBP ┆ FXXR_NSA ┆ -0.067809 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ GBP ┆ FXXR_NSA ┆ -0.430055 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ AUD ┆ FXXR_VT10 ┆ -0.4294 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ AUD ┆ FXXR_VT10 ┆ -0.452535 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ CAD ┆ FXXR_VT10 ┆ -1.132314 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ CAD ┆ FXXR_VT10 ┆ -1.755605 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ EUR ┆ FXXR_VT10 ┆ -0.292422 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ EUR ┆ FXXR_VT10 ┆ -0.855108 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-14 ┆ GBP ┆ FXXR_VT10 ┆ -0.110526 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"│ 2024-11-15 ┆ GBP ┆ FXXR_VT10 ┆ -0.700977 ┆ 1.0 ┆ 0 ┆ 0 │\n",
"└────────────┴─────┴───────────┴───────────┴─────────┴─────────┴─────────┘"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_df.tail(10)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,5 +1,32 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "31d0d7e3",
"metadata": {},
"source": [
"# Running this noteboook\n",
"\n",
"Create a new Python Venv using:\n",
"\n",
"```bash\n",
"python -m venv .venv\n",
"# source .venv/bin/activate\n",
"./.venv/Scripts/activate\n",
"```\n",
"\n",
"Install `evcxr_jupyter` and `jupyterlab` using:\n",
"\n",
"```bash\n",
"cargo install evcxr_jupyter\n",
"evcxr_jupyter --install\n",
"pip install jupyterlab\n",
"jupyter lab\n",
"```\n",
"\n",
"Or try following this guide here: [DataCrayon - Setup Jupyter with Rust](https://datacrayon.com/data-analysis-with-rust-notebooks/setup-anaconda-jupyter-and-rust/)"
]
},
{
"cell_type": "markdown",
"id": "8d04a212-4025-41d7-809e-864649b08ab5",
@ -21,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "22e1ae9e-14b8-4be4-b852-8f0fb420eaca",
"metadata": {},
"outputs": [],
@ -399,7 +426,7 @@
"mimetype": "text/rust",
"name": "rust",
"pygment_lexer": "rust",
"version": ""
"version": "3.12.7"
}
},
"nbformat": 4,

44
notebooks/test.py Normal file
View File

@ -0,0 +1,44 @@
import msyrs
import datetime
# "E:\Work\jpmaqs-data\data\ADPEMPL_SA_P1M1ML1\USD_ADPEMPL_SA_P1M1ML1.csv"
DATA_FOLDER_PATH = "E:/Work/jpmaqs-data"
dfpath = f"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv"
print(msyrs.qdf.load_qdf(dfpath))
cids_dm = "AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD".split(".")
cids_em = "CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR".split(".")
cids = cids_dm + cids_em
cids_dux = list(set(cids) - set(["IDR", "NZD"]))
ecos = "CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12".split(
"."
)
mkts = "DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA".split(
"."
)
xcats = ecos + mkts
tickers = [f"{c}_{x}" for c in cids for x in xcats]
# load_qdf_from_download_bank
df = msyrs.qdf.load_qdf_from_download_bank(
folder_path=DATA_FOLDER_PATH, tickers=tickers
)
print(df)
start_date = (datetime.datetime.now() - datetime.timedelta(days=5)).strftime("%Y-%m-%d")
sel_cids = ["AUD", "USD", "GBP", "CAD", "JPY", "EUR"]
df_eq = msyrs.qdf.reduce_dataframe(
df=df, cids=["AUD"], xcats=["EQXR_NSA"], start=start_date
)
print(df_eq)
fx_xcats = [xc for xc in xcats if xc.startswith("FX")]
df_fx = msyrs.qdf.reduce_dataframe(df=df, xcats=fx_xcats, intersect=True)
print(df_fx)

7
pyproject.toml Normal file
View File

@ -0,0 +1,7 @@
[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"
[tool.maturin]
# "extension-module" tells pyo3 we want to build an extension module (skips linking against libpython.so)
features = ["pyo3/extension-module"]

View File

@ -1,30 +1,14 @@
#![doc = include_str!("../README.md")]
/// Documentation for the `msyrs` Python API.
pub mod py;
/// Documentation for the Rust API.
/// Documentation for the `download` module.
pub mod download;
pub mod utils;
use pyo3::{prelude::*, wrap_pymodule};
use pyo3_polars::PyDataFrame;
#[pyfunction]
pub fn load_qdf(file_path: &str) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
utils::qdf::load_quantamental_dataframe(file_path).unwrap(),
))
}
// ignore deprecated warning
#[allow(deprecated)]
#[pymodule]
pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(load_qdf, m)?)?;
Ok(())
}
#[allow(deprecated)]
#[pymodule]
pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> {
// add qdf as a submodule
m.add_wrapped(wrap_pymodule!(qdf))?;
Ok(())
}
pub use py::msyrs;

12
src/py/mod.rs Normal file
View File

@ -0,0 +1,12 @@
/// Python API for [`crate::utils::qdf`].
pub mod qdf;
use pyo3::{prelude::*, wrap_pymodule};
// use pyo3_polars::PyDataFrame;
#[allow(deprecated)]
#[pymodule]
pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pymodule!(qdf::qdf))?;
Ok(())
}

78
src/py/qdf.rs Normal file
View File

@ -0,0 +1,78 @@
use pyo3::prelude::*;
use pyo3_polars::PyDataFrame;
/// Python wrapper for [`crate::utils::qdf`] module.
#[allow(deprecated)]
#[pymodule]
pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(load_qdf, m)?)?;
m.add_function(wrap_pyfunction!(load_qdf_from_download_bank, m)?)?;
m.add_function(wrap_pyfunction!(reduce_dataframe, m)?)?;
m.add_function(wrap_pyfunction!(update_dataframe, m)?)?;
Ok(())
}
/// Python wrapper for loading a Quantamental DataFrame from a CSV file.
/// See [`crate::utils::qdf::load_quantamental_dataframe`] for full documentation.
#[pyfunction]
pub fn load_qdf(file_path: String) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
crate::utils::qdf::load_quantamental_dataframe(file_path).unwrap(),
))
}
/// Python wrapper for loading a Quantamental DataFrame from a download bank.
/// See [`crate::utils::qdf::load::load_qdf_from_download_bank`] for full documentation.
#[pyfunction]
pub fn load_qdf_from_download_bank(
folder_path: String,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
tickers: Option<Vec<String>>,
) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
crate::utils::qdf::load::load_qdf_from_download_bank(folder_path, cids, xcats, tickers)
.unwrap(),
))
}
/// Python wrapper for reduce_dataframe
/// See [`crate::utils::qdf::reduce_df::reduce_dataframe`] for full documentation.
#[pyfunction]
pub fn reduce_dataframe(
df: PyDataFrame,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
metrics: Option<Vec<String>>,
start: Option<String>,
end: Option<String>,
intersect: Option<bool>,
) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
crate::utils::qdf::reduce_df::reduce_dataframe(
df.into(),
cids,
xcats,
metrics,
start,
end,
intersect.unwrap_or(false),
)
.unwrap(),
))
}
/// Python wrapper for update_dataframe
/// See [`crate::utils::qdf::update_df::update_dataframe`] for full documentation.
#[pyfunction]
pub fn update_dataframe(
df: PyDataFrame,
df_add: PyDataFrame,
xcat_replace: Option<bool>,
) -> PyResult<PyDataFrame> {
let xcat_replace = xcat_replace.unwrap_or(false);
Ok(PyDataFrame(
crate::utils::qdf::update_df::update_dataframe(&df.into(), &df_add.into(), xcat_replace)
.unwrap(),
))
}

View File

@ -23,10 +23,10 @@ fn _file_base_name(file_path: String) -> String {
/// The CSV must be named in the format `cid_xcat.csv` (`ticker.csv`).
/// The DataFrame must have a `real_date` column along with additional value columns.
pub fn load_quantamental_dataframe(
file_path: &str,
file_path: String,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
// get the file base name
let base_file_name = _file_base_name(file_path.into());
let base_file_name = _file_base_name(file_path.clone().into());
// if filename does not have _ then it is not a Quantamental DataFrame
if !base_file_name.contains('_') {
@ -37,7 +37,7 @@ pub fn load_quantamental_dataframe(
let (cid, xcat) = split_ticker(ticker.to_string())?;
let mut df = CsvReadOptions::default()
.try_into_reader_with_file_path(Some(file_path.into()))
.try_into_reader_with_file_path(Some(file_path.to_string().into()))
.unwrap()
.finish()
.unwrap();
@ -99,7 +99,7 @@ fn collect_paths_recursively<P: AsRef<std::path::Path>>(path: P) -> std::io::Res
}
fn _load_qdf_thread_safe(file_path: &str) -> Result<DataFrame, Box<dyn Error + Send + Sync>> {
let res = load_quantamental_dataframe(file_path);
let res = load_quantamental_dataframe(file_path.to_string());
res.map_err(|e| {
anyhow::Error::msg(e.to_string())
.context("Failed to load quantamental dataframe")
@ -107,10 +107,10 @@ fn _load_qdf_thread_safe(file_path: &str) -> Result<DataFrame, Box<dyn Error + S
})
}
pub fn load_qdf_from_download_bank(
folder_path: &str,
cids: Option<Vec<&str>>,
xcats: Option<Vec<&str>>,
tickers: Option<Vec<&str>>,
folder_path: String,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
tickers: Option<Vec<String>>,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
let rcids = cids.unwrap_or_else(|| Vec::new());
let rxcats = xcats.unwrap_or_else(|| Vec::new());
@ -145,9 +145,9 @@ pub fn load_qdf_from_download_bank(
let load_files = rel_files
.iter()
.filter(|(_, cid, xcat)| {
let f1 = rcids.len() > 0 && rcids.contains(&cid.as_str());
let f2 = rxcats.len() > 0 && rxcats.contains(&xcat.as_str());
let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat).as_str());
let f1 = rcids.len() > 0 && rcids.contains(&cid);
let f2 = rxcats.len() > 0 && rxcats.contains(&xcat);
let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat));
f1 | f2 | f3
})
.map(|(file, _, _)| *file)
@ -160,7 +160,7 @@ pub fn load_qdf_from_download_bank(
return Err("No files to load".into());
}
if load_files.len() == 1 {
let dfx = load_quantamental_dataframe(load_files[0]).unwrap();
let dfx = load_quantamental_dataframe(load_files[0].to_string()).unwrap();
return Ok(dfx);
}

View File

@ -17,11 +17,11 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
/// If no filters are provided, the original DataFrame is returned.
pub fn reduce_dataframe(
df: DataFrame,
cids: Option<Vec<&str>>,
xcats: Option<Vec<&str>>,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
metrics: Option<Vec<String>>,
start: Option<&str>,
end: Option<&str>,
start: Option<String>,
end: Option<String>,
intersect: bool,
) -> Result<DataFrame, Box<dyn Error>> {
check_quantamental_dataframe(&df)?;
@ -36,10 +36,10 @@ pub fn reduce_dataframe(
let u_xcats: Vec<String> = get_unique_xcats(&new_df)?;
let u_tickers: Vec<String> = _get_unique_strs_from_str_column_object(&ticker_col)?;
let specified_cids: Vec<&str> =
cids.unwrap_or_else(|| u_cids.iter().map(AsRef::as_ref).collect());
let specified_xcats: Vec<&str> =
xcats.unwrap_or_else(|| u_xcats.iter().map(AsRef::as_ref).collect());
let cids_vec = cids.unwrap_or_else(|| u_cids.clone());
let specified_cids: Vec<&str> = cids_vec.iter().map(AsRef::as_ref).collect();
let xcats_vec = xcats.unwrap_or_else(|| u_xcats.clone());
let specified_xcats: Vec<&str> = xcats_vec.iter().map(AsRef::as_ref).collect();
let non_idx_cols: Vec<String> = new_df
.get_column_names()
@ -107,7 +107,7 @@ pub fn reduce_dataframe(
// Apply date filtering if `start` or `end` is provided
if let Some(start) = start {
let start_date = chrono::NaiveDate::parse_from_str(start, "%Y-%m-%d")?;
let start_date = chrono::NaiveDate::parse_from_str(&start, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(
@ -120,7 +120,7 @@ pub fn reduce_dataframe(
}
if let Some(end) = end {
let end_date = chrono::NaiveDate::parse_from_str(end, "%Y-%m-%d")?;
let end_date = chrono::NaiveDate::parse_from_str(&end, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(

View File

@ -11,7 +11,7 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
pub fn update_dataframe(
df: &DataFrame,
df_add: &DataFrame,
// xcat_replace: Option<&str>,
xcat_replace: bool,
) -> Result<DataFrame, Box<dyn Error>> {
check_quantamental_dataframe(df)?;
check_quantamental_dataframe(df_add)?;
@ -20,7 +20,10 @@ pub fn update_dataframe(
} else if df_add.is_empty() {
return Ok(df.clone());
};
println!(
"xcat_replace not implemented yet (passed value: {})",
xcat_replace
);
// vstack and drop duplicates keeping last
let mut new_df = df.vstack(df_add)?;
// help?

10
test.py
View File

@ -1,10 +0,0 @@
import msyrs
# "E:\Work\jpmaqs-data\data\ADPEMPL_SA_P1M1ML1\USD_ADPEMPL_SA_P1M1ML1.csv"
dfpath = "E:/Work/jpmaqs-data/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv"
print(msyrs.qdf.load_qdf(dfpath))