mirror of
https://github.com/Magnus167/msyrs.git
synced 2025-08-20 06:30:00 +00:00
461 lines
14 KiB
Plaintext
Vendored
461 lines
14 KiB
Plaintext
Vendored
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# ! uv pip install /home/palash/Code/msyrs --upgrade"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Import Python packages\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import macrosynergy\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import polars as pl\n",
|
|
"import os\n",
|
|
"\n",
|
|
"from macrosynergy.panel import view_timelines\n",
|
|
"from macrosynergy.management.types import QuantamentalDataFrame\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Import Python bindings - `msyrs`\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import msyrs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"DATA_FOLDER_PATH = \"E:/Work/jpmaqs-data\"\n",
|
|
"DATA_FOLDER_PATH = os.path.abspath(os.path.expanduser(\"~/Code/go-dataquery/jpmaqs-data\"))\n",
|
|
"# DATA_FOLDER_PATH = \"C:/Users/PalashTyagi/Code/go-dataquery/jpmaqs-data\"\n",
|
|
"DQ_CLIENT_ID = os.getenv(\"DQ_CLIENT_ID\")\n",
|
|
"DQ_CLIENT_SECRET = os.getenv(\"DQ_CLIENT_SECRET\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import time\n",
|
|
"\n",
|
|
"nb_start_time = time.time()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Time taken to load qdf: 0.006810665130615234\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (5, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>real_date</th><th>cid</th><th>xcat</th><th>value</th><th>grading</th><th>eop_lag</th><th>mop_lag</th></tr><tr><td>date</td><td>str</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>2010-03-03</td><td>"USD"</td><td>"ADPEMPL_SA_P1M1ML1"</td><td>-0.173806</td><td>3.0</td><td>3</td><td>33</td></tr><tr><td>2010-03-04</td><td>"USD"</td><td>"ADPEMPL_SA_P1M1ML1"</td><td>-0.173806</td><td>3.0</td><td>4</td><td>34</td></tr><tr><td>2010-03-05</td><td>"USD"</td><td>"ADPEMPL_SA_P1M1ML1"</td><td>-0.173806</td><td>3.0</td><td>5</td><td>35</td></tr><tr><td>2010-03-08</td><td>"USD"</td><td>"ADPEMPL_SA_P1M1ML1"</td><td>-0.173806</td><td>3.0</td><td>8</td><td>38</td></tr><tr><td>2010-03-09</td><td>"USD"</td><td>"ADPEMPL_SA_P1M1ML1"</td><td>-0.173806</td><td>3.0</td><td>9</td><td>39</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (5, 7)\n",
|
|
"┌────────────┬─────┬────────────────────┬───────────┬─────────┬─────────┬─────────┐\n",
|
|
"│ real_date ┆ cid ┆ xcat ┆ value ┆ grading ┆ eop_lag ┆ mop_lag │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ date ┆ str ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n",
|
|
"╞════════════╪═════╪════════════════════╪═══════════╪═════════╪═════════╪═════════╡\n",
|
|
"│ 2010-03-03 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 3 ┆ 33 │\n",
|
|
"│ 2010-03-04 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 4 ┆ 34 │\n",
|
|
"│ 2010-03-05 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 5 ┆ 35 │\n",
|
|
"│ 2010-03-08 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 8 ┆ 38 │\n",
|
|
"│ 2010-03-09 ┆ USD ┆ ADPEMPL_SA_P1M1ML1 ┆ -0.173806 ┆ 3.0 ┆ 9 ┆ 39 │\n",
|
|
"└────────────┴─────┴────────────────────┴───────────┴─────────┴─────────┴─────────┘"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"dfpath = f\"{DATA_FOLDER_PATH}/data/ADPEMPL_SA_P1M1ML1/USD_ADPEMPL_SA_P1M1ML1.csv\"\n",
|
|
"\n",
|
|
"starttime = time.time()\n",
|
|
"ldf: pl.DataFrame = msyrs.qdf.load_qdf(dfpath)\n",
|
|
"print(f\"Time taken to load qdf: {time.time() - starttime}\")\n",
|
|
"ldf.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cids_dm = \"AUD.CAD.CHF.EUR.GBP.JPY.NOK.NZD.SEK.USD\".split(\".\")\n",
|
|
"cids_em = \"CLP.COP.CZK.HUF.IDR.ILS.INR.KRW.MXN.PLN.THB.TRY.TWD.ZAR\".split(\".\")\n",
|
|
"cids = cids_dm + cids_em\n",
|
|
"cids_dux = list(set(cids) - set([\"IDR\", \"NZD\"]))\n",
|
|
"ecos = (\n",
|
|
" \"CPIC_SA_P1M1ML12.CPIC_SJA_P3M3ML3AR.CPIC_SJA_P6M6ML6AR.CPIH_SA_P1M1ML12.\"\n",
|
|
" \"CPIH_SJA_P3M3ML3AR.CPIH_SJA_P6M6ML6AR.INFTEFF_NSA.INTRGDP_NSA_P1M1ML12_3MMA.\"\n",
|
|
" \"INTRGDPv5Y_NSA_P1M1ML12_3MMA.PCREDITGDP_SJA_D1M1ML12.RGDP_SA_P1Q1QL4_20QMA.\"\n",
|
|
" \"RYLDIRS02Y_NSA.RYLDIRS05Y_NSA.PCREDITBN_SJA_P1M1ML12\".split(\".\")\n",
|
|
")\n",
|
|
"\n",
|
|
"mkts = (\n",
|
|
" \"DU02YXR_NSA.DU05YXR_NSA.DU02YXR_VT10.DU05YXR_VT10.EQXR_NSA.EQXR_VT10.\"\n",
|
|
" \"FXXR_NSA.FXXR_VT10.FXCRR_NSA.FXTARGETED_NSA.FXUNTRADABLE_NSA\".split(\".\")\n",
|
|
")\n",
|
|
"xcats = ecos + mkts\n",
|
|
"\n",
|
|
"tickers = [f\"{c}_{x}\" for c in cids for x in xcats]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Time taken to load qdf batch: 1.4180326461791992\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"starttime = time.time()\n",
|
|
"\n",
|
|
"big_df: pl.DataFrame = msyrs.qdf.load_qdf_from_download_bank(\n",
|
|
" folder_path=DATA_FOLDER_PATH,\n",
|
|
" xcats=xcats,\n",
|
|
")\n",
|
|
"print(f\"Time taken to load qdf batch: {time.time() - starttime}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"286.95422172546387"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"big_df.estimated_size(\"mb\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Reduced DataFrame from 5484608 to 2091732 rows\n",
|
|
"Time taken to reduce qdf: 1.9222838878631592\n",
|
|
"99.42372608184814\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"starttime = time.time()\n",
|
|
"\n",
|
|
"test_df = msyrs.qdf.reduce_dataframe(df=big_df, xcats=mkts)\n",
|
|
"\n",
|
|
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")\n",
|
|
"print(test_df.estimated_size(\"mb\"))\n",
|
|
"test_df = None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sel_cids = [\"USD\", \"EUR\", \"GBP\", \"AUD\", \"CAD\", \"CHF\", \"JPY\", \"INR\"]\n",
|
|
"start = \"2010-01-01\"\n",
|
|
"end = \"2011-01-01\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Reduced DataFrame from 5484608 to 4000 rows\n",
|
|
"Time taken to reduce qdf: 0.819500207901001\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"eq_xcats = [xc for xc in xcats if xc.startswith(\"EQ\")]\n",
|
|
"starttime = time.time()\n",
|
|
"\n",
|
|
"eq_df = msyrs.qdf.reduce_dataframe(\n",
|
|
" df=big_df, cids=sel_cids, xcats=eq_xcats, start=start, end=end\n",
|
|
")\n",
|
|
"\n",
|
|
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Reduced DataFrame from 5484608 to 8750 rows\n",
|
|
"Time taken to reduce qdf: 0.9090185165405273\n",
|
|
"xcat_replace not implemented yet (passed value: false)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"fx_xcats = [xc for xc in xcats if xc.startswith(\"FX\")]\n",
|
|
"\n",
|
|
"starttime = time.time()\n",
|
|
"\n",
|
|
"fx_df = msyrs.qdf.reduce_dataframe(\n",
|
|
" df=big_df, cids=sel_cids, xcats=fx_xcats, start=start, end=end\n",
|
|
")\n",
|
|
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
|
|
"\n",
|
|
"print(f\"Time taken to reduce qdf: {time.time() - starttime}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Time taken to update qdf: 0.00896310806274414\n",
|
|
"xcat_replace not implemented yet (passed value: false)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"starttime = time.time()\n",
|
|
"\n",
|
|
"new_df: pl.DataFrame = msyrs.qdf.update_dataframe(df=eq_df, df_add=fx_df)\n",
|
|
"\n",
|
|
"print(f\"Time taken to update qdf: {time.time() - starttime}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"wdf = QuantamentalDataFrame(new_df.to_pandas().fillna(0)).to_wide()\n",
|
|
"\n",
|
|
"for i in range(4):\n",
|
|
" col = wdf.columns[np.random.randint(0, len(wdf.columns))]\n",
|
|
" dates = sorted(np.random.choice(wdf.index, 2, replace=False))\n",
|
|
" dtr = pd.bdate_range(dates[0], dates[1]) \n",
|
|
" wdf.loc[dtr[0]:dtr[-1], col] = np.nan\n",
|
|
"\n",
|
|
"wdf = QuantamentalDataFrame.from_wide(wdf, categorical=False)\n",
|
|
"# cast column 'real_date' to pl.Date\n",
|
|
"new_df = pl.DataFrame(wdf).with_columns(pl.col(\"real_date\").cast(pl.Date, strict=True))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{}"
|
|
]
|
|
},
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"msyrs.utils.create_blacklist_from_qdf(new_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# view_timelines(df=new_df.to_pandas())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Date"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='D').dtype"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div><style>\n",
|
|
".dataframe > thead > tr,\n",
|
|
".dataframe > tbody > tr {\n",
|
|
" text-align: right;\n",
|
|
" white-space: pre-wrap;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<small>shape: (18_088,)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>bdates</th></tr><tr><td>date</td></tr></thead><tbody><tr><td>1971-01-01</td></tr><tr><td>1971-01-04</td></tr><tr><td>1971-01-05</td></tr><tr><td>1971-01-06</td></tr><tr><td>1971-01-07</td></tr><tr><td>…</td></tr><tr><td>2040-04-25</td></tr><tr><td>2040-04-26</td></tr><tr><td>2040-04-27</td></tr><tr><td>2040-04-30</td></tr><tr><td>2040-05-01</td></tr></tbody></table></div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (18_088,)\n",
|
|
"Series: 'bdates' [date]\n",
|
|
"[\n",
|
|
"\t1971-01-01\n",
|
|
"\t1971-01-04\n",
|
|
"\t1971-01-05\n",
|
|
"\t1971-01-06\n",
|
|
"\t1971-01-07\n",
|
|
"\t…\n",
|
|
"\t2040-04-25\n",
|
|
"\t2040-04-26\n",
|
|
"\t2040-04-27\n",
|
|
"\t2040-04-30\n",
|
|
"\t2040-05-01\n",
|
|
"]"
|
|
]
|
|
},
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='D')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Time taken: 5.6950860023498535 seconds\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"end_time = time.time()\n",
|
|
"print(f\"Time taken: {end_time - nb_start_time} seconds\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|