Merge pull request #1 from Magnus167/qdf

Adding QDF related functionality and exemplar implementation of HistoricVol
This commit is contained in:
Palash Tyagi 2025-04-06 05:16:25 +01:00 committed by GitHub
commit c10b6adfcd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 1151 additions and 4478 deletions

3
.gitignore vendored
View File

@ -8,4 +8,5 @@ __pycache__/
.idea/
/target
build/
data/
data/
Cargo.lock

3926
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -32,436 +32,26 @@ ndarray = { version = "*" }
pyo3 = { version = "*", features = ["extension-module", "abi3-py37"] }
pyo3-polars = { git = "https://github.com/Magnus167/pyo3-polars.git" }
# pyo3-polars = { git = "https://github.com/Magnus167/pyo3-polars.git", features = [
# "dtype-categorical",
# ] }
pyo3-polars = { version = "0.18.0" , features = [
"dtype-categorical",
"dtype-struct",
] }
polars = { version = "*", features = [
polars = { version = "0.44.2", features = [
"lazy",
"temporal",
"describe",
"json",
"parquet",
"dtype-datetime",
# "dtype-categorical",
"dtype-categorical",
"dtype-struct",
"strings",
"timezones",
"ndarray",
"concat_str",
"dynamic_group_by",
] }
# "serde-lazy",
# "parquet",
# "decompress",
# "zip",
# "gzip",
# "dynamic_group_by",
# "rows",
# "cross_join",
# "semi_anti_join",
# "row_hash",
# "diagonal_concat",
# "dataframe_arithmetic",
# "partition_by",
# "is_in",
# "zip_with",
# "round_series",
# "repeat_by",
# "is_first_distinct",
# "is_last_distinct",
# "checked_arithmetic",
# "dot_product",
# "reinterpret",
# "take_opt_iter",
# "mode",
# "cum_agg",
# "rolling_window",
# "interpolate",
# "rank",
# "moment",
# "ewma",
# "abs",
# "product",
# "diff",
# "pct_change",
# "unique_counts",
# "log",
# "list_to_struct",
# "list_count",
# "list_eval",
# "cumulative_eval",
# "arg_where",
# "search_sorted",
# "offset_by",
# "trigonometry",
# "sign",
# "propagate_nans",
##########################################################################################
# hashbrown v0.14.5
# ├── halfbrown v0.2.5
# │ ├── simd-json v0.14.3
# │ │ ├── polars-io v0.44.2
# │ │ │ ├── polars v0.44.2
# │ │ │ │ ├── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ │ │ │ └── pyo3-polars v0.18.0
# │ │ │ │ └── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ │ │ ├── polars-expr v0.44.2
# │ │ │ │ ├── polars-lazy v0.44.2
# │ │ │ │ │ └── polars v0.44.2 (*)
# │ │ │ │ └── polars-mem-engine v0.44.2
# │ │ │ │ └── polars-lazy v0.44.2 (*)
# │ │ │ ├── polars-lazy v0.44.2 (*)
# │ │ │ ├── polars-mem-engine v0.44.2 (*)
# │ │ │ └── polars-plan v0.44.2
# │ │ │ ├── polars-expr v0.44.2 (*)
# │ │ │ ├── polars-lazy v0.44.2 (*)
# │ │ │ └── polars-mem-engine v0.44.2 (*)
# │ │ └── polars-json v0.44.2
# │ │ ├── polars-io v0.44.2 (*)
# │ │ ├── polars-lazy v0.44.2 (*)
# │ │ ├── polars-mem-engine v0.44.2 (*)
# │ │ └── polars-plan v0.44.2 (*)
# │ └── value-trait v0.10.1
# │ └── simd-json v0.14.3 (*)
# └── polars-core v0.44.2
# ├── polars v0.44.2 (*)
# ├── polars-expr v0.44.2 (*)
# ├── polars-io v0.44.2 (*)
# ├── polars-lazy v0.44.2 (*)
# ├── polars-mem-engine v0.44.2 (*)
# ├── polars-ops v0.44.2
# │ ├── polars v0.44.2 (*)
# │ ├── polars-expr v0.44.2 (*)
# │ ├── polars-lazy v0.44.2 (*)
# │ ├── polars-mem-engine v0.44.2 (*)
# │ ├── polars-plan v0.44.2 (*)
# │ └── polars-time v0.44.2
# │ ├── polars v0.44.2 (*)
# │ ├── polars-expr v0.44.2 (*)
# │ ├── polars-io v0.44.2 (*)
# │ ├── polars-lazy v0.44.2 (*)
# │ ├── polars-mem-engine v0.44.2 (*)
# │ └── polars-plan v0.44.2 (*)
# ├── polars-plan v0.44.2 (*)
# ├── polars-time v0.44.2 (*)
# └── pyo3-polars v0.18.0 (*)
# hashbrown v0.15.1
# ├── indexmap v2.6.0
# │ ├── h2 v0.4.6
# │ │ ├── hyper v1.5.0
# │ │ │ ├── hyper-tls v0.6.0
# │ │ │ │ └── reqwest v0.12.9
# │ │ │ │ └── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ │ │ ├── hyper-util v0.1.10
# │ │ │ │ ├── hyper-tls v0.6.0 (*)
# │ │ │ │ └── reqwest v0.12.9 (*)
# │ │ │ └── reqwest v0.12.9 (*)
# │ │ └── reqwest v0.12.9 (*)
# │ ├── polars-core v0.44.2 (*)
# │ ├── polars-json v0.44.2 (*)
# │ ├── polars-ops v0.44.2 (*)
# │ ├── polars-schema v0.44.2
# │ │ ├── polars-arrow v0.44.2
# │ │ │ ├── polars v0.44.2 (*)
# │ │ │ ├── polars-compute v0.44.2
# │ │ │ │ ├── polars-core v0.44.2 (*)
# │ │ │ │ ├── polars-expr v0.44.2 (*)
# │ │ │ │ ├── polars-ops v0.44.2 (*)
# │ │ │ │ └── polars-parquet v0.44.2
# │ │ │ │ ├── polars v0.44.2 (*)
# │ │ │ │ ├── polars-io v0.44.2 (*)
# │ │ │ │ └── polars-plan v0.44.2 (*)
# │ │ │ ├── polars-core v0.44.2 (*)
# │ │ │ ├── polars-expr v0.44.2 (*)
# │ │ │ ├── polars-io v0.44.2 (*)
# │ │ │ ├── polars-json v0.44.2 (*)
# │ │ │ ├── polars-lazy v0.44.2 (*)
# │ │ │ ├── polars-mem-engine v0.44.2 (*)
# │ │ │ ├── polars-ops v0.44.2 (*)
# │ │ │ ├── polars-parquet v0.44.2 (*)
# │ │ │ ├── polars-plan v0.44.2 (*)
# │ │ │ ├── polars-row v0.44.2
# │ │ │ │ ├── polars-core v0.44.2 (*)
# │ │ │ │ └── polars-expr v0.44.2 (*)
# │ │ │ └── polars-time v0.44.2 (*)
# │ │ ├── polars-core v0.44.2 (*)
# │ │ ├── polars-io v0.44.2 (*)
# │ │ └── polars-ops v0.44.2 (*)
# │ └── polars-utils v0.44.2
# │ ├── polars v0.44.2 (*)
# │ ├── polars-arrow v0.44.2 (*)
# │ ├── polars-compute v0.44.2 (*)
# │ ├── polars-core v0.44.2 (*)
# │ ├── polars-expr v0.44.2 (*)
# │ ├── polars-io v0.44.2 (*)
# │ ├── polars-json v0.44.2 (*)
# │ ├── polars-lazy v0.44.2 (*)
# │ ├── polars-mem-engine v0.44.2 (*)
# │ ├── polars-ops v0.44.2 (*)
# │ ├── polars-parquet v0.44.2 (*)
# │ ├── polars-plan v0.44.2 (*)
# │ ├── polars-row v0.44.2 (*)
# │ ├── polars-schema v0.44.2 (*)
# │ └── polars-time v0.44.2 (*)
# ├── polars-arrow v0.44.2 (*)
# ├── polars-core v0.44.2 (*)
# ├── polars-expr v0.44.2 (*)
# ├── polars-io v0.44.2 (*)
# ├── polars-json v0.44.2 (*)
# ├── polars-ops v0.44.2 (*)
# ├── polars-parquet v0.44.2 (*)
# ├── polars-plan v0.44.2 (*)
# └── polars-utils v0.44.2 (*)
# heck v0.4.1
# └── pyo3-macros-backend v0.21.2
# └── pyo3-macros v0.21.2 (proc-macro)
# └── pyo3 v0.21.2
# ├── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# └── pyo3-polars v0.18.0 (*)
# heck v0.5.0
# └── strum_macros v0.26.4 (proc-macro)
# ├── comfy-table v7.1.3
# │ └── polars-core v0.44.2 (*)
# ├── polars-arrow v0.44.2 (*)
# ├── polars-core v0.44.2 (*)
# ├── polars-ops v0.44.2 (*)
# ├── polars-plan v0.44.2 (*)
# └── polars-time v0.44.2 (*)
# rand v0.8.5
# ├── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# ├── polars-core v0.44.2 (*)
# ├── polars-expr v0.44.2 (*)
# └── rand_distr v0.4.3
# └── polars-core v0.44.2 (*)
# rand v0.8.5
# └── phf_generator v0.11.2
# └── phf_codegen v0.11.2
# └── chrono-tz-build v0.2.1
# [build-dependencies]
# └── chrono-tz v0.8.6
# ├── polars-arrow v0.44.2 (*)
# ├── polars-core v0.44.2 (*)
# ├── polars-io v0.44.2 (*)
# ├── polars-json v0.44.2 (*)
# ├── polars-ops v0.44.2 (*)
# ├── polars-plan v0.44.2 (*)
# └── polars-time v0.44.2 (*)
# rand_core v0.6.4
# └── rand v0.8.5 (*)
# rand_core v0.6.4
# ├── rand v0.8.5 (*)
# └── rand_chacha v0.3.1
# └── rand v0.8.5 (*)
# regex v1.11.1
# ├── polars-core v0.44.2 (*)
# ├── polars-error v0.44.2
# │ ├── polars v0.44.2 (*)
# │ ├── polars-arrow v0.44.2 (*)
# │ ├── polars-compute v0.44.2 (*)
# │ ├── polars-core v0.44.2 (*)
# │ ├── polars-io v0.44.2 (*)
# │ ├── polars-json v0.44.2 (*)
# │ ├── polars-mem-engine v0.44.2 (*)
# │ ├── polars-ops v0.44.2 (*)
# │ ├── polars-parquet v0.44.2 (*)
# │ ├── polars-row v0.44.2 (*)
# │ ├── polars-schema v0.44.2 (*)
# │ ├── polars-time v0.44.2 (*)
# │ └── polars-utils v0.44.2 (*)
# ├── polars-io v0.44.2 (*)
# ├── polars-ops v0.44.2 (*)
# ├── polars-plan v0.44.2 (*)
# └── polars-time v0.44.2 (*)
# regex v1.11.1
# └── parse-zoneinfo v0.3.1
# └── chrono-tz-build v0.2.1 (*)
# regex-automata v0.4.9
# └── regex v1.11.1 (*)
# regex-automata v0.4.9
# └── regex v1.11.1 (*)
# regex-syntax v0.8.5
# ├── polars-ops v0.44.2 (*)
# ├── regex v1.11.1 (*)
# └── regex-automata v0.4.9 (*)
# regex-syntax v0.8.5
# ├── regex v1.11.1 (*)
# └── regex-automata v0.4.9 (*)
# syn v1.0.109
# └── multiversion-macros v0.7.4 (proc-macro)
# └── multiversion v0.7.4
# └── polars-arrow v0.44.2 (*)
# syn v2.0.87
# ├── bytemuck_derive v1.8.0 (proc-macro)
# │ └── bytemuck v1.19.0
# │ ├── polars-arrow v0.44.2 (*)
# │ ├── polars-compute v0.44.2 (*)
# │ ├── polars-core v0.44.2 (*)
# │ ├── polars-ops v0.44.2 (*)
# │ ├── polars-parquet v0.44.2 (*)
# │ ├── polars-plan v0.44.2 (*)
# │ ├── polars-row v0.44.2 (*)
# │ ├── polars-time v0.44.2 (*)
# │ └── polars-utils v0.44.2 (*)
# ├── displaydoc v0.2.5 (proc-macro)
# │ ├── icu_collections v1.5.0
# │ │ ├── icu_normalizer v1.5.0
# │ │ │ └── idna_adapter v1.2.0
# │ │ │ └── idna v1.0.3
# │ │ │ └── url v2.5.3
# │ │ │ └── reqwest v0.12.9 (*)
# │ │ └── icu_properties v1.5.1
# │ │ ├── icu_normalizer v1.5.0 (*)
# │ │ └── idna_adapter v1.2.0 (*)
# │ ├── icu_locid v1.5.0
# │ │ ├── icu_locid_transform v1.5.0
# │ │ │ └── icu_properties v1.5.1 (*)
# │ │ └── icu_provider v1.5.0
# │ │ ├── icu_locid_transform v1.5.0 (*)
# │ │ ├── icu_normalizer v1.5.0 (*)
# │ │ └── icu_properties v1.5.1 (*)
# │ ├── icu_locid_transform v1.5.0 (*)
# │ ├── icu_normalizer v1.5.0 (*)
# │ ├── icu_properties v1.5.1 (*)
# │ ├── icu_provider v1.5.0 (*)
# │ └── tinystr v0.7.6
# │ ├── icu_locid v1.5.0 (*)
# │ ├── icu_locid_transform v1.5.0 (*)
# │ ├── icu_properties v1.5.1 (*)
# │ └── icu_provider v1.5.0 (*)
# ├── futures-macro v0.3.31 (proc-macro)
# │ └── futures-util v0.3.31
# │ ├── futures v0.3.31
# │ │ └── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ ├── futures-executor v0.3.31
# │ │ └── futures v0.3.31 (*)
# │ ├── http-body-util v0.1.2
# │ │ ├── hyper-tls v0.6.0 (*)
# │ │ └── reqwest v0.12.9 (*)
# │ ├── hyper v1.5.0 (*)
# │ ├── hyper-util v0.1.10 (*)
# │ └── reqwest v0.12.9 (*)
# ├── icu_provider_macros v1.5.0 (proc-macro)
# │ └── icu_provider v1.5.0 (*)
# ├── pyo3-macros v0.21.2 (proc-macro) (*)
# ├── pyo3-macros-backend v0.21.2 (*)
# ├── recursive-proc-macro-impl v0.1.1 (proc-macro)
# │ └── recursive v0.1.1
# │ └── polars-plan v0.44.2 (*)
# ├── ref-cast-impl v1.0.23 (proc-macro)
# │ └── ref-cast v1.0.23
# │ └── simd-json v0.14.3 (*)
# ├── serde_derive v1.0.215 (proc-macro)
# │ └── serde v1.0.215
# │ ├── compact_str v0.8.0
# │ │ └── polars-utils v0.44.2 (*)
# │ ├── halfbrown v0.2.5 (*)
# │ ├── hashbrown v0.14.5 (*)
# │ ├── hashbrown v0.15.1 (*)
# │ ├── indexmap v2.6.0 (*)
# │ ├── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ ├── polars-arrow-format v0.1.0
# │ │ ├── polars-arrow v0.44.2 (*)
# │ │ └── polars-error v0.44.2 (*)
# │ ├── reqwest v0.12.9 (*)
# │ ├── serde_json v1.0.133
# │ │ ├── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ │ ├── reqwest v0.12.9 (*)
# │ │ └── simd-json v0.14.3 (*)
# │ ├── serde_urlencoded v0.7.1
# │ │ ├── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ │ └── reqwest v0.12.9 (*)
# │ └── simd-json v0.14.3 (*)
# ├── strum_macros v0.26.4 (proc-macro) (*)
# ├── synstructure v0.13.1
# │ ├── yoke-derive v0.7.4 (proc-macro)
# │ │ └── yoke v0.7.4
# │ │ ├── icu_collections v1.5.0 (*)
# │ │ ├── icu_provider v1.5.0 (*)
# │ │ └── zerovec v0.10.4
# │ │ ├── icu_collections v1.5.0 (*)
# │ │ ├── icu_locid v1.5.0 (*)
# │ │ ├── icu_locid_transform v1.5.0 (*)
# │ │ ├── icu_normalizer v1.5.0 (*)
# │ │ ├── icu_properties v1.5.1 (*)
# │ │ ├── icu_provider v1.5.0 (*)
# │ │ └── tinystr v0.7.6 (*)
# │ └── zerofrom-derive v0.1.4 (proc-macro)
# │ └── zerofrom v0.1.4
# │ ├── icu_collections v1.5.0 (*)
# │ ├── icu_provider v1.5.0 (*)
# │ ├── yoke v0.7.4 (*)
# │ └── zerovec v0.10.4 (*)
# ├── thiserror-impl v1.0.69 (proc-macro)
# │ └── thiserror v1.0.69
# │ ├── polars-core v0.44.2 (*)
# │ ├── polars-error v0.44.2 (*)
# │ └── pyo3-polars v0.18.0 (*)
# ├── yoke-derive v0.7.4 (proc-macro) (*)
# ├── zerocopy-derive v0.7.35 (proc-macro)
# │ └── zerocopy v0.7.35
# │ ├── ahash v0.8.11
# │ │ ├── hashbrown v0.14.5 (*)
# │ │ ├── polars-arrow v0.44.2 (*)
# │ │ ├── polars-core v0.44.2 (*)
# │ │ ├── polars-expr v0.44.2 (*)
# │ │ ├── polars-io v0.44.2 (*)
# │ │ ├── polars-json v0.44.2 (*)
# │ │ ├── polars-lazy v0.44.2 (*)
# │ │ ├── polars-ops v0.44.2 (*)
# │ │ ├── polars-parquet v0.44.2 (*)
# │ │ ├── polars-plan v0.44.2 (*)
# │ │ ├── polars-utils v0.44.2 (*)
# │ │ └── simd-json v0.14.3 (*)
# │ └── ppv-lite86 v0.2.20
# │ └── rand_chacha v0.3.1 (*)
# ├── zerofrom-derive v0.1.4 (proc-macro) (*)
# └── zerovec-derive v0.10.3 (proc-macro)
# └── zerovec v0.10.4 (*)
# windows-sys v0.52.0
# ├── home v0.5.9
# │ └── polars-io v0.44.2 (*)
# ├── mio v1.0.2
# │ └── tokio v1.41.1
# │ ├── h2 v0.4.6 (*)
# │ ├── hyper v1.5.0 (*)
# │ ├── hyper-tls v0.6.0 (*)
# │ ├── hyper-util v0.1.10 (*)
# │ ├── msyrs v0.0.1 (E:\Work\ruzt\msyrs)
# │ ├── reqwest v0.12.9 (*)
# │ ├── tokio-native-tls v0.3.1
# │ │ ├── hyper-tls v0.6.0 (*)
# │ │ └── reqwest v0.12.9 (*)
# │ └── tokio-util v0.7.12
# │ └── h2 v0.4.6 (*)
# ├── socket2 v0.5.7
# │ ├── hyper-util v0.1.10 (*)
# │ └── tokio v1.41.1 (*)
# └── tokio v1.41.1 (*)
# windows-sys v0.59.0
# ├── schannel v0.1.26
# │ └── native-tls v0.2.12
# │ ├── hyper-tls v0.6.0 (*)
# │ ├── reqwest v0.12.9 (*)
# │ └── tokio-native-tls v0.3.1 (*)
# └── stacker v0.1.17
# ├── polars-utils v0.44.2 (*)
# └── recursive v0.1.1 (*)

View File

@ -4,12 +4,20 @@ A Rust implementation of the [Macrosynergy Python Package](https://github.com/ma
## Build and install the Python package
Create a virtual environment:
```bash
python -m venv .venv
```
```bash
# source .venv/bin/activate
./.venv/Scripts/activate
pip install maturin
maturin develop --release
pip install uv
uv pip install maturin jupyter notebook
uv pip install .
# alternatively, directly use maturin:
# maturin develop --release
```
## Status

File diff suppressed because one or more lines are too long

View File

@ -6,6 +6,13 @@ build-backend = "maturin"
name = "msyrs"
version = "0.0.1"
requires-python = ">=3.7"
dependencies = [
"macrosynergy>=1.2.0",
"polars>=1.0.0",
"pyarrow>=16.0.0",
]
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",

View File

@ -1,4 +1,3 @@
/// Python wrapper for [`crate::utils::qdf`].
pub mod qdf;

View File

@ -1,11 +1,13 @@
use pyo3::prelude::*;
use pyo3_polars::PyDataFrame;
use pyo3_polars::{PyDataFrame, PySeries};
/// Python wrapper for [`crate::panel`] module.
#[allow(deprecated)]
#[pymodule]
pub fn panel(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(historic_vol, m)?)?;
m.add_function(wrap_pyfunction!(get_bdates_from_col_hv, m)?)?;
m.add_function(wrap_pyfunction!(get_period_indices_hv, m)?)?;
Ok(())
}
@ -44,3 +46,18 @@ pub fn historic_vol(
.unwrap(),
))
}
#[pyfunction]
pub fn get_bdates_from_col_hv(dfw: PyDataFrame, est_freq: &str) -> PyResult<PySeries> {
Ok(PySeries(
crate::panel::historic_vol::get_bdates_from_col_hv(&dfw.into(), est_freq)
.unwrap()
.into(),
))
}
#[pyfunction]
pub fn get_period_indices_hv(dfw: PyDataFrame, est_freq: &str) -> PyResult<Vec<usize>> {
Ok(crate::panel::historic_vol::get_period_indices_hv(&dfw.into(), est_freq).unwrap())
}

View File

@ -1,5 +1,6 @@
use polars::error::PolarsError;
use polars::export::chrono::NaiveDate;
// use polars::export::chrono::NaiveDate;
use chrono::NaiveDate;
use polars::prelude::*;
use polars::series::Series;
use serde::Deserialize;

View File

@ -1,4 +1,4 @@
from polars import DataFrame
from polars import DataFrame, Series
__all__ = [
"download",
@ -38,3 +38,10 @@ class panel:
@staticmethod
def historic_vol(*args, **kwargs) -> DataFrame: ...
@staticmethod
def get_bdates_from_col_hv(*args, **kwargs) -> Series: ...
@staticmethod
def get_period_indices_hv(*args, **kwargs) -> Series: ...

View File

@ -1,7 +1,11 @@
use crate::utils::misc::*;
use crate::utils::qdf::pivots::*;
use chrono::{Datelike, NaiveDate};
use crate::utils::qdf::reduce_df::*;
use chrono::NaiveDate;
use ndarray::{s, Array, Array1, Zip};
use polars::prelude::*;
use polars::series::Series; // Series struct
// use polars::time::Duration;
/// Returns the annualization factor for 252 trading days.
@ -74,9 +78,19 @@ fn freq_daily_calc(
);
}
let mut new_df = dfw.clone();
let idx = UInt32Chunked::from_vec(
"idx".into(),
(lback_periods - 1..dfw.height())
.map(|x| x as u32)
.collect(),
);
let real_date_col = dfw.column("real_date")?.take(&idx)?;
let mut new_df = DataFrame::new(vec![real_date_col])?;
for col_name in dfw.get_column_names() {
if col_name == "real_date" {
continue;
}
let series = dfw.column(col_name)?;
let values: Array1<f64> = series
.f64()?
@ -87,8 +101,8 @@ fn freq_daily_calc(
let result_series = match lback_method {
"ma" => {
let mut result = Vec::new();
for i in 0..(values.len() - lback_periods + 1) {
let window = values.slice(s![i..i + lback_periods]);
for i in (lback_periods - 1)..(values.len()) {
let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = flat_std(&window.to_owned(), remove_zeros);
result.push(std);
}
@ -98,8 +112,9 @@ fn freq_daily_calc(
let half_life = half_life.unwrap();
let weights = expo_weights(lback_periods, half_life);
let mut result = Vec::new();
for i in 0..(values.len() - lback_periods + 1) {
let window = values.slice(s![i..i + lback_periods]);
// for i in 0..(values.len() - lback_periods + 1) {
for i in (lback_periods - 1)..(values.len()) {
let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = expo_std(&window.to_owned(), &weights, remove_zeros);
result.push(std);
}
@ -122,7 +137,7 @@ fn freq_period_calc(
half_life: Option<f64>,
remove_zeros: bool,
nan_tolerance: f64,
period: &str,
est_freq: &str,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
if lback_method == "xma" {
assert!(
@ -132,9 +147,17 @@ fn freq_period_calc(
}
println!("Calculating historic volatility with the following parameters:");
println!("lback_periods: {:?}, lback_method: {:?}, half_life: {:?}, remove_zeros: {:?}, nan_tolerance: {:?}, period: {:?}", lback_periods, lback_method, half_life, remove_zeros, nan_tolerance, period);
println!("lback_periods: {:?}, lback_method: {:?}, half_life: {:?}, remove_zeros: {:?}, nan_tolerance: {:?}, period: {:?}", lback_periods, lback_method, half_life, remove_zeros, nan_tolerance, est_freq);
let mut new_df = dfw.clone();
let period_indices: Vec<usize> = get_period_indices(dfw, est_freq)?;
// new_df = dfw['real_date'].iloc[period_indices].copy()
let idx = UInt32Chunked::from_vec(
"idx".into(),
period_indices.iter().map(|&x| x as u32).collect(),
);
let real_date_col = dfw.column("real_date")?.take(&idx)?;
let mut new_df = DataFrame::new(vec![real_date_col])?;
for col_name in dfw.get_column_names() {
if col_name == "real_date" {
@ -150,11 +173,11 @@ fn freq_period_calc(
let result_series = match lback_method {
"ma" => {
let mut result = Vec::new();
let period_indices = get_period_indices(dfw, period)?;
for &i in &period_indices {
if i >= lback_periods - 1 {
let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = flat_std(&window.to_owned(), remove_zeros);
let std = std * annualization_factor();
result.push(std);
} else {
result.push(f64::NAN);
@ -166,11 +189,11 @@ fn freq_period_calc(
let half_life = half_life.unwrap();
let weights = expo_weights(lback_periods, half_life);
let mut result = Vec::new();
let period_indices = get_period_indices(dfw, period)?;
for &i in &period_indices {
if i >= lback_periods - 1 {
let window = values.slice(s![i + 1 - lback_periods..=i]);
let std = expo_std(&window.to_owned(), &weights, remove_zeros);
let std = std * annualization_factor();
result.push(std);
} else {
result.push(f64::NAN);
@ -180,63 +203,49 @@ fn freq_period_calc(
}
_ => return Err("Invalid lookback method.".into()),
};
println!("Successfully calculated result_series for column: {:?}", col_name);
println!(
"Successfully calculated result_series for column: {:?}",
col_name
);
new_df.with_column(result_series)?;
}
Ok(new_df)
}
pub fn get_bdates_from_col_hv(
dfw: &DataFrame,
est_freq: &str,
) -> Result<Series, Box<dyn std::error::Error>> {
let date_series = dfw.column("real_date")?.as_series().unwrap();
Ok(get_bdates_from_col(date_series, est_freq)?)
}
pub fn get_period_indices_hv(
dfw: &DataFrame,
est_freq: &str,
) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
get_period_indices(dfw, est_freq)
}
fn get_period_indices(
dfw: &DataFrame,
period: &str,
est_freq: &str,
) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
let date_series: &Logical<DateType, Int32Type> = dfw.column("real_date")?.date()?;
// let date_series: &Logical<DateType, Int32Type> = dfw.column("real_date")?.date()?;
let date_series = dfw.column("real_date")?.as_series().unwrap();
let mut indices = Vec::new();
match period {
"weekly" => {
for (i, date) in date_series.into_iter().enumerate() {
if let Some(date) = date {
if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) {
if naive_date.weekday() == chrono::Weekday::Fri {
indices.push(i);
}
}
}
}
let bdates: Series = get_bdates_from_col(date_series, est_freq)?;
for bdate in bdates.iter() {
if let Some(index) = date_series.iter().position(|date| date == bdate) {
indices.push(index);
}
"monthly" => {
let mut current_month = None;
for (i, date) in date_series.into_iter().enumerate() {
if let Some(date) = date {
if let Some(naive_date) = NaiveDate::from_num_days_from_ce_opt(date) {
if current_month.is_none() || current_month.unwrap() != naive_date.month() {
current_month = Some(naive_date.month());
if i > 0 {
indices.push(i - 1);
}
}
}
}
}
if let Some(_) = current_month {
indices.push(date_series.len() - 1);
}
}
_ => return Err("Invalid period specified.".into()),
}
Ok(indices)
}
// #[allow(dead_code)]
// fn single_calc(
// // end_date: -- naive datetime
// end_date: NaiveDate,
// wide_df: &DataFrame,
// lback_periods: usize,
// lback_method: &str,
// nan_tolerance: f64,
/// Calculate historic volatility.
/// Arguments:
@ -273,7 +282,18 @@ pub fn historic_vol(
) -> Result<DataFrame, Box<dyn std::error::Error>> {
println!("Calculating historic volatility with the following parameters:");
println!("xcat: {:?},\ncids: {:?},\nlback_periods: {:?},lback_method: {:?},\nhalf_life: {:?},\nstart: {:?},\nend: {:?},\nest_freq: {:?},\nremove_zeros: {:?},\npostfix: {:?},\nnan_tolerance: {:?}", xcat, cids, lback_periods,lback_method, half_life, start, end, est_freq, remove_zeros, postfix, nan_tolerance);
let mut dfw = pivot_dataframe_by_ticker(df.clone(), Some("value".to_string()))?;
let rdf = reduce_dataframe(
df.clone(),
cids,
Some(vec![xcat]),
None,
start.clone(),
end.clone(),
false,
)?;
let mut dfw = pivot_dataframe_by_ticker(rdf, Some("value".to_string()))?;
println!("Successfully pivoted the DataFrame.");
@ -332,16 +352,10 @@ pub fn historic_vol(
println!("Successfully filtered the DataFrame.");
let period = match est_freq.as_str() {
"W" => "weekly",
"M" => "monthly",
_ => return Err("Invalid frequency specified.".into()),
};
println!("Successfully got period.");
let dfw = match est_freq.as_str() {
"D" => freq_daily_calc(
let mut dfw = match est_freq.as_str() {
"X" => freq_daily_calc(
&dfw,
lback_periods,
&lback_method,
@ -356,9 +370,21 @@ pub fn historic_vol(
half_life,
remove_zeros,
nan_tolerance,
&period,
&est_freq,
)?,
};
// rename each column to include the postfix
for ic in 0..dfw.get_column_names().len() {
let col_name = dfw.get_column_names()[ic].to_string();
if col_name == "real_date" {
continue;
}
let new_col_name = format!("{}{}", col_name, postfix);
dfw.rename(&col_name, new_col_name.into())?;
}
dfw = pivot_wide_dataframe_to_qdf(dfw, Some("value".to_string()))?;
Ok(dfw)
}

0
src/qdf/classes.rs Normal file
View File

View File

@ -1,8 +1,10 @@
use chrono::NaiveDate;
use chrono::{Datelike, Weekday};
use polars::prelude::*;
use std::collections::HashMap;
use std::error::Error;
/// Split a ticker string into `cid` and `xcat`.
pub fn split_ticker(ticker: String) -> Result<(String, String), Box<dyn Error>> {
// split by the first underscore character. return the first and second parts.
let parts: Vec<&str> = ticker.splitn(2, '_').collect();
@ -12,6 +14,7 @@ pub fn split_ticker(ticker: String) -> Result<(String, String), Box<dyn Error>>
Ok((parts[0].to_string(), parts[1].to_string()))
}
/// Get the minimum and maximum dates from a date column in a DataFrame.
pub fn get_min_max_real_dates(
df: &DataFrame,
date_col: &str,
@ -40,22 +43,86 @@ pub fn get_min_max_real_dates(
)))
}
}
/// Get the business dates from a date column in a DataFrame.
/// Identify business days, bucket them by period, and pick the first available date from each period.
pub fn get_bdates_from_col(date_col: &Series, freq: &str) -> Result<Series, Box<dyn Error>> {
// Ensure the column is of Date type
if date_col.dtype() != &DataType::Date {
return Err("The column is not of Date type".into());
}
// Step 1: Identify business days (exclude weekends)
let date_as_days = date_col.cast(&DataType::Int32)?;
let business_days: Vec<NaiveDate> = date_as_days
.i32()?
.into_iter()
.filter_map(|opt_days| {
opt_days.map(|days| {
NaiveDate::from_ymd_opt(1970, 1, 1).unwrap() + chrono::Duration::days(days as i64)
})
})
.filter(|date| {
// Exclude weekends (Saturday and Sunday)
let weekday = date.weekday();
weekday != Weekday::Sat && weekday != Weekday::Sun
})
.collect();
// Step 2: Bucket dates by period
let mut buckets: HashMap<String, Vec<NaiveDate>> = HashMap::new();
for date in &business_days {
let bucket_key = match freq {
"D" => date.format("%Y-%m-%d").to_string(),
"W" => format!("{}-W{:02}", date.year(), date.iso_week().week()),
"M" => date.format("%Y-%m").to_string(),
"Q" => format!("{}-Q{}", date.year(), (date.month() - 1) / 3 + 1),
"A" => date.year().to_string(),
_ => return Err("Invalid frequency specified".into()),
};
buckets.entry(bucket_key).or_default().push(*date);
}
// Step 3: Pick the first available date from each bucket
let mut selected_dates: Vec<NaiveDate> = Vec::new();
for (_, mut dates) in buckets {
dates.sort(); // Ensure dates are sorted within the bucket
if let Some(first_date) = dates.first() {
selected_dates.push(*first_date);
}
}
// Step 4: Convert selected dates back to a Series of Date type
let bdates_series = Series::new(
"bdates".into(),
selected_dates
.into_iter()
.map(|date| date.format("%Y-%m-%d").to_string()) // Format as strings
.collect::<Vec<String>>(),
)
.cast(&DataType::Date)?; // Cast to Date type
Ok(bdates_series)
}
/// Get the `cid` from a ticker string.
#[allow(dead_code)]
pub fn get_cid(ticker: String) -> Result<String, Box<dyn Error>> {
split_ticker(ticker).map(|(cid, _)| cid)
}
/// Get the `xcat` from a ticker string.
#[allow(dead_code)]
pub fn get_xcat(ticker: String) -> Result<String, Box<dyn Error>> {
split_ticker(ticker).map(|(_, xcat)| xcat)
}
/// Get the `cid` and `xcat` from a ticker string.
pub fn create_ticker(cid: &str, xcat: &str) -> String {
format!("{}_{}", cid, xcat)
}
pub fn create_interesecting_tickers(cids: &[&str], xcats: &[&str]) -> Vec<String> {
/// Create all possible tickers from a list of `cids` and `xcats`.
pub fn create_intersecting_tickers(cids: &[&str], xcats: &[&str]) -> Vec<String> {
let mut tickers = Vec::new();
for cid in cids {
for xcat in xcats {

View File

@ -1,5 +1,5 @@
use crate::utils::misc::{
_get_unique_strs_from_str_column_object, create_interesecting_tickers,
_get_unique_strs_from_str_column_object, create_intersecting_tickers,
get_intersecting_cids_str_func, get_unique_from_str_column,
};
use polars::datatypes::DataType;
@ -108,7 +108,7 @@ fn get_tickers_interesecting_on_xcat(
.unwrap_or_else(|| get_unique_xcats(df).unwrap());
let rel_cids_str: Vec<&str> = rel_cids.iter().map(AsRef::as_ref).collect();
let rel_xcats_str: Vec<&str> = rel_xcats.iter().map(AsRef::as_ref).collect();
Ok(create_interesecting_tickers(&rel_cids_str, &rel_xcats_str))
Ok(create_intersecting_tickers(&rel_cids_str, &rel_xcats_str))
}
/// Get the unique tickers from a Quantamental DataFrame.

View File

@ -66,6 +66,41 @@ pub fn pivot_dataframe_by_ticker(
Ok(new_df)
}
/// Pivot ticker dataframe to qdf.
#[allow(dead_code)]
pub fn pivot_wide_dataframe_to_qdf(
df: DataFrame,
metric: Option<String>,
) -> Result<DataFrame, Box<dyn Error>> {
// df list of dataframes
let mut df_vec: Vec<DataFrame> = Vec::new();
// if no metric is provided, set it to 'value'
let metric = metric.unwrap_or("value".into());
for tkr in df.get_column_names() {
if tkr == "real_date" {
continue;
}
let mut new_df = DataFrame::new(vec![df.column("real_date")?.clone()])?;
// copy the column to the new dataframe
new_df.with_column(df.column(tkr)?.clone())?;
new_df.rename(tkr, metric.clone().into())?;
// add the cid and xcat columns
let (cid, xcat) = split_ticker(tkr.to_string())?;
new_df.with_column(Series::new("cid".into(), vec![cid]))?;
new_df.with_column(Series::new("xcat".into(), vec![xcat]))?;
df_vec.push(new_df);
}
// create a new dataframe, concatting row-wise
let mut new_df = DataFrame::new(vec![])?;
for df in df_vec.iter() {
new_df.vstack_mut(df)?;
}
// return the new dataframe
check_quantamental_dataframe(&new_df)?;
Ok(new_df)
}
/// Splits a dataframe by ticker.
#[allow(dead_code)]
fn split_df_by_tickers(

View File

@ -51,7 +51,7 @@ pub fn reduce_dataframe(
let specified_metrics: Vec<String> =
metrics.unwrap_or_else(|| non_idx_cols.iter().map(|s| s.to_string()).collect());
let specified_tickers: Vec<String> = create_interesecting_tickers(
let specified_tickers: Vec<String> = create_intersecting_tickers(
&specified_cids
.iter()
.map(AsRef::as_ref)
@ -76,7 +76,7 @@ pub fn reduce_dataframe(
.collect::<Vec<String>>(),
&u_tickers,
);
create_interesecting_tickers(
create_intersecting_tickers(
&int_cids.iter().map(AsRef::as_ref).collect::<Vec<&str>>(),
&specified_xcats
.iter()