python working with notebook!

This commit is contained in:
Palash Tyagi
2024-11-17 23:58:47 +00:00
parent 09f74916e8
commit bba5acd724
13 changed files with 586 additions and 79 deletions

View File

@@ -1,30 +1,14 @@
#![doc = include_str!("../README.md")]
/// Documentation for the `msyrs` Python API.
pub mod py;
/// Documentation for the Rust API.
/// Documentation for the `download` module.
pub mod download;
pub mod utils;
use pyo3::{prelude::*, wrap_pymodule};
use pyo3_polars::PyDataFrame;
#[pyfunction]
pub fn load_qdf(file_path: &str) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
utils::qdf::load_quantamental_dataframe(file_path).unwrap(),
))
}
// ignore deprecated warning
#[allow(deprecated)]
#[pymodule]
pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(load_qdf, m)?)?;
Ok(())
}
#[allow(deprecated)]
#[pymodule]
pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> {
// add qdf as a submodule
m.add_wrapped(wrap_pymodule!(qdf))?;
Ok(())
}
pub use py::msyrs;

12
src/py/mod.rs Normal file
View File

@@ -0,0 +1,12 @@
/// Python API for [`crate::utils::qdf`].
pub mod qdf;
use pyo3::{prelude::*, wrap_pymodule};
// use pyo3_polars::PyDataFrame;
#[allow(deprecated)]
#[pymodule]
pub fn msyrs(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pymodule!(qdf::qdf))?;
Ok(())
}

78
src/py/qdf.rs Normal file
View File

@@ -0,0 +1,78 @@
use pyo3::prelude::*;
use pyo3_polars::PyDataFrame;
/// Python wrapper for [`crate::utils::qdf`] module.
#[allow(deprecated)]
#[pymodule]
pub fn qdf(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(load_qdf, m)?)?;
m.add_function(wrap_pyfunction!(load_qdf_from_download_bank, m)?)?;
m.add_function(wrap_pyfunction!(reduce_dataframe, m)?)?;
m.add_function(wrap_pyfunction!(update_dataframe, m)?)?;
Ok(())
}
/// Python wrapper for loading a Quantamental DataFrame from a CSV file.
/// See [`crate::utils::qdf::load_quantamental_dataframe`] for full documentation.
#[pyfunction]
pub fn load_qdf(file_path: String) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
crate::utils::qdf::load_quantamental_dataframe(file_path).unwrap(),
))
}
/// Python wrapper for loading a Quantamental DataFrame from a download bank.
/// See [`crate::utils::qdf::load::load_qdf_from_download_bank`] for full documentation.
#[pyfunction]
pub fn load_qdf_from_download_bank(
folder_path: String,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
tickers: Option<Vec<String>>,
) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
crate::utils::qdf::load::load_qdf_from_download_bank(folder_path, cids, xcats, tickers)
.unwrap(),
))
}
/// Python wrapper for reduce_dataframe
/// See [`crate::utils::qdf::reduce_df::reduce_dataframe`] for full documentation.
#[pyfunction]
pub fn reduce_dataframe(
df: PyDataFrame,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
metrics: Option<Vec<String>>,
start: Option<String>,
end: Option<String>,
intersect: Option<bool>,
) -> PyResult<PyDataFrame> {
Ok(PyDataFrame(
crate::utils::qdf::reduce_df::reduce_dataframe(
df.into(),
cids,
xcats,
metrics,
start,
end,
intersect.unwrap_or(false),
)
.unwrap(),
))
}
/// Python wrapper for update_dataframe
/// See [`crate::utils::qdf::update_df::update_dataframe`] for full documentation.
#[pyfunction]
pub fn update_dataframe(
df: PyDataFrame,
df_add: PyDataFrame,
xcat_replace: Option<bool>,
) -> PyResult<PyDataFrame> {
let xcat_replace = xcat_replace.unwrap_or(false);
Ok(PyDataFrame(
crate::utils::qdf::update_df::update_dataframe(&df.into(), &df_add.into(), xcat_replace)
.unwrap(),
))
}

View File

@@ -23,10 +23,10 @@ fn _file_base_name(file_path: String) -> String {
/// The CSV must be named in the format `cid_xcat.csv` (`ticker.csv`).
/// The DataFrame must have a `real_date` column along with additional value columns.
pub fn load_quantamental_dataframe(
file_path: &str,
file_path: String,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
// get the file base name
let base_file_name = _file_base_name(file_path.into());
let base_file_name = _file_base_name(file_path.clone().into());
// if filename does not have _ then it is not a Quantamental DataFrame
if !base_file_name.contains('_') {
@@ -37,7 +37,7 @@ pub fn load_quantamental_dataframe(
let (cid, xcat) = split_ticker(ticker.to_string())?;
let mut df = CsvReadOptions::default()
.try_into_reader_with_file_path(Some(file_path.into()))
.try_into_reader_with_file_path(Some(file_path.to_string().into()))
.unwrap()
.finish()
.unwrap();
@@ -99,7 +99,7 @@ fn collect_paths_recursively<P: AsRef<std::path::Path>>(path: P) -> std::io::Res
}
fn _load_qdf_thread_safe(file_path: &str) -> Result<DataFrame, Box<dyn Error + Send + Sync>> {
let res = load_quantamental_dataframe(file_path);
let res = load_quantamental_dataframe(file_path.to_string());
res.map_err(|e| {
anyhow::Error::msg(e.to_string())
.context("Failed to load quantamental dataframe")
@@ -107,10 +107,10 @@ fn _load_qdf_thread_safe(file_path: &str) -> Result<DataFrame, Box<dyn Error + S
})
}
pub fn load_qdf_from_download_bank(
folder_path: &str,
cids: Option<Vec<&str>>,
xcats: Option<Vec<&str>>,
tickers: Option<Vec<&str>>,
folder_path: String,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
tickers: Option<Vec<String>>,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
let rcids = cids.unwrap_or_else(|| Vec::new());
let rxcats = xcats.unwrap_or_else(|| Vec::new());
@@ -145,9 +145,9 @@ pub fn load_qdf_from_download_bank(
let load_files = rel_files
.iter()
.filter(|(_, cid, xcat)| {
let f1 = rcids.len() > 0 && rcids.contains(&cid.as_str());
let f2 = rxcats.len() > 0 && rxcats.contains(&xcat.as_str());
let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat).as_str());
let f1 = rcids.len() > 0 && rcids.contains(&cid);
let f2 = rxcats.len() > 0 && rxcats.contains(&xcat);
let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat));
f1 | f2 | f3
})
.map(|(file, _, _)| *file)
@@ -160,7 +160,7 @@ pub fn load_qdf_from_download_bank(
return Err("No files to load".into());
}
if load_files.len() == 1 {
let dfx = load_quantamental_dataframe(load_files[0]).unwrap();
let dfx = load_quantamental_dataframe(load_files[0].to_string()).unwrap();
return Ok(dfx);
}

View File

@@ -17,11 +17,11 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
/// If no filters are provided, the original DataFrame is returned.
pub fn reduce_dataframe(
df: DataFrame,
cids: Option<Vec<&str>>,
xcats: Option<Vec<&str>>,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
metrics: Option<Vec<String>>,
start: Option<&str>,
end: Option<&str>,
start: Option<String>,
end: Option<String>,
intersect: bool,
) -> Result<DataFrame, Box<dyn Error>> {
check_quantamental_dataframe(&df)?;
@@ -36,10 +36,10 @@ pub fn reduce_dataframe(
let u_xcats: Vec<String> = get_unique_xcats(&new_df)?;
let u_tickers: Vec<String> = _get_unique_strs_from_str_column_object(&ticker_col)?;
let specified_cids: Vec<&str> =
cids.unwrap_or_else(|| u_cids.iter().map(AsRef::as_ref).collect());
let specified_xcats: Vec<&str> =
xcats.unwrap_or_else(|| u_xcats.iter().map(AsRef::as_ref).collect());
let cids_vec = cids.unwrap_or_else(|| u_cids.clone());
let specified_cids: Vec<&str> = cids_vec.iter().map(AsRef::as_ref).collect();
let xcats_vec = xcats.unwrap_or_else(|| u_xcats.clone());
let specified_xcats: Vec<&str> = xcats_vec.iter().map(AsRef::as_ref).collect();
let non_idx_cols: Vec<String> = new_df
.get_column_names()
@@ -107,7 +107,7 @@ pub fn reduce_dataframe(
// Apply date filtering if `start` or `end` is provided
if let Some(start) = start {
let start_date = chrono::NaiveDate::parse_from_str(start, "%Y-%m-%d")?;
let start_date = chrono::NaiveDate::parse_from_str(&start, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(
@@ -120,7 +120,7 @@ pub fn reduce_dataframe(
}
if let Some(end) = end {
let end_date = chrono::NaiveDate::parse_from_str(end, "%Y-%m-%d")?;
let end_date = chrono::NaiveDate::parse_from_str(&end, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(

View File

@@ -11,7 +11,7 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
pub fn update_dataframe(
df: &DataFrame,
df_add: &DataFrame,
// xcat_replace: Option<&str>,
xcat_replace: bool,
) -> Result<DataFrame, Box<dyn Error>> {
check_quantamental_dataframe(df)?;
check_quantamental_dataframe(df_add)?;
@@ -20,7 +20,10 @@ pub fn update_dataframe(
} else if df_add.is_empty() {
return Ok(df.clone());
};
println!(
"xcat_replace not implemented yet (passed value: {})",
xcat_replace
);
// vstack and drop duplicates keeping last
let mut new_df = df.vstack(df_add)?;
// help?