python working with notebook!

This commit is contained in:
Palash Tyagi
2024-11-17 23:58:47 +00:00
parent 09f74916e8
commit bba5acd724
13 changed files with 586 additions and 79 deletions

View File

@@ -23,10 +23,10 @@ fn _file_base_name(file_path: String) -> String {
/// The CSV must be named in the format `cid_xcat.csv` (`ticker.csv`).
/// The DataFrame must have a `real_date` column along with additional value columns.
pub fn load_quantamental_dataframe(
file_path: &str,
file_path: String,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
// get the file base name
let base_file_name = _file_base_name(file_path.into());
let base_file_name = _file_base_name(file_path.clone().into());
// if filename does not have _ then it is not a Quantamental DataFrame
if !base_file_name.contains('_') {
@@ -37,7 +37,7 @@ pub fn load_quantamental_dataframe(
let (cid, xcat) = split_ticker(ticker.to_string())?;
let mut df = CsvReadOptions::default()
.try_into_reader_with_file_path(Some(file_path.into()))
.try_into_reader_with_file_path(Some(file_path.to_string().into()))
.unwrap()
.finish()
.unwrap();
@@ -99,7 +99,7 @@ fn collect_paths_recursively<P: AsRef<std::path::Path>>(path: P) -> std::io::Res
}
fn _load_qdf_thread_safe(file_path: &str) -> Result<DataFrame, Box<dyn Error + Send + Sync>> {
let res = load_quantamental_dataframe(file_path);
let res = load_quantamental_dataframe(file_path.to_string());
res.map_err(|e| {
anyhow::Error::msg(e.to_string())
.context("Failed to load quantamental dataframe")
@@ -107,10 +107,10 @@ fn _load_qdf_thread_safe(file_path: &str) -> Result<DataFrame, Box<dyn Error + S
})
}
pub fn load_qdf_from_download_bank(
folder_path: &str,
cids: Option<Vec<&str>>,
xcats: Option<Vec<&str>>,
tickers: Option<Vec<&str>>,
folder_path: String,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
tickers: Option<Vec<String>>,
) -> Result<DataFrame, Box<dyn std::error::Error>> {
let rcids = cids.unwrap_or_else(|| Vec::new());
let rxcats = xcats.unwrap_or_else(|| Vec::new());
@@ -145,9 +145,9 @@ pub fn load_qdf_from_download_bank(
let load_files = rel_files
.iter()
.filter(|(_, cid, xcat)| {
let f1 = rcids.len() > 0 && rcids.contains(&cid.as_str());
let f2 = rxcats.len() > 0 && rxcats.contains(&xcat.as_str());
let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat).as_str());
let f1 = rcids.len() > 0 && rcids.contains(&cid);
let f2 = rxcats.len() > 0 && rxcats.contains(&xcat);
let f3 = rtickers.len() > 0 && rtickers.contains(&create_ticker(cid, xcat));
f1 | f2 | f3
})
.map(|(file, _, _)| *file)
@@ -160,7 +160,7 @@ pub fn load_qdf_from_download_bank(
return Err("No files to load".into());
}
if load_files.len() == 1 {
let dfx = load_quantamental_dataframe(load_files[0]).unwrap();
let dfx = load_quantamental_dataframe(load_files[0].to_string()).unwrap();
return Ok(dfx);
}

View File

@@ -17,11 +17,11 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
/// If no filters are provided, the original DataFrame is returned.
pub fn reduce_dataframe(
df: DataFrame,
cids: Option<Vec<&str>>,
xcats: Option<Vec<&str>>,
cids: Option<Vec<String>>,
xcats: Option<Vec<String>>,
metrics: Option<Vec<String>>,
start: Option<&str>,
end: Option<&str>,
start: Option<String>,
end: Option<String>,
intersect: bool,
) -> Result<DataFrame, Box<dyn Error>> {
check_quantamental_dataframe(&df)?;
@@ -36,10 +36,10 @@ pub fn reduce_dataframe(
let u_xcats: Vec<String> = get_unique_xcats(&new_df)?;
let u_tickers: Vec<String> = _get_unique_strs_from_str_column_object(&ticker_col)?;
let specified_cids: Vec<&str> =
cids.unwrap_or_else(|| u_cids.iter().map(AsRef::as_ref).collect());
let specified_xcats: Vec<&str> =
xcats.unwrap_or_else(|| u_xcats.iter().map(AsRef::as_ref).collect());
let cids_vec = cids.unwrap_or_else(|| u_cids.clone());
let specified_cids: Vec<&str> = cids_vec.iter().map(AsRef::as_ref).collect();
let xcats_vec = xcats.unwrap_or_else(|| u_xcats.clone());
let specified_xcats: Vec<&str> = xcats_vec.iter().map(AsRef::as_ref).collect();
let non_idx_cols: Vec<String> = new_df
.get_column_names()
@@ -107,7 +107,7 @@ pub fn reduce_dataframe(
// Apply date filtering if `start` or `end` is provided
if let Some(start) = start {
let start_date = chrono::NaiveDate::parse_from_str(start, "%Y-%m-%d")?;
let start_date = chrono::NaiveDate::parse_from_str(&start, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(
@@ -120,7 +120,7 @@ pub fn reduce_dataframe(
}
if let Some(end) = end {
let end_date = chrono::NaiveDate::parse_from_str(end, "%Y-%m-%d")?;
let end_date = chrono::NaiveDate::parse_from_str(&end, "%Y-%m-%d")?;
new_df = new_df
.lazy()
.filter(

View File

@@ -11,7 +11,7 @@ const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
pub fn update_dataframe(
df: &DataFrame,
df_add: &DataFrame,
// xcat_replace: Option<&str>,
xcat_replace: bool,
) -> Result<DataFrame, Box<dyn Error>> {
check_quantamental_dataframe(df)?;
check_quantamental_dataframe(df_add)?;
@@ -20,7 +20,10 @@ pub fn update_dataframe(
} else if df_add.is_empty() {
return Ok(df.clone());
};
println!(
"xcat_replace not implemented yet (passed value: {})",
xcat_replace
);
// vstack and drop duplicates keeping last
let mut new_df = df.vstack(df_add)?;
// help?