msyrs/src/utils/dftools.rs
2024-11-14 17:08:57 +00:00

64 lines
1.7 KiB
Rust

use polars::prelude::*;
/// The standard metrics provided by JPMaQS (`value`, `grading`, `eop_lag`, `mop_lag`).
pub const DEFAULT_JPMAQS_METRICS: [&str; 4] = ["value", "grading", "eop_lag", "mop_lag"];
/// The required columns for a Quantamental DataFrame.
pub const QDF_INDEX_COLUMNS: [&str; 3] = ["real_date", "cid", "xcat"];
/// Check if a DataFrame is a quantamental DataFrame.
/// A standard Quantamental DataFrame has the following columns:
/// - `real_date`: Date column as a date type
/// - `cid`: Column of cross-sectional identifiers
/// - `xcat`: Column of extended categories
///
/// Additionally, the DataFrame should have atleast 1 more column.
/// Typically, this is one (or more) of the default JPMaQS metics.
pub fn is_quantamental_dataframe(df: &DataFrame) -> bool {
let columns = df
.get_column_names()
.iter()
.map(|s| s.as_str())
.collect::<Vec<&str>>();
let has_idx_columns = QDF_INDEX_COLUMNS.iter().all(|col| columns.contains(col));
if !has_idx_columns {
return false;
}
let real_date_col = df.select(["real_date"]);
match real_date_col {
Ok(_) => {}
Err(_) => return false,
};
let is_date_dtype = real_date_col
.unwrap()
.dtypes()
.iter()
.all(|dtype| dtype == &DataType::Date);
if !is_date_dtype {
return false;
}
let cid_col = df.select(["cid"]);
match cid_col {
Ok(_) => {}
Err(_) => return false,
};
let xcat_col = df.select(["xcat"]);
match xcat_col {
Ok(_) => {}
Err(_) => return false,
};
// has atleast 1 more column
let has_other_columns = columns.len() > 3;
if !has_other_columns {
return false;
}
return true;
}