This commit is contained in:
Palash Tyagi
2024-11-08 16:50:42 +00:00
parent 2b64eb7d25
commit 001c8488b0
8 changed files with 145 additions and 109 deletions

View File

@@ -0,0 +1,94 @@
use crate::download::oauth_client::OAuthClient;
use crate::download::requester::DQRequester;
use crate::download::requester::DQTimeseriesRequestArgs;
use crate::download::timeseries::DQTimeSeriesResponse;
use crate::download::timeseries::JPMaQSIndicator;
use std::error::Error;
const DEFAULT_JPMAQS_METRICS: [&str; 4] = ["value", "grading", "eop_lag", "mop_lag"];
fn ticker_to_expressions(ticker: &str) -> Vec<String> {
DEFAULT_JPMAQS_METRICS
.iter()
.map(|metric| format!("DB(JPMAQS,{},{})", ticker, metric))
.collect::<Vec<String>>()
}
fn construct_expressions(tickers: Vec<String>) -> Vec<String> {
tickers
.iter()
.flat_map(|ticker| ticker_to_expressions(ticker))
.collect()
}
fn is_jpmaq_expression(expression: &str) -> bool {
expression.starts_with("DB(JPMAQS,")
&& expression.ends_with(")")
&& expression.split(',').count() == 3
&& expression.split(',').nth(0).unwrap() == "DB(JPMAQS"
&& expression.split(',').nth(2).unwrap().ends_with(")")
}
fn all_jpmaq_expressions(expressions: Vec<String>) -> bool {
expressions
.iter()
.all(|expression| is_jpmaq_expression(expression))
}
#[derive(Debug, Clone)]
pub struct JPMaQSDownload {
requester: DQRequester,
}
impl Default for JPMaQSDownload {
fn default() -> Self {
let requester = DQRequester::default();
JPMaQSDownload { requester }
}
}
impl JPMaQSDownload {
pub fn new(client_id: String, client_secret: String) -> Self {
let oauth_client = OAuthClient::new(client_id.clone(), client_secret.clone());
let requester = DQRequester::new(oauth_client);
JPMaQSDownload { requester }
}
pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> {
self.requester.check_connection()
}
pub fn get_catalogue(&mut self) -> Result<Vec<String>, Box<dyn Error>> {
let dq_catalogue = self.requester.get_catalogue("JPMAQS", 1000)?;
Ok(dq_catalogue.all_instruments)
}
pub fn get_expressions(
&mut self,
expressions: Vec<String>,
) -> Result<Vec<DQTimeSeriesResponse>, Box<dyn Error>> {
let dqts_vec = self.requester.get_timeseries(DQTimeseriesRequestArgs {
expressions: expressions,
..Default::default()
})?;
Ok(dqts_vec)
}
pub fn get_indicators(
&mut self,
tickers: Vec<String>,
) -> Result<Vec<JPMaQSIndicator>, Box<dyn Error>> {
let expressions = construct_expressions(tickers);
assert!(all_jpmaq_expressions(expressions.clone()));
let dqts_vec = self.get_expressions(expressions)?;
let indicators = dqts_vec
.iter()
.flat_map(|dqts| dqts.get_timeseries_by_ticker())
.map(|tsv| JPMaQSIndicator::new(tsv))
.collect::<Result<Vec<JPMaQSIndicator>, Box<dyn Error>>>()?;
Ok(indicators)
}
}

4
src/download/mod.rs Normal file
View File

@@ -0,0 +1,4 @@
pub mod jpmaqsdownload;
pub mod oauth_client;
pub mod requester;
pub mod timeseries;

View File

@@ -0,0 +1,101 @@
use reqwest::blocking::Client;
use reqwest::Error as ReqwestError;
use serde_json::Value;
use std::collections::HashMap;
use std::env;
use std::error::Error;
use std::time::{Duration, SystemTime};
pub const OAUTH_TOKEN_URL: &str = "https://authe.jpmchase.com/as/token.oauth2";
pub const OAUTH_RESOURCE_ID: &str = "JPMC:URI:RS-06785-DataQueryExternalApi-PROD";
#[derive(Debug, Clone)]
pub struct OAuthClient {
client_id: String,
client_secret: String,
token_url: String,
resource_id: String,
access_token: Option<String>,
expires_at: Option<SystemTime>, // Stores the token's expiration time
}
impl OAuthClient {
pub fn new(client_id: String, client_secret: String) -> Self {
OAuthClient {
client_id,
client_secret,
token_url: OAUTH_TOKEN_URL.to_string(),
resource_id: OAUTH_RESOURCE_ID.to_string(),
access_token: None,
expires_at: None, // Initially None until a token is fetched
}
}
pub fn fetch_token(&mut self) -> Result<(), ReqwestError> {
let client = Client::new();
// Set up the form parameters for the request
let mut params = HashMap::new();
params.insert("grant_type", "client_credentials");
params.insert("client_id", &self.client_id);
params.insert("client_secret", &self.client_secret);
params.insert("aud", &self.resource_id);
// Make the POST request to retrieve the token
let response = client.post(&self.token_url).form(&params).send()?;
if response.status().is_success() {
let json: Value = response.json()?;
if let Some(token) = json["access_token"].as_str() {
self.access_token = Some(token.to_string());
println!("Access token retrieved; token length: {}", token.len());
if let Some(expires_in) = json["expires_in"].as_u64() {
self.expires_at = Some(SystemTime::now() + Duration::from_secs(expires_in));
} else {
self.expires_at = Some(SystemTime::now() + Duration::from_secs(3600));
}
} else {
eprintln!("Failed to retrieve access token from response.");
}
} else {
eprintln!("Token request failed with status: {}", response.status());
}
Ok(())
}
fn is_token_expired(&self) -> bool {
match self.expires_at {
Some(expiration) => SystemTime::now() >= expiration,
None => true, // If there's no expiration set, assume it's expired
}
}
pub fn get_headers(&mut self) -> Result<HashMap<String, String>, Box<dyn Error>> {
if self.is_token_expired() {
// println!("Token has expired. Fetching a new token...");
self.fetch_token()?;
}
let mut headers = HashMap::new();
if let Some(token) = &self.access_token {
headers.insert("Authorization".to_string(), format!("Bearer {}", token));
} else {
return Err("No access token available.".into());
}
Ok(headers)
}
}
impl Default for OAuthClient {
fn default() -> Self {
OAuthClient {
client_id: env::var("DQ_CLIENT_ID".to_string()).unwrap().to_string(),
client_secret: env::var("DQ_CLIENT_SECRET".to_string())
.unwrap()
.to_string(),
token_url: OAUTH_TOKEN_URL.to_string(),
resource_id: OAUTH_RESOURCE_ID.to_string(),
access_token: None,
expires_at: None,
}
}
}

351
src/download/requester.rs Normal file
View File

@@ -0,0 +1,351 @@
use crate::download::oauth_client::OAuthClient;
use crate::download::timeseries::DQCatalogueResponse;
use crate::download::timeseries::DQCatalogueSingleResponse;
use crate::download::timeseries::DQTimeSeriesResponse;
use reqwest;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
use std::error::Error;
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Duration;
const API_BASE_URL: &str = "https://api-developer.jpmorgan.com/research/dataquery-authe/api/v2";
const HEARTBEAT_ENDPOINT: &str = "/services/heartbeat";
const TIMESERIES_ENDPOINT: &str = "/expressions/time-series";
const CATALOGUE_ENDPOINT: &str = "/group/instruments";
// const JPMAQS_CATALOGUE_GROUP: &str = "JPMAQS";
#[derive(Debug, Clone)]
pub struct DQRequester {
oauth_client: OAuthClient,
rqclient: Client,
}
#[derive(Debug, Clone)]
pub struct DQTimeseriesRequestArgs {
pub start_date: String,
pub end_date: String,
pub calendar: String,
pub frequency: String,
pub conversion: String,
pub nan_treatment: String,
pub expressions: Vec<String>,
}
impl DQTimeseriesRequestArgs {
pub fn new(
start_date: &str,
end_date: &str,
calendar: &str,
frequency: &str,
conversion: &str,
nan_treatment: &str,
expressions: Vec<String>,
) -> Self {
DQTimeseriesRequestArgs {
start_date: start_date.to_string(),
end_date: end_date.to_string(),
calendar: calendar.to_string(),
frequency: frequency.to_string(),
conversion: conversion.to_string(),
nan_treatment: nan_treatment.to_string(),
expressions,
}
}
pub fn update_expressions(&mut self, expressions: Vec<String>) {
self.expressions = expressions;
}
pub fn as_query_string(&self) -> String {
let mut params = vec![
("format", "JSON"),
("start-date", &self.start_date),
("end-date", &self.end_date),
("calendar", &self.calendar),
("frequency", &self.frequency),
("conversion", &self.conversion),
("nan_treatment", &self.nan_treatment),
("data", "NO_REFERENCE_DATA"),
];
for expression in &self.expressions {
params.push(("expressions", expression));
}
serde_urlencoded::to_string(&params).unwrap()
}
}
impl Default for DQTimeseriesRequestArgs {
fn default() -> Self {
DQTimeseriesRequestArgs {
start_date: "1990-01-01".to_string(),
end_date: "TODAY+2D".to_string(),
calendar: "CAL_ALLDAYS".to_string(),
frequency: "FREQ_DAY".to_string(),
conversion: "CONV_LASTBUS_ABS".to_string(),
nan_treatment: "NA_NOTHING".to_string(),
expressions: Vec::new(),
}
}
}
impl Default for DQRequester {
fn default() -> Self {
DQRequester {
oauth_client: OAuthClient::default(),
rqclient: Client::new(),
}
}
}
impl DQRequester {
pub fn new(oauth_client: OAuthClient) -> Self {
DQRequester {
oauth_client,
rqclient: Client::new(),
}
}
fn _request(
&mut self,
method: reqwest::Method,
endpoint: &str,
) -> Result<reqwest::blocking::Response, Box<dyn Error>> {
let headers_map = self.oauth_client.get_headers()?;
let mut headers = HeaderMap::new();
for (key, value) in headers_map {
let header_name = HeaderName::from_bytes(key.as_bytes())?;
let header_value = HeaderValue::from_str(&value)?;
headers.insert(header_name, header_value);
}
// Construct the full URL
let url = format!("{}{}", API_BASE_URL, endpoint);
let response = self
.rqclient
.request(method, &url)
.headers(headers)
.send()?;
if response.status().is_success() {
Ok(response)
} else {
Err(Box::new(response.error_for_status().unwrap_err()))
}
}
pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> {
let response = self._request(reqwest::Method::GET, HEARTBEAT_ENDPOINT)?;
println!("Connection is successful: {}", response.status());
Ok(())
}
pub fn get_catalogue(
&mut self,
catalogue_group: &str,
page_size: u32,
// ) -> Result<Vec<DQCatalogueSingleResponse>, Box<dyn Error>> {
) -> Result<DQCatalogueResponse, Box<dyn Error>> {
let mut responses: Vec<DQCatalogueSingleResponse> = Vec::new();
if page_size < 1 || page_size > 1000 {
return Err("Page size must be between 1 and 1000".into());
}
let mut next_page = Some(format!(
"{}?group-id={}&limit={}",
CATALOGUE_ENDPOINT, catalogue_group, page_size
));
while let Some(endpoint) = next_page {
std::thread::sleep(std::time::Duration::from_millis(200));
let response = self._request(reqwest::Method::GET, &endpoint)?;
if !response.status().is_success() {
return Err(Box::new(response.error_for_status().unwrap_err()));
}
let json: serde_json::Value = response.json()?;
let dq_catalogue_response: DQCatalogueSingleResponse =
serde_json::from_str(&json.to_string())?;
responses.push(dq_catalogue_response);
let links = json.get("links");
let links_array = links.and_then(|links| links.as_array());
let next_link = links_array.and_then(|links| {
links
.iter()
.find_map(|link| link.get("next")?.as_str().map(|s| s.to_string()))
});
next_page = next_link;
}
Ok(DQCatalogueResponse::new(responses))
}
pub fn _fetch_single_timeseries_batch(
&mut self,
args: DQTimeseriesRequestArgs,
) -> Result<reqwest::blocking::Response, Box<dyn Error>> {
log::info!(
"Fetching timeseries batch with {} expressions",
args.expressions.len()
);
if args.expressions.len() < 1 || args.expressions.len() > 20 {
return Err("Number of expressions must be between 1 and 20".into());
}
let query_string = args.as_query_string();
let endpoint = format!("{}?{}", TIMESERIES_ENDPOINT, query_string);
let response = self._request(reqwest::Method::GET, &endpoint)?;
log::info!("Got response: {:?}", response.status());
Ok(response)
}
fn _fetch_timeseries_recursive(
&mut self,
args: DQTimeseriesRequestArgs,
max_retries: u32,
) -> Result<Vec<DQTimeSeriesResponse>, Box<dyn Error>> {
let expression_batches: Vec<Vec<String>> = args
.expressions
.chunks(20)
.map(|chunk| chunk.to_vec())
.collect();
let okay_responses = Arc::new(Mutex::new(Vec::new()));
let failed_batches = Arc::new(Mutex::new(Vec::new()));
let client = Arc::new(Mutex::new(self.clone()));
let mut handles = vec![];
let total_batches = expression_batches.len();
let mut curr_batch = 0;
for expr_batch in expression_batches {
curr_batch += 1;
let mut args = args.clone();
args.update_expressions(expr_batch.clone());
let okay_responses = Arc::clone(&okay_responses);
let failed_batches = Arc::clone(&failed_batches);
let client = Arc::clone(&client);
// if curr_batch mod 100 == 0 print progress
log::info!("Processed {} out of {} batches", curr_batch, total_batches);
thread::sleep(Duration::from_millis(200));
let handle = thread::spawn(move || {
let response = client.lock().unwrap()._fetch_single_timeseries_batch(args);
match response {
Ok(r) => {
// Attempt to parse the response text
match serde_json::from_str::<DQTimeSeriesResponse>(&r.text().unwrap()) {
Ok(dq_response) => {
okay_responses.lock().unwrap().push(dq_response);
log::info!("Got batch: {:?}", expr_batch);
}
Err(e) => {
// If parsing fails, treat this as a batch failure
failed_batches.lock().unwrap().push(expr_batch.clone());
log::error!(
"Failed to parse timeseries: {:?} : {:?}",
expr_batch,
e
);
}
}
}
Err(e) => {
// Handle _fetch_single_timeseries_batch error
failed_batches.lock().unwrap().push(expr_batch.clone());
log::error!("Failed to get batch: {:?} : {:?}", expr_batch, e);
}
}
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
let mut okay_responses = Arc::try_unwrap(okay_responses)
.unwrap()
.into_inner()
.unwrap();
let failed_batches = Arc::try_unwrap(failed_batches)
.unwrap()
.into_inner()
.unwrap();
if !failed_batches.is_empty() && max_retries == 0 {
return Err("Max retries reached".into());
}
if !failed_batches.is_empty() && max_retries > 0 {
log::info!("Retrying failed batches");
let mut new_args = args.clone();
new_args.update_expressions(failed_batches.concat());
log::info!("Retrying with {} failed expressions", failed_batches.len());
let mut retry_responses =
self._fetch_timeseries_recursive(new_args, max_retries - 1)?;
okay_responses.append(&mut retry_responses);
}
log::info!("Returning {} responses", okay_responses.len());
Ok(okay_responses)
}
pub fn get_timeseries(
&mut self,
args: DQTimeseriesRequestArgs,
) -> Result<Vec<DQTimeSeriesResponse>, Box<dyn Error>> {
let max_retries = 5;
println!(
"Invoking recursive function for {:?} expressions",
args.expressions.len()
);
self._fetch_timeseries_recursive(args, max_retries)
}
}
#[allow(dead_code)]
fn main() {
let client_id = std::env::var("DQ_CLIENT_ID").unwrap();
let client_secret = std::env::var("DQ_CLIENT_SECRET").unwrap();
let mut oauth_client = OAuthClient::new(client_id, client_secret);
oauth_client.fetch_token().unwrap();
let mut requester = DQRequester::new(oauth_client);
requester.check_connection().unwrap();
// let response = requester
// .get_catalogue(JPMAQS_CATALOGUE_GROUP, 1000)
// .unwrap();
// let json_data = response
// try to pull into
// let expressions_a = vec![
// "DB(JPMAQS,USD_EQXR_NSA,value)",
// "DB(JPMAQS,USD_EQXR_NSA,grading)",
// "DB(JPMAQS,USD_EQXR_NSA,eop_lag)",
// "DB(JPMAQS,USD_EQXR_NSA,mop_lag)",
// "DB(JPMAQS,GBP_EQXR_NSA,value)",
// "DB(JPMAQS,GBP_EQXR_NSA,grading)",
// "DB(JPMAQS,GBP_EQXR_NSA,eop_lag)",
// "DB(JPMAQS,GBP_EQXR_NSA,mop_lag)",
// ];
// let response = requester
// .get_timeseries_with_defaults(expressions_a.iter().map(|s| *s).collect())
// .unwrap();
// let json_data = response.text().unwrap();
// println!("{}", json_data);
}

377
src/download/timeseries.rs Normal file
View File

@@ -0,0 +1,377 @@
use polars::error::PolarsError;
use polars::export::chrono::NaiveDate;
use polars::prelude::*;
use polars::series::Series;
use serde::Deserialize;
use std::collections::HashMap;
use std::collections::HashSet;
use std::error::Error;
use std::fs::File;
/// Response from the DataQuery API
#[derive(Deserialize, Debug)]
pub struct DQTimeSeriesResponse {
instruments: Vec<Instrument>,
}
#[derive(Deserialize, Debug)]
pub struct DQCatalogueResponse {
pub items: u32,
pub catalogue_responses: Vec<DQCatalogueSingleResponse>,
pub all_instruments: Vec<String>,
}
impl DQCatalogueResponse {
pub fn new(catalogue_responses: Vec<DQCatalogueSingleResponse>) -> Self {
let all_instruments: Vec<String> = catalogue_responses
.iter()
.flat_map(|response| {
response
.instruments
.iter()
.map(|instrument| instrument.instrument_name.clone())
})
.collect();
DQCatalogueResponse {
items: all_instruments.len() as u32,
catalogue_responses: catalogue_responses,
all_instruments: all_instruments,
}
}
}
#[derive(Deserialize, Debug)]
pub struct DQCatalogueSingleResponse {
pub links: Vec<HashMap<String, Option<String>>>,
pub items: u32,
pub instruments: Vec<DQCatalogueInstrument>,
}
#[derive(Deserialize, Debug)]
pub struct DQCatalogueInstrument {
#[serde(rename = "instrument-id")]
pub instrument_id: String,
#[serde(rename = "instrument-name")]
pub instrument_name: String,
pub item: u32,
}
/// Representation of DQResponse.Instrument
#[derive(Deserialize, Debug)]
struct Instrument {
attributes: Vec<Attribute>,
}
/// Representation of DQResponse.Instrument.Attribute
#[derive(Deserialize, Debug)]
struct Attribute {
expression: String,
#[serde(rename = "time-series")]
time_series: Vec<(String, Option<f64>)>,
}
/// Representation of a single time series
#[derive(Debug)]
pub struct DQTimeSeries {
expression: String,
time_series: Vec<(String, Option<f64>)>,
}
#[derive(Debug)]
pub struct JPMaQSIndicator {
pub df: DataFrame,
pub ticker: String,
pub metrics: Vec<String>,
}
impl DQTimeSeries {
/// Get the ticker from the expression
pub fn get_ticker(&self) -> Result<String, Box<dyn Error>> {
if !self.expression.starts_with("DB(JPMAQS,") {
return Err("Expression does not start with 'DB(JPMAQS,'".into());
}
let ticker = self.expression.split(',').nth(1).unwrap();
if ticker.is_empty() {
return Err("Ticker is empty".into());
}
Ok(ticker.to_string())
}
/// Get the metric from the expression
pub fn get_metric(&self) -> Result<String, Box<dyn Error>> {
if !self.expression.starts_with("DB(JPMAQS,") {
return Err("Expression does not start with 'DB(JPMAQS,'".into());
}
let metric = self
.expression
.trim_end_matches(')')
.split(',')
.last()
.unwrap();
if metric.is_empty() {
return Err("Metric is empty".into());
}
Ok(metric.to_string())
}
/// Convert the time series to a Polars DataFrame
pub fn to_dataframe(&self) -> Result<DataFrame, PolarsError> {
let dates: Vec<NaiveDate> = self
.time_series
.iter()
.map(|(date_str, _)| NaiveDate::parse_from_str(date_str, "%Y%m%d").unwrap())
.collect();
let values: Vec<Option<f64>> = self.time_series.iter().map(|(_, value)| *value).collect();
let date_series = Series::new("date".into(), &dates);
let value_series = Float64Chunked::new("value".into(), &values);
df!(
"real_date" => date_series,
self.expression.clone() => value_series
)
}
}
impl DQTimeSeriesResponse {
/// Return a list of all expressions in the response
pub fn list_expressions(&self) -> Vec<String> {
self.instruments
.iter()
.flat_map(|instrument| {
instrument
.attributes
.iter()
.map(|attribute| attribute.expression.clone())
})
.collect()
}
/// Return a list of all DQTimeSeries in the response
pub fn get_all_timeseries(&self) -> Vec<DQTimeSeries> {
self.instruments
.iter()
.flat_map(|instrument| {
instrument.attributes.iter().map(|attribute| DQTimeSeries {
expression: attribute.expression.clone(),
time_series: attribute.time_series.clone(),
})
})
.collect()
}
/// Return a list of lists of DQTimeSeries, where each list contains all the timeseries for each ticker
pub fn get_timeseries_by_ticker(&self) -> Vec<Vec<DQTimeSeries>> {
let timeseries_by_ticker = self
.instruments
.iter()
.flat_map(|instrument| {
instrument.attributes.iter().map(|attribute| DQTimeSeries {
expression: attribute.expression.clone(),
time_series: attribute.time_series.clone(),
})
})
.fold(HashMap::new(), |mut acc, ts| {
let ticker = ts.get_ticker().unwrap();
acc.entry(ticker).or_insert_with(Vec::new).push(ts);
acc
});
timeseries_by_ticker.into_iter().map(|(_, v)| v).collect()
}
}
impl JPMaQSIndicator {
pub fn new(timeseries_list: Vec<DQTimeSeries>) -> Result<Self, Box<dyn Error>> {
let found_tickers = timeseries_list
.iter()
.map(|ts| ts.get_ticker().unwrap())
.collect::<HashSet<String>>();
if found_tickers.len() != 1 {
return Err("All provided timeseries do not belong to the same ticker".into());
}
let ticker = found_tickers.into_iter().next().unwrap();
let metrics = timeseries_list
.iter()
.map(|ts| ts.get_metric().unwrap())
.collect::<Vec<String>>();
let output_df = timeseries_list_to_dataframe(timeseries_list, true)?;
Ok(JPMaQSIndicator {
df: output_df,
ticker: ticker,
metrics: metrics,
})
}
pub fn as_qdf(&self) -> Result<DataFrame, Box<dyn Error>> {
let mut qdf = self.df.clone();
let (cid, xcat) = match self.ticker.split_once('_') {
Some((cid, xcat)) => (cid, xcat),
None => return Err(format!("Invalid ticker format; got '{}'", self.ticker).into()),
};
qdf.with_column(Series::new("cid".into(), vec![cid; qdf.height()]))?;
qdf.with_column(Series::new("xcat".into(), vec![xcat; qdf.height()]))?;
sort_qdf_columns(&mut qdf)?;
Ok(qdf)
}
pub fn save_qdf_to_csv(&self, filename: &str) -> Result<(), Box<dyn Error>> {
save_qdf_to_csv(&mut self.as_qdf()?, filename)
}
}
fn timeseries_list_to_dataframe(
timeseries_list: Vec<DQTimeSeries>,
dropna: bool,
) -> Result<DataFrame, Box<dyn Error>> {
let mut output_df = DataFrame::new(vec![]).expect("Failed to create DataFrame");
if let Some((first, rest)) = timeseries_list.split_first() {
// Convert the first timeseries to DataFrame and clone it to avoid modifying the original
let mut result_df = {
let mut df = first
.to_dataframe()
.expect("Failed to convert first timeseries to DataFrame");
let curr_metric = first.get_metric().expect("Failed to get metric");
let column_name = df.get_column_names()[1].to_string();
df.rename(&column_name, curr_metric.into())
.expect("Failed to rename column");
df.clone()
};
// Iterate over the remaining timeseries
for ts in rest {
// Convert the current timeseries to DataFrame
let mut df = ts
.to_dataframe()
.expect("Failed to convert timeseries to DataFrame");
// Rename the metric column to the metric of the relevant DataFrame
let curr_metric = ts.get_metric().expect("Failed to get metric");
let column_name = df.get_column_names()[1].to_string();
df.rename(&column_name, curr_metric.into())
.expect("Failed to rename column");
// Perform a left join on the 'real_date' column
result_df = result_df
.left_join(&df, ["real_date"], ["real_date"])
.expect("Left join failed");
}
output_df = result_df.clone();
} else {
println!("No timeseries provided.");
}
// drop rows where all values are NA
if dropna {
output_df = output_df
.lazy()
.drop_nulls(None)
.filter(all_horizontal([all().is_not_null()])?)
.collect()
.expect("Failed to drop NA rows");
}
Ok(output_df)
}
fn sort_qdf_columns(qdf: &mut DataFrame) -> Result<(), Box<dyn Error>> {
let index_columns = ["real_date", "cid", "xcat"];
let known_metrics = ["value", "grading", "eop_lag", "mop_lag"];
let df_columns = qdf
.get_column_names()
.into_iter()
.map(|s| s.clone().into_string())
.collect::<Vec<String>>();
let mut unknown_metrics: Vec<String> = df_columns
.iter()
.filter(|&m| !known_metrics.contains(&m.as_str()))
.filter(|&m| !index_columns.contains(&m.as_str()))
.cloned()
.collect();
let mut new_columns: Vec<String> = vec![];
new_columns.extend(index_columns.iter().map(|s| s.to_string()));
for &colname in &known_metrics {
if df_columns.contains(&colname.into()) {
new_columns.push(colname.to_string());
}
}
unknown_metrics.sort();
new_columns.extend(unknown_metrics);
*qdf = qdf
.select(new_columns.clone())
.expect("Failed to select columns");
Ok(())
}
fn save_qdf_to_csv(qdf: &mut DataFrame, filename: &str) -> Result<(), Box<dyn Error>> {
let file = File::create(filename)?;
// Write the DataFrame to a CSV file
let mut csv_writer = CsvWriter::new(file);
csv_writer.finish(qdf)?;
Ok(())
}
#[allow(dead_code)]
fn main() {
let json_data = r#"
{
"instruments": [
{
"attributes": [
{
"expression": "metric, TICKER, metric1()",
"time-series": [
["2022-01-01", 10.0],
["2022-01-02", null]
]
},
{
"expression": "metric, TICKER2, metric2()",
"time-series": [
["2022-01-01", 20.0],
["2022-01-03", 25.0]
]
}
]
},
{
"attributes": [
{
"expression": "metric, TICKER3, metric3()",
"time-series": [
["2022-02-01", 30.0],
["2022-02-02", 35.0]
]
}
]
}
]
}
"#;
let response: DQTimeSeriesResponse = serde_json::from_str(json_data).unwrap();
println!("{:?}", response);
let all_timeseries = response.get_all_timeseries();
for ts in all_timeseries {
println!("{:?}", ts);
match ts.to_dataframe() {
Ok(df) => println!("{:?}", df),
Err(e) => println!("Failed to create DataFrame: {:?}", e),
}
}
}