This commit is contained in:
Palash Tyagi 2024-11-12 23:56:37 +00:00
parent c9971dc2e3
commit c69454fe2f
9 changed files with 123 additions and 81 deletions

View File

@ -1,6 +0,0 @@
[package]
name = "docs"
version = "0.1.0"
edition = "2021"
[dependencies]

View File

@ -1,14 +0,0 @@
pub fn add(left: u64, right: u64) -> u64 {
left + right
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}

View File

@ -8,6 +8,7 @@ use std::collections::HashSet;
use std::error::Error; use std::error::Error;
use std::fs::File; use std::fs::File;
/// Arguments for the DataQuery API request
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct DQTimeseriesRequestArgs { pub struct DQTimeseriesRequestArgs {
pub start_date: String, pub start_date: String,
@ -19,6 +20,7 @@ pub struct DQTimeseriesRequestArgs {
pub expressions: Vec<String>, pub expressions: Vec<String>,
} }
#[allow(dead_code)]
impl DQTimeseriesRequestArgs { impl DQTimeseriesRequestArgs {
pub fn new( pub fn new(
start_date: &str, start_date: &str,
@ -62,6 +64,15 @@ impl DQTimeseriesRequestArgs {
} }
} }
/// Default implementation for DQTimeseriesRequestArgs
/// The default values are set:
/// - start_date: "1990-01-01"
/// - end_date: "TODAY+2D"
/// - calendar: "CAL_ALLDAYS"
/// - frequency: "FREQ_DAY"
/// - conversion: "CONV_LASTBUS_ABS"
/// - nan_treatment: "NA_NOTHING"
/// - expressions: empty Vec
impl Default for DQTimeseriesRequestArgs { impl Default for DQTimeseriesRequestArgs {
fn default() -> Self { fn default() -> Self {
DQTimeseriesRequestArgs { DQTimeseriesRequestArgs {
@ -76,12 +87,14 @@ impl Default for DQTimeseriesRequestArgs {
} }
} }
/// Response from the DataQuery API /// Response from the DataQuery API.
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
pub struct DQTimeSeriesResponse { pub struct DQTimeSeriesResponse {
instruments: Vec<Instrument>, instruments: Vec<Instrument>,
} }
/// Response from the DataQuery API for a catalogue request.
#[allow(dead_code)]
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
pub struct DQCatalogueResponse { pub struct DQCatalogueResponse {
pub items: u32, pub items: u32,
@ -89,7 +102,9 @@ pub struct DQCatalogueResponse {
pub all_instruments: Vec<String>, pub all_instruments: Vec<String>,
} }
/// Methods for DQCatalogueResponse
impl DQCatalogueResponse { impl DQCatalogueResponse {
/// Create a new DQCatalogueResponse from a list of DQCatalogueSingleResponse objects.
pub fn new(catalogue_responses: Vec<DQCatalogueSingleResponse>) -> Self { pub fn new(catalogue_responses: Vec<DQCatalogueSingleResponse>) -> Self {
let all_instruments: Vec<String> = catalogue_responses let all_instruments: Vec<String> = catalogue_responses
.iter() .iter()
@ -109,6 +124,8 @@ impl DQCatalogueResponse {
} }
} }
/// Response from the DataQuery API for a single catalogue request (one page).
#[allow(dead_code)]
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
pub struct DQCatalogueSingleResponse { pub struct DQCatalogueSingleResponse {
pub links: Vec<HashMap<String, Option<String>>>, pub links: Vec<HashMap<String, Option<String>>>,
@ -116,6 +133,8 @@ pub struct DQCatalogueSingleResponse {
pub instruments: Vec<DQCatalogueInstrument>, pub instruments: Vec<DQCatalogueInstrument>,
} }
/// Representation of DQCatalogueSingleResponse.Instrument
#[allow(dead_code)]
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
pub struct DQCatalogueInstrument { pub struct DQCatalogueInstrument {
#[serde(rename = "instrument-id")] #[serde(rename = "instrument-id")]
@ -178,6 +197,7 @@ pub struct DQTimeSeries {
time_series: Vec<(String, Option<f64>)>, time_series: Vec<(String, Option<f64>)>,
} }
/// Representation of a JPMaQS indicator (1 or more time series for a single JPMaQS ticker)
#[derive(Debug)] #[derive(Debug)]
pub struct JPMaQSIndicator { pub struct JPMaQSIndicator {
pub df: DataFrame, pub df: DataFrame,
@ -185,6 +205,7 @@ pub struct JPMaQSIndicator {
pub metrics: Vec<String>, pub metrics: Vec<String>,
} }
/// Methods for DQTimeSeries
impl DQTimeSeries { impl DQTimeSeries {
/// Get the ticker from the expression /// Get the ticker from the expression
pub fn get_ticker(&self) -> Result<String, Box<dyn Error>> { pub fn get_ticker(&self) -> Result<String, Box<dyn Error>> {
@ -234,6 +255,7 @@ impl DQTimeSeries {
} }
} }
/// Methods for DQTimeSeriesResponse
impl DQTimeSeriesResponse { impl DQTimeSeriesResponse {
/// Return a list of all expressions in the response /// Return a list of all expressions in the response
pub fn list_expressions(&self) -> Vec<String> { pub fn list_expressions(&self) -> Vec<String> {
@ -280,8 +302,10 @@ impl DQTimeSeriesResponse {
timeseries_by_ticker.into_iter().map(|(_, v)| v).collect() timeseries_by_ticker.into_iter().map(|(_, v)| v).collect()
} }
/// Consume the DQTimeSeriesResponse by grouping the time series by ticker.
/// This function can only be called once as it takes ownership of the data. /// Return a list of lists of DQTimeSeries, where each list contains all the timeseries for each ticker.
/// The function consumes the data, leaving an empty vector in its place.
/// This function can only be called once as it transfers ownership of the data.
pub fn consume_to_grouped_by_ticker(mut self) -> Vec<Vec<DQTimeSeries>> { pub fn consume_to_grouped_by_ticker(mut self) -> Vec<Vec<DQTimeSeries>> {
// Take the instruments vector, leaving an empty one in its place. // Take the instruments vector, leaving an empty one in its place.
let instruments = std::mem::take(&mut self.instruments); let instruments = std::mem::take(&mut self.instruments);
@ -307,7 +331,9 @@ impl DQTimeSeriesResponse {
} }
} }
/// Methods for JPMaQSIndicator
impl JPMaQSIndicator { impl JPMaQSIndicator {
/// Create a new JPMaQSIndicator from a list of DQTimeSeries, ensuring they all belong to the same ticker
pub fn new(timeseries_list: Vec<DQTimeSeries>) -> Result<Self, Box<dyn Error>> { pub fn new(timeseries_list: Vec<DQTimeSeries>) -> Result<Self, Box<dyn Error>> {
let found_tickers = timeseries_list let found_tickers = timeseries_list
.iter() .iter()
@ -331,6 +357,7 @@ impl JPMaQSIndicator {
}) })
} }
/// Add a single time series to the JPMaQSIndicator DataFrame
pub fn add_timeseries(&mut self, timeseries: DQTimeSeries) -> Result<(), Box<dyn Error>> { pub fn add_timeseries(&mut self, timeseries: DQTimeSeries) -> Result<(), Box<dyn Error>> {
if self.ticker != timeseries.get_ticker()? { if self.ticker != timeseries.get_ticker()? {
return Err("Timeseries does not belong to the same ticker".into()); return Err("Timeseries does not belong to the same ticker".into());
@ -339,6 +366,7 @@ impl JPMaQSIndicator {
Ok(()) Ok(())
} }
/// Convert the JPMaQSIndicator to a standard JPMaQS Quantamental DataFrame (<https://docs.macrosynergy.com/stable/common_definitions.html#the-quantamental-data-format-qdf>)
pub fn as_qdf(&self) -> Result<DataFrame, Box<dyn Error>> { pub fn as_qdf(&self) -> Result<DataFrame, Box<dyn Error>> {
let mut qdf = self.df.clone(); let mut qdf = self.df.clone();
let (cid, xcat) = match self.ticker.split_once('_') { let (cid, xcat) = match self.ticker.split_once('_') {
@ -353,6 +381,7 @@ impl JPMaQSIndicator {
Ok(qdf) Ok(qdf)
} }
/// Save the JPMaQSIndicator to a CSV file
pub fn save_qdf_to_csv(&self, filename: &str) -> Result<(), Box<dyn Error>> { pub fn save_qdf_to_csv(&self, filename: &str) -> Result<(), Box<dyn Error>> {
save_qdf_to_csv(&mut self.as_qdf()?, filename) save_qdf_to_csv(&mut self.as_qdf()?, filename)
} }

View File

@ -1,8 +1,9 @@
use crate::download::oauth_client::OAuthClient; use crate::download::oauth_client::OAuthClient;
use crate::download::requester::DQRequester; use crate::download::requester::DQRequester;
use crate::download::timeseries::DQTimeSeriesResponse; use crate::download::helpers::DQTimeSeriesResponse;
use crate::download::timeseries::DQTimeseriesRequestArgs; use crate::download::helpers::DQTimeseriesRequestArgs;
use crate::download::timeseries::JPMaQSIndicator; use crate::download::helpers::JPMaQSIndicator;
// use polars::prelude::*;
use std::error::Error; use std::error::Error;
const DEFAULT_JPMAQS_METRICS: [&str; 4] = ["value", "grading", "eop_lag", "mop_lag"]; const DEFAULT_JPMAQS_METRICS: [&str; 4] = ["value", "grading", "eop_lag", "mop_lag"];
@ -59,6 +60,7 @@ impl Default for JPMaQSDownloadGetIndicatorArgs {
} }
} }
/// Struct for downloading data from the JPMaQS data from JPMorgan DataQuery API.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct JPMaQSDownload { pub struct JPMaQSDownload {
requester: DQRequester, requester: DQRequester,
@ -72,21 +74,25 @@ impl Default for JPMaQSDownload {
} }
impl JPMaQSDownload { impl JPMaQSDownload {
/// Create a new JPMaQSDownload instance with the provided client ID and client secret.
pub fn new(client_id: String, client_secret: String) -> Self { pub fn new(client_id: String, client_secret: String) -> Self {
let oauth_client = OAuthClient::new(client_id.clone(), client_secret.clone()); let oauth_client = OAuthClient::new(client_id.clone(), client_secret.clone());
let requester = DQRequester::new(oauth_client); let requester = DQRequester::new(oauth_client);
JPMaQSDownload { requester } JPMaQSDownload { requester }
} }
/// Check the connection to the DataQuery API.
pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> { pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> {
self.requester.check_connection() self.requester.check_connection()
} }
/// Get the catalogue of tickers available in the JPMaQS data.
pub fn get_catalogue(&mut self) -> Result<Vec<String>, Box<dyn Error>> { pub fn get_catalogue(&mut self) -> Result<Vec<String>, Box<dyn Error>> {
let dq_catalogue = self.requester.get_catalogue("JPMAQS", 1000)?; let dq_catalogue = self.requester.get_catalogue("JPMAQS", 1000)?;
Ok(dq_catalogue.all_instruments) Ok(dq_catalogue.all_instruments)
} }
/// Get the time series data for the provided expressions.
pub fn get_expressions( pub fn get_expressions(
&mut self, &mut self,
expressions: Vec<String>, expressions: Vec<String>,
@ -102,7 +108,8 @@ impl JPMaQSDownload {
Ok(dqts_vec) Ok(dqts_vec)
} }
pub fn get_indicators( /// Get the indicators for the provided tickers and metrics.
pub fn get_indicators_list(
&mut self, &mut self,
download_args: JPMaQSDownloadGetIndicatorArgs, download_args: JPMaQSDownloadGetIndicatorArgs,
) -> Result<Vec<JPMaQSIndicator>, Box<dyn Error>> { ) -> Result<Vec<JPMaQSIndicator>, Box<dyn Error>> {
@ -129,4 +136,33 @@ impl JPMaQSDownload {
Err(e) => Err(e), Err(e) => Err(e),
} }
} }
pub fn get_indicators_qdf(
&mut self,
download_args: JPMaQSDownloadGetIndicatorArgs,
) -> Result<polars::prelude::DataFrame, Box<dyn Error>> {
let mut indicators: Vec<JPMaQSIndicator> = self.get_indicators_list(download_args)?;
if indicators.is_empty() {
return Err("No indicators retrieved".into());
}
if indicators.len() == 1 {
return indicators.pop().unwrap().as_qdf();
}
assert!(indicators.len() > 1);
let mut df_main = indicators.pop().unwrap().as_qdf().unwrap();
while !indicators.is_empty() {
let df = indicators.pop().unwrap().as_qdf().unwrap();
df_main = df_main.vstack(&df).unwrap();
}
// sort by cid, xcat, real_date in that order
let _ = df_main.sort_in_place(
[
"cid".to_string(),
"xcat".to_string(),
"real_date".to_string(),
],
polars::chunked_array::ops::SortMultipleOptions::default(),
);
Ok(df_main)
}
} }

View File

@ -1,5 +1,5 @@
pub mod jpmaqsdownload; pub mod jpmaqsdownload;
pub mod helpers;
pub mod oauth_client; pub mod oauth_client;
pub mod requester; pub mod requester;
pub mod timeseries;
pub mod parreq; pub mod parreq;

View File

@ -47,7 +47,6 @@ impl OAuthClient {
let json: Value = response.json()?; let json: Value = response.json()?;
if let Some(token) = json["access_token"].as_str() { if let Some(token) = json["access_token"].as_str() {
self.access_token = Some(token.to_string()); self.access_token = Some(token.to_string());
println!("Access token retrieved; token length: {}", token.len());
if let Some(expires_in) = json["expires_in"].as_u64() { if let Some(expires_in) = json["expires_in"].as_u64() {
self.expires_at = Some(SystemTime::now() + Duration::from_secs(expires_in)); self.expires_at = Some(SystemTime::now() + Duration::from_secs(expires_in));
} else { } else {

View File

@ -1,5 +1,5 @@
use crate::download::oauth_client::OAuthClient; use crate::download::oauth_client::OAuthClient;
use crate::download::timeseries::DQTimeseriesRequestArgs; use crate::download::helpers::DQTimeseriesRequestArgs;
use futures::future; use futures::future;
use reqwest::header::{HeaderMap, HeaderName, HeaderValue}; use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
use std::error::Error; use std::error::Error;

View File

@ -1,10 +1,10 @@
use crate::download::helpers::DQCatalogueResponse;
use crate::download::helpers::DQCatalogueSingleResponse;
use crate::download::helpers::DQTimeSeriesResponse;
use crate::download::helpers::DQTimeseriesRequestArgs;
use crate::download::helpers::JPMaQSIndicator;
use crate::download::oauth_client::OAuthClient; use crate::download::oauth_client::OAuthClient;
use crate::download::parreq::ParallelRequester; use crate::download::parreq::ParallelRequester;
use crate::download::timeseries::DQCatalogueResponse;
use crate::download::timeseries::DQCatalogueSingleResponse;
use crate::download::timeseries::DQTimeSeriesResponse;
use crate::download::timeseries::DQTimeseriesRequestArgs;
use crate::download::timeseries::JPMaQSIndicator;
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IntoParallelIterator, ParallelIterator};
use reqwest; use reqwest;
use reqwest::blocking::Client; use reqwest::blocking::Client;
@ -35,6 +35,7 @@ impl Default for DQRequester {
} }
} }
#[allow(dead_code)]
impl DQRequester { impl DQRequester {
pub fn new(oauth_client: OAuthClient) -> Self { pub fn new(oauth_client: OAuthClient) -> Self {
DQRequester { DQRequester {
@ -75,7 +76,6 @@ impl DQRequester {
pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> { pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> {
let response = self._request(reqwest::Method::GET, HEARTBEAT_ENDPOINT)?; let response = self._request(reqwest::Method::GET, HEARTBEAT_ENDPOINT)?;
println!("Connection is successful: {}", response.status()); println!("Connection is successful: {}", response.status());
Ok(()) Ok(())
} }
@ -124,7 +124,8 @@ impl DQRequester {
Ok(DQCatalogueResponse::new(responses)) Ok(DQCatalogueResponse::new(responses))
} }
pub fn _fetch_single_timeseries_batch( /// Fetches a single batch of timeseries data from the DataQuery API.
fn _fetch_single_timeseries_batch(
&mut self, &mut self,
args: DQTimeseriesRequestArgs, args: DQTimeseriesRequestArgs,
) -> Result<reqwest::blocking::Response, Box<dyn Error>> { ) -> Result<reqwest::blocking::Response, Box<dyn Error>> {
@ -144,6 +145,8 @@ impl DQRequester {
Ok(response) Ok(response)
} }
/// Makes parallel requests to the DataQuery API to fetch timeseries data.
/// The function returns a vector of DQTimeSeriesResponse objects.
pub fn get_timeseries( pub fn get_timeseries(
&mut self, &mut self,
args: DQTimeseriesRequestArgs, args: DQTimeseriesRequestArgs,
@ -162,43 +165,28 @@ impl DQRequester {
Ok(dqts_vec) Ok(dqts_vec)
} }
/// Makes parallel requests to the DataQuery API to fetch timeseries data.
/// The function returns a vector of JPMaQSIndicator objects.
/// This function is preferred as it consumes less memory than get_timeseries.
pub fn get_timeseries_as_jpmaqs_indicators( pub fn get_timeseries_as_jpmaqs_indicators(
&mut self, &mut self,
args: DQTimeseriesRequestArgs, args: DQTimeseriesRequestArgs,
) -> Result<Vec<JPMaQSIndicator>, Box<dyn Error>> { ) -> Result<Vec<JPMaQSIndicator>, Box<dyn Error>> {
let max_retries = 5; let max_retries = 5;
println!(
"Invoking ParallelRequester for {:?} expressions",
args.expressions.len()
);
let mut pq = ParallelRequester::new(self.oauth_client.clone()); let mut pq = ParallelRequester::new(self.oauth_client.clone());
let start = std::time::Instant::now();
let mut response_texts = match pq.request_expressions(args, max_retries) { let mut response_texts = match pq.request_expressions(args, max_retries) {
Ok(r) => r, Ok(r) => r,
Err(e) => return Err(e), Err(e) => return Err(e),
}; };
println!(
"Time elapsed for pq.request_expressions: {:?}",
start.elapsed()
);
// sleep for 10 seconds
println!("Pausing for 10 seconds");
std::thread::sleep(std::time::Duration::from_secs(10));
println!("Resuming - parsing response texts to JPMaQSIndicators");
let jpmaqs_indicators: Vec<JPMaQSIndicator> = let jpmaqs_indicators: Vec<JPMaQSIndicator> =
parse_response_texts_to_jpmaqs_indicators(&mut response_texts); parse_response_texts_to_jpmaqs_indicators(&mut response_texts);
// Sleep for 10 seconds
println!("Pausing for 10 seconds");
std::thread::sleep(std::time::Duration::from_secs(10));
println!("Resuming");
Ok(jpmaqs_indicators) Ok(jpmaqs_indicators)
} }
} }
/// Parses a vector of response texts into a vector of DQTimeSeriesResponse objects.
fn parse_response_texts(response_texts: Vec<String>) -> Vec<DQTimeSeriesResponse> { fn parse_response_texts(response_texts: Vec<String>) -> Vec<DQTimeSeriesResponse> {
response_texts response_texts
.into_par_iter() .into_par_iter()
@ -215,6 +203,7 @@ fn parse_response_texts(response_texts: Vec<String>) -> Vec<DQTimeSeriesResponse
.collect() .collect()
} }
/// Parses a vector of response texts into a vector of JPMaQSIndicator objects.
fn parse_response_texts_to_jpmaqs_indicators( fn parse_response_texts_to_jpmaqs_indicators(
response_texts: &mut Vec<String>, response_texts: &mut Vec<String>,
) -> Vec<JPMaQSIndicator> { ) -> Vec<JPMaQSIndicator> {
@ -261,4 +250,3 @@ fn parse_response_texts_to_jpmaqs_indicators(
println!("Number of responses left: {}", response_texts.len()); println!("Number of responses left: {}", response_texts.len());
jpmaqs_indicators_map.into_iter().map(|(_, v)| v).collect() jpmaqs_indicators_map.into_iter().map(|(_, v)| v).collect()
} }

View File

@ -17,7 +17,7 @@ fn main() {
start.elapsed() start.elapsed()
); );
let num_ticks = 5000; let num_ticks = 20;
let sel_tickers: Vec<String> = tickers let sel_tickers: Vec<String> = tickers
.iter() .iter()
.take(num_ticks) .take(num_ticks)
@ -27,9 +27,10 @@ fn main() {
println!("Retrieving indicators for {} tickers", sel_tickers.len()); println!("Retrieving indicators for {} tickers", sel_tickers.len());
start = std::time::Instant::now(); start = std::time::Instant::now();
let indicators = jpamqs_download let mut res_df: DataFrame = jpamqs_download
.get_indicators(JPMaQSDownloadGetIndicatorArgs { .get_indicators_qdf(JPMaQSDownloadGetIndicatorArgs {
tickers: sel_tickers.clone(), tickers: sel_tickers.clone(),
start_date: "2024-11-05".to_string(),
..Default::default() ..Default::default()
}) })
.unwrap(); .unwrap();
@ -40,27 +41,36 @@ fn main() {
start.elapsed() start.elapsed()
); );
// sleep for 10 seconds // append _ to every cid
println!("Sleeping for 10 seconds..."); let cid_vec: Vec<String> = res_df
std::thread::sleep(std::time::Duration::from_secs(10)); .column("cid")
println!("concatting to mega DataFrame"); .unwrap()
.str()
.unwrap()
.into_iter()
.map(|s| s.unwrap_or("").to_string())
.collect();
let xcat_vec: Vec<String> = res_df
.column("xcat")
.unwrap()
.str()
.unwrap()
.into_iter()
.map(|s| s.unwrap_or("").to_string())
.collect();
start = std::time::Instant::now(); let mut tickers_set = std::collections::HashSet::new();
// let mut qdf_list = Vec::new(); for (cid, xcat) in cid_vec.iter().zip(xcat_vec.iter()) {
let mega_df = indicators tickers_set.insert(cid.to_string() + "_" + xcat);
.iter() }
.map(|indicator| indicator.as_qdf().unwrap())
.fold(DataFrame::new(vec![]).unwrap(), |acc, df| {
acc.vstack(&df).unwrap()
});
// // save this df to disk
let es = mega_df.estimated_size(); let file_name = "data/jpmaqs_indicators_qdf.csv";
let es_mb = es as f64 / 1_048_576.0; let file = std::fs::File::create(file_name).unwrap();
println!("Estimated size of DataFrame: {:.2} MB", es_mb); let mut csv_writer = CsvWriter::new(file);
println!("Sleeping for 10 seconds..."); csv_writer.finish(&mut res_df).unwrap();
println!(
"Converted indicators to DataFrames in {:?}", // print len of tickers_set
start.elapsed() println!("Unique tickers: {}", tickers_set.len());
); println!("DataFrame shape: {:?}", res_df.shape());
} }