mirror of
https://github.com/Magnus167/msyrs.git
synced 2025-08-20 13:00:01 +00:00
working
This commit is contained in:
parent
c9971dc2e3
commit
c69454fe2f
@ -1,6 +0,0 @@
|
||||
[package]
|
||||
name = "docs"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
@ -1,14 +0,0 @@
|
||||
pub fn add(left: u64, right: u64) -> u64 {
|
||||
left + right
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let result = add(2, 2);
|
||||
assert_eq!(result, 4);
|
||||
}
|
||||
}
|
@ -8,6 +8,7 @@ use std::collections::HashSet;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
|
||||
/// Arguments for the DataQuery API request
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DQTimeseriesRequestArgs {
|
||||
pub start_date: String,
|
||||
@ -19,6 +20,7 @@ pub struct DQTimeseriesRequestArgs {
|
||||
pub expressions: Vec<String>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl DQTimeseriesRequestArgs {
|
||||
pub fn new(
|
||||
start_date: &str,
|
||||
@ -62,6 +64,15 @@ impl DQTimeseriesRequestArgs {
|
||||
}
|
||||
}
|
||||
|
||||
/// Default implementation for DQTimeseriesRequestArgs
|
||||
/// The default values are set:
|
||||
/// - start_date: "1990-01-01"
|
||||
/// - end_date: "TODAY+2D"
|
||||
/// - calendar: "CAL_ALLDAYS"
|
||||
/// - frequency: "FREQ_DAY"
|
||||
/// - conversion: "CONV_LASTBUS_ABS"
|
||||
/// - nan_treatment: "NA_NOTHING"
|
||||
/// - expressions: empty Vec
|
||||
impl Default for DQTimeseriesRequestArgs {
|
||||
fn default() -> Self {
|
||||
DQTimeseriesRequestArgs {
|
||||
@ -76,12 +87,14 @@ impl Default for DQTimeseriesRequestArgs {
|
||||
}
|
||||
}
|
||||
|
||||
/// Response from the DataQuery API
|
||||
/// Response from the DataQuery API.
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct DQTimeSeriesResponse {
|
||||
instruments: Vec<Instrument>,
|
||||
}
|
||||
|
||||
/// Response from the DataQuery API for a catalogue request.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct DQCatalogueResponse {
|
||||
pub items: u32,
|
||||
@ -89,7 +102,9 @@ pub struct DQCatalogueResponse {
|
||||
pub all_instruments: Vec<String>,
|
||||
}
|
||||
|
||||
/// Methods for DQCatalogueResponse
|
||||
impl DQCatalogueResponse {
|
||||
/// Create a new DQCatalogueResponse from a list of DQCatalogueSingleResponse objects.
|
||||
pub fn new(catalogue_responses: Vec<DQCatalogueSingleResponse>) -> Self {
|
||||
let all_instruments: Vec<String> = catalogue_responses
|
||||
.iter()
|
||||
@ -109,6 +124,8 @@ impl DQCatalogueResponse {
|
||||
}
|
||||
}
|
||||
|
||||
/// Response from the DataQuery API for a single catalogue request (one page).
|
||||
#[allow(dead_code)]
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct DQCatalogueSingleResponse {
|
||||
pub links: Vec<HashMap<String, Option<String>>>,
|
||||
@ -116,6 +133,8 @@ pub struct DQCatalogueSingleResponse {
|
||||
pub instruments: Vec<DQCatalogueInstrument>,
|
||||
}
|
||||
|
||||
/// Representation of DQCatalogueSingleResponse.Instrument
|
||||
#[allow(dead_code)]
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct DQCatalogueInstrument {
|
||||
#[serde(rename = "instrument-id")]
|
||||
@ -178,6 +197,7 @@ pub struct DQTimeSeries {
|
||||
time_series: Vec<(String, Option<f64>)>,
|
||||
}
|
||||
|
||||
/// Representation of a JPMaQS indicator (1 or more time series for a single JPMaQS ticker)
|
||||
#[derive(Debug)]
|
||||
pub struct JPMaQSIndicator {
|
||||
pub df: DataFrame,
|
||||
@ -185,6 +205,7 @@ pub struct JPMaQSIndicator {
|
||||
pub metrics: Vec<String>,
|
||||
}
|
||||
|
||||
/// Methods for DQTimeSeries
|
||||
impl DQTimeSeries {
|
||||
/// Get the ticker from the expression
|
||||
pub fn get_ticker(&self) -> Result<String, Box<dyn Error>> {
|
||||
@ -234,6 +255,7 @@ impl DQTimeSeries {
|
||||
}
|
||||
}
|
||||
|
||||
/// Methods for DQTimeSeriesResponse
|
||||
impl DQTimeSeriesResponse {
|
||||
/// Return a list of all expressions in the response
|
||||
pub fn list_expressions(&self) -> Vec<String> {
|
||||
@ -280,8 +302,10 @@ impl DQTimeSeriesResponse {
|
||||
|
||||
timeseries_by_ticker.into_iter().map(|(_, v)| v).collect()
|
||||
}
|
||||
/// Consume the DQTimeSeriesResponse by grouping the time series by ticker.
|
||||
/// This function can only be called once as it takes ownership of the data.
|
||||
|
||||
/// Return a list of lists of DQTimeSeries, where each list contains all the timeseries for each ticker.
|
||||
/// The function consumes the data, leaving an empty vector in its place.
|
||||
/// This function can only be called once as it transfers ownership of the data.
|
||||
pub fn consume_to_grouped_by_ticker(mut self) -> Vec<Vec<DQTimeSeries>> {
|
||||
// Take the instruments vector, leaving an empty one in its place.
|
||||
let instruments = std::mem::take(&mut self.instruments);
|
||||
@ -307,7 +331,9 @@ impl DQTimeSeriesResponse {
|
||||
}
|
||||
}
|
||||
|
||||
/// Methods for JPMaQSIndicator
|
||||
impl JPMaQSIndicator {
|
||||
/// Create a new JPMaQSIndicator from a list of DQTimeSeries, ensuring they all belong to the same ticker
|
||||
pub fn new(timeseries_list: Vec<DQTimeSeries>) -> Result<Self, Box<dyn Error>> {
|
||||
let found_tickers = timeseries_list
|
||||
.iter()
|
||||
@ -331,6 +357,7 @@ impl JPMaQSIndicator {
|
||||
})
|
||||
}
|
||||
|
||||
/// Add a single time series to the JPMaQSIndicator DataFrame
|
||||
pub fn add_timeseries(&mut self, timeseries: DQTimeSeries) -> Result<(), Box<dyn Error>> {
|
||||
if self.ticker != timeseries.get_ticker()? {
|
||||
return Err("Timeseries does not belong to the same ticker".into());
|
||||
@ -339,6 +366,7 @@ impl JPMaQSIndicator {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert the JPMaQSIndicator to a standard JPMaQS Quantamental DataFrame (<https://docs.macrosynergy.com/stable/common_definitions.html#the-quantamental-data-format-qdf>)
|
||||
pub fn as_qdf(&self) -> Result<DataFrame, Box<dyn Error>> {
|
||||
let mut qdf = self.df.clone();
|
||||
let (cid, xcat) = match self.ticker.split_once('_') {
|
||||
@ -353,6 +381,7 @@ impl JPMaQSIndicator {
|
||||
Ok(qdf)
|
||||
}
|
||||
|
||||
/// Save the JPMaQSIndicator to a CSV file
|
||||
pub fn save_qdf_to_csv(&self, filename: &str) -> Result<(), Box<dyn Error>> {
|
||||
save_qdf_to_csv(&mut self.as_qdf()?, filename)
|
||||
}
|
@ -1,8 +1,9 @@
|
||||
use crate::download::oauth_client::OAuthClient;
|
||||
use crate::download::requester::DQRequester;
|
||||
use crate::download::timeseries::DQTimeSeriesResponse;
|
||||
use crate::download::timeseries::DQTimeseriesRequestArgs;
|
||||
use crate::download::timeseries::JPMaQSIndicator;
|
||||
use crate::download::helpers::DQTimeSeriesResponse;
|
||||
use crate::download::helpers::DQTimeseriesRequestArgs;
|
||||
use crate::download::helpers::JPMaQSIndicator;
|
||||
// use polars::prelude::*;
|
||||
use std::error::Error;
|
||||
|
||||
const DEFAULT_JPMAQS_METRICS: [&str; 4] = ["value", "grading", "eop_lag", "mop_lag"];
|
||||
@ -59,6 +60,7 @@ impl Default for JPMaQSDownloadGetIndicatorArgs {
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct for downloading data from the JPMaQS data from JPMorgan DataQuery API.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct JPMaQSDownload {
|
||||
requester: DQRequester,
|
||||
@ -72,21 +74,25 @@ impl Default for JPMaQSDownload {
|
||||
}
|
||||
|
||||
impl JPMaQSDownload {
|
||||
/// Create a new JPMaQSDownload instance with the provided client ID and client secret.
|
||||
pub fn new(client_id: String, client_secret: String) -> Self {
|
||||
let oauth_client = OAuthClient::new(client_id.clone(), client_secret.clone());
|
||||
let requester = DQRequester::new(oauth_client);
|
||||
JPMaQSDownload { requester }
|
||||
}
|
||||
|
||||
/// Check the connection to the DataQuery API.
|
||||
pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> {
|
||||
self.requester.check_connection()
|
||||
}
|
||||
|
||||
/// Get the catalogue of tickers available in the JPMaQS data.
|
||||
pub fn get_catalogue(&mut self) -> Result<Vec<String>, Box<dyn Error>> {
|
||||
let dq_catalogue = self.requester.get_catalogue("JPMAQS", 1000)?;
|
||||
Ok(dq_catalogue.all_instruments)
|
||||
}
|
||||
|
||||
/// Get the time series data for the provided expressions.
|
||||
pub fn get_expressions(
|
||||
&mut self,
|
||||
expressions: Vec<String>,
|
||||
@ -102,7 +108,8 @@ impl JPMaQSDownload {
|
||||
Ok(dqts_vec)
|
||||
}
|
||||
|
||||
pub fn get_indicators(
|
||||
/// Get the indicators for the provided tickers and metrics.
|
||||
pub fn get_indicators_list(
|
||||
&mut self,
|
||||
download_args: JPMaQSDownloadGetIndicatorArgs,
|
||||
) -> Result<Vec<JPMaQSIndicator>, Box<dyn Error>> {
|
||||
@ -129,4 +136,33 @@ impl JPMaQSDownload {
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_indicators_qdf(
|
||||
&mut self,
|
||||
download_args: JPMaQSDownloadGetIndicatorArgs,
|
||||
) -> Result<polars::prelude::DataFrame, Box<dyn Error>> {
|
||||
let mut indicators: Vec<JPMaQSIndicator> = self.get_indicators_list(download_args)?;
|
||||
if indicators.is_empty() {
|
||||
return Err("No indicators retrieved".into());
|
||||
}
|
||||
if indicators.len() == 1 {
|
||||
return indicators.pop().unwrap().as_qdf();
|
||||
}
|
||||
assert!(indicators.len() > 1);
|
||||
let mut df_main = indicators.pop().unwrap().as_qdf().unwrap();
|
||||
while !indicators.is_empty() {
|
||||
let df = indicators.pop().unwrap().as_qdf().unwrap();
|
||||
df_main = df_main.vstack(&df).unwrap();
|
||||
}
|
||||
// sort by cid, xcat, real_date in that order
|
||||
let _ = df_main.sort_in_place(
|
||||
[
|
||||
"cid".to_string(),
|
||||
"xcat".to_string(),
|
||||
"real_date".to_string(),
|
||||
],
|
||||
polars::chunked_array::ops::SortMultipleOptions::default(),
|
||||
);
|
||||
Ok(df_main)
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
pub mod jpmaqsdownload;
|
||||
pub mod helpers;
|
||||
pub mod oauth_client;
|
||||
pub mod requester;
|
||||
pub mod timeseries;
|
||||
pub mod parreq;
|
@ -47,7 +47,6 @@ impl OAuthClient {
|
||||
let json: Value = response.json()?;
|
||||
if let Some(token) = json["access_token"].as_str() {
|
||||
self.access_token = Some(token.to_string());
|
||||
println!("Access token retrieved; token length: {}", token.len());
|
||||
if let Some(expires_in) = json["expires_in"].as_u64() {
|
||||
self.expires_at = Some(SystemTime::now() + Duration::from_secs(expires_in));
|
||||
} else {
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::download::oauth_client::OAuthClient;
|
||||
use crate::download::timeseries::DQTimeseriesRequestArgs;
|
||||
use crate::download::helpers::DQTimeseriesRequestArgs;
|
||||
use futures::future;
|
||||
use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
|
||||
use std::error::Error;
|
||||
|
@ -1,10 +1,10 @@
|
||||
use crate::download::helpers::DQCatalogueResponse;
|
||||
use crate::download::helpers::DQCatalogueSingleResponse;
|
||||
use crate::download::helpers::DQTimeSeriesResponse;
|
||||
use crate::download::helpers::DQTimeseriesRequestArgs;
|
||||
use crate::download::helpers::JPMaQSIndicator;
|
||||
use crate::download::oauth_client::OAuthClient;
|
||||
use crate::download::parreq::ParallelRequester;
|
||||
use crate::download::timeseries::DQCatalogueResponse;
|
||||
use crate::download::timeseries::DQCatalogueSingleResponse;
|
||||
use crate::download::timeseries::DQTimeSeriesResponse;
|
||||
use crate::download::timeseries::DQTimeseriesRequestArgs;
|
||||
use crate::download::timeseries::JPMaQSIndicator;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use reqwest;
|
||||
use reqwest::blocking::Client;
|
||||
@ -35,6 +35,7 @@ impl Default for DQRequester {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl DQRequester {
|
||||
pub fn new(oauth_client: OAuthClient) -> Self {
|
||||
DQRequester {
|
||||
@ -75,7 +76,6 @@ impl DQRequester {
|
||||
|
||||
pub fn check_connection(&mut self) -> Result<(), Box<dyn Error>> {
|
||||
let response = self._request(reqwest::Method::GET, HEARTBEAT_ENDPOINT)?;
|
||||
|
||||
println!("Connection is successful: {}", response.status());
|
||||
Ok(())
|
||||
}
|
||||
@ -124,7 +124,8 @@ impl DQRequester {
|
||||
Ok(DQCatalogueResponse::new(responses))
|
||||
}
|
||||
|
||||
pub fn _fetch_single_timeseries_batch(
|
||||
/// Fetches a single batch of timeseries data from the DataQuery API.
|
||||
fn _fetch_single_timeseries_batch(
|
||||
&mut self,
|
||||
args: DQTimeseriesRequestArgs,
|
||||
) -> Result<reqwest::blocking::Response, Box<dyn Error>> {
|
||||
@ -144,6 +145,8 @@ impl DQRequester {
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Makes parallel requests to the DataQuery API to fetch timeseries data.
|
||||
/// The function returns a vector of DQTimeSeriesResponse objects.
|
||||
pub fn get_timeseries(
|
||||
&mut self,
|
||||
args: DQTimeseriesRequestArgs,
|
||||
@ -162,43 +165,28 @@ impl DQRequester {
|
||||
Ok(dqts_vec)
|
||||
}
|
||||
|
||||
/// Makes parallel requests to the DataQuery API to fetch timeseries data.
|
||||
/// The function returns a vector of JPMaQSIndicator objects.
|
||||
/// This function is preferred as it consumes less memory than get_timeseries.
|
||||
pub fn get_timeseries_as_jpmaqs_indicators(
|
||||
&mut self,
|
||||
args: DQTimeseriesRequestArgs,
|
||||
) -> Result<Vec<JPMaQSIndicator>, Box<dyn Error>> {
|
||||
let max_retries = 5;
|
||||
println!(
|
||||
"Invoking ParallelRequester for {:?} expressions",
|
||||
args.expressions.len()
|
||||
);
|
||||
let mut pq = ParallelRequester::new(self.oauth_client.clone());
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let mut response_texts = match pq.request_expressions(args, max_retries) {
|
||||
Ok(r) => r,
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
println!(
|
||||
"Time elapsed for pq.request_expressions: {:?}",
|
||||
start.elapsed()
|
||||
);
|
||||
|
||||
// sleep for 10 seconds
|
||||
println!("Pausing for 10 seconds");
|
||||
std::thread::sleep(std::time::Duration::from_secs(10));
|
||||
println!("Resuming - parsing response texts to JPMaQSIndicators");
|
||||
|
||||
let jpmaqs_indicators: Vec<JPMaQSIndicator> =
|
||||
parse_response_texts_to_jpmaqs_indicators(&mut response_texts);
|
||||
// Sleep for 10 seconds
|
||||
println!("Pausing for 10 seconds");
|
||||
std::thread::sleep(std::time::Duration::from_secs(10));
|
||||
println!("Resuming");
|
||||
|
||||
Ok(jpmaqs_indicators)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a vector of response texts into a vector of DQTimeSeriesResponse objects.
|
||||
fn parse_response_texts(response_texts: Vec<String>) -> Vec<DQTimeSeriesResponse> {
|
||||
response_texts
|
||||
.into_par_iter()
|
||||
@ -215,6 +203,7 @@ fn parse_response_texts(response_texts: Vec<String>) -> Vec<DQTimeSeriesResponse
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Parses a vector of response texts into a vector of JPMaQSIndicator objects.
|
||||
fn parse_response_texts_to_jpmaqs_indicators(
|
||||
response_texts: &mut Vec<String>,
|
||||
) -> Vec<JPMaQSIndicator> {
|
||||
@ -261,4 +250,3 @@ fn parse_response_texts_to_jpmaqs_indicators(
|
||||
println!("Number of responses left: {}", response_texts.len());
|
||||
jpmaqs_indicators_map.into_iter().map(|(_, v)| v).collect()
|
||||
}
|
||||
|
||||
|
60
src/main.rs
60
src/main.rs
@ -17,7 +17,7 @@ fn main() {
|
||||
start.elapsed()
|
||||
);
|
||||
|
||||
let num_ticks = 5000;
|
||||
let num_ticks = 20;
|
||||
let sel_tickers: Vec<String> = tickers
|
||||
.iter()
|
||||
.take(num_ticks)
|
||||
@ -27,9 +27,10 @@ fn main() {
|
||||
|
||||
println!("Retrieving indicators for {} tickers", sel_tickers.len());
|
||||
start = std::time::Instant::now();
|
||||
let indicators = jpamqs_download
|
||||
.get_indicators(JPMaQSDownloadGetIndicatorArgs {
|
||||
let mut res_df: DataFrame = jpamqs_download
|
||||
.get_indicators_qdf(JPMaQSDownloadGetIndicatorArgs {
|
||||
tickers: sel_tickers.clone(),
|
||||
start_date: "2024-11-05".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
@ -40,27 +41,36 @@ fn main() {
|
||||
start.elapsed()
|
||||
);
|
||||
|
||||
// sleep for 10 seconds
|
||||
println!("Sleeping for 10 seconds...");
|
||||
std::thread::sleep(std::time::Duration::from_secs(10));
|
||||
println!("concatting to mega DataFrame");
|
||||
// append _ to every cid
|
||||
let cid_vec: Vec<String> = res_df
|
||||
.column("cid")
|
||||
.unwrap()
|
||||
.str()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|s| s.unwrap_or("").to_string())
|
||||
.collect();
|
||||
let xcat_vec: Vec<String> = res_df
|
||||
.column("xcat")
|
||||
.unwrap()
|
||||
.str()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|s| s.unwrap_or("").to_string())
|
||||
.collect();
|
||||
|
||||
start = std::time::Instant::now();
|
||||
// let mut qdf_list = Vec::new();
|
||||
let mega_df = indicators
|
||||
.iter()
|
||||
.map(|indicator| indicator.as_qdf().unwrap())
|
||||
.fold(DataFrame::new(vec![]).unwrap(), |acc, df| {
|
||||
acc.vstack(&df).unwrap()
|
||||
});
|
||||
|
||||
//
|
||||
let es = mega_df.estimated_size();
|
||||
let es_mb = es as f64 / 1_048_576.0;
|
||||
println!("Estimated size of DataFrame: {:.2} MB", es_mb);
|
||||
println!("Sleeping for 10 seconds...");
|
||||
println!(
|
||||
"Converted indicators to DataFrames in {:?}",
|
||||
start.elapsed()
|
||||
);
|
||||
let mut tickers_set = std::collections::HashSet::new();
|
||||
for (cid, xcat) in cid_vec.iter().zip(xcat_vec.iter()) {
|
||||
tickers_set.insert(cid.to_string() + "_" + xcat);
|
||||
}
|
||||
|
||||
// save this df to disk
|
||||
let file_name = "data/jpmaqs_indicators_qdf.csv";
|
||||
let file = std::fs::File::create(file_name).unwrap();
|
||||
let mut csv_writer = CsvWriter::new(file);
|
||||
csv_writer.finish(&mut res_df).unwrap();
|
||||
|
||||
// print len of tickers_set
|
||||
println!("Unique tickers: {}", tickers_set.len());
|
||||
println!("DataFrame shape: {:?}", res_df.shape());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user