diff --git a/notebooks/funcwise/bdate_range_util.ipynb b/notebooks/funcwise/bdate_range_util.ipynb new file mode 100644 index 0000000..d1a6a0a --- /dev/null +++ b/notebooks/funcwise/bdate_range_util.ipynb @@ -0,0 +1,360 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# ! uv pip install E:\\Work\\ruzt\\msyrs --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Python packages\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import macrosynergy\n", + "import pandas as pd\n", + "import numpy as np\n", + "import polars as pl\n", + "import os\n", + "import time\n", + "\n", + "from macrosynergy.panel import view_timelines\n", + "from macrosynergy.management.types import QuantamentalDataFrame\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Python bindings - `msyrs`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import msyrs" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bdates0
02000-01-032000-01-03
12000-01-102000-01-10
22000-01-172000-01-17
32000-01-242000-01-24
42000-01-312000-01-31
.........
10562020-03-302020-03-30
10572020-04-062020-04-06
10582020-04-132020-04-13
10592020-04-202020-04-20
10602020-04-272020-04-27
\n", + "

1061 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " bdates 0\n", + "0 2000-01-03 2000-01-03\n", + "1 2000-01-10 2000-01-10\n", + "2 2000-01-17 2000-01-17\n", + "3 2000-01-24 2000-01-24\n", + "4 2000-01-31 2000-01-31\n", + "... ... ...\n", + "1056 2020-03-30 2020-03-30\n", + "1057 2020-04-06 2020-04-06\n", + "1058 2020-04-13 2020-04-13\n", + "1059 2020-04-20 2020-04-20\n", + "1060 2020-04-27 2020-04-27\n", + "\n", + "[1061 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = msyrs.utils.get_bdates_series_default_opt(start_date='2000-01-01', end_date='2020-05-01', freq='W').to_pandas()\n", + "y = pd.Series(pd.bdate_range(start='2000-01-01', end='2020-05-01', freq='W-MON'))\n", + "\n", + "pd.concat([x, y], axis=1)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results for M\t & \tBMS\t are exactly the same\n", + "Results for Q\t & \tBQS\t are exactly the same\n", + "Results for W\t & \tW-MON\t are exactly the same\n", + "Results for WF\t & \tW-FRI\t are exactly the same\n" + ] + } + ], + "source": [ + "for rs_freq, pd_freq in [('M', 'BMS'), ('Q', 'BQS'), ('W', 'W-MON'), ('WF', 'W-FRI')]:\n", + "\n", + "\n", + " x = msyrs.utils.get_bdates_series_default_opt(start_date='2000-01-01', end_date='2020-05-01', freq=rs_freq).to_pandas()\n", + " y = pd.Series(pd.bdate_range(start='2000-01-01', end='2020-05-01', freq=pd_freq))\n", + "\n", + " e = x == y\n", + " res = e.all()\n", + " non_matching_df = pd.concat([x[~e], y[~e]], axis=1)\n", + " assert res, f\"Results for {rs_freq}\\t and \\t{pd_freq}\\t are not the same\\n{non_matching_df}\"\n", + " print(f\"Results for {rs_freq}\\t & \\t{pd_freq}\\t are exactly the same\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "23.5 μs ± 1.02 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n", + "67.4 μs ± 979 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n", + "1.97 ms ± 57.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", + "4.65 ms ± 170 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "28.3 ms ± 898 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", + "93.8 ms ± 2.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='2000-01-01', end_date='2020-05-01', freq='D')\n", + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='D')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='D')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='1971-01-01', end_date='2040-05-01', freq='D')\n", + "%timeit pd.bdate_range(start='2000-01-01', end='2020-05-01', freq='B')\n", + "%timeit pd.bdate_range(start='1971-01-01', end='2040-05-01', freq='B')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7.95 μs ± 146 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "17.9 μs ± 108 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "1.73 ms ± 20.8 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", + "4 ms ± 69.3 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "5.69 ms ± 139 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "19.1 ms ± 268 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='2000-01-01', end_date='2020-05-01', freq='WF')\n", + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='WF')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='WF')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='1971-01-01', end_date='2040-05-01', freq='WF')\n", + "%timeit pd.bdate_range(start='2000-01-01', end='2020-05-01', freq='W-FRI')\n", + "%timeit pd.bdate_range(start='1971-01-01', end='2040-05-01', freq='W-FRI')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6.9 μs ± 126 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "13.1 μs ± 93.3 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "1.73 ms ± 29.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", + "4.2 ms ± 81.5 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "931 μs ± 14.2 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", + "3.05 ms ± 47.5 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='2000-01-01', end_date='2020-05-01', freq='ME')\n", + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='ME')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='ME')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='1971-01-01', end_date='2040-05-01', freq='ME')\n", + "%timeit pd.bdate_range(start='2000-01-01', end='2020-05-01', freq='BME')\n", + "%timeit pd.bdate_range(start='1971-01-01', end='2040-05-01', freq='BME')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.65 μs ± 69.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "4.78 μs ± 38.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "1.73 ms ± 122 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", + "4.16 ms ± 286 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "340 μs ± 11.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n", + "1.1 ms ± 11.5 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" + ] + } + ], + "source": [ + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='2000-01-01', end_date='2020-05-01', freq='Q')\n", + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='Q')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='Q')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='1971-01-01', end_date='2040-05-01', freq='Q')\n", + "%timeit pd.bdate_range(start='2000-01-01', end='2020-05-01', freq='BQS')\n", + "%timeit pd.bdate_range(start='1971-01-01', end='2040-05-01', freq='BQS')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.21 μs ± 83.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "3.66 μs ± 198 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n", + "2.67 ms ± 459 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "3.71 ms ± 143 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", + "98.7 μs ± 1.47 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n", + "289 μs ± 15.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" + ] + } + ], + "source": [ + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='2000-01-01', end_date='2020-05-01', freq='YE')\n", + "%timeit msyrs.utils.get_bdates_series_default_opt(start_date='1971-01-01', end_date='2040-05-01', freq='YE')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='2000-01-01', end_date='2020-05-01', freq='YE')\n", + "%timeit msyrs.utils.get_bdates_series_default_pl(start_date='1971-01-01', end_date='2040-05-01', freq='YE')\n", + "%timeit pd.bdate_range(start='2000-01-01', end='2020-05-01', freq='BYE')\n", + "%timeit pd.bdate_range(start='1971-01-01', end='2040-05-01', freq='BYE')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/src/_py/utils.rs b/src/_py/utils.rs index 1799f1a..45c24f2 100644 --- a/src/_py/utils.rs +++ b/src/_py/utils.rs @@ -5,18 +5,31 @@ use pyo3_polars::{PyDataFrame, PySeries}; #[allow(deprecated)] #[pymodule] pub fn utils(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(get_bdates_series_default, m)?)?; + m.add_function(wrap_pyfunction!(get_bdates_series_default_pl, m)?)?; + m.add_function(wrap_pyfunction!(get_bdates_series_default_opt, m)?)?; Ok(()) } #[pyfunction] -pub fn get_bdates_series_default( +pub fn get_bdates_series_default_pl( start_date: String, end_date: String, freq: Option, ) -> PyResult { Ok(PySeries( - crate::utils::dateutils::get_bdates_series_default(start_date, end_date, freq) + crate::utils::dateutils::get_bdates_series_default_pl(start_date, end_date, freq) + .map_err(|e| PyErr::new::(format!("{}", e)))?, + )) +} + +#[pyfunction] +pub fn get_bdates_series_default_opt( + start_date: String, + end_date: String, + freq: Option, +) -> PyResult { + Ok(PySeries( + crate::utils::dateutils::get_bdates_series_default_opt(start_date, end_date, freq) .map_err(|e| PyErr::new::(format!("{}", e)))?, )) } diff --git a/src/msyrs.pyi b/src/msyrs.pyi index df6b596..bdfa7f3 100644 --- a/src/msyrs.pyi +++ b/src/msyrs.pyi @@ -51,6 +51,8 @@ class panel: def linear_composite(*args, **kwargs) -> DataFrame: ... class utils: - __all__ = ["get_bdates_series_default"] + __all__ = ["get_bdates_series_default", "get_bdates_series_default_opt"] @staticmethod - def get_bdates_series_default(*args, **kwargs) -> Series: ... + def get_bdates_series_default_pl(*args, **kwargs) -> Series: ... + @staticmethod + def get_bdates_series_default_opt(*args, **kwargs) -> Series: ... diff --git a/src/utils/bdates.rs b/src/utils/bdates.rs new file mode 100644 index 0000000..0734ed5 --- /dev/null +++ b/src/utils/bdates.rs @@ -0,0 +1,365 @@ +use chrono::{Datelike, Duration, NaiveDate, Weekday}; +use std::error::Error; + +#[derive(Debug, Clone, Copy)] +pub enum BDateFreq { + Daily, + WeeklyMonday, + MonthStart, + QuarterStart, + YearStart, + MonthEnd, + QuarterEnd, + WeeklyFriday, + YearEnd, +} + +#[derive(Debug, Clone, Copy)] +pub enum AggregationType { + Start, // Indicates picking the first date in a group. + End, // Indicates picking the last date in a group. +} + +impl BDateFreq { + pub fn from_string(freq: String) -> Result> { + // use from_str to convert the string to a BDateFreq enum + Self::from_str(&freq) + } + pub fn from_str(freq: &str) -> Result> { + match freq { + "D" => Ok(BDateFreq::Daily), + "W" => Ok(BDateFreq::WeeklyMonday), + "M" => Ok(BDateFreq::MonthStart), + "Q" => Ok(BDateFreq::QuarterStart), + "A" => Ok(BDateFreq::YearStart), + "ME" => Ok(BDateFreq::MonthEnd), + "QE" => Ok(BDateFreq::QuarterEnd), + "WF" => Ok(BDateFreq::WeeklyFriday), + "YE" => Ok(BDateFreq::YearEnd), + _ => Err("Invalid frequency specified".into()), + } + } + pub fn agg_type(&self) -> AggregationType { + match self { + BDateFreq::Daily + | BDateFreq::WeeklyMonday + | BDateFreq::MonthStart + | BDateFreq::QuarterStart + | BDateFreq::YearStart => AggregationType::Start, + + BDateFreq::WeeklyFriday + | BDateFreq::MonthEnd + | BDateFreq::QuarterEnd + | BDateFreq::YearEnd => AggregationType::End, + } + } +} + +/// Returns only the business dates (Mon-Fri) between start_date and end_date +/// that match the desired frequency. +pub fn get_bdates_list_with_freq( + start_date_str: &str, + end_date_str: &str, + freq: BDateFreq, +) -> Result, Box> { + let start_date = NaiveDate::parse_from_str(start_date_str, "%Y-%m-%d")?; + let end_date = NaiveDate::parse_from_str(end_date_str, "%Y-%m-%d")?; + + if start_date > end_date { + return Ok(Vec::new()); + } + + let mut dates = match freq { + BDateFreq::Daily => collect_daily(start_date, end_date), + + BDateFreq::WeeklyMonday => collect_weekly(start_date, end_date, Weekday::Mon), + BDateFreq::WeeklyFriday => collect_weekly(start_date, end_date, Weekday::Fri), + + BDateFreq::MonthStart => collect_monthly(start_date, end_date, /*start=*/ true), + BDateFreq::MonthEnd => collect_monthly(start_date, end_date, /*start=*/ false), + + BDateFreq::QuarterStart => collect_quarterly(start_date, end_date, /*start=*/ true), + BDateFreq::QuarterEnd => collect_quarterly(start_date, end_date, /*start=*/ false), + + BDateFreq::YearStart => collect_yearly(start_date, end_date, /*start=*/ true), + BDateFreq::YearEnd => collect_yearly(start_date, end_date, /*start=*/ false), + }; + + // Filter out any weekend days that might slip in edge cases (e.g. if the + // computed "start of month" fell on Sat/Sun). + dates.retain(|d| d.weekday() != Weekday::Sat && d.weekday() != Weekday::Sun); + + Ok(dates) +} + +/* ------------------------------ Helpers ------------------------------ */ + +/// Return all business days, day-by-day. +fn collect_daily(start_date: NaiveDate, end_date: NaiveDate) -> Vec { + let mut result = Vec::new(); + let mut current = start_date; + while current <= end_date { + if is_weekday(current) { + result.push(current); + } + current = current.succ_opt().unwrap(); + } + result +} + +/// Return the specified weekday (e.g. Monday, Friday) in each week of the range. +fn collect_weekly( + start_date: NaiveDate, + end_date: NaiveDate, + target_weekday: Weekday, +) -> Vec { + let mut result = Vec::new(); + + // Find the first `target_weekday` on or after `start_date`. + // If `start_date` is already e.g. Monday, we can use it as is. + // Otherwise, jump ahead until we get that weekday. + let mut current = move_to_weekday_on_or_after(start_date, target_weekday); + + // Step in 7-day increments (full weeks). + while current <= end_date { + result.push(current); + current = current + Duration::days(7); + } + result +} + +/// Return either first or last business day in each month of the range. +fn collect_monthly( + start_date: NaiveDate, + end_date: NaiveDate, + want_first_day: bool, +) -> Vec { + let mut result = Vec::new(); + + // We'll iterate month by month, from (start_year, start_month) up to + // (end_year, end_month). + let mut year = start_date.year(); + let mut month = start_date.month(); + + // A small helper that updates year/month by +1 month. + let next_month = |(yr, mo): (i32, u32)| -> (i32, u32) { + if mo == 12 { + (yr + 1, 1) + } else { + (yr, mo + 1) + } + }; + + // Move `(year, month)` backward if necessary so that `(year, month)` + // definitely covers the entire period from `start_date` onward. + // Actually, it’s simpler to start from the actual (year, month) of start_date + // and go up. We'll just skip if the computed "day" < start_date. + + // Continue while we haven't passed (end_year, end_month). + while year < end_date.year() || (year == end_date.year() && month <= end_date.month()) { + // Compute the date that represents either first or last business day + // for this (year, month). + let candidate = if want_first_day { + first_business_day_of_month(year, month) + } else { + last_business_day_of_month(year, month) + }; + if candidate >= start_date && candidate <= end_date { + result.push(candidate); + } + + // Move to the next month. + let (ny, nm) = next_month((year, month)); + year = ny; + month = nm; + } + + result +} + +/// Return either the first or last business day in each quarter of the range. +fn collect_quarterly( + start_date: NaiveDate, + end_date: NaiveDate, + want_first_day: bool, +) -> Vec { + let mut result = Vec::new(); + + // We'll figure out which quarter `start_date` is in, then jump quarter-by-quarter. + // Quarters are: Q1 = months 1–3, Q2 = 4–6, Q3 = 7–9, Q4 = 10–12. + // Start by computing the (year, quarter_index) for start_date. + let mut year = start_date.year(); + let mut q = month_to_quarter(start_date.month()); + + while quarter_to_first_date(year, q) > end_date { + // If even the earliest day in that quarter is > end_date, we’re done. + return result; + } + + // Move backward if the quarter’s last day < start_date, etc. + // But simpler: we’ll do a loop that increments quarter by quarter, and + // pick the appropriate date each time. We break when we pass end_date. + + loop { + // For the current year+quarter, compute the date that’s either the first or last + // business day of that quarter: + let candidate = if want_first_day { + first_business_day_of_quarter(year, q) + } else { + last_business_day_of_quarter(year, q) + }; + + if candidate > end_date { + break; + } + if candidate >= start_date { + result.push(candidate); + } + + // Move to next quarter. + if q == 4 { + year += 1; + q = 1; + } else { + q += 1; + } + } + + result +} + +/// Return either the first or last business day in each year of the range. +fn collect_yearly( + start_date: NaiveDate, + end_date: NaiveDate, + want_first_day: bool, +) -> Vec { + let mut result = Vec::new(); + let mut year = start_date.year(); + + // Step year-by-year from `start_date.year()` up to `end_date.year()`. + while year <= end_date.year() { + let candidate = if want_first_day { + first_business_day_of_year(year) + } else { + last_business_day_of_year(year) + }; + if candidate >= start_date && candidate <= end_date { + result.push(candidate); + } + year += 1; + } + result +} + +/* ---------------------- Low-Level Utility Functions ---------------------- */ + +/// Is this a weekday (Mon-Fri)? +fn is_weekday(date: NaiveDate) -> bool { + match date.weekday() { + Weekday::Sat | Weekday::Sun => false, + _ => true, + } +} + +/// Given a date and a `target_weekday`, returns the date that is the first +/// `target_weekday` on or after the given date. +fn move_to_weekday_on_or_after(date: NaiveDate, target: Weekday) -> NaiveDate { + let mut current = date; + while current.weekday() != target { + current = current.succ_opt().unwrap(); + } + current +} + +/// Return the earliest business day of (year, month). +fn first_business_day_of_month(year: i32, month: u32) -> NaiveDate { + // Start with the 1st of the month. + let mut d = NaiveDate::from_ymd_opt(year, month, 1).expect("invalid year-month"); + // If it’s Sat/Sun, move forward until we get a weekday. + while !is_weekday(d) { + d = d.succ_opt().unwrap(); + } + d +} + +/// Return the latest business day of (year, month). +fn last_business_day_of_month(year: i32, month: u32) -> NaiveDate { + let last_dom = days_in_month(year, month); + let mut d = NaiveDate::from_ymd_opt(year, month, last_dom).expect("invalid year-month"); + // If it’s Sat/Sun, move backward until we get a weekday. + while !is_weekday(d) { + d = d.pred_opt().unwrap(); + } + d +} + +/// Number of days in a month (not considering leap years *beyond* chrono's normal handling). +fn days_in_month(year: i32, month: u32) -> u32 { + // Chrono can handle this if we do a little trick: + // Construct the 1st of the next month, then subtract 1 day. + // For example: + // if month == 12 => next = (year+1, 1, 1) + // else => next = (year, month+1, 1) + let (ny, nm) = if month == 12 { + (year + 1, 1) + } else { + (year, month + 1) + }; + let first_of_next = NaiveDate::from_ymd_opt(ny, nm, 1).unwrap(); + let last_of_this = first_of_next.pred_opt().unwrap(); + last_of_this.day() +} + +/// Convert a month (1..12) to a quarter (1..4). +fn month_to_quarter(m: u32) -> u32 { + (m - 1) / 3 + 1 +} + +/// Returns 1st day of a given (year, quarter). +fn quarter_to_first_date(year: i32, quarter: u32) -> NaiveDate { + let month = match quarter { + 1 => 1, + 2 => 4, + 3 => 7, + 4 => 10, + _ => panic!("invalid quarter"), + }; + NaiveDate::from_ymd_opt(year, month, 1).unwrap() +} + +/// Return the earliest business day in (year, quarter). +fn first_business_day_of_quarter(year: i32, quarter: u32) -> NaiveDate { + let mut d = quarter_to_first_date(year, quarter); + while !is_weekday(d) { + d = d.succ_opt().unwrap(); + } + d +} + +/// Return the last business day in (year, quarter). +fn last_business_day_of_quarter(year: i32, quarter: u32) -> NaiveDate { + // The last month in the quarter is quarter_to_first_date(...) + 2 months + // Then we find the last day of that month. + let start = quarter_to_first_date(year, quarter); + let last_month = start.month() + 2; // e.g. Q1 => month=1 => +2=3 => March + last_business_day_of_month(year, last_month) +} + +/// Returns Jan 1st of a given year (adjust if weekend). +fn first_business_day_of_year(year: i32) -> NaiveDate { + let mut d = NaiveDate::from_ymd_opt(year, 1, 1).unwrap(); + while !is_weekday(d) { + d = d.succ_opt().unwrap(); + } + d +} + +/// Returns Dec 31st of a given year (adjust if weekend). +fn last_business_day_of_year(year: i32) -> NaiveDate { + let mut d = NaiveDate::from_ymd_opt(year, 12, 31).unwrap(); + while !is_weekday(d) { + d = d.pred_opt().unwrap(); + } + d +} diff --git a/src/utils/dateutils.rs b/src/utils/dateutils.rs index 4f1697f..095fce6 100644 --- a/src/utils/dateutils.rs +++ b/src/utils/dateutils.rs @@ -1,3 +1,5 @@ +use crate::utils::bdates; +use crate::utils::bdates::BDateFreq; use chrono::NaiveDate; use chrono::{Datelike, Weekday}; use polars::prelude::*; @@ -57,57 +59,6 @@ pub fn get_bdates_list( Ok(business_days) } -#[derive(Debug, Clone, Copy)] -pub enum BDateFreq { - Daily, - WeeklyMonday, - MonthStart, - QuarterStart, - YearStart, - MonthEnd, - QuarterEnd, - WeeklyFriday, - YearEnd, -} - -impl BDateFreq { - pub fn from_str(freq: &str) -> Result> { - match freq { - "D" => Ok(BDateFreq::Daily), - "W" => Ok(BDateFreq::WeeklyMonday), - "M" => Ok(BDateFreq::MonthStart), - "Q" => Ok(BDateFreq::QuarterStart), - "A" => Ok(BDateFreq::YearStart), - "ME" => Ok(BDateFreq::MonthEnd), - "QE" => Ok(BDateFreq::QuarterEnd), - "WF" => Ok(BDateFreq::WeeklyFriday), - "YE" => Ok(BDateFreq::YearEnd), - _ => Err("Invalid frequency specified".into()), - } - } - - pub fn agg_type(&self) -> AggregationType { - match self { - BDateFreq::Daily - | BDateFreq::WeeklyMonday - | BDateFreq::MonthStart - | BDateFreq::QuarterStart - | BDateFreq::YearStart => AggregationType::Start, - BDateFreq::WeeklyFriday - | BDateFreq::MonthEnd - | BDateFreq::QuarterEnd - | BDateFreq::YearEnd => AggregationType::End, - } - } -} - -#[derive(Debug, Clone, Copy)] -pub enum AggregationType { - Start, // Indicates picking the first date in a group. - End, // Indicates picking the last date in a group. -} - -// Map a BDateFreq to an AggregationType. fn compute_group_key(d: NaiveDate, freq: BDateFreq) -> String { match freq { // For Daily, each date is its own group. @@ -130,19 +81,32 @@ fn compute_group_key(d: NaiveDate, freq: BDateFreq) -> String { BDateFreq::YearStart | BDateFreq::YearEnd => format!("{}", d.year()), } } - -pub fn get_bdates_series_default( +pub fn get_bdates_series_default_opt( start_date: String, end_date: String, freq: Option, ) -> Result> { let freq = freq.unwrap_or_else(|| "D".to_string()); let freq = BDateFreq::from_str(&freq)?; - get_bdates_series(start_date, end_date, freq) + let series = Series::new( + "bdates".into(), + bdates::get_bdates_list_with_freq(&start_date, &end_date, freq)?, + ); + Ok(series) +} + +pub fn get_bdates_series_default_pl( + start_date: String, + end_date: String, + freq: Option, +) -> Result> { + let freq = freq.unwrap_or_else(|| "D".to_string()); + let freq = BDateFreq::from_str(&freq)?; + get_bdates_series_pl(start_date, end_date, freq) } /// Get the business dates between two dates as a Series. -pub fn get_bdates_series( +pub fn get_bdates_series_pl( start_date: String, end_date: String, freq: BDateFreq, @@ -159,8 +123,8 @@ pub fn get_bdates_series( ])?; let gb = df.lazy().group_by(["group"]); let aggx = match freq.agg_type() { - AggregationType::Start => gb.agg([col("bdates").first()]), - AggregationType::End => gb.agg([col("bdates").last()]), + bdates::AggregationType::Start => gb.agg([col("bdates").first()]), + bdates::AggregationType::End => gb.agg([col("bdates").last()]), }; let result = aggx.collect()?; let result = result diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 36e8f7f..45d4295 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,3 +1,4 @@ -pub mod qdf; +pub mod bdates; +pub mod dateutils; pub mod misc; -pub mod dateutils; \ No newline at end of file +pub mod qdf;