2025-08-21 10:40:01 +00:00
2 changed files with 138 additions and 292 deletions
--- a/.github/scripts/custom_benchmark_report.py
+++ b/.github/scripts/custom_benchmark_report.py
@ -25,17 +25,7 @@ ESTIMATES_PATH_BASE = Path("base") / "estimates.json"
 REPORT_HTML_RELATIVE_PATH = Path("report") / "index.html"
-def get_default_criterion_report_path() -> Path:
+def load_criterion_reports(criterion_root_dir: Path) -> Dict[str, Dict[str, Dict[str, Any]]]:
    """
    Returns the default path for the Criterion benchmark report.
    This is typically 'target/criterion'.
    """
    return Path("target") / "criterion" / "report" / "index.html"
 def load_criterion_reports(
    criterion_root_dir: Path,
 ) -> Dict[str, Dict[str, Dict[str, Any]]]:
    """
    Loads Criterion benchmark results from a specified directory and finds HTML paths.
@ -60,51 +50,58 @@ def load_criterion_reports(
    print(f"Scanning for benchmark reports in: {criterion_root_dir}")
    for item in criterion_root_dir.iterdir():
        # We are only interested in directories matching the pattern
        if not item.is_dir():
            continue
        match = DIR_PATTERN.match(item.name)
        if not match:
            # print(f"Skipping directory (name doesn't match pattern): {item.name}")
            continue
        test_name = match.group(1).strip()
        size = match.group(2).strip()
-        benchmark_dir_name = item.name
+        benchmark_dir_name = item.name # Store the original directory name
-        benchmark_dir_path = item
+        benchmark_dir_path = item     # The Path object to the benchmark dir
        json_path: Optional[Path] = None
        # Look for the estimates JSON file (prefer 'new', fallback to 'base')
        if (benchmark_dir_path / ESTIMATES_PATH_NEW).is_file():
            json_path = benchmark_dir_path / ESTIMATES_PATH_NEW
        elif (benchmark_dir_path / ESTIMATES_PATH_BASE).is_file():
            json_path = benchmark_dir_path / ESTIMATES_PATH_BASE
        # The HTML report is at a fixed location relative to the benchmark directory
        html_path = benchmark_dir_path / REPORT_HTML_RELATIVE_PATH
        if json_path is None or not json_path.is_file():
            print(
                f"Warning: Could not find estimates JSON in {benchmark_dir_path}. Skipping benchmark size '{test_name} ({size})'.",
                file=sys.stderr,
            )
-            continue
+            continue # Skip if no JSON data
        if not html_path.is_file():
             print(
                f"Warning: Could not find HTML report at expected location {html_path}. Skipping benchmark size '{test_name} ({size})'.",
                file=sys.stderr,
            )
-            continue
+             continue # Skip if no HTML report
        # Try loading the JSON data
        try:
            with json_path.open("r", encoding="utf-8") as f:
                json_data = json.load(f)
            # Store both the JSON data and the relative path to the HTML report
            results[test_name][size] = {
-                "json": json_data,
+                'json': json_data,
-                "html_path_relative_to_criterion_root": str(
+                # The path from the criterion root to the specific benchmark's report/index.html
-                    Path(benchmark_dir_name) / REPORT_HTML_RELATIVE_PATH
+                'html_path_relative_to_criterion_root': str(Path(benchmark_dir_name) / REPORT_HTML_RELATIVE_PATH).replace('\\', '/') # Ensure forward slashes
                ).replace("\\", "/"),
            }
            # print(f"  Loaded: {test_name} ({size}) from {json_path}, html: {html_path}")
        except json.JSONDecodeError:
            print(f"Error: Failed to decode JSON from {json_path}", file=sys.stderr)
        except IOError as e:
@ -115,6 +112,7 @@ def load_criterion_reports(
                file=sys.stderr,
            )
    # Convert defaultdict back to regular dict for cleaner output (optional)
    return dict(results)
@ -132,215 +130,118 @@ def format_nanoseconds(ns: float) -> str:
        return f"{ns / 1_000_000_000:.2f} s"
-def generate_html_table_with_links(
+def generate_html_table_with_links(results: Dict[str, Dict[str, Dict[str, Any]]], html_base_path: str) -> str:
    results: Dict[str, Dict[str, Dict[str, Any]]], html_base_path: str
 ) -> str:
    """
    Generates a full HTML page with a styled table from benchmark results.
    """
    css_styles = """
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
            line-height: 1.6;
            margin: 0;
            padding: 20px;
            background-color: #f4f7f6;
            color: #333;
        }
        .container {
            max-width: 1200px;
            margin: 20px auto;
            padding: 20px;
            background-color: #fff;
            box-shadow: 0 0 15px rgba(0,0,0,0.1);
            border-radius: 8px;
        }
        h1 {
            color: #2c3e50;
            text-align: center;
            margin-bottom: 10px;
        }
        p.subtitle {
            text-align: center;
            margin-bottom: 8px;
            color: #555;
            font-size: 0.95em;
        }
        p.note {
            text-align: center;
            margin-bottom: 25px;
            color: #777;
            font-size: 0.85em;
        }
        .benchmark-table {
            width: 100%;
            border-collapse: collapse;
            margin-top: 25px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.05);
        }
        .benchmark-table th, .benchmark-table td {
            border: 1px solid #dfe6e9; /* Lighter border */
            padding: 12px 15px;
        }
        .benchmark-table th {
            background-color: #3498db; /* Primary blue */
            color: #ffffff;
            font-weight: 600; /* Slightly bolder */
            text-transform: uppercase;
            letter-spacing: 0.05em;
            text-align: center; /* Center align headers */
        }
        .benchmark-table td {
            text-align: right; /* Default for data cells (times) */
        }
        .benchmark-table td:first-child { /* Benchmark Name column */
            font-weight: 500;
            color: #2d3436;
            text-align: left; /* Left align benchmark names */
        }
        .benchmark-table tbody tr:nth-child(even) {
            background-color: #f8f9fa; /* Very light grey for even rows */
        }
        .benchmark-table tbody tr:hover {
            background-color: #e9ecef; /* Slightly darker on hover */
        }
        .benchmark-table a {
            color: #2980b9; /* Link blue */
            text-decoration: none;
            font-weight: 500;
        }
        .benchmark-table a:hover {
            text-decoration: underline;
            color: #1c5a81; /* Darker blue on hover */
        }
        .no-results {
            text-align: center;
            font-size: 1.2em;
            color: #7f8c8d;
            margin-top: 30px;
        }
    </style>
    """
    Generates an HTML table from benchmark results, with cells linking to reports.
-    html_doc_start = f"""<!DOCTYPE html>
+    Args:
-<html lang="en">
+        results: The nested dictionary loaded by load_criterion_reports,
-<head>
+                 including 'json' data and 'html_path_relative_to_criterion_root'.
-    <meta charset="UTF-8">
+        html_base_path: The base URL path where the 'target/criterion' directory
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+                        is hosted on the static site, relative to the output HTML file.
-    <title>Criterion Benchmark Results</title>
+                        e.g., '../target/criterion/'
-    {css_styles}
+
-</head>
+    Returns:
-<body>
+        A string containing the full HTML table.
    <div class="container">
        <h1 id="criterion-benchmark-results">Criterion Benchmark Results</h1>
    """
    html_doc_end = """
    </div>
 </body>
 </html>"""
    if not results:
-        return f"""{html_doc_start}
+        return "<p>No benchmark results found or loaded.</p>"
        <p class="no-results">No benchmark results found or loaded.</p>
 {html_doc_end}"""
-    all_sizes = sorted(
+    # Get all unique sizes (columns) and test names (rows)
-        list(set(size for test_data in results.values() for size in test_data.keys()))
+    # Using ordered dictionaries to maintain insertion order from loading, then sorting keys
-    )
+    # Or simply sort the keys after extraction:
    all_sizes = sorted(list(set(size for test_data in results.values() for size in test_data.keys())))
    all_test_names = sorted(list(results.keys()))
-    table_content = """
+    html_string = """
-        <p class="subtitle">Each cell links to the detailed Criterion.rs report for that specific benchmark size.</p>
+    <meta charset="utf-8">
-        <p class="note">Note: Values shown are the midpoint of the mean confidence interval, formatted for readability.</p>
+    <h1 id="criterion-benchmark-results">Criterion Benchmark Results</h1>
-        <p class="note"><a href="report/index.html">[Switch to the standard Criterion.rs report]</a></p>
+    <p>Each cell links to the detailed Criterion report for that specific benchmark size.</p>
-        <table class="benchmark-table">
+    <p>Note: Values shown are the midpoint of the mean confidence interval, formatted for readability.</p>
    <table class="table table-striped" border="1" justify="center">
        <thead>
            <tr>
                <th>Benchmark Name</th>
    """
    # Add size headers
    for size in all_sizes:
-        table_content += f"<th>{html.escape(size)}</th>\n"
+        html_string += f"<th>{html.escape(size)}</th>\n"
-    table_content += """
+    html_string += """
            </tr>
        </thead>
        <tbody>
    """
    # Add data rows
    for test_name in all_test_names:
-        table_content += f"<tr>\n"
+        html_string += f"<tr>\n"
-        table_content += f"    <td>{html.escape(test_name)}</td>\n"
+        html_string += f"    <td>{html.escape(test_name)}</td>\n"
        # Iterate through all possible sizes to ensure columns align
        for size in all_sizes:
            cell_data = results.get(test_name, {}).get(size)
-            mean_value = pd.NA
+            mean_value = pd.NA # Default value
-            full_report_url = "#"
+            full_report_url = "#" # Default link to self or dummy
-            if (
+            if cell_data and 'json' in cell_data and 'html_path_relative_to_criterion_root' in cell_data:
                cell_data
                and "json" in cell_data
                and "html_path_relative_to_criterion_root" in cell_data
            ):
                try:
-                    mean_data = cell_data["json"].get("mean")
+                    # Extract mean from JSON
                    mean_data = cell_data['json'].get("mean")
                    if mean_data and "confidence_interval" in mean_data:
                        ci = mean_data["confidence_interval"]
                        if "lower_bound" in ci and "upper_bound" in ci:
                             lower, upper = ci["lower_bound"], ci["upper_bound"]
-                            if isinstance(lower, (int, float)) and isinstance(
+                             if isinstance(lower, (int, float)) and isinstance(upper, (int, float)):
                                upper, (int, float)
                            ):
                                 mean_value = (lower + upper) / 2.0
                             else:
-                                print(
+                                 print(f"Warning: Non-numeric bounds for {test_name} ({size}).", file=sys.stderr)
                                    f"Warning: Non-numeric bounds for {test_name} ({size}).",
                                    file=sys.stderr,
                                )
                        else:
-                            print(
+                             print(f"Warning: Missing confidence_interval bounds for {test_name} ({size}).", file=sys.stderr)
                                f"Warning: Missing confidence_interval bounds for {test_name} ({size}).",
                                file=sys.stderr,
                            )
                    else:
-                        print(
+                         print(f"Warning: Missing 'mean' data for {test_name} ({size}).", file=sys.stderr)
-                            f"Warning: Missing 'mean' data for {test_name} ({size}).",
+
-                            file=sys.stderr,
+                    # Construct the full relative URL
-                        )
+                    relative_report_path = cell_data['html_path_relative_to_criterion_root']
                    full_report_url = f"{html_base_path}{relative_report_path}"
                    # Ensure forward slashes and resolve potential double slashes if html_base_path ends in /
                    full_report_url = str(Path(full_report_url)).replace('\\', '/')
                    relative_report_path = cell_data[
                        "html_path_relative_to_criterion_root"
                    ]
                    joined_path = Path(html_base_path) / relative_report_path
                    full_report_url = str(joined_path).replace("\\", "/")
                except Exception as e:
-                    print(
+                    print(f"Error processing cell data for {test_name} ({size}): {e}", file=sys.stderr)
-                        f"Error processing cell data for {test_name} ({size}): {e}",
+                    # Keep mean_value as NA and URL as '#'
                        file=sys.stderr,
                    )
            # Format the mean value for display
            formatted_mean = format_nanoseconds(mean_value)
            # Create the link cell
            # Only make it a link if a valid report path was found
            if full_report_url and full_report_url != "#":
-                table_content += f'    <td><a href="{html.escape(full_report_url)}">{html.escape(formatted_mean)}</a></td>\n'
+                 html_string += f'    <td><a href="{html.escape(full_report_url)}">{html.escape(formatted_mean)}</a></td>\n'
            else:
-                table_content += f"    <td>{html.escape(formatted_mean)}</td>\n"
+                 # Display value without a link if no report path
-        table_content += "</tr>\n"
+                 html_string += f'    <td>{html.escape(formatted_mean)}</td>\n'
-    table_content += """
+
        html_string += f"</tr>\n"
    html_string += """
        </tbody>
    </table>
    """
-    return f"{html_doc_start}{table_content}{html_doc_end}"
+
    return html_string
 if __name__ == "__main__":
    DEFAULT_CRITERION_PATH = "target/criterion"
-    DEFAULT_OUTPUT_FILE = "./target/criterion/index.html"
+    # Default relative path from benchmark_results.html to the criterion root on the hosted site
-    DEFAULT_HTML_BASE_PATH = ""
+    # Assumes benchmark_results.html is in .../doc/<branch-slug>/benchmarks/
    # And target/criterion is copied to .../doc/<branch-slug>/target/criterion/
    # So the path from benchmarks/ to target/criterion/ is ../target/criterion/
    DEFAULT_HTML_BASE_PATH = "../target/criterion/"
    parser = argparse.ArgumentParser(
        description="Load Criterion benchmark results from JSON files and generate an HTML table with links to reports."
@ -349,65 +250,51 @@ if __name__ == "__main__":
        "--criterion-dir",
        type=str,
        default=DEFAULT_CRITERION_PATH,
-        help=f"Path to the main 'target/criterion' directory (default: {DEFAULT_CRITERION_PATH}) containing benchmark data.",
+        help=f"Path to the main 'target/criterion' directory (default: {DEFAULT_CRITERION_PATH}) on the runner.",
    )
    parser.add_argument(
        "--html-base-path",
        type=str,
        default=DEFAULT_HTML_BASE_PATH,
-        help=(
+        help=f"Relative URL path from the output HTML file to the hosted 'target/criterion' directory (default: {DEFAULT_HTML_BASE_PATH}).",
            f"Prefix for HTML links to individual benchmark reports. "
            f"This is prepended to each report's relative path (e.g., 'benchmark_name/report/index.html'). "
            f"If the main output HTML (default: '{DEFAULT_OUTPUT_FILE}') is in the 'target/criterion/' directory, "
            f"this should typically be empty (default: '{DEFAULT_HTML_BASE_PATH}'). "
        ),
    )
    parser.add_argument(
        "--output-file",
        type=str,
-        default=DEFAULT_OUTPUT_FILE,
+        default="benchmark_results.html",
-        help=f"Path to save the generated HTML summary report (default: {DEFAULT_OUTPUT_FILE}).",
+        help="Name of the output HTML file (default: benchmark_results.html)."
    )
    args = parser.parse_args()
    criterion_path = Path(args.criterion_dir)
    output_file_path = Path(args.output_file)
    try:
        output_file_path.parent.mkdir(parents=True, exist_ok=True)
    except OSError as e:
        print(
            f"Error: Could not create output directory {output_file_path.parent}: {e}",
            file=sys.stderr,
        )
        sys.exit(1)
    all_results = load_criterion_reports(criterion_path)
    # Generate HTML output regardless of whether results were found (handles "no results" page)
    html_output = generate_html_table_with_links(all_results, args.html_base_path)
    if not all_results:
        print("\nNo benchmark results found or loaded.")
-        # Fallthrough to write the "no results" page generated by generate_html_table_with_links
+        # Still create an empty file or a file with an error message
-    else:
+        try:
            with open(args.output_file, "w", encoding="utf-8") as f:
                f.write("<h1>Criterion Benchmark Results</h1><p>No benchmark results found or loaded.</p>")
            print(f"Created empty/error HTML file: {args.output_file}")
        except IOError as e:
             print(f"Error creating empty/error HTML file {args.output_file}: {e}", file=sys.stderr)
        sys.exit(1) # Indicate failure if no data was loaded successfully
    print("\nSuccessfully loaded benchmark results.")
    # pprint(all_results) # Uncomment for debugging
-    print(
+    print(f"Generating HTML table with links using base path: {args.html_base_path}")
-        f"Generating HTML report with links using HTML base path: '{args.html_base_path}'"
+    html_output = generate_html_table_with_links(all_results, args.html_base_path)
    )
    try:
-        with output_file_path.open("w", encoding="utf-8") as f:
+        with open(args.output_file, "w", encoding="utf-8") as f:
            f.write(html_output)
-        print(f"\nSuccessfully wrote HTML report to {output_file_path}")
+        print(f"\nSuccessfully wrote HTML table to {args.output_file}")
-        if not all_results:
+        sys.exit(0) # Exit successfully
            sys.exit(1)  # Exit with error code if no results, though file is created
        sys.exit(0)
    except IOError as e:
-        print(f"Error writing HTML output to {output_file_path}: {e}", file=sys.stderr)
+        print(f"Error writing HTML output to {args.output_file}: {e}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
         print(f"An unexpected error occurred while writing HTML: {e}", file=sys.stderr)
--- a/benches/benchmarks.rs
+++ b/benches/benchmarks.rs
@ -4,7 +4,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
 use rustframe::{
    frame::{Frame, RowIndex},
-    matrix::{BoolMatrix, Matrix, SeriesOps},
+    matrix::{BoolMatrix, Matrix},
    utils::{BDateFreq, BDatesList},
 };
 use std::time::Duration;
@ -160,76 +160,35 @@ fn matrix_operations_benchmark(c: &mut Criterion) {
    }
 }
-fn generate_frame(size: usize) -> Frame<f64> {
+fn benchmark_frame_operations(c: &mut Criterion) {
-    let data: Vec<f64> = (0..size * size).map(|x| x as f64).collect();
+    let n_periods = 1000;
    let n_cols = 1000;
    let dates: Vec<NaiveDate> =
-        BDatesList::from_n_periods("2000-01-01".to_string(), BDateFreq::Daily, size)
+        BDatesList::from_n_periods("2024-01-02".to_string(), BDateFreq::Daily, n_periods)
            .unwrap()
            .list()
            .unwrap();
    // let col_names= str(i) for i in range(1, 1000)
-    let col_names: Vec<String> = (1..=size).map(|i| format!("col_{}", i)).collect();
+    let col_names: Vec<String> = (1..=n_cols).map(|i| format!("col_{}", i)).collect();
-    Frame::new(
+    let data1: Vec<f64> = (0..n_periods * n_cols).map(|x| x as f64).collect();
-        Matrix::from_vec(data.clone(), size, size),
+    let data2: Vec<f64> = (0..n_periods * n_cols).map(|x| (x + 1) as f64).collect();
-        col_names,
+    let ma = Matrix::from_vec(data1.clone(), n_periods, n_cols);
-        Some(RowIndex::Date(dates)),
+    let mb = Matrix::from_vec(data2.clone(), n_periods, n_cols);
    )
 }
-fn benchmark_frame_operations(c: &mut Criterion) {
+    let fa = Frame::new(
-    let sizes = BENCH_SIZES;
+        ma.clone(),
        col_names.clone(),
        Some(RowIndex::Date(dates.clone())),
    );
    let fb = Frame::new(mb, col_names, Some(RowIndex::Date(dates)));
-    for &size in &sizes {
+    c.bench_function("frame element-wise multiply (1000x1000)", |b| {
        let fa = generate_frame(size);
        let fb = generate_frame(size);
        c.bench_function(&format!("frame add ({}x{})", size, size), |b| {
            b.iter(|| {
                let _result = &fa + &fb;
            });
        });
        c.bench_function(&format!("frame subtract ({}x{})", size, size), |b| {
            b.iter(|| {
                let _result = &fa - &fb;
            });
        });
        c.bench_function(&format!("frame multiply ({}x{})", size, size), |b| {
        b.iter(|| {
            let _result = &fa * &fb;
        });
    });
        c.bench_function(&format!("frame divide ({}x{})", size, size), |b| {
            b.iter(|| {
                let _result = &fa / &fb;
            });
        });
        c.bench_function(&format!("frame sum_horizontal ({}x{})", size, size), |b| {
            b.iter(|| {
                let _result = fa.sum_horizontal();
            });
        });
        c.bench_function(&format!("frame sum_vertical ({}x{})", size, size), |b| {
            b.iter(|| {
                let _result = fa.sum_vertical();
            });
        });
        c.bench_function(&format!("frame prod_horizontal ({}x{})", size, size), |b| {
            b.iter(|| {
                let _result = fa.prod_horizontal();
            });
        });
        c.bench_function(&format!("frame prod_vertical ({}x{})", size, size), |b| {
            b.iter(|| {
                let _result = fa.prod_vertical();
            });
        });
    }
 }
 // Define the criterion group and pass the custom configuration function