mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-11-19 19:56:11 +00:00
Compare commits
22 Commits
3d2771bec8
...
ci-update
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d023acbaf7 | ||
|
|
1cc087ca48 | ||
|
|
c68c212de8 | ||
|
|
c18ab93f2e | ||
|
|
b9f5051015 | ||
|
|
32471aff3b | ||
|
|
ad9d2a7137 | ||
|
|
ecbc1e0252 | ||
|
|
df292d65f0 | ||
|
|
e45b1dc267 | ||
|
|
7f45b32806 | ||
|
|
0346c59d9a | ||
| c53693fa7b | |||
| 109d39b248 | |||
|
|
18ad6c689a | ||
| 1fead78b69 | |||
|
|
6fb32e743c | ||
| 2cb4e46217 | |||
|
|
a53ba63f30 | ||
|
|
dae60ea1bd | ||
|
|
755dee58e7 | ||
|
|
9e6e22fc37 |
34
.github/.archive/pr-checks.yml
vendored
34
.github/.archive/pr-checks.yml
vendored
@@ -1,34 +0,0 @@
|
||||
# name: pr-checks
|
||||
|
||||
# on:
|
||||
# pull_request:
|
||||
# branches: [pr_checks_disabled_for_now]
|
||||
# types:
|
||||
# - opened
|
||||
# # - synchronize
|
||||
# - reopened
|
||||
# - edited
|
||||
# - ready_for_review
|
||||
|
||||
# concurrency:
|
||||
# group: pr-checks-${{ github.event.number }}
|
||||
|
||||
# permissions:
|
||||
# contents: read
|
||||
# pull-requests: read
|
||||
# checks: write
|
||||
|
||||
# jobs:
|
||||
# pr-checks:
|
||||
# name: pr-checks
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
|
||||
# - name: Run PR checks
|
||||
# shell: bash
|
||||
# env:
|
||||
# GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# PR_NUMBER: ${{ github.event.number }}
|
||||
# run: |
|
||||
# python .github/scripts/pr_checks.py $PR_NUMBER
|
||||
64
.github/scripts/ci_checks.py
vendored
Normal file
64
.github/scripts/ci_checks.py
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, Optional
|
||||
import tomllib
|
||||
import packaging.version
|
||||
import requests
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
|
||||
|
||||
GITHUB_REQUEST_CONFIG = {
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
"Authorization": f"token {ACCESS_TOKEN}",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
REPO_OWNER_USERNAME: str = "Magnus167"
|
||||
REPO_NAME: str = "rustframe"
|
||||
REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
|
||||
|
||||
CARGO_TOML_PATH: str = "Cargo.toml"
|
||||
|
||||
|
||||
def load_cargo_toml() -> Dict[str, Any]:
|
||||
if not os.path.exists(CARGO_TOML_PATH):
|
||||
raise FileNotFoundError(f"{CARGO_TOML_PATH} does not exist.")
|
||||
|
||||
with open(CARGO_TOML_PATH, "rb") as file:
|
||||
return tomllib.load(file)
|
||||
|
||||
def get_latest_crates_io_version() -> str:
|
||||
url = "https://crates.io/api/v1/crates/rustframe"
|
||||
try:
|
||||
response = requests.get(url, headers=GITHUB_REQUEST_CONFIG)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["crate"]["max_version"]
|
||||
except requests.RequestException as e:
|
||||
raise RuntimeError(f"Failed to fetch latest version from crates.io: {e}")
|
||||
|
||||
|
||||
def get_current_version() -> str:
|
||||
cargo_toml = load_cargo_toml()
|
||||
version = cargo_toml.get("package", {}).get("version", None)
|
||||
if not version:
|
||||
raise ValueError("Version not found in Cargo.toml")
|
||||
return version
|
||||
|
||||
|
||||
def check_version() -> None:
|
||||
latest_version = get_latest_crates_io_version()
|
||||
latest_version_tuple = packaging.version.parse(latest_version)
|
||||
current_version = get_current_version()
|
||||
current_version_tuple = packaging.version.parse(current_version)
|
||||
|
||||
if latest_version_tuple >= current_version_tuple:
|
||||
print(f"Current version {current_version_tuple} is less than or equal to latest version {latest_version_tuple} on crates.io.")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Current version: {current_version_tuple}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_version()
|
||||
236
.github/scripts/pr_checks.py
vendored
236
.github/scripts/pr_checks.py
vendored
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
import warnings
|
||||
import urllib.parse
|
||||
|
||||
from time import sleep
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
ACCESS_TOKEN: Optional[str] = os.getenv("GH_TOKEN", None)
|
||||
|
||||
REQUEST_CONFIG = {
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
"Authorization": f"token {ACCESS_TOKEN}",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
REPO_OWNER_USERNAME: str = "Magnus167"
|
||||
REPO_NAME: str = "rustframe"
|
||||
REPOSITORY_WEB_LINK: str = f"github.com/{REPO_OWNER_USERNAME}/{REPO_NAME}"
|
||||
|
||||
|
||||
def perform_api_call(
|
||||
target_url: str,
|
||||
call_headers: Optional[dict] = REQUEST_CONFIG,
|
||||
query_parameters: Dict[str, Any] = {},
|
||||
http_method: str = "GET",
|
||||
maximum_attempts: int = 5,
|
||||
) -> Any:
|
||||
assert http_method in ["GET", "DELETE", "POST", "PATCH", "PUT"]
|
||||
|
||||
attempt_count = 0
|
||||
while attempt_count < maximum_attempts:
|
||||
try:
|
||||
if query_parameters:
|
||||
encoded_parameters = urllib.parse.urlencode(query_parameters)
|
||||
target_url = f"{target_url}?{encoded_parameters}"
|
||||
|
||||
http_request_object = urllib.request.Request(target_url, method=http_method)
|
||||
|
||||
if call_headers:
|
||||
for key, value in call_headers.items():
|
||||
http_request_object.add_header(key, value)
|
||||
|
||||
with urllib.request.urlopen(http_request_object) as server_response:
|
||||
if server_response.status == 404:
|
||||
raise Exception(f"404: {target_url} not found.")
|
||||
|
||||
return json.loads(server_response.read().decode())
|
||||
|
||||
except urllib.error.HTTPError as error_details:
|
||||
unrecoverable_codes = [403, 404, 422]
|
||||
if error_details.code in unrecoverable_codes:
|
||||
raise Exception(f"Request failed: {error_details}")
|
||||
|
||||
print(f"Request failed: {error_details}")
|
||||
attempt_count += 1
|
||||
sleep(1)
|
||||
|
||||
except Exception as error_details:
|
||||
print(f"Request failed: {error_details}")
|
||||
attempt_count += 1
|
||||
sleep(1)
|
||||
|
||||
raise Exception("Request failed")
|
||||
|
||||
|
||||
valid_title_prefixes: List[str] = [
|
||||
"Feature:",
|
||||
"Bugfix:",
|
||||
"Documentation:",
|
||||
"CI/CD:",
|
||||
"Misc:",
|
||||
"Suggestion:",
|
||||
]
|
||||
|
||||
|
||||
def validate_title_format(
|
||||
item_title: str,
|
||||
) -> bool:
|
||||
estr = "Skipping PR title validation"
|
||||
for _ in range(5):
|
||||
warnings.warn(estr)
|
||||
print(estr)
|
||||
return True
|
||||
|
||||
is_format_correct: bool = False
|
||||
for prefix_pattern in valid_title_prefixes:
|
||||
cleaned_input: str = item_title.strip()
|
||||
if cleaned_input.startswith(prefix_pattern):
|
||||
is_format_correct = True
|
||||
break
|
||||
|
||||
if not is_format_correct:
|
||||
issue_message: str = (
|
||||
f"PR title '{item_title}' does not match any "
|
||||
f"of the accepted patterns: {valid_title_prefixes}"
|
||||
)
|
||||
raise ValueError(issue_message)
|
||||
|
||||
return is_format_correct
|
||||
|
||||
|
||||
def _locate_segment_indices(
|
||||
content_string: str,
|
||||
search_pattern: str,
|
||||
expect_numeric_segment: bool = False,
|
||||
) -> Tuple[int, int]:
|
||||
numeric_characters: List[str] = list(map(str, range(10))) + ["."]
|
||||
assert bool(content_string)
|
||||
assert bool(search_pattern)
|
||||
assert search_pattern in content_string
|
||||
start_index: int = content_string.find(search_pattern)
|
||||
end_index: int = content_string.find("-", start_index)
|
||||
if end_index == -1 and not expect_numeric_segment:
|
||||
return (start_index, len(content_string))
|
||||
|
||||
if expect_numeric_segment:
|
||||
start_index = start_index + len(search_pattern)
|
||||
for char_index, current_character in enumerate(content_string[start_index:]):
|
||||
if current_character not in numeric_characters:
|
||||
break
|
||||
end_index = start_index + char_index
|
||||
|
||||
return (start_index, end_index)
|
||||
|
||||
|
||||
def _verify_no_merge_flag(
|
||||
content_string: str,
|
||||
) -> bool:
|
||||
assert bool(content_string)
|
||||
return "DO-NOT-MERGE" not in content_string
|
||||
|
||||
|
||||
def _verify_merge_dependency(
|
||||
content_string: str,
|
||||
) -> bool:
|
||||
assert bool(content_string)
|
||||
dependency_marker: str = "MERGE-AFTER-#"
|
||||
|
||||
if dependency_marker not in content_string:
|
||||
return True
|
||||
|
||||
start_index, end_index = _locate_segment_indices(
|
||||
content_string=content_string, pattern=dependency_marker, numeric=True
|
||||
)
|
||||
dependent_item_id: str = content_string[start_index:end_index].strip()
|
||||
try:
|
||||
dependent_item_id = int(dependent_item_id)
|
||||
except ValueError:
|
||||
issue_message: str = f"PR number '{dependent_item_id}' is not an integer."
|
||||
raise ValueError(issue_message)
|
||||
|
||||
dependent_item_data: Dict[str, Any] = fetch_item_details(
|
||||
item_identifier=dependent_item_id
|
||||
)
|
||||
is_dependent_item_closed: bool = dependent_item_data["state"] == "closed"
|
||||
return is_dependent_item_closed
|
||||
|
||||
|
||||
def evaluate_merge_conditions(
|
||||
item_details: Dict[str, Any],
|
||||
) -> bool:
|
||||
item_body_content: str = item_details["body"]
|
||||
|
||||
if item_body_content is None:
|
||||
return True
|
||||
|
||||
item_body_content = item_body_content.strip().replace(" ", "-").upper()
|
||||
item_body_content = f" {item_body_content} "
|
||||
|
||||
condition_outcomes: List[bool] = [
|
||||
_verify_no_merge_flag(content_string=item_body_content),
|
||||
_verify_merge_dependency(content_string=item_body_content),
|
||||
]
|
||||
|
||||
return all(condition_outcomes)
|
||||
|
||||
|
||||
def validate_item_for_merge(
|
||||
item_data: Dict[str, Any],
|
||||
) -> bool:
|
||||
assert set(["number", "title", "state", "body"]).issubset(item_data.keys())
|
||||
accumulated_issues: str = ""
|
||||
if not validate_title_format(item_title=item_data["title"]):
|
||||
accumulated_issues += (
|
||||
f"PR #{item_data['number']} is not mergable due to invalid title.\n"
|
||||
)
|
||||
|
||||
if not evaluate_merge_conditions(item_details=item_data):
|
||||
accumulated_issues += (
|
||||
f"PR #{item_data['number']} is not mergable due to merge restrictions"
|
||||
" specified in the PR body."
|
||||
)
|
||||
|
||||
if accumulated_issues:
|
||||
raise ValueError(accumulated_issues.strip())
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def fetch_item_details(
|
||||
item_identifier: int,
|
||||
):
|
||||
api_request_url: str = f"https://api.github.com/repos/{REPO_OWNER_USERNAME}/{REPO_NAME}/pulls/{item_identifier}"
|
||||
|
||||
raw_api_response_data: Dict[str, Any] = perform_api_call(target_url=api_request_url)
|
||||
|
||||
extracted_item_info: Dict[str, Any] = {
|
||||
"number": raw_api_response_data["number"],
|
||||
"title": raw_api_response_data["title"],
|
||||
"state": raw_api_response_data["state"],
|
||||
"body": raw_api_response_data["body"],
|
||||
}
|
||||
|
||||
return extracted_item_info
|
||||
|
||||
|
||||
def process_item_request(requested_item_id: int):
|
||||
extracted_item_info: Dict[str, Any] = fetch_item_details(
|
||||
item_identifier=requested_item_id
|
||||
)
|
||||
if not validate_item_for_merge(item_data=extracted_item_info):
|
||||
raise ValueError("PR is not mergable.")
|
||||
|
||||
print("PR is mergable.")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
requested_item_id: int = int(sys.argv[1])
|
||||
process_item_request(requested_item_id=requested_item_id)
|
||||
41
.github/workflows/ci-checks.yml
vendored
Normal file
41
.github/workflows/ci-checks.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: ci-checks
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
types: [review_requested, ready_for_review, synchronize, opened, reopened]
|
||||
branches:
|
||||
- main
|
||||
- test
|
||||
- develop
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
pages: write
|
||||
|
||||
jobs:
|
||||
ci-checks:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v5
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv venv
|
||||
uv pip install requests packaging
|
||||
- name: Run CI checks
|
||||
run: |
|
||||
uv run .github/scripts/ci_checks.py
|
||||
9
.github/workflows/run-benchmarks.yml
vendored
9
.github/workflows/run-benchmarks.yml
vendored
@@ -2,9 +2,12 @@ name: run-benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- test
|
||||
|
||||
jobs:
|
||||
pick-runner:
|
||||
@@ -34,9 +37,9 @@ jobs:
|
||||
toolchain: stable
|
||||
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
uses: astral-sh/setup-uv@v6
|
||||
- name: Setup venv
|
||||
run: |
|
||||
uv venv
|
||||
|
||||
2
.github/workflows/run-unit-tests.yml
vendored
2
.github/workflows/run-unit-tests.yml
vendored
@@ -5,6 +5,8 @@ on:
|
||||
types: [review_requested, ready_for_review, synchronize, opened, reopened]
|
||||
branches:
|
||||
- main
|
||||
- test
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
[package]
|
||||
name = "rustframe"
|
||||
authors = ["Palash Tyagi (https://github.com/Magnus167)"]
|
||||
version = "0.0.1-a.20250716"
|
||||
version = "0.0.1-a.20250805"
|
||||
edition = "2021"
|
||||
license = "GPL-3.0-or-later"
|
||||
readme = "README.md"
|
||||
description = "A simple dataframe library"
|
||||
description = "A simple dataframe and math toolkit"
|
||||
documentation = "https://magnus167.github.io/rustframe/"
|
||||
|
||||
[lib]
|
||||
name = "rustframe"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# rustframe
|
||||
|
||||
📚 [Docs](https://magnus167.github.io/rustframe/) | 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
|
||||
🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
|
||||
|
||||
<!-- [](https://github.com/Magnus167/rustframe) -->
|
||||
|
||||
|
||||
@@ -70,6 +70,77 @@ assert!((corr - 1.0).abs() < 1e-8);
|
||||
assert!((cov - 2.5).abs() < 1e-8);
|
||||
```
|
||||
|
||||
## Covariance
|
||||
|
||||
### `covariance`
|
||||
|
||||
Computes the population covariance between two equally sized matrices by flattening
|
||||
their values.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
|
||||
let cov = covariance(&x, &y);
|
||||
assert!((cov - 2.5).abs() < 1e-8);
|
||||
```
|
||||
|
||||
### `covariance_vertical`
|
||||
|
||||
Evaluates covariance between columns (i.e. across rows) and returns a matrix of
|
||||
column pair covariances.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance_vertical;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let cov = covariance_vertical(&m);
|
||||
assert_eq!(cov.shape(), (2, 2));
|
||||
assert!(cov.data().iter().all(|&v| (v - 1.0).abs() < 1e-8));
|
||||
```
|
||||
|
||||
### `covariance_horizontal`
|
||||
|
||||
Computes covariance between rows (i.e. across columns) returning a matrix that
|
||||
describes how each pair of rows varies together.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance_horizontal;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
let cov = covariance_horizontal(&m);
|
||||
assert_eq!(cov.shape(), (2, 2));
|
||||
assert!(cov.data().iter().all(|&v| (v - 0.25).abs() < 1e-8));
|
||||
```
|
||||
|
||||
### `covariance_matrix`
|
||||
|
||||
Builds a covariance matrix either between columns (`Axis::Col`) or rows
|
||||
(`Axis::Row`). Each entry represents how two series co-vary.
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::stats::covariance_matrix;
|
||||
use rustframe::matrix::{Axis, Matrix};
|
||||
|
||||
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
|
||||
|
||||
// Covariance between columns
|
||||
let cov_cols = covariance_matrix(&data, Axis::Col);
|
||||
assert!((cov_cols.get(0, 0) - 2.0).abs() < 1e-8);
|
||||
|
||||
// Covariance between rows
|
||||
let cov_rows = covariance_matrix(&data, Axis::Row);
|
||||
assert!((cov_rows.get(0, 1) + 0.5).abs() < 1e-8);
|
||||
```
|
||||
|
||||
## Distributions
|
||||
|
||||
Probability distribution helpers are available for common PDFs and CDFs.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Introduction
|
||||
|
||||
📚 [Docs](https://magnus167.github.io/rustframe/) | 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
|
||||
🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
|
||||
|
||||
Welcome to the **Rustframe User Guide**. Rustframe is a lightweight dataframe
|
||||
and math toolkit for Rust written in 100% safe Rust. It focuses on keeping the
|
||||
|
||||
@@ -41,9 +41,6 @@ let new_point = Matrix::from_vec(vec![0.0, 0.0], 1, 2);
|
||||
let cluster = model.predict(&new_point)[0];
|
||||
```
|
||||
|
||||
For helper functions and upcoming modules, visit the
|
||||
[utilities](./utilities.md) section.
|
||||
|
||||
## Logistic Regression
|
||||
|
||||
```rust
|
||||
@@ -72,7 +69,7 @@ let transformed = pca.transform(&data);
|
||||
assert_eq!(transformed.cols(), 1);
|
||||
```
|
||||
|
||||
### Gaussian Naive Bayes
|
||||
## Gaussian Naive Bayes
|
||||
|
||||
Gaussian Naive Bayes classifier for continuous features:
|
||||
|
||||
@@ -101,7 +98,7 @@ let predictions = model.predict(&x);
|
||||
assert_eq!(predictions.rows(), 4);
|
||||
```
|
||||
|
||||
### Dense Neural Networks
|
||||
## Dense Neural Networks
|
||||
|
||||
Simple fully connected neural network:
|
||||
|
||||
@@ -142,5 +139,144 @@ let predictions = model.predict(&x);
|
||||
assert_eq!(predictions.rows(), 4);
|
||||
```
|
||||
|
||||
## Real-world Examples
|
||||
|
||||
### Housing Price Prediction
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::linreg::LinReg;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Features: square feet and bedrooms
|
||||
let features = Matrix::from_rows_vec(vec![
|
||||
2100.0, 3.0,
|
||||
1600.0, 2.0,
|
||||
2400.0, 4.0,
|
||||
1400.0, 2.0,
|
||||
], 4, 2);
|
||||
|
||||
// Sale prices
|
||||
let target = Matrix::from_vec(vec![400_000.0, 330_000.0, 369_000.0, 232_000.0], 4, 1);
|
||||
|
||||
let mut model = LinReg::new(2);
|
||||
model.fit(&features, &target, 1e-8, 10_000);
|
||||
|
||||
// Predict price of a new home
|
||||
let new_home = Matrix::from_vec(vec![2000.0, 3.0], 1, 2);
|
||||
let predicted_price = model.predict(&new_home);
|
||||
println!("Predicted price: ${}", predicted_price.data()[0]);
|
||||
```
|
||||
|
||||
### Spam Detection
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::logreg::LogReg;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// 20 e-mails × 5 features = 100 numbers (row-major, spam first)
|
||||
let x = Matrix::from_rows_vec(
|
||||
vec![
|
||||
// ─────────── spam examples ───────────
|
||||
2.0, 1.0, 1.0, 1.0, 1.0, // "You win a FREE offer - click for money-back bonus!"
|
||||
1.0, 0.0, 1.0, 1.0, 0.0, // "FREE offer! Click now!"
|
||||
0.0, 2.0, 0.0, 1.0, 1.0, // "Win win win - money inside, click…"
|
||||
1.0, 1.0, 0.0, 0.0, 1.0, // "Limited offer to win easy money…"
|
||||
1.0, 0.0, 1.0, 0.0, 1.0, // ...
|
||||
0.0, 1.0, 1.0, 1.0, 0.0, // ...
|
||||
2.0, 0.0, 0.0, 1.0, 1.0, // ...
|
||||
0.0, 1.0, 1.0, 0.0, 1.0, // ...
|
||||
1.0, 1.0, 1.0, 1.0, 0.0, // ...
|
||||
1.0, 0.0, 0.0, 1.0, 1.0, // ...
|
||||
// ─────────── ham examples ───────────
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, // "See you at the meeting tomorrow."
|
||||
0.0, 0.0, 0.0, 1.0, 0.0, // "Here's the Zoom click-link."
|
||||
0.0, 0.0, 0.0, 0.0, 1.0, // "Expense report: money attached."
|
||||
0.0, 0.0, 0.0, 1.0, 1.0, // ...
|
||||
0.0, 1.0, 0.0, 0.0, 0.0, // "Did we win the bid?"
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, // ...
|
||||
0.0, 0.0, 0.0, 1.0, 0.0, // ...
|
||||
1.0, 0.0, 0.0, 0.0, 0.0, // "Special offer for staff lunch."
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, // ...
|
||||
0.0, 0.0, 0.0, 1.0, 0.0,
|
||||
],
|
||||
20,
|
||||
5,
|
||||
);
|
||||
|
||||
// Labels: 1 = spam, 0 = ham
|
||||
let y = Matrix::from_vec(
|
||||
vec![
|
||||
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, // 10 spam
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, // 10 ham
|
||||
],
|
||||
20,
|
||||
1,
|
||||
);
|
||||
|
||||
// Train
|
||||
let mut model = LogReg::new(5);
|
||||
model.fit(&x, &y, 0.01, 5000);
|
||||
|
||||
// Predict
|
||||
// e.g. "free money offer"
|
||||
let email_data = vec![1.0, 0.0, 1.0, 0.0, 1.0];
|
||||
let email = Matrix::from_vec(email_data, 1, 5);
|
||||
let prob_spam = model.predict_proba(&email);
|
||||
println!("Probability of spam: {:.4}", prob_spam.data()[0]);
|
||||
```
|
||||
|
||||
### Iris Flower Classification
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::gaussian_nb::GaussianNB;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Features: sepal length and petal length
|
||||
let x = Matrix::from_rows_vec(vec![
|
||||
5.1, 1.4, // setosa
|
||||
4.9, 1.4, // setosa
|
||||
6.2, 4.5, // versicolor
|
||||
5.9, 5.1, // virginica
|
||||
], 4, 2);
|
||||
|
||||
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 2.0], 4, 1);
|
||||
let names = vec!["setosa", "versicolor", "virginica"];
|
||||
|
||||
let mut model = GaussianNB::new(1e-9, true);
|
||||
model.fit(&x, &y);
|
||||
|
||||
let sample = Matrix::from_vec(vec![5.0, 1.5], 1, 2);
|
||||
let predicted_class = model.predict(&sample);
|
||||
let class_name = names[predicted_class.data()[0] as usize];
|
||||
println!("Predicted class: {} ({:?})", class_name, predicted_class.data()[0]);
|
||||
```
|
||||
|
||||
### Customer Segmentation
|
||||
|
||||
```rust
|
||||
# extern crate rustframe;
|
||||
use rustframe::compute::models::k_means::KMeans;
|
||||
use rustframe::matrix::Matrix;
|
||||
|
||||
// Each row: [age, annual_income]
|
||||
let customers = Matrix::from_rows_vec(
|
||||
vec![
|
||||
25.0, 40_000.0, 34.0, 52_000.0, 58.0, 95_000.0, 45.0, 70_000.0,
|
||||
],
|
||||
4,
|
||||
2,
|
||||
);
|
||||
|
||||
let (model, labels) = KMeans::fit(&customers, 2, 20, 1e-4);
|
||||
|
||||
let new_customer = Matrix::from_vec(vec![30.0, 50_000.0], 1, 2);
|
||||
let cluster = model.predict(&new_customer)[0];
|
||||
println!("New customer belongs to cluster: {}", cluster);
|
||||
println!("Cluster labels: {:?}", labels);
|
||||
```
|
||||
|
||||
For helper functions and upcoming modules, visit the
|
||||
[utilities](./utilities.md) section.
|
||||
|
||||
Reference in New Issue
Block a user