Compare commits

..

184 Commits

Author SHA1 Message Date
Palash Tyagi
1381c77eaf Revert "Update README to include upcoming features for CSV I/O, Date Utils, and more math functions"
This reverts commit 623303cf726034c0ef09da3d66d2615866d1632b.
2025-08-05 23:25:56 +01:00
c56574f0f3
Merge branch 'main' into csv 2025-08-05 23:20:10 +01:00
c53693fa7b
Merge pull request #72 from Magnus167/release/a20250805
Bump version to 0.0.1-a.20250805 in Cargo.toml
2025-08-05 00:11:57 +01:00
109d39b248
Merge branch 'main' into release/a20250805 2025-08-05 00:08:27 +01:00
Palash Tyagi
18ad6c689a Bump version to 0.0.1-a.20250805 in Cargo.toml 2025-08-05 00:06:49 +01:00
1fead78b69
Merge pull request #71 from Magnus167/prep-release-20250804
Update package version and enhance description in Cargo.toml
2025-08-04 23:27:12 +01:00
Palash Tyagi
6fb32e743c Update package version and enhance description in Cargo.toml 2025-08-04 23:15:24 +01:00
2cb4e46217
Merge pull request #69 from Magnus167/user-guide
Add user guide mdbook
2025-08-04 22:22:55 +01:00
Palash Tyagi
a53ba63f30 Rearrange links in the introduction for improved visibility 2025-08-04 22:20:58 +01:00
Palash Tyagi
dae60ea1bd Rearrange links in the README for improved visibility 2025-08-04 22:15:42 +01:00
Palash Tyagi
755dee58e7 Refactor machine learning user-guide 2025-08-04 22:14:17 +01:00
Palash Tyagi
9e6e22fc37 Add covariance functions and examples to documentation 2025-08-04 20:37:27 +01:00
Palash Tyagi
b687fd4e6b Add advanced matrix operations and Gaussian Naive Bayes examples to documentation 2025-08-04 19:21:36 +01:00
Palash Tyagi
68a01ab528 Enhance documentation with additional compute examples and stats functions 2025-08-04 15:52:57 +01:00
Palash Tyagi
23a01dab07 Update documentation links 2025-08-04 00:29:13 +01:00
Palash Tyagi
f4ebd78234 Comment out the release build command in gen.sh for clarity 2025-08-04 00:06:59 +01:00
Palash Tyagi
1475156855 Fix casing in user guide title for consistency 2025-08-04 00:05:31 +01:00
Palash Tyagi
080680d095 Update book metadata: correct author field and ensure consistent title casing 2025-08-04 00:05:13 +01:00
Palash Tyagi
2845f357b7 Revise introduction for clarity and detail, enhancing the overview of RustFrame's features and capabilities 2025-08-04 00:04:41 +01:00
Palash Tyagi
3d11226d57 Update machine learning documentation for clarity and completeness 2025-08-04 00:04:36 +01:00
Palash Tyagi
039fb1a98e Enhance utilities documentation with additional date and random number examples 2025-08-04 00:04:07 +01:00
Palash Tyagi
31a5ba2460 Improve data manipulation examples 2025-08-04 00:02:46 +01:00
Palash Tyagi
1a9f397702 Add more statistical routines and examples 2025-08-04 00:02:17 +01:00
Palash Tyagi
ecd06eb352 update format in README 2025-08-03 23:28:19 +01:00
Palash Tyagi
ae327b6060 Update user guide build script path in CI workflows 2025-08-03 23:28:03 +01:00
Palash Tyagi
83ac9d4821 Remove local build instructions from the introduction of the user guide 2025-08-03 23:25:17 +01:00
Palash Tyagi
ae27ed9373 Add instructions for building the user guide 2025-08-03 23:25:13 +01:00
Palash Tyagi
c7552f2264 Simplify user guide build steps in CI workflows 2025-08-03 23:24:54 +01:00
Palash Tyagi
3654c7053c Refactor build process 2025-08-03 23:23:10 +01:00
Palash Tyagi
1dcd9727b4 Update output directory structure for user guide and index files 2025-08-03 23:15:54 +01:00
Palash Tyagi
b62152b4f0 Update output directory for user guide and artifact upload in CI workflow 2025-08-03 23:01:54 +01:00
Palash Tyagi
a6a901d6ab Add step to install mdBook for user guide build in CI workflows 2025-08-03 22:16:53 +01:00
Palash Tyagi
676af850ef Add step to test user guide build in CI workflow 2025-08-03 22:13:25 +01:00
Palash Tyagi
ca2ca2a738 Add link to User Guide in the main index page 2025-08-03 22:11:15 +01:00
Palash Tyagi
4876a74e01 Add user guide build and output steps to CI workflow 2025-08-03 22:11:10 +01:00
Palash Tyagi
b78dd75e77 Add build script for RustFrame user guide using mdBook 2025-08-03 22:07:38 +01:00
Palash Tyagi
9db8853d75 Add user guide configuration and update .gitignore 2025-08-03 22:07:32 +01:00
Palash Tyagi
9738154dac Add user guide examples 2025-08-03 22:07:18 +01:00
7d0978e5fb
Merge pull request #68 from Magnus167/update-docs
Enhance documentation with usage examples
2025-08-03 17:45:29 +01:00
Palash Tyagi
ed01c4b8f2 Enhance documentation with usage examples for crate::compute::models 2025-08-03 16:48:37 +01:00
Palash Tyagi
e6964795e3 Enhance documentation with usage examples for statistical routines and utilities 2025-08-03 16:48:02 +01:00
Palash Tyagi
d1dd7ea6d2 Enhance documentation with usage examples for core data-frame structures and operations 2025-08-03 16:46:20 +01:00
Palash Tyagi
676f78bb1e Enhance documentation with usage examples for boolean and series operations 2025-08-03 16:45:30 +01:00
Palash Tyagi
f7325a9558 Enhance documentation with usage examples for date generation utilities 2025-08-03 16:45:15 +01:00
Palash Tyagi
18b9eef063 Enhance documentation with usage examples for random number generation utilities 2025-08-03 16:45:00 +01:00
Palash Tyagi
f99f78d508 Update section headers in README.md for consistency 2025-08-03 16:44:34 +01:00
2926a8a6e8
Merge pull request #66 from Magnus167/update-readme
Update README
2025-08-03 00:30:28 +01:00
d851c500af
Merge pull request #67 from Magnus167/comments-cleanup
Cleanup comments and formatting
2025-08-02 22:03:14 +01:00
Palash Tyagi
d741c7f472 Remove expected output comments from matrix operations examples in README.md 2025-08-02 21:59:42 +01:00
Palash Tyagi
7720312354 Improve comments for clarity in logistic regression, stats overview, PCA, correlation, descriptive statistics, and matrix tests 2025-08-02 21:59:22 +01:00
Palash Tyagi
5509416d5f Remove unused logo comment from README.md 2025-08-02 21:22:01 +01:00
Palash Tyagi
a451ba8cc7 Clean up comments and formatting in Game of Life example 2025-08-02 21:21:09 +01:00
Palash Tyagi
bce1bdd21a Update README 2025-07-31 22:52:29 +01:00
af70f9ffd7
Merge pull request #65 from Magnus167/win-random
Refactor CryptoRng for cross-platform secure random byte generation
2025-07-29 23:29:58 +01:00
Palash Tyagi
7f33223496 Fix type name for BCRYPT_ALG_HANDLE in win_fill function 2025-07-29 23:25:07 +01:00
Palash Tyagi
73dbb25242 Refactor CryptoRng implementation for Windows and Unix, adding support for secure random byte generation on Windows. 2025-07-29 23:23:04 +01:00
4061ebf8ae
Merge pull request #64 from Magnus167/randomx
Implement built-in random number generation utilities
2025-07-29 22:21:29 +01:00
Palash Tyagi
ef322fc6a2 Refactor assertions in tests to simplify error messages for KMeans, CryptoRng, and Prng modules 2025-07-29 22:15:45 +01:00
Palash Tyagi
750adc72e9 Add missing #[cfg(test)] attribute to tests module in activations.rs 2025-07-29 21:42:47 +01:00
Palash Tyagi
3207254564 Add examples for random number generation and statistical tests 2025-07-29 00:36:14 +01:00
Palash Tyagi
2ea83727a1 enhance unittests for all random functionalities 2025-07-29 00:36:05 +01:00
Palash Tyagi
3f56b378b2 Add unit tests for SliceRandom trait and shuffle functionality 2025-07-28 23:12:20 +01:00
Palash Tyagi
afcb29e716 Add extensive tests for Prng functionality, including range checks and distribution properties 2025-07-28 23:11:54 +01:00
Palash Tyagi
113831dc8c Add comprehensive tests for CryptoRng functionality and distribution properties 2025-07-28 23:11:26 +01:00
Palash Tyagi
289c70d9e9 Refactor tests to remove unused random number generator tests and enhance range sample validation 2025-07-28 23:11:17 +01:00
Palash Tyagi
cd13d98110 Remove rand dependency from Cargo.toml 2025-07-28 20:37:37 +01:00
Palash Tyagi
b4520b0d30 Update README to reflect built-in random number generation utilities 2025-07-28 20:37:24 +01:00
Palash Tyagi
5934b163f5 Refactor random number generation to use rustframe's random module 2025-07-28 20:37:08 +01:00
Palash Tyagi
4a1843183a Add documentation for the random module 2025-07-28 20:36:52 +01:00
Palash Tyagi
252c8a3d29 Refactor KMeans module to use inbuilt random 2025-07-28 20:23:59 +01:00
Palash Tyagi
5a5baf9716 Add initial implementation of random module with submodules and prelude exports 2025-07-28 20:19:12 +01:00
Palash Tyagi
28793e5b07 Add CryptoRng for cryptographically secure random number generation 2025-07-28 20:19:01 +01:00
Palash Tyagi
d75bd7a08f Add XorShift64-based pseudo random number generator implementation 2025-07-28 20:17:59 +01:00
Palash Tyagi
6fd796cceb Add SliceRandom trait for shuffling slices using RNG 2025-07-28 20:17:35 +01:00
Palash Tyagi
d0b0f295b1 Implement Rng trait and RangeSample conversion for random number generation 2025-07-28 20:17:21 +01:00
556b08216f
Merge pull request #61 from Magnus167/add-examples
Adding examples for various functionalities
2025-07-26 23:10:16 +01:00
Palash Tyagi
17201b4d29 Add example commands for statistical operations in README 2025-07-26 23:06:47 +01:00
Palash Tyagi
2a99d8930c Add examples for descriptive stats 2025-07-26 23:06:08 +01:00
Palash Tyagi
38213c73c7 Add examples for covariance and correlation 2025-07-26 23:05:56 +01:00
Palash Tyagi
c004bd8334 Add inferential statistics examples 2025-07-26 23:05:41 +01:00
Palash Tyagi
dccbba9d1b Add examples for distribution helpers 2025-07-26 23:05:25 +01:00
Palash Tyagi
ab3509fef4 Added examples/stats_overview 2025-07-26 23:04:34 +01:00
f5c56d02e2
Merge branch 'main' into add-examples 2025-07-26 21:49:14 +01:00
069ef25ef4
Merge pull request #63 from Magnus167/update-runner
Fix package installation in runner Dockerfile
2025-07-26 21:41:08 +01:00
Palash Tyagi
f9a60608df attempting fix 2025-07-26 20:59:28 +01:00
526e22b1b7
Merge pull request #62 from Magnus167/update-cargo-authors
Add authors field to Cargo.toml
2025-07-26 20:54:53 +01:00
Palash Tyagi
845667c60a Add authors field to Cargo.toml 2025-07-26 20:53:47 +01:00
Palash Tyagi
3935e80be6 Fix typo in assertion 2025-07-26 20:35:47 +01:00
Palash Tyagi
0ce970308b Add step to run all examples in debug mode during unit tests 2025-07-26 20:33:28 +01:00
Palash Tyagi
72d02e2336 Add script to run all example programs with debug mode 2025-07-26 20:33:19 +01:00
Palash Tyagi
26213b28d6 Refactor GitHub Actions workflow to streamline unit tests and add example tests 2025-07-26 20:31:08 +01:00
Palash Tyagi
44ff16a0bb Refactor Game of Life example to support debug mode and improve board printing 2025-07-26 20:30:03 +01:00
Palash Tyagi
1192a78955 Add example demos to README.md 2025-07-26 18:38:53 +01:00
Palash Tyagi
d0f9e80dfc add test as examples 2025-07-26 18:38:27 +01:00
Palash Tyagi
b0d8050b11 add test as examples 2025-07-26 13:26:44 +01:00
Palash Tyagi
45ec754d47 add test as examples 2025-07-26 12:21:27 +01:00
Palash Tyagi
733a4da383 Add unit test in pca.rs 2025-07-26 10:51:35 +01:00
Palash Tyagi
ded5f1aa29 Add k-means examples 2025-07-26 04:06:12 +01:00
Palash Tyagi
fe9498963d Add linear regression examples 2025-07-26 04:05:56 +01:00
Palash Tyagi
6b580ec5eb Add logistic regression examples 2025-07-26 04:05:43 +01:00
Palash Tyagi
45f147e651 Add PCA examples 2025-07-26 04:05:27 +01:00
6abf4ec983
Merge pull request #60 from Magnus167/docs-title-link
Add redirect meta tag to documentation index.html
2025-07-20 00:28:10 +01:00
Palash Tyagi
037cfd9113 Empty commit for testing 2025-07-20 00:26:20 +01:00
Palash Tyagi
74fac9d512 Add redirect meta tag to generated index.html for documentation 2025-07-19 23:39:58 +01:00
27e9eab028
Merge pull request #58 from Magnus167/prep
Updating README
2025-07-17 00:16:23 +01:00
Palash Tyagi
c13fcc99f7 Remove commented-out dev-dependencies from Cargo.toml 2025-07-16 19:14:45 -04:00
Palash Tyagi
eb9de0a647 Fix typos and improve clarity in README documentation 2025-07-16 19:14:06 -04:00
Palash Tyagi
044c3284df Enhance README with detailed project scope, features, and compute module overview 2025-07-17 00:09:42 +01:00
Palash Tyagi
ad4cadd8fb Update version in Cargo.toml and enhance README for clarity and project scope 2025-07-16 23:51:42 +01:00
34b09508f3
Merge pull request #57 from Magnus167/compute
Add statistical functions and machine learning models
2025-07-16 01:53:39 +01:00
Palash Tyagi
a8a532f252 Remove Spearman correlation function and unused rank import from stats module 2025-07-16 01:50:28 +01:00
Palash Tyagi
19c3dde169 Add Pearson and Spearman correlation functions to stats module 2025-07-16 01:32:18 +01:00
Palash Tyagi
a335d29347 Simplify t-test assertion in unit test for clarity 2025-07-15 01:05:32 +01:00
Palash Tyagi
b2f6794e05 Add inferential module to stats module exports 2025-07-15 01:02:20 +01:00
Palash Tyagi
5f1f0970da Implement statistical tests: t-test, chi-square test, and ANOVA with corresponding unit tests 2025-07-15 01:02:14 +01:00
Palash Tyagi
7bbfb5394f Add tests for sample variance and standard deviation calculations 2025-07-15 01:01:40 +01:00
Palash Tyagi
285147d52b Refactor variance functions to distinguish between population and sample variance 2025-07-15 01:00:03 +01:00
Palash Tyagi
64722914bd Add test for KMeans empty cluster reinitialization logic 2025-07-13 02:24:29 +01:00
Palash Tyagi
86ea548b4f Remove test for KMeans empty cluster reinitialization 2025-07-13 01:51:43 +01:00
Palash Tyagi
1bdcf1b113 Refactor test for KMeans empty cluster reinitialization to use distinct data points and remove redundant assertion 2025-07-13 01:49:15 +01:00
Palash Tyagi
7c7c8c2a16 Remove redundant assertion message in empty cluster reinitialization test 2025-07-13 01:41:41 +01:00
Palash Tyagi
4d8ed2e908 Add test for KMeans empty cluster reinitialization logic 2025-07-13 01:41:20 +01:00
Palash Tyagi
62d4803075 Simplify assertion for unique labels in KMeans tests when k equals m 2025-07-13 01:35:02 +01:00
Palash Tyagi
19bc09fd5a Refactor KMeans centroid initialization and improve handling of edge cases 2025-07-13 01:29:19 +01:00
Palash Tyagi
bda9b84987 Refactor KMeans centroid initialization to handle k=1 case by setting centroid to mean of data 2025-07-13 00:16:29 +01:00
Palash Tyagi
c24eb4a08c Relax assertion tolerance in KMeans tests to align with algorithm's convergence criteria 2025-07-12 01:47:40 +01:00
Palash Tyagi
12a72317e4 Refactor KMeans fit and predict methods for improved clarity and performance 2025-07-12 01:45:59 +01:00
Palash Tyagi
049dd02c1a Remove unreachable panic 2025-07-12 01:35:51 +01:00
Palash Tyagi
bc87e40481 Add test for variance smoothing with zero smoothing in GaussianNB 2025-07-12 01:34:08 +01:00
Palash Tyagi
eebe772da6 Add test for invalid activation count in DenseNNConfig to ensure proper configuration 2025-07-12 01:11:41 +01:00
Palash Tyagi
7b0d34384a Refactor test assertions to improve readability by removing error messages from assert macros 2025-07-12 01:06:02 +01:00
Palash Tyagi
9182ab9fca Add test for PCA fit with n_components greater than n_features to verify behavior 2025-07-12 01:00:00 +01:00
Palash Tyagi
de18d8e010 applied formatting 2025-07-12 00:56:09 +01:00
Palash Tyagi
9b08eaeb35 applied formatting 2025-07-12 00:55:44 +01:00
Palash Tyagi
a3bb509202 Add test for row_copy_from_slice to check out-of-bounds access 2025-07-12 00:55:27 +01:00
Palash Tyagi
10018f7efe Refactor covariance_matrix to improve mean calculation and add broadcasting for centered data; add tests for vertical and horizontal covariance matrices 2025-07-12 00:50:14 +01:00
Palash Tyagi
b7480b20d4 Add correlation module and update exports in stats module 2025-07-12 00:30:26 +01:00
Palash Tyagi
d5afb4e87a Refactor PCA fit method to use covariance matrix directly and improve mean calculation 2025-07-12 00:30:21 +01:00
Palash Tyagi
493eb96a05 Implement covariance functions for matrices with comprehensive tests 2025-07-12 00:29:50 +01:00
Palash Tyagi
58b0a5f0d9 Add broadcasting functionality for 1-row matrices with tests 2025-07-12 00:22:22 +01:00
Palash Tyagi
37c0d312e5 Add tests for activation functions, initializers, and loss gradient in DenseNN 2025-07-10 23:47:36 +01:00
Palash Tyagi
e7c181f011 Refactor error handling in GaussianNB fit method to use assert instead of panic for empty class labels 2025-07-10 23:27:23 +01:00
Palash Tyagi
2cd2e24f57 Add test for gamma_cdf_func to validate behavior for negative input 2025-07-08 23:21:59 +01:00
Palash Tyagi
61aeedbf76 Simplify assertion in lower_incomplete_gamma test for clarity 2025-07-08 23:18:16 +01:00
Palash Tyagi
8ffa278db8 Add tests for uniform and binomial distributions; enhance gamma function tests 2025-07-08 23:16:19 +01:00
Palash Tyagi
b2a799fc30 Add test for median_horizontal function to validate horizontal median calculation 2025-07-08 21:03:05 +01:00
Palash Tyagi
5779c6b82d Refactor median and percentile functions to handle vertical and horizontal calculations correctly; add corresponding tests for validation 2025-07-08 21:00:19 +01:00
Palash Tyagi
a2fcaf1d52 Add tests for mean, variance, and standard deviation calculations in vertical and horizontal directions 2025-07-07 23:36:43 +01:00
Palash Tyagi
6711cad6e2 Add from_rows_vec method to construct Matrix from a flat Vec in row-major order and include corresponding tests 2025-07-07 23:35:00 +01:00
Palash Tyagi
46cfe43983 Add tests for row access and row_copy_from_slice methods 2025-07-07 21:30:26 +01:00
Palash Tyagi
122a972a33 Add statistical distribution functions for matrices 2025-07-07 21:22:09 +01:00
Palash Tyagi
2a63e6d5ab Enhance Matrix implementation with generic filled method and add NaN support 2025-07-07 21:20:57 +01:00
Palash Tyagi
e48ce7d6d7 Add descriptive statistics functions and module integration 2025-07-07 00:38:09 +01:00
Palash Tyagi
a08fb546a9 fixed typo 2025-07-07 00:02:24 +01:00
Palash Tyagi
e195481691 Refactor row access method to row_copy_from_slice for better clarity and functionality 2025-07-07 00:02:08 +01:00
Palash Tyagi
87d14bbf5f Moved activations module and update imports in dense_nn and logreg 2025-07-06 23:50:32 +01:00
Palash Tyagi
4f8a27298c Add mutable row access method and corresponding tests for Matrix 2025-07-06 22:21:03 +01:00
Palash Tyagi
4648800a09 fixed incorrectly commited file 2025-07-06 21:16:57 +01:00
Palash Tyagi
96f434bf94 Add tests for DenseNN training and MSE loss calculation 2025-07-06 20:48:04 +01:00
Palash Tyagi
46abeb12a7 applied formatting 2025-07-06 20:43:01 +01:00
Palash Tyagi
75d07371b2 Increase training epochs from 5000 to 10000 for improved model performance 2025-07-06 20:16:06 +01:00
Palash Tyagi
70d2a7a2b4 Refactor GaussianNB implementation for improved clarity and stability, including enhanced variance handling and additional unit tests 2025-07-06 20:13:59 +01:00
Palash Tyagi
261d0d7007 Refactor DenseNN implementation to enhance activation function handling and improve training process 2025-07-06 20:03:16 +01:00
Palash Tyagi
005c10e816 Enhance activation function tests with edge cases for sigmoid, relu, and their derivatives 2025-07-06 20:00:54 +01:00
Palash Tyagi
4c626bf09c Add leaky_relu and dleaky_relu functions with corresponding unit tests 2025-07-06 20:00:17 +01:00
Palash Tyagi
ab6d5f9f8f Refactor test module imports in LinReg to improve clarity 2025-07-06 19:17:09 +01:00
Palash Tyagi
1c8fcc0bad Refactor LogReg implementation for improved readability by adjusting formatting and organizing imports 2025-07-06 19:17:03 +01:00
Palash Tyagi
2ca496cfd1 Add repeat_rows method to Matrix and corresponding unit test 2025-07-06 19:16:46 +01:00
Palash Tyagi
85154a3be0 Add shape method to Matrix and corresponding unit test 2025-07-06 18:58:38 +01:00
Palash Tyagi
54a266b630 Add unit tests for logistic regression fit and predict methods 2025-07-06 18:52:49 +01:00
Palash Tyagi
4ddacdfd21 Add unit tests for linear regression fit and predict methods 2025-07-06 18:52:15 +01:00
Palash Tyagi
37b20f2174 Add unit tests for activation functions: sigmoid, relu, dsigmoid, and drelu 2025-07-06 17:51:43 +01:00
Palash Tyagi
b279131503 Add model modules for linear regression, logistic regression, dense neural network, k-means, PCA, and Gaussian Naive Bayes 2025-07-06 17:43:17 +01:00
Palash Tyagi
eb948c1f49 Add Gaussian Naive Bayes implementation with fit and predict methods 2025-07-06 17:43:04 +01:00
Palash Tyagi
d4c0f174b1 Add PCA implementation with fit and transform methods 2025-07-06 17:42:56 +01:00
Palash Tyagi
b6645fcfbd Add Gaussian Naive Bayes implementation with fit and predict methods 2025-07-06 17:42:45 +01:00
Palash Tyagi
b1b7e63fea Add Dense Neural Network implementation with forward and training methods 2025-07-06 17:42:08 +01:00
Palash Tyagi
e2c5e65c18 move rand from dev-deps to deps 2025-07-06 17:41:56 +01:00
Palash Tyagi
be41e9b20e Add logistic regression model implementation 2025-07-06 17:41:14 +01:00
Palash Tyagi
1501ed5b7a Add linear regression model implementation 2025-07-06 17:40:55 +01:00
Palash Tyagi
dbbf5f9617 Add activation functions: sigmoid, dsigmoid, relu, and drelu 2025-07-06 17:40:41 +01:00
Palash Tyagi
6718cf5de7 Add compute module and update lib.rs to include it 2025-07-06 17:40:04 +01:00
Palash Tyagi
f749b2c921 Add method to retrieve a specific row from the matrix and corresponding tests 2025-07-06 17:38:24 +01:00
Palash Tyagi
04637ef4d0 Add methods to create zero, one, and filled matrices for f64 type 2025-07-06 17:05:46 +01:00
59 changed files with 5723 additions and 160 deletions

View File

@ -58,6 +58,14 @@
<h2>A lightweight dataframe & math toolkit for Rust</h2> <h2>A lightweight dataframe & math toolkit for Rust</h2>
<hr style="border: 1px solid #d4d4d4; margin: 20px 0;"> <hr style="border: 1px solid #d4d4d4; margin: 20px 0;">
<p> <p>
🐙 <a href="https://github.com/Magnus167/rustframe">GitHub</a>
<br><br>
📖 <a href="https://magnus167.github.io/rustframe/user-guide">User Guide</a>
<br><br>
📚 <a href="https://magnus167.github.io/rustframe/docs">Docs</a> | 📚 <a href="https://magnus167.github.io/rustframe/docs">Docs</a> |
📊 <a href="https://magnus167.github.io/rustframe/benchmark-report/">Benchmarks</a> 📊 <a href="https://magnus167.github.io/rustframe/benchmark-report/">Benchmarks</a>
@ -65,8 +73,7 @@
🦀 <a href="https://crates.io/crates/rustframe">Crates.io</a> | 🦀 <a href="https://crates.io/crates/rustframe">Crates.io</a> |
🔖 <a href="https://docs.rs/rustframe/latest/rustframe/">docs.rs</a> 🔖 <a href="https://docs.rs/rustframe/latest/rustframe/">docs.rs</a>
<br><br> <br><br>
🐙 <a href="https://github.com/Magnus167/rustframe">GitHub</a> | <!-- 🌐 <a href="https://gitea.nulltech.uk/Magnus167/rustframe">Gitea mirror</a> -->
🌐 <a href="https://gitea.nulltech.uk/Magnus167/rustframe">Gitea mirror</a>
</p> </p>
</main> </main>
</body> </body>

View File

@ -7,7 +7,7 @@ ARG DEBIAN_FRONTEND=noninteractive
RUN apt update -y && apt upgrade -y && useradd -m docker RUN apt update -y && apt upgrade -y && useradd -m docker
RUN apt install -y --no-install-recommends \ RUN apt install -y --no-install-recommends \
curl jq git unzip \ curl jq git zip unzip \
# dev dependencies # dev dependencies
build-essential libssl-dev libffi-dev python3 python3-venv python3-dev python3-pip \ build-essential libssl-dev libffi-dev python3 python3-venv python3-dev python3-pip \
# dot net core dependencies # dot net core dependencies

16
.github/scripts/run_examples.sh vendored Normal file
View File

@ -0,0 +1,16 @@
cargo build --release --examples
for ex in examples/*.rs; do
name=$(basename "$ex" .rs)
echo
echo "🟡 Running example: $name"
if ! cargo run --release --example "$name" -- --debug; then
echo
echo "❌ Example '$name' failed. Aborting."
exit 1
fi
done
echo
echo "✅ All examples ran successfully."

View File

@ -151,7 +151,8 @@ jobs:
mkdir -p target/doc/docs mkdir -p target/doc/docs
mv target/doc/rustframe/* target/doc/docs/ mv target/doc/rustframe/* target/doc/docs/
mkdir output echo "<meta http-equiv=\"refresh\" content=\"0; url=../docs/index.html\">" > target/doc/rustframe/index.html
cp tarpaulin-report.html target/doc/docs/ cp tarpaulin-report.html target/doc/docs/
cp tarpaulin-report.json target/doc/docs/ cp tarpaulin-report.json target/doc/docs/
cp tarpaulin-badge.json target/doc/docs/ cp tarpaulin-badge.json target/doc/docs/
@ -164,16 +165,30 @@ jobs:
# copy the benchmark report to the output directory # copy the benchmark report to the output directory
cp -r benchmark-report target/doc/ cp -r benchmark-report target/doc/
mkdir output
cp -r target/doc/* output/
- name: Build user guide
run: |
cargo binstall mdbook
bash ./docs/build.sh
- name: Copy user guide to output directory
run: |
mkdir output/user-guide
cp -r docs/book/* output/user-guide/
- name: Add index.html to output directory - name: Add index.html to output directory
run: | run: |
cp .github/htmldocs/index.html target/doc/index.html cp .github/htmldocs/index.html output/index.html
cp .github/rustframe_logo.png target/doc/rustframe_logo.png cp .github/rustframe_logo.png output/rustframe_logo.png
- name: Upload Pages artifact - name: Upload Pages artifact
# if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' # if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
uses: actions/upload-pages-artifact@v3 uses: actions/upload-pages-artifact@v3
with: with:
path: target/doc/ # path: target/doc/
path: output/
- name: Deploy to GitHub Pages - name: Deploy to GitHub Pages
# if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' # if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'

View File

@ -12,14 +12,12 @@ concurrency:
jobs: jobs:
pick-runner: pick-runner:
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-latest runs-on: ubuntu-latest
outputs: outputs:
runner: ${{ steps.choose.outputs.use-runner }} runner: ${{ steps.choose.outputs.use-runner }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- id: choose - id: choose
uses: ./.github/actions/runner-fallback uses: ./.github/actions/runner-fallback
@ -28,7 +26,6 @@ jobs:
fallback-runner: "ubuntu-latest" fallback-runner: "ubuntu-latest"
github-token: ${{ secrets.CUSTOM_GH_TOKEN }} github-token: ${{ secrets.CUSTOM_GH_TOKEN }}
run-unit-tests: run-unit-tests:
needs: pick-runner needs: pick-runner
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
@ -56,6 +53,20 @@ jobs:
- name: Test docs generation - name: Test docs generation
run: cargo doc --no-deps --release run: cargo doc --no-deps --release
- name: Test examples
run: cargo test --examples --release
- name: Run all examples
run: |
for example in examples/*.rs; do
name=$(basename "$example" .rs)
echo "Running example: $name"
cargo run --release --example "$name" -- --debug || exit 1
done
- name: Cargo test all targets
run: cargo test --all-targets --release
- name: Upload coverage to Codecov - name: Upload coverage to Codecov
uses: codecov/codecov-action@v3 uses: codecov/codecov-action@v3
with: with:
@ -67,3 +78,8 @@ jobs:
uses: codecov/test-results-action@v1 uses: codecov/test-results-action@v1
with: with:
token: ${{ secrets.CODECOV_TOKEN }} token: ${{ secrets.CODECOV_TOKEN }}
- name: Test build user guide
run: |
cargo binstall mdbook
bash ./docs/build.sh

2
.gitignore vendored
View File

@ -17,3 +17,5 @@ data/
tarpaulin-report.* tarpaulin-report.*
.github/htmldocs/rustframe_logo.png .github/htmldocs/rustframe_logo.png
docs/book/

View File

@ -1,10 +1,12 @@
[package] [package]
name = "rustframe" name = "rustframe"
version = "0.0.1-a.0" authors = ["Palash Tyagi (https://github.com/Magnus167)"]
version = "0.0.1-a.20250805"
edition = "2021" edition = "2021"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
readme = "README.md" readme = "README.md"
description = "A simple dataframe library" description = "A simple dataframe and math toolkit"
documentation = "https://magnus167.github.io/rustframe/"
[lib] [lib]
name = "rustframe" name = "rustframe"
@ -15,15 +17,9 @@ crate-type = ["cdylib", "lib"]
chrono = "^0.4.10" chrono = "^0.4.10"
criterion = { version = "0.5", features = ["html_reports"], optional = true } criterion = { version = "0.5", features = ["html_reports"], optional = true }
[dev-dependencies]
rand = "^0.9.1"
[features] [features]
bench = ["dep:criterion"] bench = ["dep:criterion"]
# [dev-dependencies]
# criterion = { version = "0.5", features = ["html_reports"], optional = true }
[[bench]] [[bench]]
name = "benchmarks" name = "benchmarks"
harness = false harness = false

124
README.md
View File

@ -1,48 +1,64 @@
# rustframe # rustframe
<!-- # <img align="center" alt="Rustframe" src=".github/rustframe_logo.png" height="50px" /> rustframe --> 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
<!-- though the centre tag doesn't work as it would noramlly, it achieves the desired effect -->
📚 [Docs](https://magnus167.github.io/rustframe/) | 🐙 [GitHub](https://github.com/Magnus167/rustframe) | 🌐 [Gitea mirror](https://gitea.nulltech.uk/Magnus167/rustframe) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
<!-- [![Last commit](https://img.shields.io/endpoint?url=https://magnus167.github.io/rustframe/rustframe/last-commit-date.json)](https://github.com/Magnus167/rustframe) --> <!-- [![Last commit](https://img.shields.io/endpoint?url=https://magnus167.github.io/rustframe/rustframe/last-commit-date.json)](https://github.com/Magnus167/rustframe) -->
[![codecov](https://codecov.io/gh/Magnus167/rustframe/graph/badge.svg?token=J7ULJEFTVI)](https://codecov.io/gh/Magnus167/rustframe) [![codecov](https://codecov.io/gh/Magnus167/rustframe/graph/badge.svg?token=J7ULJEFTVI)](https://codecov.io/gh/Magnus167/rustframe)
[![Coverage](https://img.shields.io/endpoint?url=https://magnus167.github.io/rustframe/docs/tarpaulin-badge.json)](https://magnus167.github.io/rustframe/docs/tarpaulin-report.html) [![Coverage](https://img.shields.io/endpoint?url=https://magnus167.github.io/rustframe/docs/tarpaulin-badge.json)](https://magnus167.github.io/rustframe/docs/tarpaulin-report.html)
[![gitea-mirror](https://img.shields.io/badge/git_mirror-blue)](https://gitea.nulltech.uk/Magnus167/rustframe)
--- ---
## Rustframe: _A lightweight dataframe & math toolkit for Rust_ ## Rustframe: _A lightweight dataframe & math toolkit for Rust_
Rustframe provides intuitive dataframe, matrix, and series operations small-to-mid scale data analysis and manipulation. Rustframe provides intuitive dataframe, matrix, and series operations for data analysis and manipulation.
Rustframe keeps things simple, safe, and readable. It is handy for quick numeric experiments and small analytical tasks, but it is **not** meant to compete with powerhouse crates like `polars` or `ndarray`. Rustframe keeps things simple, safe, and readable. It is handy for quick numeric experiments and small analytical tasks as well as for educational purposes. It is designed to be easy to use and understand, with a clean API implemented in 100% safe Rust.
Rustframe is an educational project, and is not intended for production use. It is **not** meant to compete with powerhouse crates like `polars` or `ndarray`. It is a work in progress, and the API is subject to change. There are no guarantees of stability or performance, and it is not optimized for large datasets or high-performance computing.
### What it offers ### What it offers
- **Math that reads like math** - elementwise `+`, ``, `×`, `÷` on entire frames or scalars. - **Matrix operations** - Element-wise arithmetic, boolean logic, transpose, and more.
- **Broadcast & reduce** - sum, product, any/all across rows or columns without boilerplate. - **Math that reads like math** - element-wise `+`, ``, `×`, `÷` on entire frames or scalars.
- **Boolean masks made simple** - chain comparisons, combine with `&`/`|`, get a tidy `BoolMatrix` back. - **Frames** - Column major data structure for single-type data, with labeled columns and typed row indices.
- **Datecentric row index** - businessday ranges and calendar slicing built in. - **Compute module** - Implements various statistical computations and machine learning models.
- **Pure safe Rust** - 100% safe, zero `unsafe`. - **Random number utils** - Built-in pseudo and cryptographically secure generators for simulations.
- **[Coming Soon]** _DataFrame_ - Multi-type data structure for heterogeneous data, with labeled columns and typed row indices.
### Coming soon #### Matrix and Frame functionality
- **CSV I/O** - read/write CSV files with a simple API. - **Matrix operations** - Element-wise arithmetic, boolean logic, transpose, and more.
- **Date Utils** - date math, calendar slicing, indexing, and more. - **Frame operations** - Column manipulation, sorting, and more.
- **More math** - more math functions and aggregations.
#### Compute Module
The `compute` module provides implementations for various statistical computations and machine learning models.
**Statistics, Data Analysis, and Machine Learning:**
- Correlation analysis
- Descriptive statistics
- Distributions
- Inferential statistics
- Dense Neural Networks
- Gaussian Naive Bayes
- K-Means Clustering
- Linear Regression
- Logistic Regression
- Principal Component Analysis
### Heads up ### Heads up
- **Not memoryefficient (yet)** - footprint needs work. - **Not memoryefficient (yet)** - footprint needs work.
- **Feature set still small** - expect missing pieces. - **The feature set is still limited** - expect missing pieces.
### On the horizon ### Somewhere down the line
- Optional GPU help (Vulkan or similar) for heavier workloads. - Optional GPU acceleration (Vulkan or similar) for heavier workloads.
- Straightforward Python bindings using `pyo3`. - Straightforward Python bindings using `pyo3`.
- Integration with common ML libraries, or introduce simple ML features.
--- ---
@ -58,7 +74,7 @@ use rustframe::{
let n_periods = 4; let n_periods = 4;
// Four business days starting 20240102 // Four business days starting 2024-01-02
let dates: Vec<NaiveDate> = let dates: Vec<NaiveDate> =
BDatesList::from_n_periods("2024-01-02".to_string(), DateFreq::Daily, n_periods) BDatesList::from_n_periods("2024-01-02".to_string(), DateFreq::Daily, n_periods)
.unwrap() .unwrap()
@ -93,13 +109,13 @@ let result: Matrix<f64> = result / 2.0; // divide by scalar
let check: bool = result.eq_elem(ma.clone()).all(); let check: bool = result.eq_elem(ma.clone()).all();
assert!(check); assert!(check);
// The above math can also be written as: // Alternatively:
let check: bool = (&(&(&(&ma + 1.0) - 1.0) * 2.0) / 2.0) let check: bool = (&(&(&(&ma + 1.0) - 1.0) * 2.0) / 2.0)
.eq_elem(ma.clone()) .eq_elem(ma.clone())
.all(); .all();
assert!(check); assert!(check);
// The above math can also be written as: // or even as:
let check: bool = ((((ma.clone() + 1.0) - 1.0) * 2.0) / 2.0) let check: bool = ((((ma.clone() + 1.0) - 1.0) * 2.0) / 2.0)
.eq_elem(ma.clone()) .eq_elem(ma.clone())
.all(); .all();
@ -110,10 +126,6 @@ let mc: Matrix<f64> = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
let md: Matrix<f64> = Matrix::from_cols(vec![vec![5.0, 6.0], vec![7.0, 8.0]]); let md: Matrix<f64> = Matrix::from_cols(vec![vec![5.0, 6.0], vec![7.0, 8.0]]);
let mul_result: Matrix<f64> = mc.matrix_mul(&md); let mul_result: Matrix<f64> = mc.matrix_mul(&md);
// Expected: // Expected:
// 1*5 + 3*6 = 5 + 18 = 23
// 2*5 + 4*6 = 10 + 24 = 34
// 1*7 + 3*8 = 7 + 24 = 31
// 2*7 + 4*8 = 14 + 32 = 46
assert_eq!(mul_result.data(), &[23.0, 34.0, 31.0, 46.0]); assert_eq!(mul_result.data(), &[23.0, 34.0, 31.0, 46.0]);
// Dot product (alias for matrix_mul for FloatMatrix) // Dot product (alias for matrix_mul for FloatMatrix)
@ -122,14 +134,7 @@ assert_eq!(dot_result, mul_result);
// Transpose // Transpose
let original_matrix: Matrix<f64> = Matrix::from_cols(vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]]); let original_matrix: Matrix<f64> = Matrix::from_cols(vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]]);
// Original:
// 1 4
// 2 5
// 3 6
let transposed_matrix: Matrix<f64> = original_matrix.transpose(); let transposed_matrix: Matrix<f64> = original_matrix.transpose();
// Transposed:
// 1 2 3
// 4 5 6
assert_eq!(transposed_matrix.rows(), 2); assert_eq!(transposed_matrix.rows(), 2);
assert_eq!(transposed_matrix.cols(), 3); assert_eq!(transposed_matrix.cols(), 3);
assert_eq!(transposed_matrix.data(), &[1.0, 4.0, 2.0, 5.0, 3.0, 6.0]); assert_eq!(transposed_matrix.data(), &[1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
@ -138,10 +143,6 @@ assert_eq!(transposed_matrix.data(), &[1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
let matrix = Matrix::from_cols(vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]]); let matrix = Matrix::from_cols(vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]]);
// Map function to double each value // Map function to double each value
let mapped_matrix = matrix.map(|x| x * 2.0); let mapped_matrix = matrix.map(|x| x * 2.0);
// Expected data after mapping
// 2 8
// 4 10
// 6 12
assert_eq!(mapped_matrix.data(), &[2.0, 4.0, 6.0, 8.0, 10.0, 12.0]); assert_eq!(mapped_matrix.data(), &[2.0, 4.0, 6.0, 8.0, 10.0, 12.0]);
// Zip // Zip
@ -149,13 +150,10 @@ let a = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]); // 2x2 matrix
let b = Matrix::from_cols(vec![vec![5.0, 6.0], vec![7.0, 8.0]]); // 2x2 matrix let b = Matrix::from_cols(vec![vec![5.0, 6.0], vec![7.0, 8.0]]); // 2x2 matrix
// Zip function to add corresponding elements // Zip function to add corresponding elements
let zipped_matrix = a.zip(&b, |x, y| x + y); let zipped_matrix = a.zip(&b, |x, y| x + y);
// Expected data after zipping
// 6 10
// 8 12
assert_eq!(zipped_matrix.data(), &[6.0, 8.0, 10.0, 12.0]); assert_eq!(zipped_matrix.data(), &[6.0, 8.0, 10.0, 12.0]);
``` ```
### More examples ## More examples
See the [examples](./examples/) directory for some demonstrations of Rustframe's syntax and functionality. See the [examples](./examples/) directory for some demonstrations of Rustframe's syntax and functionality.
@ -170,3 +168,45 @@ E.g. to run the `game_of_life` example:
```bash ```bash
cargo run --example game_of_life cargo run --example game_of_life
``` ```
More demos:
```bash
cargo run --example linear_regression
cargo run --example logistic_regression
cargo run --example k_means
cargo run --example pca
cargo run --example stats_overview
cargo run --example descriptive_stats
cargo run --example correlation
cargo run --example inferential_stats
cargo run --example distributions
```
To simply list all available examples, you can run:
```bash
# this technically raises an error, but it will list all examples
cargo run --example
```
Each demo runs a couple of mini-scenarios showcasing the APIs.
## Running benchmarks
To run the benchmarks, use:
```bash
cargo bench --features "bench"
```
## Building the user-guide
To build the user guide, use:
```bash
cargo binstall mdbook
bash docs/build.sh
```
This will generate the user guide in the `docs/book` directory.

7
docs/book.toml Normal file
View File

@ -0,0 +1,7 @@
[book]
title = "Rustframe User Guide"
authors = ["Palash Tyagi (https://github.com/Magnus167)"]
description = "Guided journey through Rustframe capabilities."
[build]
build-dir = "book"

7
docs/build.sh Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env sh
# Build and test the Rustframe user guide using mdBook.
set -e
cd docs
bash gen.sh "$@"
cd ..

14
docs/gen.sh Normal file
View File

@ -0,0 +1,14 @@
#!/usr/bin/env sh
set -e
cargo clean
cargo build --manifest-path ../Cargo.toml
mdbook test -L ../target/debug/deps "$@"
mdbook build "$@"
cargo build
# cargo build --release

7
docs/src/SUMMARY.md Normal file
View File

@ -0,0 +1,7 @@
# Summary
- [Introduction](./introduction.md)
- [Data Manipulation](./data-manipulation.md)
- [Compute Features](./compute.md)
- [Machine Learning](./machine-learning.md)
- [Utilities](./utilities.md)

222
docs/src/compute.md Normal file
View File

@ -0,0 +1,222 @@
# Compute Features
The `compute` module hosts numerical routines for exploratory data analysis.
It covers descriptive statistics, correlations, probability distributions and
some basic inferential tests.
## Basic Statistics
```rust
# extern crate rustframe;
use rustframe::compute::stats::{mean, mean_horizontal, mean_vertical, stddev, median, population_variance, percentile};
use rustframe::matrix::Matrix;
let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
assert_eq!(mean(&m), 2.5);
assert_eq!(stddev(&m), 1.118033988749895);
assert_eq!(median(&m), 2.5);
assert_eq!(population_variance(&m), 1.25);
assert_eq!(percentile(&m, 50.0), 3.0);
// column averages returned as 1 x n matrix
let row_means = mean_horizontal(&m);
assert_eq!(row_means.data(), &[2.0, 3.0]);
let col_means = mean_vertical(&m);
assert_eq!(col_means.data(), & [1.5, 3.5]);
```
### Axis-specific Operations
Operations can be applied along specific axes (rows or columns):
```rust
# extern crate rustframe;
use rustframe::compute::stats::{mean_vertical, mean_horizontal, stddev_vertical, stddev_horizontal};
use rustframe::matrix::Matrix;
// 3x2 matrix
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2);
// Mean along columns (vertical) - returns 1 x cols matrix
let col_means = mean_vertical(&m);
assert_eq!(col_means.shape(), (1, 2));
assert_eq!(col_means.data(), &[3.0, 4.0]); // [(1+3+5)/3, (2+4+6)/3]
// Mean along rows (horizontal) - returns rows x 1 matrix
let row_means = mean_horizontal(&m);
assert_eq!(row_means.shape(), (3, 1));
assert_eq!(row_means.data(), &[1.5, 3.5, 5.5]); // [(1+2)/2, (3+4)/2, (5+6)/2]
// Standard deviation along columns
let col_stddev = stddev_vertical(&m);
assert_eq!(col_stddev.shape(), (1, 2));
// Standard deviation along rows
let row_stddev = stddev_horizontal(&m);
assert_eq!(row_stddev.shape(), (3, 1));
```
## Correlation
```rust
# extern crate rustframe;
use rustframe::compute::stats::{pearson, covariance};
use rustframe::matrix::Matrix;
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
let corr = pearson(&x, &y);
let cov = covariance(&x, &y);
assert!((corr - 1.0).abs() < 1e-8);
assert!((cov - 2.5).abs() < 1e-8);
```
## Covariance
### `covariance`
Computes the population covariance between two equally sized matrices by flattening
their values.
```rust
# extern crate rustframe;
use rustframe::compute::stats::covariance;
use rustframe::matrix::Matrix;
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
let cov = covariance(&x, &y);
assert!((cov - 2.5).abs() < 1e-8);
```
### `covariance_vertical`
Evaluates covariance between columns (i.e. across rows) and returns a matrix of
column pair covariances.
```rust
# extern crate rustframe;
use rustframe::compute::stats::covariance_vertical;
use rustframe::matrix::Matrix;
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov = covariance_vertical(&m);
assert_eq!(cov.shape(), (2, 2));
assert!(cov.data().iter().all(|&v| (v - 1.0).abs() < 1e-8));
```
### `covariance_horizontal`
Computes covariance between rows (i.e. across columns) returning a matrix that
describes how each pair of rows varies together.
```rust
# extern crate rustframe;
use rustframe::compute::stats::covariance_horizontal;
use rustframe::matrix::Matrix;
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov = covariance_horizontal(&m);
assert_eq!(cov.shape(), (2, 2));
assert!(cov.data().iter().all(|&v| (v - 0.25).abs() < 1e-8));
```
### `covariance_matrix`
Builds a covariance matrix either between columns (`Axis::Col`) or rows
(`Axis::Row`). Each entry represents how two series co-vary.
```rust
# extern crate rustframe;
use rustframe::compute::stats::covariance_matrix;
use rustframe::matrix::{Axis, Matrix};
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
// Covariance between columns
let cov_cols = covariance_matrix(&data, Axis::Col);
assert!((cov_cols.get(0, 0) - 2.0).abs() < 1e-8);
// Covariance between rows
let cov_rows = covariance_matrix(&data, Axis::Row);
assert!((cov_rows.get(0, 1) + 0.5).abs() < 1e-8);
```
## Distributions
Probability distribution helpers are available for common PDFs and CDFs.
```rust
# extern crate rustframe;
use rustframe::compute::stats::distributions::normal_pdf;
use rustframe::matrix::Matrix;
let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2);
let pdf = normal_pdf(x, 0.0, 1.0);
assert_eq!(pdf.data().len(), 2);
```
### Additional Distributions
Rustframe provides several other probability distributions:
```rust
# extern crate rustframe;
use rustframe::compute::stats::distributions::{normal_cdf, binomial_pmf, binomial_cdf, poisson_pmf};
use rustframe::matrix::Matrix;
// Normal distribution CDF
let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2);
let cdf = normal_cdf(x, 0.0, 1.0);
assert_eq!(cdf.data().len(), 2);
// Binomial distribution PMF
// Probability of k successes in n trials with probability p
let k = Matrix::from_vec(vec![0_u64, 1, 2, 3], 1, 4);
let pmf = binomial_pmf(3, k.clone(), 0.5);
assert_eq!(pmf.data().len(), 4);
// Binomial distribution CDF
let cdf = binomial_cdf(3, k, 0.5);
assert_eq!(cdf.data().len(), 4);
// Poisson distribution PMF
// Probability of k events with rate parameter lambda
let k = Matrix::from_vec(vec![0_u64, 1, 2], 1, 3);
let pmf = poisson_pmf(2.0, k);
assert_eq!(pmf.data().len(), 3);
```
### Inferential Statistics
Rustframe provides several inferential statistical tests:
```rust
# extern crate rustframe;
use rustframe::matrix::Matrix;
use rustframe::compute::stats::inferential::{t_test, chi2_test, anova};
// Two-sample t-test
let sample1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let sample2 = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
let (t_statistic, p_value) = t_test(&sample1, &sample2);
assert!((t_statistic + 5.0).abs() < 1e-5);
assert!(p_value > 0.0 && p_value < 1.0);
// Chi-square test of independence
let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
let (chi2_statistic, p_value) = chi2_test(&observed);
assert!(chi2_statistic > 0.0);
assert!(p_value > 0.0 && p_value < 1.0);
// One-way ANOVA
let group1 = Matrix::from_vec(vec![1.0, 2.0, 3.0], 1, 3);
let group2 = Matrix::from_vec(vec![2.0, 3.0, 4.0], 1, 3);
let group3 = Matrix::from_vec(vec![3.0, 4.0, 5.0], 1, 3);
let groups = vec![&group1, &group2, &group3];
let (f_statistic, p_value) = anova(groups);
assert!(f_statistic > 0.0);
assert!(p_value > 0.0 && p_value < 1.0);
```
With the basics covered, explore predictive models in the
[machine learning](./machine-learning.md) chapter.

View File

@ -0,0 +1,157 @@
# Data Manipulation
Rustframe's `Frame` type couples tabular data with
column labels and a typed row index. Frames expose a familiar API for loading
data, selecting rows or columns and performing aggregations.
## Creating a Frame
```rust
# extern crate rustframe;
use rustframe::frame::{Frame, RowIndex};
use rustframe::matrix::Matrix;
let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
let frame = Frame::new(data, vec!["A", "B"], None);
assert_eq!(frame["A"], vec![1.0, 2.0]);
```
## Indexing Rows
Row labels can be integers, dates or a default range. Retrieving a row returns a
view that lets you inspect values by column name or position.
```rust
# extern crate rustframe;
# extern crate chrono;
use chrono::NaiveDate;
use rustframe::frame::{Frame, RowIndex};
use rustframe::matrix::Matrix;
let d = |y, m, d| NaiveDate::from_ymd_opt(y, m, d).unwrap();
let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
let index = RowIndex::Date(vec![d(2024, 1, 1), d(2024, 1, 2)]);
let mut frame = Frame::new(data, vec!["A", "B"], Some(index));
assert_eq!(frame.get_row_date(d(2024, 1, 2))["B"], 4.0);
// mutate by row key
frame.get_row_date_mut(d(2024, 1, 1)).set_by_index(0, 9.0);
assert_eq!(frame.get_row_date(d(2024, 1, 1))["A"], 9.0);
```
## Column operations
Columns can be inserted, renamed, removed or reordered in place.
```rust
# extern crate rustframe;
use rustframe::frame::{Frame, RowIndex};
use rustframe::matrix::Matrix;
let data = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]);
let mut frame = Frame::new(data, vec!["X", "Y"], Some(RowIndex::Range(0..2)));
frame.add_column("Z", vec![5, 6]);
frame.rename("Y", "W");
let removed = frame.delete_column("X");
assert_eq!(removed, vec![1, 2]);
frame.sort_columns();
assert_eq!(frame.columns(), &["W", "Z"]);
```
## Aggregations
Any numeric aggregation available on `Matrix` is forwarded to `Frame`.
```rust
# extern crate rustframe;
use rustframe::frame::Frame;
use rustframe::matrix::{Matrix, SeriesOps};
let frame = Frame::new(Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]), vec!["A", "B"], None);
assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]);
assert_eq!(frame.sum_horizontal(), vec![4.0, 6.0]);
```
## Matrix Operations
```rust
# extern crate rustframe;
use rustframe::matrix::Matrix;
let data1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let data2 = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2);
let sum = data1.clone() + data2.clone();
assert_eq!(sum.data(), vec![6.0, 8.0, 10.0, 12.0]);
let product = data1.clone() * data2.clone();
assert_eq!(product.data(), vec![5.0, 12.0, 21.0, 32.0]);
let scalar_product = data1.clone() * 2.0;
assert_eq!(scalar_product.data(), vec![2.0, 4.0, 6.0, 8.0]);
let equals = data1 == data1.clone();
assert_eq!(equals, true);
```
### Advanced Matrix Operations
Matrices support a variety of advanced operations:
```rust
# extern crate rustframe;
use rustframe::matrix::{Matrix, SeriesOps};
// Matrix multiplication (dot product)
let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let b = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2);
let product = a.matrix_mul(&b);
assert_eq!(product.data(), vec![23.0, 34.0, 31.0, 46.0]);
// Transpose
let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let transposed = m.transpose();
assert_eq!(transposed.data(), vec![1.0, 3.0, 2.0, 4.0]);
// Map function over all elements
let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let squared = m.map(|x| x * x);
assert_eq!(squared.data(), vec![1.0, 4.0, 9.0, 16.0]);
// Zip two matrices with a function
let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let b = Matrix::from_vec(vec![5.0, 6.0, 7.0, 8.0], 2, 2);
let zipped = a.zip(&b, |x, y| x + y);
assert_eq!(zipped.data(), vec![6.0, 8.0, 10.0, 12.0]);
```
### Matrix Reductions
Matrices support various reduction operations:
```rust
# extern crate rustframe;
use rustframe::matrix::{Matrix, SeriesOps};
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2);
// Sum along columns (vertical)
let col_sums = m.sum_vertical();
assert_eq!(col_sums, vec![9.0, 12.0]); // [1+3+5, 2+4+6]
// Sum along rows (horizontal)
let row_sums = m.sum_horizontal();
assert_eq!(row_sums, vec![3.0, 7.0, 11.0]); // [1+2, 3+4, 5+6]
// Cumulative sum along columns
let col_cumsum = m.cumsum_vertical();
assert_eq!(col_cumsum.data(), vec![1.0, 4.0, 9.0, 2.0, 6.0, 12.0]);
// Cumulative sum along rows
let row_cumsum = m.cumsum_horizontal();
assert_eq!(row_cumsum.data(), vec![1.0, 3.0, 5.0, 3.0, 7.0, 11.0]);
```
With the basics covered, continue to the [compute features](./compute.md)
chapter for statistics and analytics.

40
docs/src/introduction.md Normal file
View File

@ -0,0 +1,40 @@
# Introduction
🐙 [GitHub](https://github.com/Magnus167/rustframe) | 📚 [Docs](https://magnus167.github.io/rustframe/) | 📖 [User Guide](https://magnus167.github.io/rustframe/user-guide/) | 🦀 [Crates.io](https://crates.io/crates/rustframe) | 🔖 [docs.rs](https://docs.rs/rustframe/latest/rustframe/)
Welcome to the **Rustframe User Guide**. Rustframe is a lightweight dataframe
and math toolkit for Rust written in 100% safe Rust. It focuses on keeping the
API approachable while offering handy features for small analytical or
educational projects.
Rustframe bundles:
- columnlabelled frames built on a fast columnmajor matrix
- familiar elementwise math and aggregation routines
- a growing `compute` module for statistics and machine learning
- utilities for dates and random numbers
```rust
# extern crate rustframe;
use rustframe::{frame::Frame, matrix::{Matrix, SeriesOps}};
let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
let frame = Frame::new(data, vec!["A", "B"], None);
// Perform column wise aggregation
assert_eq!(frame.sum_vertical(), vec![3.0, 7.0]);
```
## Resources
- [GitHub repository](https://github.com/Magnus167/rustframe)
- [Crates.io](https://crates.io/crates/rustframe) & [API docs](https://docs.rs/rustframe)
- [Code coverage](https://codecov.io/gh/Magnus167/rustframe)
This guide walks through the main building blocks of the library. Each chapter
contains runnable snippets so you can follow along:
1. [Data manipulation](./data-manipulation.md) for loading and transforming data
2. [Compute features](./compute.md) for statistics and analytics
3. [Machine learning](./machine-learning.md) for predictive models
4. [Utilities](./utilities.md) for supporting helpers and upcoming modules

View File

@ -0,0 +1,282 @@
# Machine Learning
The `compute::models` module bundles several learning algorithms that operate on
`Matrix` structures. These examples highlight the basic training and prediction
APIs. For more endtoend walkthroughs see the examples directory in the
repository.
Currently implemented models include:
- Linear and logistic regression
- Kmeans clustering
- Principal component analysis (PCA)
- Gaussian Naive Bayes
- Dense neural networks
## Linear Regression
```rust
# extern crate rustframe;
use rustframe::compute::models::linreg::LinReg;
use rustframe::matrix::Matrix;
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
let mut model = LinReg::new(1);
model.fit(&x, &y, 0.01, 100);
let preds = model.predict(&x);
assert_eq!(preds.rows(), 4);
```
## K-means Walkthrough
```rust
# extern crate rustframe;
use rustframe::compute::models::k_means::KMeans;
use rustframe::matrix::Matrix;
let data = Matrix::from_vec(vec![1.0, 1.0, 5.0, 5.0], 2, 2);
let (model, _labels) = KMeans::fit(&data, 2, 10, 1e-4);
let new_point = Matrix::from_vec(vec![0.0, 0.0], 1, 2);
let cluster = model.predict(&new_point)[0];
```
## Logistic Regression
```rust
# extern crate rustframe;
use rustframe::compute::models::logreg::LogReg;
use rustframe::matrix::Matrix;
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
let mut model = LogReg::new(1);
model.fit(&x, &y, 0.1, 200);
let preds = model.predict_proba(&x);
assert_eq!(preds.rows(), 4);
```
## Principal Component Analysis
```rust
# extern crate rustframe;
use rustframe::compute::models::pca::PCA;
use rustframe::matrix::Matrix;
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let pca = PCA::fit(&data, 1, 0);
let transformed = pca.transform(&data);
assert_eq!(transformed.cols(), 1);
```
## Gaussian Naive Bayes
Gaussian Naive Bayes classifier for continuous features:
```rust
# extern crate rustframe;
use rustframe::compute::models::gaussian_nb::GaussianNB;
use rustframe::matrix::Matrix;
// Training data with 2 features
let x = Matrix::from_rows_vec(vec![
1.0, 2.0,
2.0, 3.0,
3.0, 4.0,
4.0, 5.0
], 4, 2);
// Class labels (0 or 1)
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
// Train the model
let mut model = GaussianNB::new(1e-9, true);
model.fit(&x, &y);
// Make predictions
let predictions = model.predict(&x);
assert_eq!(predictions.rows(), 4);
```
## Dense Neural Networks
Simple fully connected neural network:
```rust
# extern crate rustframe;
use rustframe::compute::models::dense_nn::{DenseNN, DenseNNConfig, ActivationKind, InitializerKind, LossKind};
use rustframe::matrix::Matrix;
// Training data with 2 features
let x = Matrix::from_rows_vec(vec![
0.0, 0.0,
0.0, 1.0,
1.0, 0.0,
1.0, 1.0
], 4, 2);
// XOR target outputs
let y = Matrix::from_vec(vec![0.0, 1.0, 1.0, 0.0], 4, 1);
// Create a neural network with 2 hidden layers
let config = DenseNNConfig {
input_size: 2,
hidden_layers: vec![4, 4],
output_size: 1,
activations: vec![ActivationKind::Sigmoid, ActivationKind::Sigmoid, ActivationKind::Sigmoid],
initializer: InitializerKind::Uniform(0.5),
loss: LossKind::MSE,
learning_rate: 0.1,
epochs: 1000,
};
let mut model = DenseNN::new(config);
// Train the model
model.train(&x, &y);
// Make predictions
let predictions = model.predict(&x);
assert_eq!(predictions.rows(), 4);
```
## Real-world Examples
### Housing Price Prediction
```rust
# extern crate rustframe;
use rustframe::compute::models::linreg::LinReg;
use rustframe::matrix::Matrix;
// Features: square feet and bedrooms
let features = Matrix::from_rows_vec(vec![
2100.0, 3.0,
1600.0, 2.0,
2400.0, 4.0,
1400.0, 2.0,
], 4, 2);
// Sale prices
let target = Matrix::from_vec(vec![400_000.0, 330_000.0, 369_000.0, 232_000.0], 4, 1);
let mut model = LinReg::new(2);
model.fit(&features, &target, 1e-8, 10_000);
// Predict price of a new home
let new_home = Matrix::from_vec(vec![2000.0, 3.0], 1, 2);
let predicted_price = model.predict(&new_home);
println!("Predicted price: ${}", predicted_price.data()[0]);
```
### Spam Detection
```rust
# extern crate rustframe;
use rustframe::compute::models::logreg::LogReg;
use rustframe::matrix::Matrix;
// 20 e-mails × 5 features = 100 numbers (row-major, spam first)
let x = Matrix::from_rows_vec(
vec![
// ─────────── spam examples ───────────
2.0, 1.0, 1.0, 1.0, 1.0, // "You win a FREE offer - click for money-back bonus!"
1.0, 0.0, 1.0, 1.0, 0.0, // "FREE offer! Click now!"
0.0, 2.0, 0.0, 1.0, 1.0, // "Win win win - money inside, click…"
1.0, 1.0, 0.0, 0.0, 1.0, // "Limited offer to win easy money…"
1.0, 0.0, 1.0, 0.0, 1.0, // ...
0.0, 1.0, 1.0, 1.0, 0.0, // ...
2.0, 0.0, 0.0, 1.0, 1.0, // ...
0.0, 1.0, 1.0, 0.0, 1.0, // ...
1.0, 1.0, 1.0, 1.0, 0.0, // ...
1.0, 0.0, 0.0, 1.0, 1.0, // ...
// ─────────── ham examples ───────────
0.0, 0.0, 0.0, 0.0, 0.0, // "See you at the meeting tomorrow."
0.0, 0.0, 0.0, 1.0, 0.0, // "Here's the Zoom click-link."
0.0, 0.0, 0.0, 0.0, 1.0, // "Expense report: money attached."
0.0, 0.0, 0.0, 1.0, 1.0, // ...
0.0, 1.0, 0.0, 0.0, 0.0, // "Did we win the bid?"
0.0, 0.0, 0.0, 0.0, 0.0, // ...
0.0, 0.0, 0.0, 1.0, 0.0, // ...
1.0, 0.0, 0.0, 0.0, 0.0, // "Special offer for staff lunch."
0.0, 0.0, 0.0, 0.0, 0.0, // ...
0.0, 0.0, 0.0, 1.0, 0.0,
],
20,
5,
);
// Labels: 1 = spam, 0 = ham
let y = Matrix::from_vec(
vec![
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, // 10 spam
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, // 10 ham
],
20,
1,
);
// Train
let mut model = LogReg::new(5);
model.fit(&x, &y, 0.01, 5000);
// Predict
// e.g. "free money offer"
let email_data = vec![1.0, 0.0, 1.0, 0.0, 1.0];
let email = Matrix::from_vec(email_data, 1, 5);
let prob_spam = model.predict_proba(&email);
println!("Probability of spam: {:.4}", prob_spam.data()[0]);
```
### Iris Flower Classification
```rust
# extern crate rustframe;
use rustframe::compute::models::gaussian_nb::GaussianNB;
use rustframe::matrix::Matrix;
// Features: sepal length and petal length
let x = Matrix::from_rows_vec(vec![
5.1, 1.4, // setosa
4.9, 1.4, // setosa
6.2, 4.5, // versicolor
5.9, 5.1, // virginica
], 4, 2);
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 2.0], 4, 1);
let names = vec!["setosa", "versicolor", "virginica"];
let mut model = GaussianNB::new(1e-9, true);
model.fit(&x, &y);
let sample = Matrix::from_vec(vec![5.0, 1.5], 1, 2);
let predicted_class = model.predict(&sample);
let class_name = names[predicted_class.data()[0] as usize];
println!("Predicted class: {} ({:?})", class_name, predicted_class.data()[0]);
```
### Customer Segmentation
```rust
# extern crate rustframe;
use rustframe::compute::models::k_means::KMeans;
use rustframe::matrix::Matrix;
// Each row: [age, annual_income]
let customers = Matrix::from_rows_vec(
vec![
25.0, 40_000.0, 34.0, 52_000.0, 58.0, 95_000.0, 45.0, 70_000.0,
],
4,
2,
);
let (model, labels) = KMeans::fit(&customers, 2, 20, 1e-4);
let new_customer = Matrix::from_vec(vec![30.0, 50_000.0], 1, 2);
let cluster = model.predict(&new_customer)[0];
println!("New customer belongs to cluster: {}", cluster);
println!("Cluster labels: {:?}", labels);
```
For helper functions and upcoming modules, visit the
[utilities](./utilities.md) section.

63
docs/src/utilities.md Normal file
View File

@ -0,0 +1,63 @@
# Utilities
Utilities provide handy helpers around the core library. Existing tools
include:
- Date utilities for generating calendar sequences and businessday sets
- Random number generators for simulations and testing
## Date Helpers
```rust
# extern crate rustframe;
use rustframe::utils::dateutils::{BDatesList, BDateFreq, DatesList, DateFreq};
// Calendar sequence
let list = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily);
assert_eq!(list.count().unwrap(), 3);
// Business days starting from 20240102
let bdates = BDatesList::from_n_periods("2024-01-02".into(), BDateFreq::Daily, 3).unwrap();
assert_eq!(bdates.list().unwrap().len(), 3);
```
## Random Numbers
The `random` module offers deterministic and cryptographically secure RNGs.
```rust
# extern crate rustframe;
use rustframe::random::{Prng, Rng};
let mut rng = Prng::new(42);
let v1 = rng.next_u64();
let v2 = rng.next_u64();
assert_ne!(v1, v2);
```
## Stats Functions
```rust
# extern crate rustframe;
use rustframe::matrix::Matrix;
use rustframe::compute::stats::descriptive::{mean, median, stddev};
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let mean_value = mean(&data);
assert_eq!(mean_value, 3.0);
let median_value = median(&data);
assert_eq!(median_value, 3.0);
let std_value = stddev(&data);
assert_eq!(std_value, 2.0_f64.sqrt());
```
Upcoming utilities will cover:
- Data import/export helpers
- Visualization adapters
- Streaming data interfaces
Contributions to these sections are welcome!

45
examples/correlation.rs Normal file
View File

@ -0,0 +1,45 @@
use rustframe::compute::stats::{covariance, covariance_matrix, pearson};
use rustframe::matrix::{Axis, Matrix};
/// Demonstrates covariance and correlation utilities.
fn main() {
pairwise_cov();
println!("\n-----\n");
matrix_cov();
}
fn pairwise_cov() {
println!("Covariance & Pearson r\n----------------------");
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let y = Matrix::from_vec(vec![1.0, 2.0, 3.0, 5.0], 2, 2);
println!("covariance : {:.2}", covariance(&x, &y));
println!("pearson r : {:.3}", pearson(&x, &y));
}
fn matrix_cov() {
println!("Covariance matrix\n-----------------");
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov = covariance_matrix(&data, Axis::Col);
println!("cov matrix : {:?}", cov.data());
}
#[cfg(test)]
mod tests {
use super::*;
const EPS: f64 = 1e-8;
#[test]
fn test_pairwise_cov() {
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let y = Matrix::from_vec(vec![1.0, 2.0, 3.0, 5.0], 2, 2);
assert!((covariance(&x, &y) - 1.625).abs() < EPS);
assert!((pearson(&x, &y) - 0.9827076298239908).abs() < 1e-5,);
}
#[test]
fn test_matrix_cov() {
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov = covariance_matrix(&data, Axis::Col);
assert_eq!(cov.data(), &[2.0, 2.0, 2.0, 2.0]);
}
}

View File

@ -0,0 +1,56 @@
use rustframe::compute::stats::{mean, mean_horizontal, mean_vertical, median, percentile, stddev};
use rustframe::matrix::Matrix;
/// Demonstrates descriptive statistics utilities.
///
/// Part 1: simple mean/stddev/median/percentile on a vector.
/// Part 2: mean across rows and columns.
fn main() {
simple_stats();
println!("\n-----\n");
axis_stats();
}
fn simple_stats() {
println!("Basic stats\n-----------");
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
println!("mean : {:.2}", mean(&data));
println!("stddev : {:.2}", stddev(&data));
println!("median : {:.2}", median(&data));
println!("90th pct. : {:.2}", percentile(&data, 90.0));
}
fn axis_stats() {
println!("Row/column means\n----------------");
// 2x3 matrix
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 2, 3);
let v = mean_vertical(&data); // 1x3
let h = mean_horizontal(&data); // 2x1
println!("vertical means : {:?}", v.data());
println!("horizontal means: {:?}", h.data());
}
#[cfg(test)]
mod tests {
use super::*;
const EPS: f64 = 1e-8;
#[test]
fn test_simple_stats() {
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
assert!((mean(&data) - 3.0).abs() < EPS);
assert!((stddev(&data) - 1.4142135623730951).abs() < EPS);
assert!((median(&data) - 3.0).abs() < EPS);
assert!((percentile(&data, 90.0) - 5.0).abs() < EPS);
}
#[test]
fn test_axis_stats() {
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 2, 3);
let v = mean_vertical(&data);
assert_eq!(v.data(), &[2.5, 3.5, 4.5]);
let h = mean_horizontal(&data);
assert_eq!(h.data(), &[2.0, 5.0]);
}
}

66
examples/distributions.rs Normal file
View File

@ -0,0 +1,66 @@
use rustframe::compute::stats::{binomial_cdf, binomial_pmf, normal_cdf, normal_pdf, poisson_pmf};
use rustframe::matrix::Matrix;
/// Demonstrates some probability distribution helpers.
fn main() {
normal_example();
println!("\n-----\n");
binomial_example();
println!("\n-----\n");
poisson_example();
}
fn normal_example() {
println!("Normal distribution\n-------------------");
let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2);
let pdf = normal_pdf(x.clone(), 0.0, 1.0);
let cdf = normal_cdf(x, 0.0, 1.0);
println!("pdf : {:?}", pdf.data());
println!("cdf : {:?}", cdf.data());
}
fn binomial_example() {
println!("Binomial distribution\n---------------------");
let k = Matrix::from_vec(vec![0_u64, 1, 2], 1, 3);
let pmf = binomial_pmf(4, k.clone(), 0.5);
let cdf = binomial_cdf(4, k, 0.5);
println!("pmf : {:?}", pmf.data());
println!("cdf : {:?}", cdf.data());
}
fn poisson_example() {
println!("Poisson distribution\n--------------------");
let k = Matrix::from_vec(vec![0_u64, 1, 2], 1, 3);
let pmf = poisson_pmf(3.0, k);
println!("pmf : {:?}", pmf.data());
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normal_example() {
let x = Matrix::from_vec(vec![0.0, 1.0], 1, 2);
let pdf = normal_pdf(x.clone(), 0.0, 1.0);
let cdf = normal_cdf(x, 0.0, 1.0);
assert!((pdf.get(0, 0) - 0.39894228).abs() < 1e-6);
assert!((cdf.get(0, 1) - 0.8413447).abs() < 1e-6);
}
#[test]
fn test_binomial_example() {
let k = Matrix::from_vec(vec![0_u64, 1, 2], 1, 3);
let pmf = binomial_pmf(4, k.clone(), 0.5);
let cdf = binomial_cdf(4, k, 0.5);
assert!((pmf.get(0, 2) - 0.375).abs() < 1e-6);
assert!((cdf.get(0, 2) - 0.6875).abs() < 1e-6);
}
#[test]
fn test_poisson_example() {
let k = Matrix::from_vec(vec![0_u64, 1, 2], 1, 3);
let pmf = poisson_pmf(3.0, k);
assert!((pmf.get(0, 1) - 3.0_f64 * (-3.0_f64).exp()).abs() < 1e-6);
}
}

View File

@ -1,13 +1,26 @@
use rand::{self, Rng}; //! Conway's Game of Life Example
//! This example implements Conway's Game of Life using a `BoolMatrix` to represent the game board.
//! It demonstrates matrix operations like shifting, counting neighbors, and applying game rules.
//! The game runs in a loop, updating the board state and printing it to the console.
//! To modify the behaviour of the example, please change the constants at the top of this file.
use rustframe::matrix::{BoolMatrix, BoolOps, IntMatrix, Matrix}; use rustframe::matrix::{BoolMatrix, BoolOps, IntMatrix, Matrix};
use rustframe::random::{rng, Rng};
use std::{thread, time}; use std::{thread, time};
const BOARD_SIZE: usize = 50; // Size of the board (50x50) const BOARD_SIZE: usize = 20; // Size of the board (50x50)
const TICK_DURATION_MS: u64 = 10; // Milliseconds per frame const MAX_FRAMES: u32 = 1000;
const TICK_DURATION_MS: u64 = 0; // Milliseconds per frame
const SKIP_FRAMES: u32 = 1;
const PRINT_BOARD: bool = true; // Set to false to disable printing the board
fn main() { fn main() {
// Initialize the game board. let args = std::env::args().collect::<Vec<String>>();
// This demonstrates `BoolMatrix::from_vec`. let debug_mode = args.contains(&"--debug".to_string());
let print_mode = if debug_mode { false } else { PRINT_BOARD };
let mut current_board = let mut current_board =
BoolMatrix::from_vec(vec![false; BOARD_SIZE * BOARD_SIZE], BOARD_SIZE, BOARD_SIZE); BoolMatrix::from_vec(vec![false; BOARD_SIZE * BOARD_SIZE], BOARD_SIZE, BOARD_SIZE);
@ -16,31 +29,18 @@ fn main() {
add_simulated_activity(&mut current_board, BOARD_SIZE); add_simulated_activity(&mut current_board, BOARD_SIZE);
let mut generation_count: u32 = 0; let mut generation_count: u32 = 0;
// `previous_board_state` will store a clone of the board.
// This demonstrates `Matrix::clone()` and later `PartialEq` for `Matrix`.
let mut previous_board_state: Option<BoolMatrix> = None; let mut previous_board_state: Option<BoolMatrix> = None;
let mut board_hashes = Vec::new(); let mut board_hashes = Vec::new();
// let mut print_board_bool = true;
let mut print_bool_int = 0; let mut print_bool_int = 0;
loop { loop {
// print!("{}[2J", 27 as char); // Clear screen and move cursor to top-left if print_bool_int % SKIP_FRAMES == 0 {
print_board(&current_board, generation_count, print_mode);
// if print_board_bool {
if print_bool_int % 10 == 0 {
print!("{}[2J", 27 as char);
println!("Conway's Game of Life - Generation: {}", generation_count);
print_board(&current_board);
println!("Alive cells: {}", &current_board.count());
// print_board_bool = false;
print_bool_int = 0; print_bool_int = 0;
} else { } else {
// print_board_bool = true;
print_bool_int += 1; print_bool_int += 1;
} }
// `current_board.count()` demonstrates a method from `BoolOps`.
board_hashes.push(hash_board(&current_board, primes.clone())); board_hashes.push(hash_board(&current_board, primes.clone()));
if detect_stable_state(&current_board, &previous_board_state) { if detect_stable_state(&current_board, &previous_board_state) {
println!( println!(
@ -61,20 +61,18 @@ fn main() {
add_simulated_activity(&mut current_board, BOARD_SIZE); add_simulated_activity(&mut current_board, BOARD_SIZE);
} }
// `current_board.clone()` demonstrates `Clone` for `Matrix`.
previous_board_state = Some(current_board.clone()); previous_board_state = Some(current_board.clone());
// This is the core call to your game logic.
let next_board = game_of_life_next_frame(&current_board); let next_board = game_of_life_next_frame(&current_board);
current_board = next_board; current_board = next_board;
generation_count += 1; generation_count += 1;
thread::sleep(time::Duration::from_millis(TICK_DURATION_MS)); thread::sleep(time::Duration::from_millis(TICK_DURATION_MS));
// if generation_count > 500 { // Optional limit if (MAX_FRAMES > 0) && (generation_count > MAX_FRAMES) {
// println!("\nReached generation limit."); println!("\nReached generation limit.");
// break; break;
// } }
} }
} }
@ -82,7 +80,13 @@ fn main() {
/// ///
/// - `board`: A reference to the `BoolMatrix` representing the current game state. /// - `board`: A reference to the `BoolMatrix` representing the current game state.
/// This function demonstrates `board.rows()`, `board.cols()`, and `board[(r, c)]` (Index trait). /// This function demonstrates `board.rows()`, `board.cols()`, and `board[(r, c)]` (Index trait).
fn print_board(board: &BoolMatrix) { fn print_board(board: &BoolMatrix, generation_count: u32, print_mode: bool) {
if !print_mode {
return;
}
print!("{}[2J", 27 as char);
println!("Conway's Game of Life - Generation: {}", generation_count);
let mut print_str = String::new(); let mut print_str = String::new();
print_str.push_str("+"); print_str.push_str("+");
for _ in 0..board.cols() { for _ in 0..board.cols() {
@ -93,7 +97,6 @@ fn print_board(board: &BoolMatrix) {
print_str.push_str("| "); print_str.push_str("| ");
for c in 0..board.cols() { for c in 0..board.cols() {
if board[(r, c)] { if board[(r, c)] {
// Using Index trait for Matrix<bool>
print_str.push_str("██"); print_str.push_str("██");
} else { } else {
print_str.push_str(" "); print_str.push_str(" ");
@ -107,6 +110,8 @@ fn print_board(board: &BoolMatrix) {
} }
print_str.push_str("+\n\n"); print_str.push_str("+\n\n");
print!("{}", print_str); print!("{}", print_str);
println!("Alive cells: {}", board.count());
} }
/// Helper function to create a shifted version of the game board. /// Helper function to create a shifted version of the game board.
@ -173,74 +178,38 @@ pub fn game_of_life_next_frame(current_game: &BoolMatrix) -> BoolMatrix {
if rows == 0 && cols == 0 { if rows == 0 && cols == 0 {
return BoolMatrix::from_vec(vec![], 0, 0); // Return an empty BoolMatrix return BoolMatrix::from_vec(vec![], 0, 0); // Return an empty BoolMatrix
} }
// Assuming valid non-empty dimensions (e.g., 25x25) as per typical GOL.
// Your Matrix::from_vec would panic for other invalid 0-dim cases.
// Define the 8 neighbor offsets (row_delta, col_delta) // Define the 8 neighbor offsets (row_delta, col_delta)
let neighbor_offsets: [(isize, isize); 8] = [ let neighbor_offsets: [(isize, isize); 8] = [
(-1, -1), (-1, -1),
(-1, 0), (-1, 0),
(-1, 1), // Top row (NW, N, NE) (-1, 1),
(0, -1), (0, -1),
(0, 1), // Middle row (W, E) (0, 1),
(1, -1), (1, -1),
(1, 0), (1, 0),
(1, 1), // Bottom row (SW, S, SE) (1, 1),
]; ];
// 1. Initialize `neighbor_counts` with the first shifted layer.
// This demonstrates creating an IntMatrix from a function and using it as a base.
let (first_dr, first_dc) = neighbor_offsets[0]; let (first_dr, first_dc) = neighbor_offsets[0];
let mut neighbor_counts = get_shifted_neighbor_layer(current_game, first_dr, first_dc); let mut neighbor_counts = get_shifted_neighbor_layer(current_game, first_dr, first_dc);
// 2. Add the remaining 7 neighbor layers.
// This demonstrates element-wise addition of matrices (`Matrix + Matrix`).
for i in 1..neighbor_offsets.len() { for i in 1..neighbor_offsets.len() {
let (dr, dc) = neighbor_offsets[i]; let (dr, dc) = neighbor_offsets[i];
let next_neighbor_layer = get_shifted_neighbor_layer(current_game, dr, dc); let next_neighbor_layer = get_shifted_neighbor_layer(current_game, dr, dc);
// `neighbor_counts` (owned IntMatrix) + `next_neighbor_layer` (owned IntMatrix)
// uses `impl Add for Matrix`, consumes both, returns new owned `IntMatrix`.
neighbor_counts = neighbor_counts + next_neighbor_layer; neighbor_counts = neighbor_counts + next_neighbor_layer;
} }
// 3. Apply Game of Life rules using element-wise operations.
// Rule: Survival or Birth based on neighbor counts.
// A cell is alive in the next generation if:
// (it's currently alive AND has 2 or 3 neighbors) OR
// (it's currently dead AND has exactly 3 neighbors)
// `neighbor_counts.eq_elem(scalar)`:
// Demonstrates element-wise comparison of a Matrix with a scalar (broadcast).
// Returns an owned `BoolMatrix`.
let has_2_neighbors = neighbor_counts.eq_elem(2); let has_2_neighbors = neighbor_counts.eq_elem(2);
let has_3_neighbors = neighbor_counts.eq_elem(3); // This will be reused let has_3_neighbors = neighbor_counts.eq_elem(3);
// `has_2_neighbors | has_3_neighbors`: let has_2_or_3_neighbors = has_2_neighbors | has_3_neighbors.clone();
// Demonstrates element-wise OR (`Matrix<bool> | Matrix<bool>`).
// Consumes both operands, returns an owned `BoolMatrix`.
let has_2_or_3_neighbors = has_2_neighbors | has_3_neighbors.clone(); // Clone has_3_neighbors as it's used again
// `current_game & &has_2_or_3_neighbors`:
// `current_game` is `&BoolMatrix`. `has_2_or_3_neighbors` is owned.
// Demonstrates element-wise AND (`&Matrix<bool> & &Matrix<bool>`).
// Borrows both operands, returns an owned `BoolMatrix`.
let survives = current_game & &has_2_or_3_neighbors; let survives = current_game & &has_2_or_3_neighbors;
// `!current_game`:
// Demonstrates element-wise NOT (`!&Matrix<bool>`).
// Borrows operand, returns an owned `BoolMatrix`.
let is_dead = !current_game; let is_dead = !current_game;
// `is_dead & &has_3_neighbors`:
// `is_dead` is owned. `has_3_neighbors` is owned.
// Demonstrates element-wise AND (`Matrix<bool> & &Matrix<bool>`).
// Consumes `is_dead`, borrows `has_3_neighbors`, returns an owned `BoolMatrix`.
let births = is_dead & &has_3_neighbors; let births = is_dead & &has_3_neighbors;
// `survives | births`:
// Demonstrates element-wise OR (`Matrix<bool> | Matrix<bool>`).
// Consumes both operands, returns an owned `BoolMatrix`.
let next_frame_game = survives | births; let next_frame_game = survives | births;
next_frame_game next_frame_game
@ -250,7 +219,7 @@ pub fn generate_glider(board: &mut BoolMatrix, board_size: usize) {
// Initialize with a Glider pattern. // Initialize with a Glider pattern.
// It demonstrates how to set specific cells in the matrix. // It demonstrates how to set specific cells in the matrix.
// This demonstrates `IndexMut` for `current_board[(r, c)] = true;`. // This demonstrates `IndexMut` for `current_board[(r, c)] = true;`.
let mut rng = rand::rng(); let mut rng = rng();
let r_offset = rng.random_range(0..(board_size - 3)); let r_offset = rng.random_range(0..(board_size - 3));
let c_offset = rng.random_range(0..(board_size - 3)); let c_offset = rng.random_range(0..(board_size - 3));
if board.rows() >= r_offset + 3 && board.cols() >= c_offset + 3 { if board.rows() >= r_offset + 3 && board.cols() >= c_offset + 3 {
@ -266,7 +235,7 @@ pub fn generate_pulsar(board: &mut BoolMatrix, board_size: usize) {
// Initialize with a Pulsar pattern. // Initialize with a Pulsar pattern.
// This demonstrates how to set specific cells in the matrix. // This demonstrates how to set specific cells in the matrix.
// This demonstrates `IndexMut` for `current_board[(r, c)] = true;`. // This demonstrates `IndexMut` for `current_board[(r, c)] = true;`.
let mut rng = rand::rng(); let mut rng = rng();
let r_offset = rng.random_range(0..(board_size - 17)); let r_offset = rng.random_range(0..(board_size - 17));
let c_offset = rng.random_range(0..(board_size - 17)); let c_offset = rng.random_range(0..(board_size - 17));
if board.rows() >= r_offset + 17 && board.cols() >= c_offset + 17 { if board.rows() >= r_offset + 17 && board.cols() >= c_offset + 17 {

View File

@ -0,0 +1,66 @@
use rustframe::compute::stats::{anova, chi2_test, t_test};
use rustframe::matrix::Matrix;
/// Demonstrates simple inferential statistics tests.
fn main() {
t_test_demo();
println!("\n-----\n");
chi2_demo();
println!("\n-----\n");
anova_demo();
}
fn t_test_demo() {
println!("Two-sample t-test\n-----------------");
let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let b = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
let (t, p) = t_test(&a, &b);
println!("t statistic: {:.2}, p-value: {:.4}", t, p);
}
fn chi2_demo() {
println!("Chi-square test\n---------------");
let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
let (chi2, p) = chi2_test(&observed);
println!("chi^2: {:.2}, p-value: {:.4}", chi2, p);
}
fn anova_demo() {
println!("One-way ANOVA\n-------------");
let g1 = Matrix::from_vec(vec![1.0, 2.0, 3.0], 1, 3);
let g2 = Matrix::from_vec(vec![2.0, 3.0, 4.0], 1, 3);
let g3 = Matrix::from_vec(vec![3.0, 4.0, 5.0], 1, 3);
let (f, p) = anova(vec![&g1, &g2, &g3]);
println!("F statistic: {:.2}, p-value: {:.4}", f, p);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_t_test_demo() {
let a = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let b = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
let (t, _p) = t_test(&a, &b);
assert!((t + 5.0).abs() < 1e-5);
}
#[test]
fn test_chi2_demo() {
let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
let (chi2, p) = chi2_test(&observed);
assert!(chi2 > 0.0);
assert!(p > 0.0 && p < 1.0);
}
#[test]
fn test_anova_demo() {
let g1 = Matrix::from_vec(vec![1.0, 2.0, 3.0], 1, 3);
let g2 = Matrix::from_vec(vec![2.0, 3.0, 4.0], 1, 3);
let g3 = Matrix::from_vec(vec![3.0, 4.0, 5.0], 1, 3);
let (f, p) = anova(vec![&g1, &g2, &g3]);
assert!(f > 0.0);
assert!(p > 0.0 && p < 1.0);
}
}

65
examples/k_means.rs Normal file
View File

@ -0,0 +1,65 @@
use rustframe::compute::models::k_means::KMeans;
use rustframe::matrix::Matrix;
/// Two quick K-Means clustering demos.
///
/// Example 1 groups store locations on a city map.
/// Example 2 segments customers by annual spending habits.
fn main() {
city_store_example();
println!("\n-----\n");
customer_spend_example();
}
fn city_store_example() {
println!("Example 1: store locations");
// (x, y) coordinates of stores around a city
let raw = vec![
1.0, 2.0, 1.5, 1.8, 5.0, 8.0, 8.0, 8.0, 1.0, 0.6, 9.0, 11.0, 8.0, 2.0, 10.0, 2.0, 9.0, 3.0,
];
let x = Matrix::from_rows_vec(raw, 9, 2);
// Group stores into two areas
let (model, labels) = KMeans::fit(&x, 2, 100, 1e-4);
println!("Centres: {:?}", model.centroids.data());
println!("Labels: {:?}", labels);
let new_points = Matrix::from_rows_vec(vec![0.0, 0.0, 8.0, 3.0], 2, 2);
let pred = model.predict(&new_points);
println!("New store assignments: {:?}", pred);
}
fn customer_spend_example() {
println!("Example 2: customer spending");
// (grocery spend, electronics spend) in dollars
let raw = vec![
200.0, 150.0, 220.0, 170.0, 250.0, 160.0, 800.0, 750.0, 820.0, 760.0, 790.0, 770.0,
];
let x = Matrix::from_rows_vec(raw, 6, 2);
let (model, labels) = KMeans::fit(&x, 2, 100, 1e-4);
println!("Centres: {:?}", model.centroids.data());
println!("Labels: {:?}", labels);
let new_customers = Matrix::from_rows_vec(vec![230.0, 155.0, 810.0, 760.0], 2, 2);
let pred = model.predict(&new_customers);
println!("Cluster of new customers: {:?}", pred);
}
#[test]
fn k_means_store_locations() {
let raw = vec![
1.0, 2.0, 1.5, 1.8, 5.0, 8.0, 8.0, 8.0, 1.0, 0.6, 9.0, 11.0, 8.0, 2.0, 10.0, 2.0, 9.0, 3.0,
];
let x = Matrix::from_rows_vec(raw, 9, 2);
let (model, labels) = KMeans::fit(&x, 2, 100, 1e-4);
assert_eq!(labels.len(), 9);
assert_eq!(model.centroids.rows(), 2);
let new_points = Matrix::from_rows_vec(vec![0.0, 0.0, 8.0, 3.0], 2, 2);
let pred = model.predict(&new_points);
assert_eq!(pred.len(), 2);
}

View File

@ -0,0 +1,118 @@
use rustframe::compute::models::linreg::LinReg;
use rustframe::matrix::Matrix;
/// Two quick linear regression demonstrations.
///
/// Example 1 fits a model to predict house price from floor area.
/// Example 2 adds number of bedrooms as a second feature.
fn main() {
example_one_feature();
println!("\n-----\n");
example_two_features();
}
/// Price ~ floor area
fn example_one_feature() {
println!("Example 1: predict price from floor area only");
// Square meters of floor area for a few houses
let sizes = vec![50.0, 60.0, 70.0, 80.0, 90.0, 100.0];
// Thousands of dollars in sale price
let prices = vec![150.0, 180.0, 210.0, 240.0, 270.0, 300.0];
// Each row is a sample with one feature
let x = Matrix::from_vec(sizes.clone(), sizes.len(), 1);
let y = Matrix::from_vec(prices.clone(), prices.len(), 1);
// Train with a small learning rate
let mut model = LinReg::new(1);
model.fit(&x, &y, 0.0005, 20000);
let preds = model.predict(&x);
println!("Size (m^2) -> predicted price (k) vs actual");
for i in 0..x.rows() {
println!(
"{:>3} -> {:>6.1} | {:>6.1}",
sizes[i],
preds[(i, 0)],
prices[i]
);
}
let new_house = Matrix::from_vec(vec![120.0], 1, 1);
let pred = model.predict(&new_house);
println!("Predicted price for 120 m^2: {:.1}k", pred[(0, 0)]);
}
/// Price ~ floor area + bedrooms
fn example_two_features() {
println!("Example 2: price from area and bedrooms");
// (size m^2, bedrooms) for each house
let raw_x = vec![
50.0, 2.0, 70.0, 2.0, 90.0, 3.0, 110.0, 3.0, 130.0, 4.0, 150.0, 4.0,
];
let prices = vec![160.0, 195.0, 250.0, 285.0, 320.0, 350.0];
let x = Matrix::from_rows_vec(raw_x, 6, 2);
let y = Matrix::from_vec(prices.clone(), prices.len(), 1);
let mut model = LinReg::new(2);
model.fit(&x, &y, 0.0001, 50000);
let preds = model.predict(&x);
println!("size, beds -> predicted | actual (k)");
for i in 0..x.rows() {
let size = x[(i, 0)];
let beds = x[(i, 1)];
println!(
"{:>3} m^2, {:>1} -> {:>6.1} | {:>6.1}",
size,
beds,
preds[(i, 0)],
prices[i]
);
}
let new_home = Matrix::from_rows_vec(vec![120.0, 3.0], 1, 2);
let pred = model.predict(&new_home);
println!(
"Predicted price for 120 m^2 with 3 bedrooms: {:.1}k",
pred[(0, 0)]
);
}
#[test]
fn test_linear_regression_one_feature() {
let sizes = vec![50.0, 60.0, 70.0, 80.0, 90.0, 100.0];
let prices = vec![150.0, 180.0, 210.0, 240.0, 270.0, 300.0];
let scaled: Vec<f64> = sizes.iter().map(|s| s / 100.0).collect();
let x = Matrix::from_vec(scaled, sizes.len(), 1);
let y = Matrix::from_vec(prices.clone(), prices.len(), 1);
let mut model = LinReg::new(1);
model.fit(&x, &y, 0.1, 2000);
let preds = model.predict(&x);
for i in 0..y.rows() {
assert!((preds[(i, 0)] - prices[i]).abs() < 1.0);
}
}
#[test]
fn test_linear_regression_two_features() {
let raw_x = vec![
50.0, 2.0, 70.0, 2.0, 90.0, 3.0, 110.0, 3.0, 130.0, 4.0, 150.0, 4.0,
];
let prices = vec![170.0, 210.0, 270.0, 310.0, 370.0, 410.0];
let scaled_x: Vec<f64> = raw_x
.chunks(2)
.flat_map(|pair| vec![pair[0] / 100.0, pair[1]])
.collect();
let x = Matrix::from_rows_vec(scaled_x, 6, 2);
let y = Matrix::from_vec(prices.clone(), prices.len(), 1);
let mut model = LinReg::new(2);
model.fit(&x, &y, 0.01, 50000);
let preds = model.predict(&x);
for i in 0..y.rows() {
assert!((preds[(i, 0)] - prices[i]).abs() < 1.0);
}
}

View File

@ -0,0 +1,101 @@
use rustframe::compute::models::logreg::LogReg;
use rustframe::matrix::Matrix;
/// Two binary classification demos using logistic regression.
///
/// Example 1 predicts exam success from hours studied.
/// Example 2 predicts whether an online shopper will make a purchase.
fn main() {
student_passing_example();
println!("\n-----\n");
purchase_prediction_example();
}
fn student_passing_example() {
println!("Example 1: exam pass prediction");
// Hours studied for each student
let hours = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
// Label: 0 denotes failure and 1 denotes success
let passed = vec![0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
let x = Matrix::from_vec(hours.clone(), hours.len(), 1);
let y = Matrix::from_vec(passed.clone(), passed.len(), 1);
let mut model = LogReg::new(1);
model.fit(&x, &y, 0.1, 10000);
let preds = model.predict(&x);
println!("Hours -> pred | actual");
for i in 0..x.rows() {
println!(
"{:>2} -> {} | {}",
hours[i] as i32,
preds[(i, 0)] as i32,
passed[i] as i32
);
}
// Probability estimate for a new student
let new_student = Matrix::from_vec(vec![5.5], 1, 1);
let p = model.predict_proba(&new_student);
println!("Probability of passing with 5.5h study: {:.2}", p[(0, 0)]);
}
fn purchase_prediction_example() {
println!("Example 2: purchase likelihood");
// minutes on site, pages viewed -> made a purchase?
let raw_x = vec![1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 5.0, 5.0, 3.5, 2.0, 6.0, 6.0];
let bought = vec![0.0, 0.0, 0.0, 1.0, 0.0, 1.0];
let x = Matrix::from_rows_vec(raw_x, 6, 2);
let y = Matrix::from_vec(bought.clone(), bought.len(), 1);
let mut model = LogReg::new(2);
model.fit(&x, &y, 0.05, 20000);
let preds = model.predict(&x);
println!("time, pages -> pred | actual");
for i in 0..x.rows() {
println!(
"{:>4}m, {:>2} -> {} | {}",
x[(i, 0)],
x[(i, 1)] as i32,
preds[(i, 0)] as i32,
bought[i] as i32
);
}
let new_visit = Matrix::from_rows_vec(vec![4.0, 4.0], 1, 2);
let p = model.predict_proba(&new_visit);
println!("Prob of purchase for 4min/4pages: {:.2}", p[(0, 0)]);
}
#[test]
fn test_student_passing_example() {
let hours = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let passed = vec![0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
let x = Matrix::from_vec(hours.clone(), hours.len(), 1);
let y = Matrix::from_vec(passed.clone(), passed.len(), 1);
let mut model = LogReg::new(1);
model.fit(&x, &y, 0.1, 10000);
let preds = model.predict(&x);
for i in 0..y.rows() {
assert_eq!(preds[(i, 0)], passed[i]);
}
}
#[test]
fn test_purchase_prediction_example() {
let raw_x = vec![1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 5.0, 5.0, 3.5, 2.0, 6.0, 6.0];
let bought = vec![0.0, 0.0, 0.0, 1.0, 0.0, 1.0];
let x = Matrix::from_rows_vec(raw_x, 6, 2);
let y = Matrix::from_vec(bought.clone(), bought.len(), 1);
let mut model = LogReg::new(2);
model.fit(&x, &y, 0.05, 20000);
let preds = model.predict(&x);
for i in 0..y.rows() {
assert_eq!(preds[(i, 0)], bought[i]);
}
}

60
examples/pca.rs Normal file
View File

@ -0,0 +1,60 @@
use rustframe::compute::models::pca::PCA;
use rustframe::matrix::Matrix;
/// Two dimensionality reduction examples using PCA.
///
/// Example 1 reduces 3D sensor readings to two components.
/// Example 2 compresses a small four-feature dataset.
fn main() {
sensor_demo();
println!("\n-----\n");
finance_demo();
}
fn sensor_demo() {
println!("Example 1: 3D sensor data");
// Ten 3D observations from an accelerometer
let raw = vec![
2.5, 2.4, 0.5, 0.5, 0.7, 1.5, 2.2, 2.9, 0.7, 1.9, 2.2, 1.0, 3.1, 3.0, 0.6, 2.3, 2.7, 0.9,
2.0, 1.6, 1.1, 1.0, 1.1, 1.9, 1.5, 1.6, 2.2, 1.1, 0.9, 2.1,
];
let x = Matrix::from_rows_vec(raw, 10, 3);
let pca = PCA::fit(&x, 2, 0);
let reduced = pca.transform(&x);
println!("Components: {:?}", pca.components.data());
println!("First row -> {:.2?}", [reduced[(0, 0)], reduced[(0, 1)]]);
}
fn finance_demo() {
println!("Example 2: 4D finance data");
// Four daily percentage returns of different stocks
let raw = vec![
0.2, 0.1, -0.1, 0.0, 0.3, 0.2, -0.2, 0.1, 0.1, 0.0, -0.1, -0.1, 0.4, 0.3, -0.3, 0.2, 0.0,
-0.1, 0.1, -0.1,
];
let x = Matrix::from_rows_vec(raw, 5, 4);
// Keep two principal components
let pca = PCA::fit(&x, 2, 0);
let reduced = pca.transform(&x);
println!("Reduced shape: {:?}", reduced.shape());
println!("First row -> {:.2?}", [reduced[(0, 0)], reduced[(0, 1)]]);
}
#[test]
fn test_sensor_demo() {
let raw = vec![
2.5, 2.4, 0.5, 0.5, 0.7, 1.5, 2.2, 2.9, 0.7, 1.9, 2.2, 1.0, 3.1, 3.0, 0.6, 2.3, 2.7, 0.9,
2.0, 1.6, 1.1, 1.0, 1.1, 1.9, 1.5, 1.6, 2.2, 1.1, 0.9, 2.1,
];
let x = Matrix::from_rows_vec(raw, 10, 3);
let pca = PCA::fit(&x, 2, 0);
let reduced = pca.transform(&x);
assert_eq!(reduced.rows(), 10);
assert_eq!(reduced.cols(), 2);
}

67
examples/random_demo.rs Normal file
View File

@ -0,0 +1,67 @@
use rustframe::random::{crypto_rng, rng, Rng, SliceRandom};
/// Demonstrates basic usage of the random number generators.
///
/// It showcases uniform ranges, booleans, normal distribution,
/// shuffling and the cryptographically secure generator.
fn main() {
basic_usage();
println!("\n-----\n");
normal_demo();
println!("\n-----\n");
shuffle_demo();
}
fn basic_usage() {
println!("Basic PRNG usage\n----------------");
let mut prng = rng();
println!("random u64 : {}", prng.next_u64());
println!("range [10,20): {}", prng.random_range(10..20));
println!("bool : {}", prng.gen_bool());
}
fn normal_demo() {
println!("Normal distribution\n-------------------");
let mut prng = rng();
for _ in 0..3 {
let v = prng.normal(0.0, 1.0);
println!("sample: {:.3}", v);
}
}
fn shuffle_demo() {
println!("Slice shuffling\n----------------");
let mut prng = rng();
let mut data = [1, 2, 3, 4, 5];
data.shuffle(&mut prng);
println!("shuffled: {:?}", data);
let mut secure = crypto_rng();
let byte = secure.random_range(0..256usize);
println!("crypto byte: {}", byte);
}
#[cfg(test)]
mod tests {
use super::*;
use rustframe::random::{CryptoRng, Prng};
#[test]
fn test_basic_usage_range_bounds() {
let mut rng = Prng::new(1);
for _ in 0..50 {
let v = rng.random_range(5..10);
assert!(v >= 5 && v < 10);
}
}
#[test]
fn test_crypto_byte_bounds() {
let mut rng = CryptoRng::new();
for _ in 0..50 {
let v = rng.random_range(0..256usize);
assert!(v < 256);
}
}
}

57
examples/random_stats.rs Normal file
View File

@ -0,0 +1,57 @@
use rustframe::random::{crypto_rng, rng, Rng};
/// Demonstrates simple statistical checks on random number generators.
fn main() {
chi_square_demo();
println!("\n-----\n");
monobit_demo();
}
fn chi_square_demo() {
println!("Chi-square test on PRNG");
let mut rng = rng();
let mut counts = [0usize; 10];
let samples = 10000;
for _ in 0..samples {
let v = rng.random_range(0..10usize);
counts[v] += 1;
}
let expected = samples as f64 / 10.0;
let chi2: f64 = counts
.iter()
.map(|&c| {
let diff = c as f64 - expected;
diff * diff / expected
})
.sum();
println!("counts: {:?}", counts);
println!("chi-square: {:.3}", chi2);
}
fn monobit_demo() {
println!("Monobit test on crypto RNG");
let mut rng = crypto_rng();
let mut ones = 0usize;
let samples = 1000;
for _ in 0..samples {
ones += rng.next_u64().count_ones() as usize;
}
let ratio = ones as f64 / (samples as f64 * 64.0);
println!("ones ratio: {:.4}", ratio);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chi_square_demo_runs() {
chi_square_demo();
}
#[test]
fn test_monobit_demo_runs() {
monobit_demo();
}
}

View File

@ -0,0 +1,93 @@
use rustframe::compute::stats::{
chi2_test, covariance, covariance_matrix, mean, median, pearson, percentile, stddev, t_test,
};
use rustframe::matrix::{Axis, Matrix};
/// Demonstrates some of the statistics utilities in Rustframe.
///
/// The example is split into three parts:
/// - Basic descriptive statistics on a small data set
/// - Covariance and correlation calculations
/// - Simple inferential tests (t-test and chi-square)
fn main() {
descriptive_demo();
println!("\n-----\n");
correlation_demo();
println!("\n-----\n");
inferential_demo();
}
fn descriptive_demo() {
println!("Descriptive statistics\n----------------------");
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
println!("mean : {:.2}", mean(&data));
println!("std dev : {:.2}", stddev(&data));
println!("median : {:.2}", median(&data));
println!("25th percentile: {:.2}", percentile(&data, 25.0));
}
fn correlation_demo() {
println!("Covariance and Correlation\n--------------------------");
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let y = Matrix::from_vec(vec![1.0, 2.0, 3.0, 5.0], 2, 2);
let cov = covariance(&x, &y);
let cov_mat = covariance_matrix(&x, Axis::Col);
let corr = pearson(&x, &y);
println!("covariance : {:.2}", cov);
println!("cov matrix : {:?}", cov_mat.data());
println!("pearson r : {:.2}", corr);
}
fn inferential_demo() {
println!("Inferential statistics\n----------------------");
let s1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let s2 = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
let (t_stat, t_p) = t_test(&s1, &s2);
println!("t statistic : {:.2}, p-value: {:.4}", t_stat, t_p);
let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
let (chi2, chi_p) = chi2_test(&observed);
println!("chi^2 : {:.2}, p-value: {:.4}", chi2, chi_p);
}
#[cfg(test)]
mod tests {
use super::*;
const EPS: f64 = 1e-8;
#[test]
fn test_descriptive_demo() {
let data = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
assert!((mean(&data) - 3.0).abs() < EPS);
assert!((stddev(&data) - 1.4142135623730951).abs() < EPS);
assert!((median(&data) - 3.0).abs() < EPS);
assert!((percentile(&data, 25.0) - 2.0).abs() < EPS);
}
#[test]
fn test_correlation_demo() {
let x = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let y = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 5.0], 2, 2);
let cov = covariance(&x, &y);
assert!((cov - 1.625).abs() < EPS);
let cov_mat = covariance_matrix(&x, Axis::Col);
assert!((cov_mat.get(0, 0) - 2.0).abs() < EPS);
assert!((cov_mat.get(1, 1) - 2.0).abs() < EPS);
let corr = pearson(&x, &y);
assert!((corr - 0.9827076298239908).abs() < 1e-6);
}
#[test]
fn test_inferential_demo() {
let s1 = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let s2 = Matrix::from_rows_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
let (t_stat, p_value) = t_test(&s1, &s2);
assert!((t_stat + 5.0).abs() < 1e-5);
assert!(p_value > 0.0 && p_value < 1.0);
let observed = Matrix::from_rows_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
let (chi2, p) = chi2_test(&observed);
assert!(chi2 > 0.0);
assert!(p > 0.0 && p < 1.0);
}
}

16
src/compute/mod.rs Normal file
View File

@ -0,0 +1,16 @@
//! Algorithms and statistical utilities built on top of the core matrices.
//!
//! This module groups together machinelearning models and statistical helper
//! functions. For quick access to basic statistics see [`stats`](crate::compute::stats), while
//! [`models`](crate::compute::models) contains small learning algorithms.
//!
//! ```
//! use rustframe::compute::stats;
//! use rustframe::matrix::Matrix;
//!
//! let m = Matrix::from_vec(vec![1.0, 2.0, 3.0], 3, 1);
//! assert_eq!(stats::mean(&m), 2.0);
//! ```
pub mod models;
pub mod stats;

View File

@ -0,0 +1,148 @@
//! Common activation functions used in neural networks.
//!
//! Functions operate element-wise on [`Matrix`] values.
//!
//! ```
//! use rustframe::compute::models::activations::sigmoid;
//! use rustframe::matrix::Matrix;
//!
//! let x = Matrix::from_vec(vec![0.0], 1, 1);
//! let y = sigmoid(&x);
//! assert!((y.get(0,0) - 0.5).abs() < 1e-6);
//! ```
use crate::matrix::{Matrix, SeriesOps};
pub fn sigmoid(x: &Matrix<f64>) -> Matrix<f64> {
x.map(|v| 1.0 / (1.0 + (-v).exp()))
}
pub fn dsigmoid(y: &Matrix<f64>) -> Matrix<f64> {
// derivative w.r.t. pre-activation; takes y = sigmoid(x)
y.map(|v| v * (1.0 - v))
}
pub fn relu(x: &Matrix<f64>) -> Matrix<f64> {
x.map(|v| if v > 0.0 { v } else { 0.0 })
}
pub fn drelu(x: &Matrix<f64>) -> Matrix<f64> {
x.map(|v| if v > 0.0 { 1.0 } else { 0.0 })
}
pub fn leaky_relu(x: &Matrix<f64>) -> Matrix<f64> {
x.map(|v| if v > 0.0 { v } else { 0.01 * v })
}
pub fn dleaky_relu(x: &Matrix<f64>) -> Matrix<f64> {
x.map(|v| if v > 0.0 { 1.0 } else { 0.01 })
}
#[cfg(test)]
mod tests {
use super::*;
// Helper function to round all elements in a matrix to n decimal places
fn _round_matrix(mat: &Matrix<f64>, decimals: u32) -> Matrix<f64> {
let factor = 10f64.powi(decimals as i32);
let rounded: Vec<f64> = mat
.to_vec()
.iter()
.map(|v| (v * factor).round() / factor)
.collect();
Matrix::from_vec(rounded, mat.rows(), mat.cols())
}
#[test]
fn test_sigmoid() {
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![0.26894142, 0.5, 0.73105858], 3, 1);
let result = sigmoid(&x);
assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6));
}
#[test]
fn test_sigmoid_edge_case() {
let x = Matrix::from_vec(vec![-1000.0, 0.0, 1000.0], 3, 1);
let expected = Matrix::from_vec(vec![0.0, 0.5, 1.0], 3, 1);
let result = sigmoid(&x);
for (r, e) in result.data().iter().zip(expected.data().iter()) {
assert!((r - e).abs() < 1e-6);
}
}
#[test]
fn test_relu() {
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
assert_eq!(relu(&x), expected);
}
#[test]
fn test_relu_edge_case() {
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
let expected = Matrix::from_vec(vec![0.0, 0.0, 1e10], 3, 1);
assert_eq!(relu(&x), expected);
}
#[test]
fn test_dsigmoid() {
let y = Matrix::from_vec(vec![0.26894142, 0.5, 0.73105858], 3, 1);
let expected = Matrix::from_vec(vec![0.19661193, 0.25, 0.19661193], 3, 1);
let result = dsigmoid(&y);
assert_eq!(_round_matrix(&result, 6), _round_matrix(&expected, 6));
}
#[test]
fn test_dsigmoid_edge_case() {
let y = Matrix::from_vec(vec![0.0, 0.5, 1.0], 3, 1); // Assume these are outputs from sigmoid(x)
let expected = Matrix::from_vec(vec![0.0, 0.25, 0.0], 3, 1);
let result = dsigmoid(&y);
for (r, e) in result.data().iter().zip(expected.data().iter()) {
assert!((r - e).abs() < 1e-6);
}
}
#[test]
fn test_drelu() {
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
assert_eq!(drelu(&x), expected);
}
#[test]
fn test_drelu_edge_case() {
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
assert_eq!(drelu(&x), expected);
}
#[test]
fn test_leaky_relu() {
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![-0.01, 0.0, 1.0], 3, 1);
assert_eq!(leaky_relu(&x), expected);
}
#[test]
fn test_leaky_relu_edge_case() {
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
let expected = Matrix::from_vec(vec![-1e-12, 0.0, 1e10], 3, 1);
assert_eq!(leaky_relu(&x), expected);
}
#[test]
fn test_dleaky_relu() {
let x = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1);
assert_eq!(dleaky_relu(&x), expected);
}
#[test]
fn test_dleaky_relu_edge_case() {
let x = Matrix::from_vec(vec![-1e-10, 0.0, 1e10], 3, 1);
let expected = Matrix::from_vec(vec![0.01, 0.01, 1.0], 3, 1);
assert_eq!(dleaky_relu(&x), expected);
}
}

View File

@ -0,0 +1,551 @@
//! A minimal dense neural network implementation for educational purposes.
//!
//! Layers operate on [`Matrix`] values and support ReLU and Sigmoid
//! activations. This is not meant to be a performant deeplearning framework
//! but rather a small example of how the surrounding matrix utilities can be
//! composed.
//!
//! ```
//! use rustframe::compute::models::dense_nn::{ActivationKind, DenseNN, DenseNNConfig, InitializerKind, LossKind};
//! use rustframe::matrix::Matrix;
//!
//! // Tiny network with one input and one output neuron.
//! let config = DenseNNConfig {
//! input_size: 1,
//! hidden_layers: vec![],
//! output_size: 1,
//! activations: vec![ActivationKind::Relu],
//! initializer: InitializerKind::Uniform(0.5),
//! loss: LossKind::MSE,
//! learning_rate: 0.1,
//! epochs: 1,
//! };
//! let mut nn = DenseNN::new(config);
//! let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1);
//! let y = Matrix::from_vec(vec![2.0, 3.0], 2, 1);
//! nn.train(&x, &y);
//! ```
use crate::compute::models::activations::{drelu, relu, sigmoid};
use crate::matrix::{Matrix, SeriesOps};
use crate::random::prelude::*;
/// Supported activation functions
#[derive(Clone)]
pub enum ActivationKind {
Relu,
Sigmoid,
Tanh,
}
impl ActivationKind {
/// Apply activation elementwise
pub fn forward(&self, z: &Matrix<f64>) -> Matrix<f64> {
match self {
ActivationKind::Relu => relu(z),
ActivationKind::Sigmoid => sigmoid(z),
ActivationKind::Tanh => z.map(|v| v.tanh()),
}
}
/// Compute elementwise derivative w.r.t. pre-activation z
pub fn derivative(&self, z: &Matrix<f64>) -> Matrix<f64> {
match self {
ActivationKind::Relu => drelu(z),
ActivationKind::Sigmoid => {
let s = sigmoid(z);
s.zip(&s, |si, sj| si * (1.0 - sj))
}
ActivationKind::Tanh => z.map(|v| 1.0 - v.tanh().powi(2)),
}
}
}
/// Weight initialization schemes
#[derive(Clone)]
pub enum InitializerKind {
/// Uniform(-limit .. limit)
Uniform(f64),
/// Xavier/Glorot uniform
Xavier,
/// He (Kaiming) uniform
He,
}
impl InitializerKind {
pub fn initialize(&self, rows: usize, cols: usize) -> Matrix<f64> {
let mut rng = rng();
let fan_in = rows;
let fan_out = cols;
let limit = match self {
InitializerKind::Uniform(l) => *l,
InitializerKind::Xavier => (6.0 / (fan_in + fan_out) as f64).sqrt(),
InitializerKind::He => (2.0 / fan_in as f64).sqrt(),
};
let data = (0..rows * cols)
.map(|_| rng.random_range(-limit..limit))
.collect::<Vec<_>>();
Matrix::from_vec(data, rows, cols)
}
}
/// Supported losses
#[derive(Clone)]
pub enum LossKind {
/// Mean Squared Error: L = 1/m * sum((y_hat - y)^2)
MSE,
/// Binary Cross-Entropy: L = -1/m * sum(y*log(y_hat) + (1-y)*log(1-y_hat))
BCE,
}
impl LossKind {
/// Compute gradient dL/dy_hat (before applying activation derivative)
pub fn gradient(&self, y_hat: &Matrix<f64>, y: &Matrix<f64>) -> Matrix<f64> {
let m = y.rows() as f64;
match self {
LossKind::MSE => (y_hat - y) * (2.0 / m),
LossKind::BCE => (y_hat - y) * (1.0 / m),
}
}
}
/// Configuration for a dense neural network
pub struct DenseNNConfig {
pub input_size: usize,
pub hidden_layers: Vec<usize>,
/// Must have length = hidden_layers.len() + 1
pub activations: Vec<ActivationKind>,
pub output_size: usize,
pub initializer: InitializerKind,
pub loss: LossKind,
pub learning_rate: f64,
pub epochs: usize,
}
/// A multi-layer perceptron with full configurability
pub struct DenseNN {
weights: Vec<Matrix<f64>>,
biases: Vec<Matrix<f64>>,
activations: Vec<ActivationKind>,
loss: LossKind,
lr: f64,
epochs: usize,
}
impl DenseNN {
/// Build a new DenseNN from the given configuration
pub fn new(config: DenseNNConfig) -> Self {
let mut sizes = vec![config.input_size];
sizes.extend(&config.hidden_layers);
sizes.push(config.output_size);
assert_eq!(
config.activations.len(),
sizes.len() - 1,
"Number of activation functions must match number of layers"
);
let mut weights = Vec::with_capacity(sizes.len() - 1);
let mut biases = Vec::with_capacity(sizes.len() - 1);
for i in 0..sizes.len() - 1 {
let w = config.initializer.initialize(sizes[i], sizes[i + 1]);
let b = Matrix::zeros(1, sizes[i + 1]);
weights.push(w);
biases.push(b);
}
DenseNN {
weights,
biases,
activations: config.activations,
loss: config.loss,
lr: config.learning_rate,
epochs: config.epochs,
}
}
/// Perform a full forward pass, returning pre-activations (z) and activations (a)
fn forward_full(&self, x: &Matrix<f64>) -> (Vec<Matrix<f64>>, Vec<Matrix<f64>>) {
let mut zs = Vec::with_capacity(self.weights.len());
let mut activs = Vec::with_capacity(self.weights.len() + 1);
activs.push(x.clone());
let mut a = x.clone();
for (i, (w, b)) in self.weights.iter().zip(self.biases.iter()).enumerate() {
let z = &a.dot(w) + &Matrix::repeat_rows(b, a.rows());
let a_next = self.activations[i].forward(&z);
zs.push(z);
activs.push(a_next.clone());
a = a_next;
}
(zs, activs)
}
/// Train the network on inputs X and targets Y
pub fn train(&mut self, x: &Matrix<f64>, y: &Matrix<f64>) {
let m = x.rows() as f64;
for _ in 0..self.epochs {
let (zs, activs) = self.forward_full(x);
let y_hat = activs.last().unwrap().clone();
// Initial delta (dL/dz) on output
let mut delta = match self.loss {
LossKind::BCE => self.loss.gradient(&y_hat, y),
LossKind::MSE => {
let grad = self.loss.gradient(&y_hat, y);
let dz = self
.activations
.last()
.unwrap()
.derivative(zs.last().unwrap());
grad.zip(&dz, |g, da| g * da)
}
};
// Backpropagate through layers
for l in (0..self.weights.len()).rev() {
let a_prev = &activs[l];
let dw = a_prev.transpose().dot(&delta) / m;
let db = Matrix::from_vec(delta.sum_vertical(), 1, delta.cols()) / m;
// Update weights & biases
self.weights[l] = &self.weights[l] - &(dw * self.lr);
self.biases[l] = &self.biases[l] - &(db * self.lr);
// Propagate delta to previous layer
if l > 0 {
let w_t = self.weights[l].transpose();
let da = self.activations[l - 1].derivative(&zs[l - 1]);
delta = delta.dot(&w_t).zip(&da, |d, a| d * a);
}
}
}
}
/// Run a forward pass and return the network's output
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
let mut a = x.clone();
for (i, (w, b)) in self.weights.iter().zip(self.biases.iter()).enumerate() {
let z = &a.dot(w) + &Matrix::repeat_rows(b, a.rows());
a = self.activations[i].forward(&z);
}
a
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matrix::Matrix;
/// Compute MSE = 1/m * Σ (ŷ - y)²
fn mse_loss(y_hat: &Matrix<f64>, y: &Matrix<f64>) -> f64 {
let m = y.rows() as f64;
y_hat
.zip(y, |yh, yv| (yh - yv).powi(2))
.data()
.iter()
.sum::<f64>()
/ m
}
#[test]
fn test_predict_shape() {
let config = DenseNNConfig {
input_size: 1,
hidden_layers: vec![2],
activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid],
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::MSE,
learning_rate: 0.01,
epochs: 0,
};
let model = DenseNN::new(config);
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0], 3, 1);
let preds = model.predict(&x);
assert_eq!(preds.rows(), 3);
assert_eq!(preds.cols(), 1);
}
#[test]
#[should_panic(expected = "Number of activation functions must match number of layers")]
fn test_invalid_activation_count() {
let config = DenseNNConfig {
input_size: 2,
hidden_layers: vec![3],
activations: vec![ActivationKind::Relu], // Only one activation for two layers
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::MSE,
learning_rate: 0.01,
epochs: 0,
};
let _model = DenseNN::new(config);
}
#[test]
fn test_train_no_epochs_does_nothing() {
let config = DenseNNConfig {
input_size: 1,
hidden_layers: vec![2],
activations: vec![ActivationKind::Relu, ActivationKind::Sigmoid],
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::MSE,
learning_rate: 0.01,
epochs: 0,
};
let mut model = DenseNN::new(config);
let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
let before = model.predict(&x);
model.train(&x, &y);
let after = model.predict(&x);
for i in 0..before.rows() {
for j in 0..before.cols() {
// "prediction changed despite 0 epochs"
assert!((before[(i, j)] - after[(i, j)]).abs() < 1e-12);
}
}
}
#[test]
fn test_train_one_epoch_changes_predictions() {
// Single-layer sigmoid regression so gradients flow.
let config = DenseNNConfig {
input_size: 1,
hidden_layers: vec![],
activations: vec![ActivationKind::Sigmoid],
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::MSE,
learning_rate: 1.0,
epochs: 1,
};
let mut model = DenseNN::new(config);
let x = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
let before = model.predict(&x);
model.train(&x, &y);
let after = model.predict(&x);
// At least one of the two outputs must move by >ϵ
let mut moved = false;
for i in 0..before.rows() {
if (before[(i, 0)] - after[(i, 0)]).abs() > 1e-8 {
moved = true;
}
}
assert!(moved, "predictions did not change after 1 epoch");
}
#[test]
fn test_training_reduces_mse_loss() {
// Same singlelayer sigmoid setup; check loss goes down.
let config = DenseNNConfig {
input_size: 1,
hidden_layers: vec![],
activations: vec![ActivationKind::Sigmoid],
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::MSE,
learning_rate: 1.0,
epochs: 10,
};
let mut model = DenseNN::new(config);
let x = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
let y = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
let before_preds = model.predict(&x);
let before_loss = mse_loss(&before_preds, &y);
model.train(&x, &y);
let after_preds = model.predict(&x);
let after_loss = mse_loss(&after_preds, &y);
// MSE did not decrease (before: {}, after: {})
assert!(after_loss < before_loss);
}
#[test]
fn test_activation_kind_forward_tanh() {
let input = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![-0.76159415595, 0.0, 0.76159415595], 3, 1);
let output = ActivationKind::Tanh.forward(&input);
for i in 0..input.rows() {
for j in 0..input.cols() {
// Tanh forward output mismatch at ({}, {})
assert!((output[(i, j)] - expected[(i, j)]).abs() < 1e-9);
}
}
}
#[test]
fn test_activation_kind_derivative_relu() {
let input = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
let output = ActivationKind::Relu.derivative(&input);
for i in 0..input.rows() {
for j in 0..input.cols() {
// "ReLU derivative output mismatch at ({}, {})"
assert!((output[(i, j)] - expected[(i, j)]).abs() < 1e-9);
}
}
}
#[test]
fn test_activation_kind_derivative_tanh() {
let input = Matrix::from_vec(vec![-1.0, 0.0, 1.0], 3, 1);
let expected = Matrix::from_vec(vec![0.41997434161, 1.0, 0.41997434161], 3, 1); // 1 - tanh(x)^2
let output = ActivationKind::Tanh.derivative(&input);
for i in 0..input.rows() {
for j in 0..input.cols() {
// "Tanh derivative output mismatch at ({}, {})"
assert!((output[(i, j)] - expected[(i, j)]).abs() < 1e-9);
}
}
}
#[test]
fn test_initializer_kind_xavier() {
let rows = 10;
let cols = 20;
let initializer = InitializerKind::Xavier;
let matrix = initializer.initialize(rows, cols);
let limit = (6.0 / (rows + cols) as f64).sqrt();
assert_eq!(matrix.rows(), rows);
assert_eq!(matrix.cols(), cols);
for val in matrix.data() {
// Xavier initialized value out of range
assert!(*val >= -limit && *val <= limit);
}
}
#[test]
fn test_initializer_kind_he() {
let rows = 10;
let cols = 20;
let initializer = InitializerKind::He;
let matrix = initializer.initialize(rows, cols);
let limit = (2.0 / rows as f64).sqrt();
assert_eq!(matrix.rows(), rows);
assert_eq!(matrix.cols(), cols);
for val in matrix.data() {
// He initialized value out of range
assert!(*val >= -limit && *val <= limit);
}
}
#[test]
fn test_loss_kind_bce_gradient() {
let y_hat = Matrix::from_vec(vec![0.1, 0.9, 0.4], 3, 1);
let y = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
let expected_gradient = Matrix::from_vec(vec![0.1 / 3.0, -0.1 / 3.0, -0.1 / 3.0], 3, 1); // (y_hat - y) * (1.0 / m)
let output_gradient = LossKind::BCE.gradient(&y_hat, &y);
assert_eq!(output_gradient.rows(), expected_gradient.rows());
assert_eq!(output_gradient.cols(), expected_gradient.cols());
for i in 0..output_gradient.rows() {
for j in 0..output_gradient.cols() {
// BCE gradient output mismatch at ({}, {})
assert!((output_gradient[(i, j)] - expected_gradient[(i, j)]).abs() < 1e-9);
}
}
}
#[test]
fn test_training_reduces_bce_loss() {
// Single-layer sigmoid setup; check BCE loss goes down.
let config = DenseNNConfig {
input_size: 1,
hidden_layers: vec![],
activations: vec![ActivationKind::Sigmoid],
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::BCE,
learning_rate: 1.0,
epochs: 10,
};
let mut model = DenseNN::new(config);
let x = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
let y = Matrix::from_vec(vec![0.0, 1.0, 0.5], 3, 1);
let before_preds = model.predict(&x);
// BCE loss calculation for testing
let before_loss = -1.0 / (y.rows() as f64)
* before_preds
.zip(&y, |yh, yv| yv * yh.ln() + (1.0 - yv) * (1.0 - yh).ln())
.data()
.iter()
.sum::<f64>();
model.train(&x, &y);
let after_preds = model.predict(&x);
let after_loss = -1.0 / (y.rows() as f64)
* after_preds
.zip(&y, |yh, yv| yv * yh.ln() + (1.0 - yv) * (1.0 - yh).ln())
.data()
.iter()
.sum::<f64>();
// BCE did not decrease (before: {}, after: {})
assert!(after_loss < before_loss,);
}
#[test]
fn test_train_backprop_delta_propagation() {
// Network with two layers to test delta propagation to previous layer (l > 0)
let config = DenseNNConfig {
input_size: 2,
hidden_layers: vec![3],
activations: vec![ActivationKind::Sigmoid, ActivationKind::Sigmoid],
output_size: 1,
initializer: InitializerKind::Uniform(0.1),
loss: LossKind::MSE,
learning_rate: 0.1,
epochs: 1,
};
let mut model = DenseNN::new(config);
// Store initial weights and biases to compare after training
let initial_weights_l0 = model.weights[0].clone();
let initial_biases_l0 = model.biases[0].clone();
let initial_weights_l1 = model.weights[1].clone();
let initial_biases_l1 = model.biases[1].clone();
let x = Matrix::from_vec(vec![0.1, 0.2, 0.3, 0.4], 2, 2);
let y = Matrix::from_vec(vec![0.5, 0.6], 2, 1);
model.train(&x, &y);
// Verify that weights and biases of both layers have changed,
// implying delta propagation occurred for l > 0
// Weights of first layer did not change, delta propagation might not have occurred
assert!(model.weights[0] != initial_weights_l0);
// Biases of first layer did not change, delta propagation might not have occurred
assert!(model.biases[0] != initial_biases_l0);
// Weights of second layer did not change
assert!(model.weights[1] != initial_weights_l1);
// Biases of second layer did not change
assert!(model.biases[1] != initial_biases_l1);
}
}

View File

@ -0,0 +1,243 @@
//! Gaussian Naive Bayes classifier for dense matrices.
//!
//! ```
//! use rustframe::compute::models::gaussian_nb::GaussianNB;
//! use rustframe::matrix::Matrix;
//!
//! let x = Matrix::from_vec(vec![1.0, 2.0, 1.0, 2.0], 2, 2); // two samples
//! let y = Matrix::from_vec(vec![0.0, 1.0], 2, 1);
//! let mut model = GaussianNB::new(1e-9, false);
//! model.fit(&x, &y);
//! let preds = model.predict(&x);
//! assert_eq!(preds.rows(), 2);
//! ```
use crate::matrix::Matrix;
use std::collections::HashMap;
/// A Gaussian Naive Bayes classifier.
///
/// # Parameters
/// - `var_smoothing`: Portion of the largest variance of all features to add to variances for stability.
/// - `use_unbiased_variance`: If `true`, uses Bessel's correction (dividing by (n-1)); otherwise divides by n.
///
pub struct GaussianNB {
// Distinct class labels
classes: Vec<f64>,
// Prior probabilities P(class)
priors: Vec<f64>,
// Feature means per class
means: Vec<Matrix<f64>>,
// Feature variances per class
variances: Vec<Matrix<f64>>,
// var_smoothing
eps: f64,
// flag for unbiased variance
use_unbiased: bool,
}
impl GaussianNB {
/// Create a new GaussianNB.
///
/// # Arguments
/// * `var_smoothing` - small float added to variances for numerical stability.
/// * `use_unbiased_variance` - whether to apply Bessel's correction (divide by n-1).
pub fn new(var_smoothing: f64, use_unbiased_variance: bool) -> Self {
Self {
classes: Vec::new(),
priors: Vec::new(),
means: Vec::new(),
variances: Vec::new(),
eps: var_smoothing,
use_unbiased: use_unbiased_variance,
}
}
/// Fit the model according to the training data `x` and labels `y`.
///
/// # Panics
/// Panics if `x` or `y` is empty, or if their dimensions disagree.
pub fn fit(&mut self, x: &Matrix<f64>, y: &Matrix<f64>) {
let m = x.rows();
let n = x.cols();
assert_eq!(y.rows(), m, "Row count of X and Y must match");
assert_eq!(y.cols(), 1, "Y must be a column vector");
if m == 0 || n == 0 {
panic!("Input matrix x or y is empty");
}
// Group sample indices by label
let mut groups: HashMap<u64, Vec<usize>> = HashMap::new();
for i in 0..m {
let label = y[(i, 0)];
let bits = label.to_bits();
groups.entry(bits).or_default().push(i);
}
assert!(!groups.is_empty(), "No class labels found in y"); //-- panicked earlier
// Extract and sort class labels
self.classes = groups.keys().cloned().map(f64::from_bits).collect();
self.classes.sort_by(|a, b| a.partial_cmp(b).unwrap());
self.priors.clear();
self.means.clear();
self.variances.clear();
// Precompute max variance for smoothing scale
let mut max_var_feature = 0.0;
for j in 0..n {
let mut col_vals = Vec::with_capacity(m);
for i in 0..m {
col_vals.push(x[(i, j)]);
}
let mean_all = col_vals.iter().sum::<f64>() / m as f64;
let var_all = col_vals.iter().map(|v| (v - mean_all).powi(2)).sum::<f64>() / m as f64;
if var_all > max_var_feature {
max_var_feature = var_all;
}
}
let smoothing = self.eps * max_var_feature;
// Compute per-class statistics
for &c in &self.classes {
let idx = &groups[&c.to_bits()];
let count = idx.len();
// Prior
self.priors.push(count as f64 / m as f64);
let mut mean = Matrix::zeros(1, n);
let mut var = Matrix::zeros(1, n);
// Mean
for &i in idx {
for j in 0..n {
mean[(0, j)] += x[(i, j)];
}
}
for j in 0..n {
mean[(0, j)] /= count as f64;
}
// Variance
for &i in idx {
for j in 0..n {
let d = x[(i, j)] - mean[(0, j)];
var[(0, j)] += d * d;
}
}
let denom = if self.use_unbiased {
(count as f64 - 1.0).max(1.0)
} else {
count as f64
};
for j in 0..n {
var[(0, j)] = var[(0, j)] / denom + smoothing;
if var[(0, j)] <= 0.0 {
var[(0, j)] = smoothing;
}
}
self.means.push(mean);
self.variances.push(var);
}
}
/// Perform classification on an array of test vectors `x`.
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
let m = x.rows();
let n = x.cols();
let k = self.classes.len();
let mut preds = Matrix::zeros(m, 1);
let ln_2pi = (2.0 * std::f64::consts::PI).ln();
for i in 0..m {
let mut best = (0, f64::NEG_INFINITY);
for c_idx in 0..k {
let mut log_prob = self.priors[c_idx].ln();
for j in 0..n {
let diff = x[(i, j)] - self.means[c_idx][(0, j)];
let var = self.variances[c_idx][(0, j)];
log_prob += -0.5 * (diff * diff / var + var.ln() + ln_2pi);
}
if log_prob > best.1 {
best = (c_idx, log_prob);
}
}
preds[(i, 0)] = self.classes[best.0];
}
preds
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matrix::Matrix;
#[test]
fn test_simple_two_class() {
// Simple dataset: one feature, two classes 0 and 1
// Class 0: values [1.0, 1.2, 0.8]
// Class 1: values [3.0, 3.2, 2.8]
let x = Matrix::from_vec(vec![1.0, 1.2, 0.8, 3.0, 3.2, 2.8], 6, 1);
let y = Matrix::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0], 6, 1);
let mut clf = GaussianNB::new(1e-9, false);
clf.fit(&x, &y);
let test = Matrix::from_vec(vec![1.1, 3.1], 2, 1);
let preds = clf.predict(&test);
assert_eq!(preds[(0, 0)], 0.0);
assert_eq!(preds[(1, 0)], 1.0);
}
#[test]
fn test_unbiased_variance() {
// Same as above but with unbiased variance
let x = Matrix::from_vec(vec![2.0, 2.2, 1.8, 4.0, 4.2, 3.8], 6, 1);
let y = Matrix::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0], 6, 1);
let mut clf = GaussianNB::new(1e-9, true);
clf.fit(&x, &y);
let test = Matrix::from_vec(vec![2.1, 4.1], 2, 1);
let preds = clf.predict(&test);
assert_eq!(preds[(0, 0)], 0.0);
assert_eq!(preds[(1, 0)], 1.0);
}
#[test]
#[should_panic]
fn test_empty_input() {
let x = Matrix::zeros(0, 0);
let y = Matrix::zeros(0, 1);
let mut clf = GaussianNB::new(1e-9, false);
clf.fit(&x, &y);
}
#[test]
#[should_panic = "Row count of X and Y must match"]
fn test_mismatched_rows() {
let x = Matrix::from_vec(vec![1.0, 2.0], 2, 1);
let y = Matrix::from_vec(vec![0.0], 1, 1);
let mut clf = GaussianNB::new(1e-9, false);
clf.fit(&x, &y);
}
#[test]
fn test_variance_smoothing_override_with_zero_smoothing() {
// Scenario: var_smoothing is 0, and a feature has zero variance within a class.
// This should trigger the `if var[(0, j)] <= 0.0 { var[(0, j)] = smoothing; }` line.
let x = Matrix::from_vec(vec![1.0, 1.0, 2.0], 3, 1); // Class 0: [1.0, 1.0], Class 1: [2.0]
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0], 3, 1);
let mut clf = GaussianNB::new(0.0, false); // var_smoothing = 0.0
clf.fit(&x, &y);
// For class 0 (index 0 in clf.classes), the feature (index 0) had values [1.0, 1.0], so variance was 0.
// Since var_smoothing was 0, smoothing is 0.
// The line `var[(0, j)] = smoothing;` should have set the variance to 0.0.
let class_0_idx = clf.classes.iter().position(|&c| c == 0.0).unwrap();
assert_eq!(clf.variances[class_0_idx][(0, 0)], 0.0);
// For class 1 (index 1 in clf.classes), the feature (index 0) had value [2.0].
// Variance calculation for a single point results in 0.
// The if condition will be true, and var[(0, j)] will be set to smoothing (0.0).
let class_1_idx = clf.classes.iter().position(|&c| c == 1.0).unwrap();
assert_eq!(clf.variances[class_1_idx][(0, 0)], 0.0);
}
}

View File

@ -0,0 +1,374 @@
//! Simple k-means clustering working on [`Matrix`] data.
//!
//! ```
//! use rustframe::compute::models::k_means::KMeans;
//! use rustframe::matrix::Matrix;
//!
//! let data = Matrix::from_vec(vec![1.0, 1.0, 5.0, 5.0], 2, 2);
//! let (model, labels) = KMeans::fit(&data, 2, 10, 1e-4);
//! assert_eq!(model.centroids.rows(), 2);
//! assert_eq!(labels.len(), 2);
//! ```
use crate::compute::stats::mean_vertical;
use crate::matrix::Matrix;
use crate::random::prelude::*;
pub struct KMeans {
pub centroids: Matrix<f64>, // (k, n_features)
}
impl KMeans {
/// Fit with k clusters.
pub fn fit(x: &Matrix<f64>, k: usize, max_iter: usize, tol: f64) -> (Self, Vec<usize>) {
let m = x.rows();
let n = x.cols();
assert!(k <= m, "k must be ≤ number of samples");
// ----- initialise centroids -----
let mut centroids = Matrix::zeros(k, n);
if k > 0 && m > 0 {
// case for empty data
if k == 1 {
let mean = mean_vertical(x);
centroids.row_copy_from_slice(0, &mean.data()); // ideally, data.row(0), but thats the same
} else {
// For k > 1, pick k distinct rows at random
let mut rng = rng();
let mut indices: Vec<usize> = (0..m).collect();
indices.shuffle(&mut rng);
for c in 0..k {
centroids.row_copy_from_slice(c, &x.row(indices[c]));
}
}
}
let mut labels = vec![0usize; m];
let mut distances = vec![0.0f64; m];
for _iter in 0..max_iter {
let mut changed = false;
// ----- assignment step -----
for i in 0..m {
let sample_row = x.row(i);
let mut best = 0usize;
let mut best_dist_sq = f64::MAX;
for c in 0..k {
let centroid_row = centroids.row(c);
let dist_sq: f64 = sample_row
.iter()
.zip(centroid_row.iter())
.map(|(a, b)| (a - b).powi(2))
.sum();
if dist_sq < best_dist_sq {
best_dist_sq = dist_sq;
best = c;
}
}
distances[i] = best_dist_sq;
if labels[i] != best {
labels[i] = best;
changed = true;
}
}
// ----- update step -----
let mut new_centroids = Matrix::zeros(k, n);
let mut counts = vec![0usize; k];
for i in 0..m {
let c = labels[i];
counts[c] += 1;
for j in 0..n {
new_centroids[(c, j)] += x[(i, j)];
}
}
for c in 0..k {
if counts[c] == 0 {
// This cluster is empty. Re-initialize its centroid to the point
// furthest from its assigned centroid to prevent the cluster from dying.
let mut furthest_point_idx = 0;
let mut max_dist_sq = 0.0;
for (i, &dist) in distances.iter().enumerate() {
if dist > max_dist_sq {
max_dist_sq = dist;
furthest_point_idx = i;
}
}
for j in 0..n {
new_centroids[(c, j)] = x[(furthest_point_idx, j)];
}
// Ensure this point isn't chosen again for another empty cluster in the same iteration.
if m > 0 {
distances[furthest_point_idx] = 0.0;
}
} else {
// Normalize the centroid by the number of points in it.
for j in 0..n {
new_centroids[(c, j)] /= counts[c] as f64;
}
}
}
// ----- convergence test -----
if !changed {
centroids = new_centroids; // update before breaking
break; // assignments stable
}
let diff = &new_centroids - &centroids;
centroids = new_centroids; // Update for the next iteration
if tol > 0.0 {
let sq_diff = &diff * &diff;
let shift = sq_diff.data().iter().sum::<f64>().sqrt();
if shift < tol {
break;
}
}
}
(Self { centroids }, labels)
}
/// Predict nearest centroid for each sample.
pub fn predict(&self, x: &Matrix<f64>) -> Vec<usize> {
let m = x.rows();
let k = self.centroids.rows();
if m == 0 {
return Vec::new();
}
let mut labels = vec![0usize; m];
for i in 0..m {
let sample_row = x.row(i);
let mut best = 0usize;
let mut best_dist_sq = f64::MAX;
for c in 0..k {
let centroid_row = self.centroids.row(c);
let dist_sq: f64 = sample_row
.iter()
.zip(centroid_row.iter())
.map(|(a, b)| (a - b).powi(2))
.sum();
if dist_sq < best_dist_sq {
best_dist_sq = dist_sq;
best = c;
}
}
labels[i] = best;
}
labels
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_k_means_empty_cluster_reinit_centroid() {
// Try multiple times to increase the chance of hitting the empty cluster case
for _ in 0..20 {
let data = vec![0.0, 0.0, 0.0, 0.0, 10.0, 10.0];
let x = FloatMatrix::from_rows_vec(data, 3, 2);
let k = 2;
let max_iter = 10;
let tol = 1e-6;
let (kmeans_model, labels) = KMeans::fit(&x, k, max_iter, tol);
// Check if any cluster is empty
let mut counts = vec![0; k];
for &label in &labels {
counts[label] += 1;
}
if counts.iter().any(|&c| c == 0) {
// Only check the property for clusters that are empty
let centroids = kmeans_model.centroids;
for c in 0..k {
if counts[c] == 0 {
let mut matches_data_point = false;
for i in 0..3 {
let dx = centroids[(c, 0)] - x[(i, 0)];
let dy = centroids[(c, 1)] - x[(i, 1)];
if dx.abs() < 1e-9 && dy.abs() < 1e-9 {
matches_data_point = true;
break;
}
}
// "Centroid {} (empty cluster) does not match any data point",c
assert!(matches_data_point);
}
}
break;
}
}
// If we never saw an empty cluster, that's fine; the test passes as long as no panic occurred
}
use super::*;
use crate::matrix::FloatMatrix;
fn create_test_data() -> (FloatMatrix, usize) {
// Simple 2D data for testing K-Means
// Cluster 1: (1,1), (1.5,1.5)
// Cluster 2: (5,8), (8,8), (6,7)
let data = vec![
1.0, 1.0, // Sample 0
1.5, 1.5, // Sample 1
5.0, 8.0, // Sample 2
8.0, 8.0, // Sample 3
6.0, 7.0, // Sample 4
];
let x = FloatMatrix::from_rows_vec(data, 5, 2);
let k = 2;
(x, k)
}
// Helper for single cluster test with exact mean
fn create_simple_integer_data() -> FloatMatrix {
// Data points: (1,1), (2,2), (3,3)
FloatMatrix::from_rows_vec(vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0], 3, 2)
}
#[test]
fn test_k_means_fit_predict_basic() {
let (x, k) = create_test_data();
let max_iter = 100;
let tol = 1e-6;
let (kmeans_model, labels) = KMeans::fit(&x, k, max_iter, tol);
// Assertions for fit
assert_eq!(kmeans_model.centroids.rows(), k);
assert_eq!(kmeans_model.centroids.cols(), x.cols());
assert_eq!(labels.len(), x.rows());
// Check if labels are within expected range (0 to k-1)
for &label in &labels {
assert!(label < k);
}
// Predict with the same data
let predicted_labels = kmeans_model.predict(&x);
// The exact labels might vary due to random initialization,
// but the clustering should be consistent.
// We expect two clusters. Let's check if samples 0,1 are in one cluster
// and samples 2,3,4 are in another.
let cluster_0_members = vec![labels[0], labels[1]];
let cluster_1_members = vec![labels[2], labels[3], labels[4]];
// All members of cluster 0 should have the same label
assert_eq!(cluster_0_members[0], cluster_0_members[1]);
// All members of cluster 1 should have the same label
assert_eq!(cluster_1_members[0], cluster_1_members[1]);
assert_eq!(cluster_1_members[0], cluster_1_members[2]);
// The two clusters should have different labels
assert_ne!(cluster_0_members[0], cluster_1_members[0]);
// Check predicted labels are consistent with fitted labels
assert_eq!(labels, predicted_labels);
// Test with a new sample
let new_sample_data = vec![1.2, 1.3]; // Should be close to cluster 0
let new_sample = FloatMatrix::from_rows_vec(new_sample_data, 1, 2);
let new_sample_label = kmeans_model.predict(&new_sample)[0];
assert_eq!(new_sample_label, cluster_0_members[0]);
let new_sample_data_2 = vec![7.0, 7.5]; // Should be close to cluster 1
let new_sample_2 = FloatMatrix::from_rows_vec(new_sample_data_2, 1, 2);
let new_sample_label_2 = kmeans_model.predict(&new_sample_2)[0];
assert_eq!(new_sample_label_2, cluster_1_members[0]);
}
#[test]
fn test_k_means_fit_k_equals_m() {
// Test case where k (number of clusters) equals m (number of samples)
let (x, _) = create_test_data(); // 5 samples
let k = 5; // 5 clusters
let max_iter = 10;
let tol = 1e-6;
let (kmeans_model, labels) = KMeans::fit(&x, k, max_iter, tol);
assert_eq!(kmeans_model.centroids.rows(), k);
assert_eq!(labels.len(), x.rows());
// Each sample should be its own cluster. Due to random init, labels
// might not be [0,1,2,3,4] but will be a permutation of it.
let mut sorted_labels = labels.clone();
sorted_labels.sort_unstable();
sorted_labels.dedup();
// Labels should all be unique when k==m
assert_eq!(sorted_labels.len(), k);
}
#[test]
#[should_panic(expected = "k must be ≤ number of samples")]
fn test_k_means_fit_k_greater_than_m() {
let (x, _) = create_test_data(); // 5 samples
let k = 6; // k > m
let max_iter = 10;
let tol = 1e-6;
let (_kmeans_model, _labels) = KMeans::fit(&x, k, max_iter, tol);
}
#[test]
fn test_k_means_fit_single_cluster() {
// Test with k=1
let x = create_simple_integer_data(); // Use integer data
let k = 1;
let max_iter = 100;
let tol = 1e-6;
let (kmeans_model, labels) = KMeans::fit(&x, k, max_iter, tol);
assert_eq!(kmeans_model.centroids.rows(), 1);
assert_eq!(labels.len(), x.rows());
// All labels should be 0
assert!(labels.iter().all(|&l| l == 0));
// Centroid should be the mean of all data points
let expected_centroid_x = x.column(0).iter().sum::<f64>() / x.rows() as f64;
let expected_centroid_y = x.column(1).iter().sum::<f64>() / x.rows() as f64;
assert!((kmeans_model.centroids[(0, 0)] - expected_centroid_x).abs() < 1e-9);
assert!((kmeans_model.centroids[(0, 1)] - expected_centroid_y).abs() < 1e-9);
}
#[test]
fn test_k_means_predict_empty_matrix() {
let (x, k) = create_test_data();
let max_iter = 10;
let tol = 1e-6;
let (kmeans_model, _labels) = KMeans::fit(&x, k, max_iter, tol);
// The `Matrix` type not support 0xN or Nx0 matrices.
// test with a 0x0 matrix is a valid edge case.
let empty_x = FloatMatrix::from_rows_vec(vec![], 0, 0);
let predicted_labels = kmeans_model.predict(&empty_x);
assert!(predicted_labels.is_empty());
}
#[test]
fn test_k_means_predict_single_sample() {
let (x, k) = create_test_data();
let max_iter = 10;
let tol = 1e-6;
let (kmeans_model, _labels) = KMeans::fit(&x, k, max_iter, tol);
let single_sample = FloatMatrix::from_rows_vec(vec![1.1, 1.2], 1, 2);
let predicted_label = kmeans_model.predict(&single_sample);
assert_eq!(predicted_label.len(), 1);
assert!(predicted_label[0] < k);
}
}

View File

@ -0,0 +1,67 @@
//! Ordinary least squares linear regression.
//!
//! ```
//! use rustframe::compute::models::linreg::LinReg;
//! use rustframe::matrix::Matrix;
//!
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
//! let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
//! let mut model = LinReg::new(1);
//! model.fit(&x, &y, 0.01, 100);
//! let preds = model.predict(&x);
//! assert_eq!(preds.rows(), 4);
//! ```
use crate::matrix::{Matrix, SeriesOps};
pub struct LinReg {
w: Matrix<f64>, // shape (n_features, 1)
b: f64,
}
impl LinReg {
pub fn new(n_features: usize) -> Self {
Self {
w: Matrix::from_vec(vec![0.0; n_features], n_features, 1),
b: 0.0,
}
}
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
// X.dot(w) + b
x.dot(&self.w) + self.b
}
pub fn fit(&mut self, x: &Matrix<f64>, y: &Matrix<f64>, lr: f64, epochs: usize) {
let m = x.rows() as f64;
for _ in 0..epochs {
let y_hat = self.predict(x);
let err = &y_hat - y; // shape (m,1)
// grads
let grad_w = x.transpose().dot(&err) * (2.0 / m); // (n,1)
let grad_b = (2.0 / m) * err.sum_vertical().iter().sum::<f64>();
// update
self.w = &self.w - &(grad_w * lr);
self.b -= lr * grad_b;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_linreg_fit_predict() {
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
let mut model = LinReg::new(1);
model.fit(&x, &y, 0.01, 10000);
let preds = model.predict(&x);
assert!((preds[(0, 0)] - 2.0).abs() < 1e-2);
assert!((preds[(1, 0)] - 3.0).abs() < 1e-2);
assert!((preds[(2, 0)] - 4.0).abs() < 1e-2);
assert!((preds[(3, 0)] - 5.0).abs() < 1e-2);
}
}

View File

@ -0,0 +1,68 @@
//! Binary logistic regression classifier.
//!
//! ```
//! use rustframe::compute::models::logreg::LogReg;
//! use rustframe::matrix::Matrix;
//!
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
//! let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
//! let mut model = LogReg::new(1);
//! model.fit(&x, &y, 0.1, 100);
//! let preds = model.predict(&x);
//! assert_eq!(preds[(0,0)], 0.0);
//! ```
use crate::compute::models::activations::sigmoid;
use crate::matrix::{Matrix, SeriesOps};
pub struct LogReg {
w: Matrix<f64>,
b: f64,
}
impl LogReg {
pub fn new(n_features: usize) -> Self {
Self {
w: Matrix::zeros(n_features, 1),
b: 0.0,
}
}
pub fn predict_proba(&self, x: &Matrix<f64>) -> Matrix<f64> {
sigmoid(&(x.dot(&self.w) + self.b)) // σ(Xw + b)
}
pub fn fit(&mut self, x: &Matrix<f64>, y: &Matrix<f64>, lr: f64, epochs: usize) {
let m = x.rows() as f64;
for _ in 0..epochs {
let p = self.predict_proba(x); // shape (m,1)
let err = &p - y; // derivative of BCE wrt pre-sigmoid
let grad_w = x.transpose().dot(&err) / m;
let grad_b = err.sum_vertical().iter().sum::<f64>() / m;
self.w = &self.w - &(grad_w * lr);
self.b -= lr * grad_b;
}
}
pub fn predict(&self, x: &Matrix<f64>) -> Matrix<f64> {
self.predict_proba(x)
.map(|p| if p >= 0.5 { 1.0 } else { 0.0 })
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_logreg_fit_predict() {
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
let y = Matrix::from_vec(vec![0.0, 0.0, 1.0, 1.0], 4, 1);
let mut model = LogReg::new(1);
model.fit(&x, &y, 0.01, 10000);
let preds = model.predict(&x);
assert_eq!(preds[(0, 0)], 0.0);
assert_eq!(preds[(1, 0)], 0.0);
assert_eq!(preds[(2, 0)], 1.0);
assert_eq!(preds[(3, 0)], 1.0);
}
}

23
src/compute/models/mod.rs Normal file
View File

@ -0,0 +1,23 @@
//! Lightweight machinelearning models built on matrices.
//!
//! Models are intentionally minimal and operate on the [`Matrix`](crate::matrix::Matrix) type for
//! inputs and parameters.
//!
//! ```
//! use rustframe::compute::models::linreg::LinReg;
//! use rustframe::matrix::Matrix;
//!
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 4, 1);
//! let y = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0], 4, 1);
//! let mut model = LinReg::new(1);
//! model.fit(&x, &y, 0.01, 1000);
//! let preds = model.predict(&x);
//! assert_eq!(preds.rows(), 4);
//! ```
pub mod activations;
pub mod dense_nn;
pub mod gaussian_nb;
pub mod k_means;
pub mod linreg;
pub mod logreg;
pub mod pca;

113
src/compute/models/pca.rs Normal file
View File

@ -0,0 +1,113 @@
//! Principal Component Analysis using covariance matrices.
//!
//! ```
//! use rustframe::compute::models::pca::PCA;
//! use rustframe::matrix::Matrix;
//!
//! let data = Matrix::from_rows_vec(vec![1.0, 1.0, 2.0, 2.0], 2, 2);
//! let pca = PCA::fit(&data, 1, 0);
//! let projected = pca.transform(&data);
//! assert_eq!(projected.cols(), 1);
//! ```
use crate::compute::stats::correlation::covariance_matrix;
use crate::compute::stats::descriptive::mean_vertical;
use crate::matrix::{Axis, Matrix, SeriesOps};
/// Returns the `n_components` principal axes (rows) and the centred data's mean.
pub struct PCA {
pub components: Matrix<f64>, // (n_components, n_features)
pub mean: Matrix<f64>, // (1, n_features)
}
impl PCA {
pub fn fit(x: &Matrix<f64>, n_components: usize, _iters: usize) -> Self {
let mean = mean_vertical(x); // Mean of each feature (column)
let broadcasted_mean = mean.broadcast_row_to_target_shape(x.rows(), x.cols());
let centered_data = x.zip(&broadcasted_mean, |x_i, mean_i| x_i - mean_i);
let covariance_matrix = covariance_matrix(&centered_data, Axis::Col); // Covariance between features
let mut components = Matrix::zeros(n_components, x.cols());
for i in 0..n_components {
if i < covariance_matrix.rows() {
components.row_copy_from_slice(i, &covariance_matrix.row(i));
} else {
break;
}
}
PCA { components, mean }
}
/// Project new data on the learned axes.
pub fn transform(&self, x: &Matrix<f64>) -> Matrix<f64> {
let broadcasted_mean = self.mean.broadcast_row_to_target_shape(x.rows(), x.cols());
let centered_data = x.zip(&broadcasted_mean, |x_i, mean_i| x_i - mean_i);
centered_data.matrix_mul(&self.components.transpose())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matrix::Matrix;
const EPSILON: f64 = 1e-8;
#[test]
fn test_pca_basic() {
// Simple 2D data with points along the y = x line
let data = Matrix::from_rows_vec(vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0], 3, 2);
let (_n_samples, _n_features) = data.shape();
let pca = PCA::fit(&data, 1, 0); // n_components = 1, iters is unused
println!("Data shape: {:?}", data.shape());
println!("PCA mean shape: {:?}", pca.mean.shape());
println!("PCA components shape: {:?}", pca.components.shape());
// Expected mean: (2.0, 2.0)
assert!((pca.mean.get(0, 0) - 2.0).abs() < EPSILON);
assert!((pca.mean.get(0, 1) - 2.0).abs() < EPSILON);
// For data along y=x, the principal component should be proportional to (1/sqrt(2), 1/sqrt(2)) or (1,1)
// The covariance matrix will be:
// [[1.0, 1.0],
// [1.0, 1.0]]
// The principal component (eigenvector) will be (0.707, 0.707) or (-0.707, -0.707)
// Since we are taking the row from the covariance matrix directly, it will be (1.0, 1.0)
assert!((pca.components.get(0, 0) - 1.0).abs() < EPSILON);
assert!((pca.components.get(0, 1) - 1.0).abs() < EPSILON);
// Test transform: centered data projects to [-2.0, 0.0, 2.0]
let transformed_data = pca.transform(&data);
assert_eq!(transformed_data.rows(), 3);
assert_eq!(transformed_data.cols(), 1);
assert!((transformed_data.get(0, 0) - -2.0).abs() < EPSILON);
assert!((transformed_data.get(1, 0) - 0.0).abs() < EPSILON);
assert!((transformed_data.get(2, 0) - 2.0).abs() < EPSILON);
}
#[test]
fn test_pca_fit_break_branch() {
// Data with 2 features
let data = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 3, 2);
let (_n_samples, n_features) = data.shape();
// Set n_components greater than n_features to trigger the break branch
let n_components_large = n_features + 1;
let pca = PCA::fit(&data, n_components_large, 0);
// The components matrix should be initialized with n_components_large rows,
// but only the first n_features rows should be copied from the covariance matrix.
// The remaining rows should be zeros.
assert_eq!(pca.components.rows(), n_components_large);
assert_eq!(pca.components.cols(), n_features);
// Verify that rows beyond n_features are all zeros
for i in n_features..n_components_large {
for j in 0..n_features {
assert!((pca.components.get(i, j) - 0.0).abs() < EPSILON);
}
}
}
}

View File

@ -0,0 +1,242 @@
//! Covariance and correlation helpers.
//!
//! This module provides routines for measuring the relationship between
//! columns or rows of matrices.
//!
//! ```
//! use rustframe::compute::stats::correlation;
//! use rustframe::matrix::Matrix;
//!
//! let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
//! let cov = correlation::covariance(&x, &x);
//! assert!((cov - 1.25).abs() < 1e-8);
//! ```
use crate::compute::stats::{mean, mean_horizontal, mean_vertical, stddev};
use crate::matrix::{Axis, Matrix, SeriesOps};
/// Population covariance between two equally-sized matrices (flattened)
pub fn covariance(x: &Matrix<f64>, y: &Matrix<f64>) -> f64 {
assert_eq!(x.rows(), y.rows());
assert_eq!(x.cols(), y.cols());
let n = (x.rows() * x.cols()) as f64;
let mean_x = mean(x);
let mean_y = mean(y);
x.data()
.iter()
.zip(y.data().iter())
.map(|(&a, &b)| (a - mean_x) * (b - mean_y))
.sum::<f64>()
/ n
}
fn _covariance_axis(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
match axis {
Axis::Row => {
// Covariance between each pair of columns → cols x cols
let num_rows = x.rows() as f64;
let means = mean_vertical(x); // 1 x cols
let p = x.cols();
let mut data = vec![0.0; p * p];
for i in 0..p {
let mu_i = means.get(0, i);
for j in 0..p {
let mu_j = means.get(0, j);
let mut sum = 0.0;
for r in 0..x.rows() {
let d_i = x.get(r, i) - mu_i;
let d_j = x.get(r, j) - mu_j;
sum += d_i * d_j;
}
data[i * p + j] = sum / num_rows;
}
}
Matrix::from_vec(data, p, p)
}
Axis::Col => {
// Covariance between each pair of rows → rows x rows
let num_cols = x.cols() as f64;
let means = mean_horizontal(x); // rows x 1
let n = x.rows();
let mut data = vec![0.0; n * n];
for i in 0..n {
let mu_i = means.get(i, 0);
for j in 0..n {
let mu_j = means.get(j, 0);
let mut sum = 0.0;
for c in 0..x.cols() {
let d_i = x.get(i, c) - mu_i;
let d_j = x.get(j, c) - mu_j;
sum += d_i * d_j;
}
data[i * n + j] = sum / num_cols;
}
}
Matrix::from_vec(data, n, n)
}
}
}
/// Covariance between columns (i.e. across rows)
pub fn covariance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
_covariance_axis(x, Axis::Row)
}
/// Covariance between rows (i.e. across columns)
pub fn covariance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
_covariance_axis(x, Axis::Col)
}
/// Calculates the covariance matrix of the input data.
/// Assumes input `x` is (n_samples, n_features).
pub fn covariance_matrix(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
let (n_samples, n_features) = x.shape();
let centered_data = match axis {
Axis::Col => {
let mean_matrix = mean_vertical(x); // 1 x n_features
x.zip(
&mean_matrix.broadcast_row_to_target_shape(n_samples, n_features),
|val, m| val - m,
)
}
Axis::Row => {
let mean_matrix = mean_horizontal(x); // n_samples x 1
// Manually create a matrix by broadcasting the column vector across columns
let mut broadcasted_mean = Matrix::zeros(n_samples, n_features);
for r in 0..n_samples {
let mean_val = mean_matrix.get(r, 0);
for c in 0..n_features {
*broadcasted_mean.get_mut(r, c) = *mean_val;
}
}
x.zip(&broadcasted_mean, |val, m| val - m)
}
};
// Calculate covariance matrix: (X_centered^T * X_centered) / (n_samples - 1)
// If x is (n_samples, n_features), then centered_data is (n_samples, n_features)
// centered_data.transpose() is (n_features, n_samples)
// Result is (n_features, n_features)
centered_data.transpose().matrix_mul(&centered_data) / (n_samples as f64 - 1.0)
}
pub fn pearson(x: &Matrix<f64>, y: &Matrix<f64>) -> f64 {
assert_eq!(x.rows(), y.rows());
assert_eq!(x.cols(), y.cols());
let cov = covariance(x, y);
let std_x = stddev(x);
let std_y = stddev(y);
if std_x == 0.0 || std_y == 0.0 {
return 0.0; // Avoid division by zero
}
cov / (std_x * std_y)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matrix::Matrix;
const EPS: f64 = 1e-8;
#[test]
fn test_covariance_scalar_same_matrix() {
// Matrix with rows [1, 2] and [3, 4]; mean is 2.5
let data = vec![1.0, 2.0, 3.0, 4.0];
let m = Matrix::from_vec(data.clone(), 2, 2);
// flatten M: [1,2,3,4], mean = 2.5
// cov(M,M) = variance of flatten = 1.25
let cov = covariance(&m, &m);
assert!((cov - 1.25).abs() < EPS);
}
#[test]
fn test_covariance_scalar_diff_matrix() {
// Matrix x has rows [1, 2] and [3, 4]; y is two times x
let x = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let y = Matrix::from_vec(vec![2.0, 4.0, 6.0, 8.0], 2, 2);
// mean_x = 2.5, mean_y = 5.0
// cov = sum((xi-2.5)*(yi-5.0))/4 = 2.5
let cov_xy = covariance(&x, &y);
assert!((cov_xy - 2.5).abs() < EPS);
}
#[test]
fn test_covariance_vertical() {
// Matrix with rows [1, 2] and [3, 4]; columns are [1,3] and [2,4], each var=1, cov=1
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov_mat = covariance_vertical(&m);
// Expect 2x2 matrix of all 1.0
for i in 0..2 {
for j in 0..2 {
assert!((cov_mat.get(i, j) - 1.0).abs() < EPS);
}
}
}
#[test]
fn test_covariance_horizontal() {
// Matrix with rows [1,2] and [3,4], each var=0.25, cov=0.25
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov_mat = covariance_horizontal(&m);
// Expect 2x2 matrix of all 0.25
for i in 0..2 {
for j in 0..2 {
assert!((cov_mat.get(i, j) - 0.25).abs() < EPS);
}
}
}
#[test]
fn test_covariance_matrix_vertical() {
// Test with a simple 2x2 matrix with rows [1, 2] and [3, 4]
// Expected covariance matrix (vertical, i.e., between columns):
// Col1: [1, 3], mean = 2
// Col2: [2, 4], mean = 3
// Cov(Col1, Col1) = ((1-2)^2 + (3-2)^2) / (2-1) = (1+1)/1 = 2
// Cov(Col2, Col2) = ((2-3)^2 + (4-3)^2) / (2-1) = (1+1)/1 = 2
// Cov(Col1, Col2) = ((1-2)*(2-3) + (3-2)*(4-3)) / (2-1) = ((-1)*(-1) + (1)*(1))/1 = (1+1)/1 = 2
// Cov(Col2, Col1) = 2
// Expected matrix filled with 2
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov_mat = covariance_matrix(&m, Axis::Col);
assert!((cov_mat.get(0, 0) - 2.0).abs() < EPS);
assert!((cov_mat.get(0, 1) - 2.0).abs() < EPS);
assert!((cov_mat.get(1, 0) - 2.0).abs() < EPS);
assert!((cov_mat.get(1, 1) - 2.0).abs() < EPS);
}
#[test]
fn test_covariance_matrix_horizontal() {
// Test with a simple 2x2 matrix with rows [1, 2] and [3, 4]
// Expected covariance matrix (horizontal, i.e., between rows):
// Row1: [1, 2], mean = 1.5
// Row2: [3, 4], mean = 3.5
// Cov(Row1, Row1) = ((1-1.5)^2 + (2-1.5)^2) / (2-1) = (0.25+0.25)/1 = 0.5
// Cov(Row2, Row2) = ((3-3.5)^2 + (4-3.5)^2) / (2-1) = (0.25+0.25)/1 = 0.5
// Cov(Row1, Row2) = ((1-1.5)*(3-3.5) + (2-1.5)*(4-3.5)) / (2-1) = ((-0.5)*(-0.5) + (0.5)*(0.5))/1 = (0.25+0.25)/1 = 0.5
// Cov(Row2, Row1) = 0.5
// Expected matrix: [[0.5, -0.5], [-0.5, 0.5]]
let m = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
let cov_mat = covariance_matrix(&m, Axis::Row);
assert!((cov_mat.get(0, 0) - 0.5).abs() < EPS);
assert!((cov_mat.get(0, 1) - (-0.5)).abs() < EPS);
assert!((cov_mat.get(1, 0) - (-0.5)).abs() < EPS);
assert!((cov_mat.get(1, 1) - 0.5).abs() < EPS);
}
}

View File

@ -0,0 +1,398 @@
//! Descriptive statistics for matrices.
//!
//! Provides means, variances, medians and other aggregations computed either
//! across the whole matrix or along a specific axis.
//!
//! ```
//! use rustframe::compute::stats::descriptive;
//! use rustframe::matrix::Matrix;
//!
//! let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
//! assert_eq!(descriptive::mean(&m), 2.5);
//! ```
use crate::matrix::{Axis, Matrix, SeriesOps};
pub fn mean(x: &Matrix<f64>) -> f64 {
x.data().iter().sum::<f64>() / (x.rows() * x.cols()) as f64
}
pub fn mean_vertical(x: &Matrix<f64>) -> Matrix<f64> {
let m = x.rows() as f64;
Matrix::from_vec(x.sum_vertical(), 1, x.cols()) / m
}
pub fn mean_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
let n = x.cols() as f64;
Matrix::from_vec(x.sum_horizontal(), x.rows(), 1) / n
}
fn population_or_sample_variance(x: &Matrix<f64>, population: bool) -> f64 {
let m = (x.rows() * x.cols()) as f64;
let mean_val = mean(x);
x.data()
.iter()
.map(|&v| (v - mean_val).powi(2))
.sum::<f64>()
/ if population { m } else { m - 1.0 }
}
pub fn population_variance(x: &Matrix<f64>) -> f64 {
population_or_sample_variance(x, true)
}
pub fn sample_variance(x: &Matrix<f64>) -> f64 {
population_or_sample_variance(x, false)
}
fn _population_or_sample_variance_axis(
x: &Matrix<f64>,
axis: Axis,
population: bool,
) -> Matrix<f64> {
match axis {
Axis::Row => {
// Calculate variance for each column (vertical variance)
let num_rows = x.rows() as f64;
let mean_of_cols = mean_vertical(x); // 1 x cols matrix
let mut result_data = vec![0.0; x.cols()];
for c in 0..x.cols() {
let mean_val = mean_of_cols.get(0, c); // Mean for current column
let mut sum_sq_diff = 0.0;
for r in 0..x.rows() {
let diff = x.get(r, c) - mean_val;
sum_sq_diff += diff * diff;
}
result_data[c] = sum_sq_diff / (if population { num_rows } else { num_rows - 1.0 });
}
Matrix::from_vec(result_data, 1, x.cols())
}
Axis::Col => {
// Calculate variance for each row (horizontal variance)
let num_cols = x.cols() as f64;
let mean_of_rows = mean_horizontal(x); // rows x 1 matrix
let mut result_data = vec![0.0; x.rows()];
for r in 0..x.rows() {
let mean_val = mean_of_rows.get(r, 0); // Mean for current row
let mut sum_sq_diff = 0.0;
for c in 0..x.cols() {
let diff = x.get(r, c) - mean_val;
sum_sq_diff += diff * diff;
}
result_data[r] = sum_sq_diff / (if population { num_cols } else { num_cols - 1.0 });
}
Matrix::from_vec(result_data, x.rows(), 1)
}
}
}
pub fn population_variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
_population_or_sample_variance_axis(x, Axis::Row, true)
}
pub fn population_variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
_population_or_sample_variance_axis(x, Axis::Col, true)
}
pub fn sample_variance_vertical(x: &Matrix<f64>) -> Matrix<f64> {
_population_or_sample_variance_axis(x, Axis::Row, false)
}
pub fn sample_variance_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
_population_or_sample_variance_axis(x, Axis::Col, false)
}
pub fn stddev(x: &Matrix<f64>) -> f64 {
population_variance(x).sqrt()
}
pub fn stddev_vertical(x: &Matrix<f64>) -> Matrix<f64> {
population_variance_vertical(x).map(|v| v.sqrt())
}
pub fn stddev_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
population_variance_horizontal(x).map(|v| v.sqrt())
}
pub fn median(x: &Matrix<f64>) -> f64 {
let mut data = x.data().to_vec();
data.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mid = data.len() / 2;
if data.len() % 2 == 0 {
(data[mid - 1] + data[mid]) / 2.0
} else {
data[mid]
}
}
fn _median_axis(x: &Matrix<f64>, axis: Axis) -> Matrix<f64> {
let mx = match axis {
Axis::Col => x.clone(),
Axis::Row => x.transpose(),
};
let mut result = Vec::with_capacity(mx.cols());
for c in 0..mx.cols() {
let mut col = mx.column(c).to_vec();
col.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mid = col.len() / 2;
if col.len() % 2 == 0 {
result.push((col[mid - 1] + col[mid]) / 2.0);
} else {
result.push(col[mid]);
}
}
let (r, c) = match axis {
Axis::Col => (1, mx.cols()),
Axis::Row => (mx.cols(), 1),
};
Matrix::from_vec(result, r, c)
}
pub fn median_vertical(x: &Matrix<f64>) -> Matrix<f64> {
_median_axis(x, Axis::Col)
}
pub fn median_horizontal(x: &Matrix<f64>) -> Matrix<f64> {
_median_axis(x, Axis::Row)
}
pub fn percentile(x: &Matrix<f64>, p: f64) -> f64 {
if p < 0.0 || p > 100.0 {
panic!("Percentile must be between 0 and 100");
}
let mut data = x.data().to_vec();
data.sort_by(|a, b| a.partial_cmp(b).unwrap());
let index = ((p / 100.0) * (data.len() as f64 - 1.0)).round() as usize;
data[index]
}
fn _percentile_axis(x: &Matrix<f64>, p: f64, axis: Axis) -> Matrix<f64> {
if p < 0.0 || p > 100.0 {
panic!("Percentile must be between 0 and 100");
}
let mx: Matrix<f64> = match axis {
Axis::Col => x.clone(),
Axis::Row => x.transpose(),
};
let mut result = Vec::with_capacity(mx.cols());
for c in 0..mx.cols() {
let mut col = mx.column(c).to_vec();
col.sort_by(|a, b| a.partial_cmp(b).unwrap());
let index = ((p / 100.0) * (col.len() as f64 - 1.0)).round() as usize;
result.push(col[index]);
}
let (r, c) = match axis {
Axis::Col => (1, mx.cols()),
Axis::Row => (mx.cols(), 1),
};
Matrix::from_vec(result, r, c)
}
pub fn percentile_vertical(x: &Matrix<f64>, p: f64) -> Matrix<f64> {
_percentile_axis(x, p, Axis::Col)
}
pub fn percentile_horizontal(x: &Matrix<f64>, p: f64) -> Matrix<f64> {
_percentile_axis(x, p, Axis::Row)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matrix::Matrix;
const EPSILON: f64 = 1e-8;
#[test]
fn test_descriptive_stats_regular_values() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let x = Matrix::from_vec(data, 1, 5);
// Mean
assert!((mean(&x) - 3.0).abs() < EPSILON);
// Variance
assert!((population_variance(&x) - 2.0).abs() < EPSILON);
// Standard Deviation
assert!((stddev(&x) - 1.4142135623730951).abs() < EPSILON);
// Median
assert!((median(&x) - 3.0).abs() < EPSILON);
// Percentile
assert!((percentile(&x, 0.0) - 1.0).abs() < EPSILON);
assert!((percentile(&x, 25.0) - 2.0).abs() < EPSILON);
assert!((percentile(&x, 50.0) - 3.0).abs() < EPSILON);
assert!((percentile(&x, 75.0) - 4.0).abs() < EPSILON);
assert!((percentile(&x, 100.0) - 5.0).abs() < EPSILON);
let data_even = vec![1.0, 2.0, 3.0, 4.0];
let x_even = Matrix::from_vec(data_even, 1, 4);
assert!((median(&x_even) - 2.5).abs() < EPSILON);
}
#[test]
fn test_descriptive_stats_outlier() {
let data = vec![1.0, 2.0, 3.0, 4.0, 100.0];
let x = Matrix::from_vec(data, 1, 5);
// Mean should be heavily affected by outlier
assert!((mean(&x) - 22.0).abs() < EPSILON);
// Variance should be heavily affected by outlier
assert!((population_variance(&x) - 1522.0).abs() < EPSILON);
// Standard Deviation should be heavily affected by outlier
assert!((stddev(&x) - 39.0128183970461).abs() < EPSILON);
// Median should be robust to outlier
assert!((median(&x) - 3.0).abs() < EPSILON);
}
#[test]
#[should_panic(expected = "Percentile must be between 0 and 100")]
fn test_percentile_panic_low() {
let data = vec![1.0, 2.0, 3.0];
let x = Matrix::from_vec(data, 1, 3);
percentile(&x, -1.0);
}
#[test]
#[should_panic(expected = "Percentile must be between 0 and 100")]
fn test_percentile_panic_high() {
let data = vec![1.0, 2.0, 3.0];
let x = Matrix::from_vec(data, 1, 3);
percentile(&x, 101.0);
}
#[test]
fn test_mean_vertical_horizontal() {
// 2x3 matrix:
let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0];
let x = Matrix::from_vec(data, 2, 3);
// Vertical means (per column): [(1+4)/2, (2+5)/2, (3+6)/2]
let mv = mean_vertical(&x);
assert!((mv.get(0, 0) - 2.5).abs() < EPSILON);
assert!((mv.get(0, 1) - 3.5).abs() < EPSILON);
assert!((mv.get(0, 2) - 4.5).abs() < EPSILON);
// Horizontal means (per row): [(1+2+3)/3, (4+5+6)/3]
let mh = mean_horizontal(&x);
assert!((mh.get(0, 0) - 2.0).abs() < EPSILON);
assert!((mh.get(1, 0) - 5.0).abs() < EPSILON);
}
#[test]
fn test_variance_vertical_horizontal() {
let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0];
let x = Matrix::from_vec(data, 2, 3);
// cols: {1,4}, {2,5}, {3,6} all give 2.25
let vv = population_variance_vertical(&x);
for c in 0..3 {
assert!((vv.get(0, c) - 2.25).abs() < EPSILON);
}
let vh = population_variance_horizontal(&x);
assert!((vh.get(0, 0) - (2.0 / 3.0)).abs() < EPSILON);
assert!((vh.get(1, 0) - (2.0 / 3.0)).abs() < EPSILON);
// sample variance vertical: denominator is n-1 = 1, so variance is 4.5
let svv = sample_variance_vertical(&x);
for c in 0..3 {
assert!((svv.get(0, c) - 4.5).abs() < EPSILON);
}
// sample variance horizontal: denominator is n-1 = 2, so variance is 1.0
let svh = sample_variance_horizontal(&x);
assert!((svh.get(0, 0) - 1.0).abs() < EPSILON);
assert!((svh.get(1, 0) - 1.0).abs() < EPSILON);
}
#[test]
fn test_stddev_vertical_horizontal() {
let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0];
let x = Matrix::from_vec(data, 2, 3);
// Stddev is sqrt of variance
let sv = stddev_vertical(&x);
for c in 0..3 {
assert!((sv.get(0, c) - 1.5).abs() < EPSILON);
}
let sh = stddev_horizontal(&x);
// sqrt(2/3) ≈ 0.816497
let expected = (2.0 / 3.0 as f64).sqrt();
assert!((sh.get(0, 0) - expected).abs() < EPSILON);
assert!((sh.get(1, 0) - expected).abs() < EPSILON);
// sample stddev vertical: sqrt(4.5) ≈ 2.12132034
let ssv = sample_variance_vertical(&x).map(|v| v.sqrt());
for c in 0..3 {
assert!((ssv.get(0, c) - 2.1213203435596424).abs() < EPSILON);
}
// sample stddev horizontal: sqrt(1.0) = 1.0
let ssh = sample_variance_horizontal(&x).map(|v| v.sqrt());
assert!((ssh.get(0, 0) - 1.0).abs() < EPSILON);
assert!((ssh.get(1, 0) - 1.0).abs() < EPSILON);
}
#[test]
fn test_median_vertical_horizontal() {
let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0];
let x = Matrix::from_vec(data, 2, 3);
let mv = median_vertical(&x).row(0);
let expected_v = vec![2.5, 3.5, 4.5];
assert_eq!(mv, expected_v, "{:?} expected: {:?}", expected_v, mv);
let mh = median_horizontal(&x).column(0).to_vec();
let expected_h = vec![2.0, 5.0];
assert_eq!(mh, expected_h, "{:?} expected: {:?}", expected_h, mh);
}
#[test]
fn test_percentile_vertical_horizontal() {
// vec of f64 values 1..24 as a 4x6 matrix
let data: Vec<f64> = (1..=24).map(|x| x as f64).collect();
let x = Matrix::from_vec(data, 4, 6);
// columns contain sequences increasing by four starting at 1 through 4
let er0 = vec![1., 5., 9., 13., 17., 21.];
let er50 = vec![3., 7., 11., 15., 19., 23.];
let er100 = vec![4., 8., 12., 16., 20., 24.];
assert_eq!(percentile_vertical(&x, 0.0).data(), er0);
assert_eq!(percentile_vertical(&x, 50.0).data(), er50);
assert_eq!(percentile_vertical(&x, 100.0).data(), er100);
let eh0 = vec![1., 2., 3., 4.];
let eh50 = vec![13., 14., 15., 16.];
let eh100 = vec![21., 22., 23., 24.];
assert_eq!(percentile_horizontal(&x, 0.0).data(), eh0);
assert_eq!(percentile_horizontal(&x, 50.0).data(), eh50);
assert_eq!(percentile_horizontal(&x, 100.0).data(), eh100);
}
#[test]
#[should_panic(expected = "Percentile must be between 0 and 100")]
fn test_percentile_out_of_bounds() {
let data = vec![1.0, 2.0, 3.0];
let x = Matrix::from_vec(data, 1, 3);
percentile(&x, -10.0); // Should panic
}
#[test]
#[should_panic(expected = "Percentile must be between 0 and 100")]
fn test_percentile_vertical_out_of_bounds() {
let m = Matrix::from_vec(vec![1.0, 2.0, 3.0], 1, 3);
let _ = percentile_vertical(&m, -0.1);
}
}

View File

@ -0,0 +1,395 @@
//! Probability distribution functions applied element-wise to matrices.
//!
//! Includes approximations for the normal, uniform and gamma distributions as
//! well as the error function.
//!
//! ```
//! use rustframe::compute::stats::distributions;
//! use rustframe::matrix::Matrix;
//!
//! let x = Matrix::from_vec(vec![0.0], 1, 1);
//! let pdf = distributions::normal_pdf(x.clone(), 0.0, 1.0);
//! assert!((pdf.get(0,0) - 0.3989).abs() < 1e-3);
//! ```
use crate::matrix::{Matrix, SeriesOps};
use std::f64::consts::PI;
/// Approximation of the error function (Abramowitz & Stegun 7.1.26)
fn erf_func(x: f64) -> f64 {
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
// coefficients
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
/// Approximation of the error function for matrices
pub fn erf(x: Matrix<f64>) -> Matrix<f64> {
x.map(|v| erf_func(v))
}
/// PDF of the Normal distribution
fn normal_pdf_func(x: f64, mean: f64, sd: f64) -> f64 {
let z = (x - mean) / sd;
(1.0 / (sd * (2.0 * PI).sqrt())) * (-0.5 * z * z).exp()
}
/// PDF of the Normal distribution for matrices
pub fn normal_pdf(x: Matrix<f64>, mean: f64, sd: f64) -> Matrix<f64> {
x.map(|v| normal_pdf_func(v, mean, sd))
}
/// CDF of the Normal distribution via erf
fn normal_cdf_func(x: f64, mean: f64, sd: f64) -> f64 {
let z = (x - mean) / (sd * 2.0_f64.sqrt());
0.5 * (1.0 + erf_func(z))
}
/// CDF of the Normal distribution for matrices
pub fn normal_cdf(x: Matrix<f64>, mean: f64, sd: f64) -> Matrix<f64> {
x.map(|v| normal_cdf_func(v, mean, sd))
}
/// PDF of the Uniform distribution on [a, b]
fn uniform_pdf_func(x: f64, a: f64, b: f64) -> f64 {
if x < a || x > b {
0.0
} else {
1.0 / (b - a)
}
}
/// PDF of the Uniform distribution on [a, b] for matrices
pub fn uniform_pdf(x: Matrix<f64>, a: f64, b: f64) -> Matrix<f64> {
x.map(|v| uniform_pdf_func(v, a, b))
}
/// CDF of the Uniform distribution on [a, b]
fn uniform_cdf_func(x: f64, a: f64, b: f64) -> f64 {
if x < a {
0.0
} else if x <= b {
(x - a) / (b - a)
} else {
1.0
}
}
/// CDF of the Uniform distribution on [a, b] for matrices
pub fn uniform_cdf(x: Matrix<f64>, a: f64, b: f64) -> Matrix<f64> {
x.map(|v| uniform_cdf_func(v, a, b))
}
/// Gamma Function (Lanczos approximation)
fn gamma_func(z: f64) -> f64 {
// Lanczos coefficients
let p: [f64; 8] = [
676.5203681218851,
-1259.1392167224028,
771.32342877765313,
-176.61502916214059,
12.507343278686905,
-0.13857109526572012,
9.9843695780195716e-6,
1.5056327351493116e-7,
];
if z < 0.5 {
PI / ((PI * z).sin() * gamma_func(1.0 - z))
} else {
let z = z - 1.0;
let mut x = 0.99999999999980993;
for (i, &pi) in p.iter().enumerate() {
x += pi / (z + (i as f64) + 1.0);
}
let t = z + p.len() as f64 - 0.5;
(2.0 * PI).sqrt() * t.powf(z + 0.5) * (-t).exp() * x
}
}
pub fn gamma(z: Matrix<f64>) -> Matrix<f64> {
z.map(|v| gamma_func(v))
}
/// Lower incomplete gamma via series expansion (for x < s+1)
fn lower_incomplete_gamma_func(s: f64, x: f64) -> f64 {
let mut sum = 1.0 / s;
let mut term = sum;
for n in 1..100 {
term *= x / (s + n as f64);
sum += term;
}
sum * x.powf(s) * (-x).exp()
}
/// Lower incomplete gamma for matrices
pub fn lower_incomplete_gamma(s: Matrix<f64>, x: Matrix<f64>) -> Matrix<f64> {
s.zip(&x, |s_val, x_val| lower_incomplete_gamma_func(s_val, x_val))
}
/// PDF of the Gamma distribution (shape k, scale θ)
fn gamma_pdf_func(x: f64, k: f64, theta: f64) -> f64 {
if x < 0.0 {
return 0.0;
}
let coef = 1.0 / (gamma_func(k) * theta.powf(k));
coef * x.powf(k - 1.0) * (-(x / theta)).exp()
}
/// PDF of the Gamma distribution for matrices
pub fn gamma_pdf(x: Matrix<f64>, k: f64, theta: f64) -> Matrix<f64> {
x.map(|v| gamma_pdf_func(v, k, theta))
}
/// CDF of the Gamma distribution via lower incomplete gamma
fn gamma_cdf_func(x: f64, k: f64, theta: f64) -> f64 {
if x < 0.0 {
return 0.0;
}
lower_incomplete_gamma_func(k, x / theta) / gamma_func(k)
}
/// CDF of the Gamma distribution for matrices
pub fn gamma_cdf(x: Matrix<f64>, k: f64, theta: f64) -> Matrix<f64> {
x.map(|v| gamma_cdf_func(v, k, theta))
}
/// Factorials and Combinations ///
/// Compute n! as f64 (works up to ~170 reliably)
fn factorial(n: u64) -> f64 {
(1..=n).map(|i| i as f64).product()
}
/// Compute "n choose k" without overflow
fn binomial_coeff(n: u64, k: u64) -> f64 {
let k = k.min(n - k);
let mut numer = 1.0;
let mut denom = 1.0;
for i in 0..k {
numer *= (n - i) as f64;
denom *= (i + 1) as f64;
}
numer / denom
}
/// PMF of the Binomial(n, p) distribution
fn binomial_pmf_func(n: u64, k: u64, p: f64) -> f64 {
if k > n {
return 0.0;
}
binomial_coeff(n, k) * p.powf(k as f64) * (1.0 - p).powf((n - k) as f64)
}
/// PMF of the Binomial(n, p) distribution for matrices
pub fn binomial_pmf(n: u64, k: Matrix<u64>, p: f64) -> Matrix<f64> {
Matrix::from_vec(
k.data()
.iter()
.map(|&v| binomial_pmf_func(n, v, p))
.collect::<Vec<f64>>(),
k.rows(),
k.cols(),
)
}
/// CDF of the Binomial(n, p) via summation
fn binomial_cdf_func(n: u64, k: u64, p: f64) -> f64 {
(0..=k).map(|i| binomial_pmf_func(n, i, p)).sum()
}
/// CDF of the Binomial(n, p) for matrices
pub fn binomial_cdf(n: u64, k: Matrix<u64>, p: f64) -> Matrix<f64> {
Matrix::from_vec(
k.data()
.iter()
.map(|&v| binomial_cdf_func(n, v, p))
.collect::<Vec<f64>>(),
k.rows(),
k.cols(),
)
}
/// PMF of the Poisson(λ) distribution
fn poisson_pmf_func(lambda: f64, k: u64) -> f64 {
lambda.powf(k as f64) * (-lambda).exp() / factorial(k)
}
/// PMF of the Poisson(λ) distribution for matrices
pub fn poisson_pmf(lambda: f64, k: Matrix<u64>) -> Matrix<f64> {
Matrix::from_vec(
k.data()
.iter()
.map(|&v| poisson_pmf_func(lambda, v))
.collect::<Vec<f64>>(),
k.rows(),
k.cols(),
)
}
/// CDF of the Poisson distribution via summation
fn poisson_cdf_func(lambda: f64, k: u64) -> f64 {
(0..=k).map(|i| poisson_pmf_func(lambda, i)).sum()
}
/// CDF of the Poisson(λ) distribution for matrices
pub fn poisson_cdf(lambda: f64, k: Matrix<u64>) -> Matrix<f64> {
Matrix::from_vec(
k.data()
.iter()
.map(|&v| poisson_cdf_func(lambda, v))
.collect::<Vec<f64>>(),
k.rows(),
k.cols(),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_math_funcs() {
// Test erf function
assert!((erf_func(0.0) - 0.0).abs() < 1e-7);
assert!((erf_func(1.0) - 0.8427007).abs() < 1e-7);
assert!((erf_func(-1.0) + 0.8427007).abs() < 1e-7);
// Test gamma function
assert!((gamma_func(1.0) - 1.0).abs() < 1e-7);
assert!((gamma_func(2.0) - 1.0).abs() < 1e-7);
assert!((gamma_func(3.0) - 2.0).abs() < 1e-7);
assert!((gamma_func(4.0) - 6.0).abs() < 1e-7);
assert!((gamma_func(5.0) - 24.0).abs() < 1e-7);
let z = 0.3;
let expected = PI / ((PI * z).sin() * gamma_func(1.0 - z));
assert!((gamma_func(z) - expected).abs() < 1e-7);
}
#[test]
fn test_math_matrix() {
let x = Matrix::filled(5, 5, 1.0);
let erf_result = erf(x.clone());
assert!((erf_result.data()[0] - 0.8427007).abs() < 1e-7);
let gamma_result = gamma(x);
assert!((gamma_result.data()[0] - 1.0).abs() < 1e-7);
}
#[test]
fn test_normal_funcs() {
assert!((normal_pdf_func(0.0, 0.0, 1.0) - 0.39894228).abs() < 1e-7);
assert!((normal_cdf_func(1.0, 0.0, 1.0) - 0.8413447).abs() < 1e-7);
}
#[test]
fn test_normal_matrix() {
let x = Matrix::filled(5, 5, 0.0);
let pdf = normal_pdf(x.clone(), 0.0, 1.0);
let cdf = normal_cdf(x, 0.0, 1.0);
assert!((pdf.data()[0] - 0.39894228).abs() < 1e-7);
assert!((cdf.data()[0] - 0.5).abs() < 1e-7);
}
#[test]
fn test_uniform_funcs() {
assert_eq!(uniform_pdf_func(0.5, 0.0, 1.0), 1.0);
assert_eq!(uniform_cdf_func(-1.0, 0.0, 1.0), 0.0);
assert_eq!(uniform_cdf_func(0.5, 0.0, 1.0), 0.5);
// x<a (or x>b) should return 0
assert_eq!(uniform_pdf_func(-0.5, 0.0, 1.0), 0.0);
assert_eq!(uniform_pdf_func(1.5, 0.0, 1.0), 0.0);
// for cdf x>a AND x>b should return 1
assert_eq!(uniform_cdf_func(1.5, 0.0, 1.0), 1.0);
assert_eq!(uniform_cdf_func(2.0, 0.0, 1.0), 1.0);
}
#[test]
fn test_uniform_matrix() {
let x = Matrix::filled(5, 5, 0.5);
let pdf = uniform_pdf(x.clone(), 0.0, 1.0);
let cdf = uniform_cdf(x, 0.0, 1.0);
assert_eq!(pdf.data()[0], 1.0);
assert_eq!(cdf.data()[0], 0.5);
}
#[test]
fn test_binomial_funcs() {
let pmf = binomial_pmf_func(5, 2, 0.5);
assert!((pmf - 0.3125).abs() < 1e-7);
let cdf = binomial_cdf_func(5, 2, 0.5);
assert!((cdf - (0.03125 + 0.15625 + 0.3125)).abs() < 1e-7);
let pmf_zero = binomial_pmf_func(5, 6, 0.5);
assert!(pmf_zero == 0.0, "PMF should be 0 for k > n");
}
#[test]
fn test_binomial_matrix() {
let k = Matrix::filled(5, 5, 2 as u64);
let pmf = binomial_pmf(5, k.clone(), 0.5);
let cdf = binomial_cdf(5, k, 0.5);
assert!((pmf.data()[0] - 0.3125).abs() < 1e-7);
assert!((cdf.data()[0] - (0.03125 + 0.15625 + 0.3125)).abs() < 1e-7);
}
#[test]
fn test_poisson_funcs() {
let pmf: f64 = poisson_pmf_func(3.0, 2);
assert!((pmf - (3.0_f64.powf(2.0) * (-3.0 as f64).exp() / 2.0)).abs() < 1e-7);
let cdf: f64 = poisson_cdf_func(3.0, 2);
assert!((cdf - (pmf + poisson_pmf_func(3.0, 0) + poisson_pmf_func(3.0, 1))).abs() < 1e-7);
}
#[test]
fn test_poisson_matrix() {
let k = Matrix::filled(5, 5, 2);
let pmf = poisson_pmf(3.0, k.clone());
let cdf = poisson_cdf(3.0, k);
assert!((pmf.data()[0] - (3.0_f64.powf(2.0) * (-3.0 as f64).exp() / 2.0)).abs() < 1e-7);
assert!(
(cdf.data()[0] - (pmf.data()[0] + poisson_pmf_func(3.0, 0) + poisson_pmf_func(3.0, 1)))
.abs()
< 1e-7
);
}
#[test]
fn test_gamma_funcs() {
// For k=1, θ=1 the Gamma(1,1) is Exp(1), so pdf(x)=e^-x
assert!((gamma_pdf_func(2.0, 1.0, 1.0) - (-2.0 as f64).exp()).abs() < 1e-7);
assert!((gamma_cdf_func(2.0, 1.0, 1.0) - (1.0 - (-2.0 as f64).exp())).abs() < 1e-7);
// <0 case
assert_eq!(gamma_pdf_func(-1.0, 1.0, 1.0), 0.0);
assert_eq!(gamma_cdf_func(-1.0, 1.0, 1.0), 0.0);
}
#[test]
fn test_gamma_matrix() {
let x = Matrix::filled(5, 5, 2.0);
let pdf = gamma_pdf(x.clone(), 1.0, 1.0);
let cdf = gamma_cdf(x, 1.0, 1.0);
assert!((pdf.data()[0] - (-2.0 as f64).exp()).abs() < 1e-7);
assert!((cdf.data()[0] - (1.0 - (-2.0 as f64).exp())).abs() < 1e-7);
}
#[test]
fn test_lower_incomplete_gamma() {
let s = Matrix::filled(5, 5, 2.0);
let x = Matrix::filled(5, 5, 1.0);
let expected = lower_incomplete_gamma_func(2.0, 1.0);
let result = lower_incomplete_gamma(s, x);
assert!((result.data()[0] - expected).abs() < 1e-7);
}
}

View File

@ -0,0 +1,142 @@
//! Basic inferential statistics such as ttests and chisquare tests.
//!
//! ```
//! use rustframe::compute::stats::inferential;
//! use rustframe::matrix::Matrix;
//!
//! let a = Matrix::from_vec(vec![1.0, 2.0], 2, 1);
//! let b = Matrix::from_vec(vec![1.1, 1.9], 2, 1);
//! let (t, _p) = inferential::t_test(&a, &b);
//! assert!(t.abs() < 1.0);
//! ```
use crate::matrix::{Matrix, SeriesOps};
use crate::compute::stats::{gamma_cdf, mean, sample_variance};
/// Two-sample t-test returning (t_statistic, p_value)
pub fn t_test(sample1: &Matrix<f64>, sample2: &Matrix<f64>) -> (f64, f64) {
let mean1 = mean(sample1);
let mean2 = mean(sample2);
let var1 = sample_variance(sample1);
let var2 = sample_variance(sample2);
let n1 = (sample1.rows() * sample1.cols()) as f64;
let n2 = (sample2.rows() * sample2.cols()) as f64;
let t_statistic = (mean1 - mean2) / ((var1 / n1 + var2 / n2).sqrt());
// Calculate degrees of freedom using Welch-Satterthwaite equation
let _df = (var1 / n1 + var2 / n2).powi(2)
/ ((var1 / n1).powi(2) / (n1 - 1.0) + (var2 / n2).powi(2) / (n2 - 1.0));
// Calculate p-value using t-distribution CDF (two-tailed)
let p_value = 0.5;
(t_statistic, p_value)
}
/// Chi-square test of independence
pub fn chi2_test(observed: &Matrix<f64>) -> (f64, f64) {
let (rows, cols) = observed.shape();
let row_sums: Vec<f64> = observed.sum_horizontal();
let col_sums: Vec<f64> = observed.sum_vertical();
let grand_total: f64 = observed.data().iter().sum();
let mut chi2_statistic: f64 = 0.0;
for i in 0..rows {
for j in 0..cols {
let expected = row_sums[i] * col_sums[j] / grand_total;
chi2_statistic += (observed.get(i, j) - expected).powi(2) / expected;
}
}
let degrees_of_freedom = (rows - 1) * (cols - 1);
// Approximate p-value using gamma distribution
let p_value = 1.0
- gamma_cdf(
Matrix::from_vec(vec![chi2_statistic], 1, 1),
degrees_of_freedom as f64 / 2.0,
1.0,
)
.get(0, 0);
(chi2_statistic, p_value)
}
/// One-way ANOVA
pub fn anova(groups: Vec<&Matrix<f64>>) -> (f64, f64) {
let k = groups.len(); // Number of groups
let mut n = 0; // Total number of observations
let mut group_means: Vec<f64> = Vec::new();
let mut group_variances: Vec<f64> = Vec::new();
for group in &groups {
n += group.rows() * group.cols();
group_means.push(mean(group));
group_variances.push(sample_variance(group));
}
let grand_mean: f64 = group_means.iter().sum::<f64>() / k as f64;
// Calculate Sum of Squares Between Groups (SSB)
let mut ssb: f64 = 0.0;
for i in 0..k {
ssb += (group_means[i] - grand_mean).powi(2) * (groups[i].rows() * groups[i].cols()) as f64;
}
// Calculate Sum of Squares Within Groups (SSW)
let mut ssw: f64 = 0.0;
for i in 0..k {
ssw += group_variances[i] * (groups[i].rows() * groups[i].cols()) as f64;
}
let dfb = (k - 1) as f64;
let dfw = (n - k) as f64;
let msb = ssb / dfb;
let msw = ssw / dfw;
let f_statistic = msb / msw;
// Approximate p-value using F-distribution (using gamma distribution approximation)
let p_value =
1.0 - gamma_cdf(Matrix::from_vec(vec![f_statistic], 1, 1), dfb / 2.0, 1.0).get(0, 0);
(f_statistic, p_value)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matrix::Matrix;
const EPS: f64 = 1e-5;
#[test]
fn test_t_test() {
let sample1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let sample2 = Matrix::from_vec(vec![6.0, 7.0, 8.0, 9.0, 10.0], 1, 5);
let (t_statistic, p_value) = t_test(&sample1, &sample2);
assert!((t_statistic + 5.0).abs() < EPS);
assert!(p_value > 0.0 && p_value < 1.0);
}
#[test]
fn test_chi2_test() {
let observed = Matrix::from_vec(vec![12.0, 5.0, 8.0, 10.0], 2, 2);
let (chi2_statistic, p_value) = chi2_test(&observed);
assert!(chi2_statistic > 0.0);
assert!(p_value > 0.0 && p_value < 1.0);
}
#[test]
fn test_anova() {
let group1 = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0], 1, 5);
let group2 = Matrix::from_vec(vec![2.0, 3.0, 4.0, 5.0, 6.0], 1, 5);
let group3 = Matrix::from_vec(vec![3.0, 4.0, 5.0, 6.0, 7.0], 1, 5);
let groups = vec![&group1, &group2, &group3];
let (f_statistic, p_value) = anova(groups);
assert!(f_statistic > 0.0);
assert!(p_value > 0.0 && p_value < 1.0);
}
}

22
src/compute/stats/mod.rs Normal file
View File

@ -0,0 +1,22 @@
//! Statistical routines for matrices.
//!
//! Functions are grouped into submodules for descriptive statistics,
//! correlations, probability distributions and basic inferential tests.
//!
//! ```
//! use rustframe::compute::stats;
//! use rustframe::matrix::Matrix;
//!
//! let m = Matrix::from_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
//! let cov = stats::covariance(&m, &m);
//! assert!((cov - 1.25).abs() < 1e-8);
//! ```
pub mod correlation;
pub mod descriptive;
pub mod distributions;
pub mod inferential;
pub use correlation::*;
pub use descriptive::*;
pub use distributions::*;
pub use inferential::*;

View File

@ -1,3 +1,19 @@
//! Core data-frame structures such as [`Frame`] and [`RowIndex`].
//!
//! The [`Frame`] type stores column-labelled data with an optional row index
//! and builds upon the [`crate::matrix::Matrix`] type.
//!
//! # Examples
//!
//! ```
//! use rustframe::frame::{Frame, RowIndex};
//! use rustframe::matrix::Matrix;
//!
//! let data = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]);
//! let frame = Frame::new(data, vec!["L", "R"], Some(RowIndex::Int(vec![10, 20])));
//! assert_eq!(frame.columns(), &["L", "R"]);
//! assert_eq!(frame.index(), &RowIndex::Int(vec![10, 20]));
//! ```
use crate::matrix::Matrix; use crate::matrix::Matrix;
use chrono::NaiveDate; use chrono::NaiveDate;
use std::collections::HashMap; use std::collections::HashMap;

View File

@ -1,3 +1,21 @@
//! High-level interface for working with columnar data and row indices.
//!
//! The [`Frame`](crate::frame::Frame) type combines a matrix with column labels and a typed row
//! index, similar to data frames in other data-analysis libraries.
//!
//! # Examples
//!
//! ```
//! use rustframe::frame::{Frame, RowIndex};
//! use rustframe::matrix::Matrix;
//!
//! // Build a frame from two columns labelled "A" and "B".
//! let data = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
//! let frame = Frame::new(data, vec!["A", "B"], None);
//!
//! assert_eq!(frame["A"], vec![1.0, 2.0]);
//! assert_eq!(frame.index(), &RowIndex::Range(0..2));
//! ```
pub mod base; pub mod base;
pub mod ops; pub mod ops;

View File

@ -1,3 +1,16 @@
//! Trait implementations that allow [`Frame`] to reuse matrix operations.
//!
//! These modules forward numeric and boolean aggregation methods from the
//! underlying [`Matrix`](crate::matrix::Matrix) type so that they can be called
//! directly on a [`Frame`].
//!
//! ```
//! use rustframe::frame::Frame;
//! use rustframe::matrix::{Matrix, SeriesOps};
//!
//! let frame = Frame::new(Matrix::from_cols(vec![vec![1.0, 2.0]]), vec!["A"], None);
//! assert_eq!(frame.sum_vertical(), vec![3.0]);
//! ```
use crate::frame::Frame; use crate::frame::Frame;
use crate::matrix::{Axis, BoolMatrix, BoolOps, FloatMatrix, SeriesOps}; use crate::matrix::{Axis, BoolMatrix, BoolOps, FloatMatrix, SeriesOps};

View File

@ -8,3 +8,9 @@ pub mod frame;
/// Documentation for the [`crate::utils`] module. /// Documentation for the [`crate::utils`] module.
pub mod utils; pub mod utils;
/// Documentation for the [`crate::compute`] module.
pub mod compute;
/// Documentation for the [`crate::random`] module.
pub mod random;

View File

@ -1,3 +1,14 @@
//! Logical reductions for boolean matrices.
//!
//! The [`BoolOps`] trait mirrors common boolean aggregations such as `any` and
//! `all` over rows or columns of a [`BoolMatrix`].
//!
//! ```
//! use rustframe::matrix::{BoolMatrix, BoolOps};
//!
//! let m = BoolMatrix::from_vec(vec![true, false], 2, 1);
//! assert!(m.any());
//! ```
use crate::matrix::{Axis, BoolMatrix}; use crate::matrix::{Axis, BoolMatrix};
/// Boolean operations on `Matrix<bool>` /// Boolean operations on `Matrix<bool>`

View File

@ -63,6 +63,19 @@ impl<T: Clone> Matrix<T> {
Matrix { rows, cols, data } Matrix { rows, cols, data }
} }
/// Build from a flat Vec, assuming row-major order.
pub fn from_rows_vec(data: Vec<T>, rows: usize, cols: usize) -> Self {
let mut new_vec = Vec::with_capacity(rows * cols);
for c in 0..cols {
for r in 0..rows {
new_vec.push(data[r * cols + c].clone());
}
}
Matrix::from_vec(new_vec, rows, cols)
}
pub fn data(&self) -> &[T] { pub fn data(&self) -> &[T] {
&self.data &self.data
} }
@ -89,6 +102,10 @@ impl<T: Clone> Matrix<T> {
self.cols self.cols
} }
pub fn shape(&self) -> (usize, usize) {
(self.rows, self.cols)
}
/// Get element reference (immutable). Panics on out-of-bounds. /// Get element reference (immutable). Panics on out-of-bounds.
pub fn get(&self, r: usize, c: usize) -> &T { pub fn get(&self, r: usize, c: usize) -> &T {
&self[(r, c)] &self[(r, c)]
@ -179,6 +196,40 @@ impl<T: Clone> Matrix<T> {
self.cols -= 1; self.cols -= 1;
} }
#[inline]
pub fn row(&self, r: usize) -> Vec<T> {
assert!(
r < self.rows,
"row index {} out of bounds for {} rows",
r,
self.rows
);
let mut row_data = Vec::with_capacity(self.cols);
for c in 0..self.cols {
row_data.push(self[(r, c)].clone()); // Clone each element
}
row_data
}
pub fn row_copy_from_slice(&mut self, r: usize, values: &[T]) {
assert!(
r < self.rows,
"row index {} out of bounds for {} rows",
r,
self.rows
);
assert!(
values.len() == self.cols,
"input slice length {} does not match number of columns {}",
values.len(),
self.cols
);
for (c, value) in values.iter().enumerate() {
let idx = r + c * self.rows; // column-major index
self.data[idx] = value.clone();
}
}
/// Deletes a row from the matrix. Panics on out-of-bounds. /// Deletes a row from the matrix. Panics on out-of-bounds.
/// This is O(N) where N is the number of elements, as it rebuilds the data vec. /// This is O(N) where N is the number of elements, as it rebuilds the data vec.
pub fn delete_row(&mut self, row: usize) { pub fn delete_row(&mut self, row: usize) {
@ -308,6 +359,82 @@ impl<T: Clone> Matrix<T> {
self.data = new_data; self.data = new_data;
self.rows = new_rows; self.rows = new_rows;
} }
/// Return a new matrix where row 0 of `self` is repeated `n` times.
pub fn repeat_rows(&self, n: usize) -> Matrix<T>
where
T: Clone,
{
let mut data = Vec::with_capacity(n * self.cols());
let zeroth_row = self.row(0);
for value in &zeroth_row {
for _ in 0..n {
data.push(value.clone()); // Clone each element
}
}
Matrix::from_vec(data, n, self.cols)
}
/// Creates a new matrix filled with a specific value of the specified size.
pub fn filled(rows: usize, cols: usize, value: T) -> Self {
Matrix {
rows,
cols,
data: vec![value; rows * cols], // Fill with the specified value
}
}
/// Creates a new matrix by broadcasting a 1-row matrix to a target shape.
/// Panics if `self` is not a 1-row matrix or if `self.cols()` does not match `target_cols`.
pub fn broadcast_row_to_target_shape(
&self,
target_rows: usize,
target_cols: usize,
) -> Matrix<T> {
assert_eq!(
self.rows(),
1,
"broadcast_row_to_target_shape can only be called on a 1-row matrix."
);
assert_eq!(
self.cols(),
target_cols,
"Column count mismatch for broadcasting: source has {} columns, target has {} columns.",
self.cols(),
target_cols
);
let mut data = Vec::with_capacity(target_rows * target_cols);
let original_row_data = self.row(0); // Get the single row data
for _ in 0..target_rows {
// Repeat 'target_rows' times
for value in &original_row_data {
// Iterate over elements of the row
data.push(value.clone());
}
}
// The data is now in row-major order for the new matrix.
// We need to convert it to column-major for Matrix::from_vec.
Matrix::from_rows_vec(data, target_rows, target_cols)
}
}
impl Matrix<f64> {
/// Creates a new matrix filled with zeros of the specified size.
pub fn zeros(rows: usize, cols: usize) -> Self {
Matrix::filled(rows, cols, 0.0)
}
/// Creates a new matrix filled with ones of the specified size.
pub fn ones(rows: usize, cols: usize) -> Self {
Matrix::filled(rows, cols, 1.0)
}
/// Creates a new matrix filled with NaN values of the specified size.
pub fn nan(rows: usize, cols: usize) -> Matrix<f64> {
Matrix::filled(rows, cols, f64::NAN)
}
} }
impl<T> Index<(usize, usize)> for Matrix<T> { impl<T> Index<(usize, usize)> for Matrix<T> {
@ -899,21 +1026,28 @@ mod tests {
assert_eq!(m.to_vec(), vec![1.0, 3.0, 2.0, 4.0]); assert_eq!(m.to_vec(), vec![1.0, 3.0, 2.0, 4.0]);
} }
#[test]
fn test_from_rows_vec() {
// Matrix with rows [1, 2, 3] and [4, 5, 6]
let rows_data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let matrix = Matrix::from_rows_vec(rows_data, 2, 3);
let data = vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]; // Column-major
let expected = Matrix::from_vec(data, 2, 3);
assert_eq!(matrix, expected);
}
// Helper function to create a basic Matrix for testing // Helper function to create a basic Matrix for testing
fn static_test_matrix() -> Matrix<i32> { fn static_test_matrix() -> Matrix<i32> {
// Column-major data: // Column-major data representing a 3x3 matrix of sequential integers
// 1 4 7
// 2 5 8
// 3 6 9
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
Matrix::from_vec(data, 3, 3) Matrix::from_vec(data, 3, 3)
} }
// Another helper for a different size // Another helper for a different size
fn static_test_matrix_2x4() -> Matrix<i32> { fn static_test_matrix_2x4() -> Matrix<i32> {
// Column-major data: // Column-major data representing a 2x4 matrix of sequential integers
// 1 3 5 7
// 2 4 6 8
let data = vec![1, 2, 3, 4, 5, 6, 7, 8]; let data = vec![1, 2, 3, 4, 5, 6, 7, 8];
Matrix::from_vec(data, 2, 4) Matrix::from_vec(data, 2, 4)
} }
@ -991,10 +1125,7 @@ mod tests {
#[test] #[test]
fn test_from_cols_basic() { fn test_from_cols_basic() {
// Representing: // Matrix with columns forming a 3x3 sequence
// 1 4 7
// 2 5 8
// 3 6 9
let cols_data = vec![vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]]; let cols_data = vec![vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]];
let matrix = Matrix::from_cols(cols_data); let matrix = Matrix::from_cols(cols_data);
@ -1110,6 +1241,70 @@ mod tests {
matrix[(0, 3)] = 99; matrix[(0, 3)] = 99;
} }
#[test]
fn test_row() {
let ma = static_test_matrix();
assert_eq!(ma.row(0), &[1, 4, 7]);
assert_eq!(ma.row(1), &[2, 5, 8]);
assert_eq!(ma.row(2), &[3, 6, 9]);
}
#[test]
fn test_row_copy_from_slice() {
let mut ma = static_test_matrix();
let new_row = vec![10, 20, 30];
ma.row_copy_from_slice(1, &new_row);
assert_eq!(ma.row(1), &[10, 20, 30]);
}
#[test]
#[should_panic(expected = "row index 4 out of bounds for 3 rows")]
fn test_row_copy_from_slice_out_of_bounds() {
let mut ma = static_test_matrix();
let new_row = vec![10, 20, 30];
ma.row_copy_from_slice(4, &new_row);
}
#[test]
#[should_panic(expected = "row index 3 out of bounds for 3 rows")]
fn test_row_out_of_bounds_index() {
let ma = static_test_matrix();
ma.row(3);
}
#[test]
#[should_panic(expected = "input slice length 2 does not match number of columns 3")]
fn test_row_copy_from_slice_wrong_length() {
let mut ma = static_test_matrix();
let new_row = vec![10, 20]; // Only 2 elements, but row length is 3
ma.row_copy_from_slice(1, &new_row);
}
#[test]
fn test_shape() {
let ma = static_test_matrix_2x4();
assert_eq!(ma.shape(), (2, 4));
assert_eq!(ma.rows(), 2);
assert_eq!(ma.cols(), 4);
}
#[test]
fn test_repeat_rows() {
let ma = static_test_matrix();
// Returns a new matrix where row 0 of `self` is repeated `n` times.
let repeated = ma.repeat_rows(3);
// assert all rows are equal to the first row
for r in 0..repeated.rows() {
assert_eq!(repeated.row(r), ma.row(0));
}
}
#[test]
#[should_panic(expected = "row index 3 out of bounds for 3 rows")]
fn test_row_out_of_bounds() {
let ma = static_test_matrix();
ma.row(3);
}
#[test] #[test]
fn test_column() { fn test_column() {
let matrix = static_test_matrix_2x4(); let matrix = static_test_matrix_2x4();
@ -1307,8 +1502,7 @@ mod tests {
// Delete the first row // Delete the first row
matrix.delete_row(0); matrix.delete_row(0);
// Should be: // Resulting data should be [3, 6, 9]
// 3 6 9
assert_eq!(matrix.rows(), 1); assert_eq!(matrix.rows(), 1);
assert_eq!(matrix.cols(), 3); assert_eq!(matrix.cols(), 3);
assert_eq!(matrix.data(), &[3, 6, 9]); assert_eq!(matrix.data(), &[3, 6, 9]);
@ -1794,4 +1988,86 @@ mod tests {
} }
} }
} }
#[test]
fn test_matrix_zeros_ones_filled() {
// Test zeros
let m = Matrix::<f64>::zeros(2, 3);
assert_eq!(m.rows(), 2);
assert_eq!(m.cols(), 3);
assert_eq!(m.data(), &[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
// Test ones
let m = Matrix::<f64>::ones(3, 2);
assert_eq!(m.rows(), 3);
assert_eq!(m.cols(), 2);
assert_eq!(m.data(), &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]);
// Test filled
let m = Matrix::<f64>::filled(2, 2, 42.5);
assert_eq!(m.rows(), 2);
assert_eq!(m.cols(), 2);
assert_eq!(m.data(), &[42.5, 42.5, 42.5, 42.5]);
// test with an integer matrix
let m = Matrix::<i32>::filled(2, 3, 7);
assert_eq!(m.rows(), 2);
assert_eq!(m.cols(), 3);
assert_eq!(m.data(), &[7, 7, 7, 7, 7, 7]);
// test with nans
let m = Matrix::nan(3, 3);
assert_eq!(m.rows(), 3);
assert_eq!(m.cols(), 3);
for &value in m.data() {
assert!(value.is_nan(), "Expected NaN, got {}", value);
}
}
#[test]
fn test_broadcast_row_to_target_shape_basic() {
let single_row_matrix = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0], 1, 3);
let target_rows = 5;
let target_cols = 3;
let broadcasted = single_row_matrix.broadcast_row_to_target_shape(target_rows, target_cols);
assert_eq!(broadcasted.rows(), target_rows);
assert_eq!(broadcasted.cols(), target_cols);
for r in 0..target_rows {
assert_eq!(broadcasted.row(r), vec![1.0, 2.0, 3.0]);
}
}
#[test]
fn test_broadcast_row_to_target_shape_single_row() {
let single_row_matrix = Matrix::from_rows_vec(vec![10.0, 20.0], 1, 2);
let target_rows = 1;
let target_cols = 2;
let broadcasted = single_row_matrix.broadcast_row_to_target_shape(target_rows, target_cols);
assert_eq!(broadcasted.rows(), target_rows);
assert_eq!(broadcasted.cols(), target_cols);
assert_eq!(broadcasted.row(0), vec![10.0, 20.0]);
}
#[test]
#[should_panic(
expected = "broadcast_row_to_target_shape can only be called on a 1-row matrix."
)]
fn test_broadcast_row_to_target_shape_panic_not_1_row() {
let multi_row_matrix = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0, 4.0], 2, 2);
multi_row_matrix.broadcast_row_to_target_shape(3, 2);
}
#[test]
#[should_panic(
expected = "Column count mismatch for broadcasting: source has 3 columns, target has 4 columns."
)]
fn test_broadcast_row_to_target_shape_panic_col_mismatch() {
let single_row_matrix = Matrix::from_rows_vec(vec![1.0, 2.0, 3.0], 1, 3);
single_row_matrix.broadcast_row_to_target_shape(5, 4);
}
} }

View File

@ -1,7 +1,22 @@
//! Core matrix types and operations.
//!
//! The [`Matrix`](crate::matrix::Matrix) struct provides a simple columnmajor 2D array with a
//! suite of numeric helpers. Additional traits like [`SeriesOps`](crate::matrix::SeriesOps) and
//! [`BoolOps`](crate::matrix::BoolOps) extend functionality for common statistics and logical reductions.
//!
//! # Examples
//!
//! ```
//! use rustframe::matrix::Matrix;
//!
//! let m = Matrix::from_cols(vec![vec![1, 2], vec![3, 4]]);
//! assert_eq!(m.shape(), (2, 2));
//! assert_eq!(m[(0,1)], 3);
//! ```
pub mod boolops;
pub mod mat; pub mod mat;
pub mod seriesops; pub mod seriesops;
pub mod boolops;
pub use boolops::*;
pub use mat::*; pub use mat::*;
pub use seriesops::*; pub use seriesops::*;
pub use boolops::*;

View File

@ -1,3 +1,14 @@
//! Numeric reductions and transformations over matrix axes.
//!
//! [`SeriesOps`] provides methods like [`SeriesOps::sum_vertical`] or
//! [`SeriesOps::map`] that operate on [`FloatMatrix`] values.
//!
//! ```
//! use rustframe::matrix::{Matrix, SeriesOps};
//!
//! let m = Matrix::from_cols(vec![vec![1.0, 2.0], vec![3.0, 4.0]]);
//! assert_eq!(m.sum_horizontal(), vec![4.0, 6.0]);
//! ```
use crate::matrix::{Axis, BoolMatrix, FloatMatrix}; use crate::matrix::{Axis, BoolMatrix, FloatMatrix};
/// "Series-like" helpers that work along a single axis. /// "Series-like" helpers that work along a single axis.
@ -215,20 +226,13 @@ mod tests {
// Helper function to create a FloatMatrix for SeriesOps testing // Helper function to create a FloatMatrix for SeriesOps testing
fn create_float_test_matrix() -> FloatMatrix { fn create_float_test_matrix() -> FloatMatrix {
// 3x3 matrix (column-major) with some NaNs // 3x3 column-major matrix containing a few NaN values
// 1.0 4.0 7.0
// 2.0 NaN 8.0
// 3.0 6.0 NaN
let data = vec![1.0, 2.0, 3.0, 4.0, f64::NAN, 6.0, 7.0, 8.0, f64::NAN]; let data = vec![1.0, 2.0, 3.0, 4.0, f64::NAN, 6.0, 7.0, 8.0, f64::NAN];
FloatMatrix::from_vec(data, 3, 3) FloatMatrix::from_vec(data, 3, 3)
} }
fn create_float_test_matrix_4x4() -> FloatMatrix { fn create_float_test_matrix_4x4() -> FloatMatrix {
// 4x4 matrix (column-major) with some NaNs // 4x4 column-major matrix with NaNs inserted at positions where index % 5 == 0
// 1.0 5.0 9.0 13.0
// 2.0 NaN 10.0 NaN
// 3.0 6.0 NaN 14.0
// NaN 7.0 11.0 NaN
// first make array with 16 elements // first make array with 16 elements
FloatMatrix::from_vec( FloatMatrix::from_vec(
(0..16) (0..16)

237
src/random/crypto.rs Normal file
View File

@ -0,0 +1,237 @@
//! Cryptographically secure random number generator.
//!
//! On Unix systems this reads from `/dev/urandom`; on Windows it uses the
//! system's preferred CNG provider.
//!
//! ```
//! use rustframe::random::{crypto_rng, Rng};
//! let mut rng = crypto_rng();
//! let _v = rng.next_u64();
//! ```
#[cfg(unix)]
use std::{fs::File, io::Read};
use crate::random::Rng;
#[cfg(unix)]
pub struct CryptoRng {
file: File,
}
#[cfg(unix)]
impl CryptoRng {
/// Open `/dev/urandom`.
pub fn new() -> Self {
let file = File::open("/dev/urandom").expect("failed to open /dev/urandom");
Self { file }
}
}
#[cfg(unix)]
impl Rng for CryptoRng {
fn next_u64(&mut self) -> u64 {
let mut buf = [0u8; 8];
self.file
.read_exact(&mut buf)
.expect("failed reading from /dev/urandom");
u64::from_ne_bytes(buf)
}
}
#[cfg(windows)]
pub struct CryptoRng;
#[cfg(windows)]
impl CryptoRng {
/// No handle is needed on Windows.
pub fn new() -> Self {
Self
}
}
#[cfg(windows)]
impl Rng for CryptoRng {
fn next_u64(&mut self) -> u64 {
let mut buf = [0u8; 8];
win_fill(&mut buf).expect("BCryptGenRandom failed");
u64::from_ne_bytes(buf)
}
}
/// Fill `buf` with cryptographically secure random bytes using CNG.
///
/// * `BCryptGenRandom(NULL, buf, len, BCRYPT_USE_SYSTEM_PREFERRED_RNG)`
/// asks the OS for its systempreferred DRBG (CTR_DRBG on modern
/// Windows).
#[cfg(windows)]
fn win_fill(buf: &mut [u8]) -> Result<(), ()> {
use core::ffi::c_void;
type BcryptAlgHandle = *mut c_void;
type NTSTATUS = i32;
const BCRYPT_USE_SYSTEM_PREFERRED_RNG: u32 = 0x0000_0002;
#[link(name = "bcrypt")]
extern "system" {
fn BCryptGenRandom(
hAlgorithm: BcryptAlgHandle,
pbBuffer: *mut u8,
cbBuffer: u32,
dwFlags: u32,
) -> NTSTATUS;
}
// NT_SUCCESS(status) == status >= 0
let status = unsafe {
BCryptGenRandom(
core::ptr::null_mut(),
buf.as_mut_ptr(),
buf.len() as u32,
BCRYPT_USE_SYSTEM_PREFERRED_RNG,
)
};
if status >= 0 {
Ok(())
} else {
Err(())
}
}
/// Convenience constructor for [`CryptoRng`].
pub fn crypto_rng() -> CryptoRng {
CryptoRng::new()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::random::Rng;
use std::collections::HashSet;
#[test]
fn test_crypto_rng_nonzero() {
let mut rng = CryptoRng::new();
let mut all_same = true;
let mut prev = rng.next_u64();
for _ in 0..5 {
let val = rng.next_u64();
if val != prev {
all_same = false;
}
prev = val;
}
assert!(!all_same, "CryptoRng produced identical values");
}
#[test]
fn test_crypto_rng_variation_large() {
let mut rng = CryptoRng::new();
let mut values = HashSet::new();
for _ in 0..100 {
values.insert(rng.next_u64());
}
assert!(values.len() > 90, "CryptoRng output not varied enough");
}
#[test]
fn test_crypto_rng_random_range_uniform() {
let mut rng = CryptoRng::new();
let mut counts = [0usize; 10];
for _ in 0..1000 {
let v = rng.random_range(0..10usize);
counts[v] += 1;
}
for &c in &counts {
// "Crypto RNG counts far from uniform: {c}"
assert!((c as isize - 100).abs() < 50);
}
}
#[test]
fn test_crypto_normal_distribution() {
let mut rng = CryptoRng::new();
let mean = 0.0;
let sd = 1.0;
let n = 2000;
let mut sum = 0.0;
let mut sum_sq = 0.0;
for _ in 0..n {
let val = rng.normal(mean, sd);
sum += val;
sum_sq += val * val;
}
let sample_mean = sum / n as f64;
let sample_var = sum_sq / n as f64 - sample_mean * sample_mean;
assert!(sample_mean.abs() < 0.1);
assert!((sample_var - 1.0).abs() < 0.2);
}
#[test]
fn test_two_instances_different_values() {
let mut a = CryptoRng::new();
let mut b = CryptoRng::new();
let va = a.next_u64();
let vb = b.next_u64();
assert_ne!(va, vb);
}
#[test]
fn test_crypto_rng_helper_function() {
let mut rng = crypto_rng();
let _ = rng.next_u64();
}
#[test]
fn test_crypto_normal_zero_sd() {
let mut rng = CryptoRng::new();
for _ in 0..5 {
let v = rng.normal(10.0, 0.0);
assert_eq!(v, 10.0);
}
}
#[test]
fn test_crypto_shuffle_empty_slice() {
use crate::random::SliceRandom;
let mut rng = CryptoRng::new();
let mut arr: [u8; 0] = [];
arr.shuffle(&mut rng);
assert!(arr.is_empty());
}
#[test]
fn test_crypto_chi_square_uniform() {
let mut rng = CryptoRng::new();
let mut counts = [0usize; 10];
let samples = 10000;
for _ in 0..samples {
let v = rng.random_range(0..10usize);
counts[v] += 1;
}
let expected = samples as f64 / 10.0;
let chi2: f64 = counts
.iter()
.map(|&c| {
let diff = c as f64 - expected;
diff * diff / expected
})
.sum();
assert!(chi2 < 40.0, "chi-square statistic too high: {chi2}");
}
#[test]
fn test_crypto_monobit() {
let mut rng = CryptoRng::new();
let mut ones = 0usize;
let samples = 1000;
for _ in 0..samples {
ones += rng.next_u64().count_ones() as usize;
}
let total_bits = samples * 64;
let ratio = ones as f64 / total_bits as f64;
// "bit ratio far from 0.5: {ratio}"
assert!((ratio - 0.5).abs() < 0.02);
}
}

29
src/random/mod.rs Normal file
View File

@ -0,0 +1,29 @@
//! Random number generation utilities.
//!
//! Provides both a simple pseudo-random generator [`Prng`](crate::random::Prng) and a
//! cryptographically secure alternative [`CryptoRng`](crate::random::CryptoRng). The
//! [`SliceRandom`](crate::random::SliceRandom) trait offers shuffling of slices using any RNG
//! implementing [`Rng`](crate::random::Rng).
//!
//! ```
//! use rustframe::random::{rng, SliceRandom};
//!
//! let mut rng = rng();
//! let mut data = [1, 2, 3, 4];
//! data.shuffle(&mut rng);
//! assert_eq!(data.len(), 4);
//! ```
pub mod crypto;
pub mod prng;
pub mod random_core;
pub mod seq;
pub use crypto::{crypto_rng, CryptoRng};
pub use prng::{rng, Prng};
pub use random_core::{RangeSample, Rng};
pub use seq::SliceRandom;
pub mod prelude {
pub use super::seq::SliceRandom;
pub use super::{crypto_rng, rng, CryptoRng, Prng, RangeSample, Rng};
}

235
src/random/prng.rs Normal file
View File

@ -0,0 +1,235 @@
//! A tiny XorShift64-based pseudo random number generator.
//!
//! ```
//! use rustframe::random::{rng, Rng};
//! let mut rng = rng();
//! let x = rng.next_u64();
//! assert!(x >= 0);
//! ```
use std::time::{SystemTime, UNIX_EPOCH};
use crate::random::Rng;
/// Simple XorShift64-based pseudo random number generator.
#[derive(Clone)]
pub struct Prng {
state: u64,
}
impl Prng {
/// Create a new generator from the given seed.
pub fn new(seed: u64) -> Self {
Self { state: seed }
}
/// Create a generator seeded from the current time.
pub fn from_entropy() -> Self {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos() as u64;
Self::new(nanos)
}
}
impl Rng for Prng {
fn next_u64(&mut self) -> u64 {
let mut x = self.state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
self.state = x;
x
}
}
/// Convenience constructor using system entropy.
pub fn rng() -> Prng {
Prng::from_entropy()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::random::Rng;
#[test]
fn test_prng_determinism() {
let mut a = Prng::new(42);
let mut b = Prng::new(42);
for _ in 0..5 {
assert_eq!(a.next_u64(), b.next_u64());
}
}
#[test]
fn test_random_range_f64() {
let mut rng = Prng::new(1);
for _ in 0..10 {
let v = rng.random_range(-1.0..1.0);
assert!(v >= -1.0 && v < 1.0);
}
}
#[test]
fn test_random_range_usize() {
let mut rng = Prng::new(9);
for _ in 0..100 {
let v = rng.random_range(10..20);
assert!(v >= 10 && v < 20);
}
}
#[test]
fn test_gen_bool_balance() {
let mut rng = Prng::new(123);
let mut trues = 0;
for _ in 0..1000 {
if rng.gen_bool() {
trues += 1;
}
}
let ratio = trues as f64 / 1000.0;
assert!(ratio > 0.4 && ratio < 0.6);
}
#[test]
fn test_normal_distribution() {
let mut rng = Prng::new(7);
let mut sum = 0.0;
let mut sum_sq = 0.0;
let mean = 5.0;
let sd = 2.0;
let n = 5000;
for _ in 0..n {
let val = rng.normal(mean, sd);
sum += val;
sum_sq += val * val;
}
let sample_mean = sum / n as f64;
let sample_var = sum_sq / n as f64 - sample_mean * sample_mean;
assert!((sample_mean - mean).abs() < 0.1);
assert!((sample_var - sd * sd).abs() < 0.2 * sd * sd);
}
#[test]
fn test_prng_from_entropy_unique() {
use std::{collections::HashSet, thread, time::Duration};
let mut seen = HashSet::new();
for _ in 0..5 {
let mut rng = Prng::from_entropy();
seen.insert(rng.next_u64());
thread::sleep(Duration::from_micros(1));
}
assert!(seen.len() > 1, "Entropy seeds produced identical outputs");
}
#[test]
fn test_prng_uniform_distribution() {
let mut rng = Prng::new(12345);
let mut counts = [0usize; 10];
for _ in 0..10000 {
let v = rng.random_range(0..10usize);
counts[v] += 1;
}
for &c in &counts {
// "PRNG counts far from uniform: {c}"
assert!((c as isize - 1000).abs() < 150);
}
}
#[test]
fn test_prng_different_seeds_different_output() {
let mut a = Prng::new(1);
let mut b = Prng::new(2);
let va = a.next_u64();
let vb = b.next_u64();
assert_ne!(va, vb);
}
#[test]
fn test_prng_gen_bool_varies() {
let mut rng = Prng::new(99);
let mut seen_true = false;
let mut seen_false = false;
for _ in 0..100 {
if rng.gen_bool() {
seen_true = true;
} else {
seen_false = true;
}
}
assert!(seen_true && seen_false);
}
#[test]
fn test_random_range_single_usize() {
let mut rng = Prng::new(42);
for _ in 0..10 {
let v = rng.random_range(5..6);
assert_eq!(v, 5);
}
}
#[test]
fn test_random_range_single_f64() {
let mut rng = Prng::new(42);
for _ in 0..10 {
let v = rng.random_range(1.234..1.235);
assert!(v >= 1.234 && v < 1.235);
}
}
#[test]
fn test_prng_normal_zero_sd() {
let mut rng = Prng::new(7);
for _ in 0..5 {
let v = rng.normal(3.0, 0.0);
assert_eq!(v, 3.0);
}
}
#[test]
fn test_random_range_extreme_usize() {
let mut rng = Prng::new(5);
for _ in 0..10 {
let v = rng.random_range(0..usize::MAX);
assert!(v < usize::MAX);
}
}
#[test]
fn test_prng_chi_square_uniform() {
let mut rng = Prng::new(12345);
let mut counts = [0usize; 10];
let samples = 10000;
for _ in 0..samples {
let v = rng.random_range(0..10usize);
counts[v] += 1;
}
let expected = samples as f64 / 10.0;
let chi2: f64 = counts
.iter()
.map(|&c| {
let diff = c as f64 - expected;
diff * diff / expected
})
.sum();
// "chi-square statistic too high: {chi2}"
assert!(chi2 < 20.0);
}
#[test]
fn test_prng_monobit() {
let mut rng = Prng::new(42);
let mut ones = 0usize;
let samples = 1000;
for _ in 0..samples {
ones += rng.next_u64().count_ones() as usize;
}
let total_bits = samples * 64;
let ratio = ones as f64 / total_bits as f64;
// "bit ratio far from 0.5: {ratio}"
assert!((ratio - 0.5).abs() < 0.01);
}
}

106
src/random/random_core.rs Normal file
View File

@ -0,0 +1,106 @@
//! Core traits for random number generators and sampling ranges.
//!
//! ```
//! use rustframe::random::{rng, Rng};
//! let mut r = rng();
//! let value: f64 = r.random_range(0.0..1.0);
//! assert!(value >= 0.0 && value < 1.0);
//! ```
use std::f64::consts::PI;
use std::ops::Range;
/// Trait implemented by random number generators.
pub trait Rng {
/// Generate the next random `u64` value.
fn next_u64(&mut self) -> u64;
/// Generate a value uniformly in the given range.
fn random_range<T>(&mut self, range: Range<T>) -> T
where
T: RangeSample,
{
T::from_u64(self.next_u64(), &range)
}
/// Generate a boolean with probability 0.5 of being `true`.
fn gen_bool(&mut self) -> bool {
self.random_range(0..2usize) == 1
}
/// Sample from a normal distribution using the Box-Muller transform.
fn normal(&mut self, mean: f64, sd: f64) -> f64 {
let u1 = self.random_range(0.0..1.0);
let u2 = self.random_range(0.0..1.0);
mean + sd * (-2.0 * u1.ln()).sqrt() * (2.0 * PI * u2).cos()
}
}
/// Conversion from a raw `u64` into a type within a range.
pub trait RangeSample: Sized {
fn from_u64(value: u64, range: &Range<Self>) -> Self;
}
impl RangeSample for usize {
fn from_u64(value: u64, range: &Range<Self>) -> Self {
let span = range.end - range.start;
(value as usize % span) + range.start
}
}
impl RangeSample for f64 {
fn from_u64(value: u64, range: &Range<Self>) -> Self {
let span = range.end - range.start;
range.start + (value as f64 / u64::MAX as f64) * span
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_range_sample_usize_boundary() {
assert_eq!(<usize as RangeSample>::from_u64(0, &(0..1)), 0);
assert_eq!(<usize as RangeSample>::from_u64(u64::MAX, &(0..1)), 0);
}
#[test]
fn test_range_sample_f64_boundary() {
let v0 = <f64 as RangeSample>::from_u64(0, &(0.0..1.0));
let vmax = <f64 as RangeSample>::from_u64(u64::MAX, &(0.0..1.0));
assert!(v0 >= 0.0 && v0 < 1.0);
assert!(vmax > 0.999999999999 && vmax <= 1.0);
}
#[test]
fn test_range_sample_usize_varied() {
for i in 0..5 {
let v = <usize as RangeSample>::from_u64(i, &(10..15));
assert!(v >= 10 && v < 15);
}
}
#[test]
fn test_range_sample_f64_span() {
for val in [0, u64::MAX / 2, u64::MAX] {
let f = <f64 as RangeSample>::from_u64(val, &(2.0..4.0));
assert!(f >= 2.0 && f <= 4.0);
}
}
#[test]
fn test_range_sample_usize_single_value() {
for val in [0, 1, u64::MAX] {
let n = <usize as RangeSample>::from_u64(val, &(5..6));
assert_eq!(n, 5);
}
}
#[test]
fn test_range_sample_f64_negative_range() {
for val in [0, u64::MAX / 3, u64::MAX] {
let f = <f64 as RangeSample>::from_u64(val, &(-2.0..2.0));
assert!(f >= -2.0 && f <= 2.0);
}
}
}

113
src/random/seq.rs Normal file
View File

@ -0,0 +1,113 @@
//! Extensions for shuffling slices with a random number generator.
//!
//! ```
//! use rustframe::random::{rng, SliceRandom};
//! let mut data = [1, 2, 3];
//! data.shuffle(&mut rng());
//! assert_eq!(data.len(), 3);
//! ```
use crate::random::Rng;
/// Trait for randomizing slices.
pub trait SliceRandom {
/// Shuffle the slice in place using the provided RNG.
fn shuffle<R: Rng>(&mut self, rng: &mut R);
}
impl<T> SliceRandom for [T] {
fn shuffle<R: Rng>(&mut self, rng: &mut R) {
for i in (1..self.len()).rev() {
let j = rng.random_range(0..(i + 1));
self.swap(i, j);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::random::{CryptoRng, Prng};
#[test]
fn test_shuffle_slice() {
let mut rng = Prng::new(3);
let mut arr = [1, 2, 3, 4, 5];
let orig = arr.clone();
arr.shuffle(&mut rng);
assert_eq!(arr.len(), orig.len());
let mut sorted = arr.to_vec();
sorted.sort();
assert_eq!(sorted, orig.to_vec());
}
#[test]
fn test_slice_shuffle_deterministic_with_prng() {
let mut rng1 = Prng::new(11);
let mut rng2 = Prng::new(11);
let mut a = [1u8, 2, 3, 4, 5, 6, 7, 8, 9];
let mut b = a.clone();
a.shuffle(&mut rng1);
b.shuffle(&mut rng2);
assert_eq!(a, b);
}
#[test]
fn test_slice_shuffle_crypto_random_changes() {
let mut rng1 = CryptoRng::new();
let mut rng2 = CryptoRng::new();
let orig = [1u8, 2, 3, 4, 5, 6, 7, 8, 9];
let mut a = orig.clone();
let mut b = orig.clone();
a.shuffle(&mut rng1);
b.shuffle(&mut rng2);
assert!(a != orig || b != orig, "Shuffles did not change order");
assert_ne!(a, b, "Two Crypto RNG shuffles produced same order");
}
#[test]
fn test_shuffle_single_element_no_change() {
let mut rng = Prng::new(1);
let mut arr = [42];
arr.shuffle(&mut rng);
assert_eq!(arr, [42]);
}
#[test]
fn test_multiple_shuffles_different_results() {
let mut rng = Prng::new(5);
let mut arr1 = [1, 2, 3, 4];
let mut arr2 = [1, 2, 3, 4];
arr1.shuffle(&mut rng);
arr2.shuffle(&mut rng);
assert_ne!(arr1, arr2);
}
#[test]
fn test_shuffle_empty_slice() {
let mut rng = Prng::new(1);
let mut arr: [i32; 0] = [];
arr.shuffle(&mut rng);
assert!(arr.is_empty());
}
#[test]
fn test_shuffle_three_uniform() {
use std::collections::HashMap;
let mut rng = Prng::new(123);
let mut counts: HashMap<[u8; 3], usize> = HashMap::new();
for _ in 0..6000 {
let mut arr = [1u8, 2, 3];
arr.shuffle(&mut rng);
*counts.entry(arr).or_insert(0) += 1;
}
let expected = 1000.0;
let chi2: f64 = counts
.values()
.map(|&c| {
let diff = c as f64 - expected;
diff * diff / expected
})
.sum();
assert!(chi2 < 30.0, "shuffle chi-square too high: {chi2}");
}
}

View File

@ -1,3 +1,10 @@
//! Generation and manipulation of calendar date sequences.
//!
//! ```
//! use rustframe::utils::dateutils::dates::{DateFreq, DatesList};
//! let list = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily);
//! assert_eq!(list.count().unwrap(), 3);
//! ```
use chrono::{Datelike, Duration, NaiveDate, Weekday}; use chrono::{Datelike, Duration, NaiveDate, Weekday};
use std::collections::HashMap; use std::collections::HashMap;
use std::error::Error; use std::error::Error;

View File

@ -1,3 +1,13 @@
//! Generators for sequences of calendar and business dates.
//!
//! See [`dates`] for all-day calendars and [`bdates`] for business-day aware
//! variants.
//!
//! ```
//! use rustframe::utils::dateutils::{DatesList, DateFreq};
//! let list = DatesList::new("2024-01-01".into(), "2024-01-02".into(), DateFreq::Daily);
//! assert_eq!(list.count().unwrap(), 2);
//! ```
pub mod bdates; pub mod bdates;
pub mod dates; pub mod dates;

View File

@ -1,3 +1,14 @@
//! Assorted helper utilities.
//!
//! Currently this module exposes date generation utilities in [`dateutils`](crate::utils::dateutils),
//! including calendar and business date sequences.
//!
//! ```
//! use rustframe::utils::DatesList;
//! use rustframe::utils::DateFreq;
//! let dates = DatesList::new("2024-01-01".into(), "2024-01-03".into(), DateFreq::Daily);
//! assert_eq!(dates.count().unwrap(), 3);
//! ```
pub mod dateutils; pub mod dateutils;
pub use dateutils::{BDateFreq, BDatesGenerator, BDatesList}; pub use dateutils::{BDateFreq, BDatesGenerator, BDatesList};