mirror of
https://github.com/Magnus167/rustframe.git
synced 2025-08-20 04:00:01 +00:00
Add test for KMeans empty cluster reinitialization logic
This commit is contained in:
parent
62d4803075
commit
4d8ed2e908
@ -321,4 +321,55 @@ mod tests {
|
||||
assert_eq!(predicted_label.len(), 1);
|
||||
assert!(predicted_label[0] < k);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_k_means_fit_empty_cluster_reinitialization() {
|
||||
// Create data where one cluster is likely to become empty
|
||||
// Two distinct groups of points, but we ask for 3 clusters.
|
||||
// This should cause one cluster to be empty and re-initialized.
|
||||
let data = vec![
|
||||
1.0, 1.0,
|
||||
1.1, 1.1,
|
||||
1.2, 1.2,
|
||||
// Large gap to ensure distinct clusters
|
||||
100.0, 100.0,
|
||||
100.1, 100.1,
|
||||
100.2, 100.2,
|
||||
];
|
||||
let x = FloatMatrix::from_rows_vec(data, 6, 2);
|
||||
let k = 3; // Request 3 clusters for 2 natural groups
|
||||
let max_iter = 100;
|
||||
let tol = 1e-6;
|
||||
|
||||
// The test aims to verify the empty cluster re-initialization logic.
|
||||
// With random initialization, it's highly probable that one of the
|
||||
// three requested clusters will initially be empty or become empty
|
||||
// during the first few iterations, triggering the re-initialization.
|
||||
|
||||
let (kmeans_model, labels) = KMeans::fit(&x, k, max_iter, tol);
|
||||
|
||||
assert_eq!(kmeans_model.centroids.rows(), k);
|
||||
assert_eq!(labels.len(), x.rows());
|
||||
|
||||
// Verify that all labels are assigned and within bounds
|
||||
for &label in &labels {
|
||||
assert!(label < k);
|
||||
}
|
||||
|
||||
// Count points assigned to each cluster
|
||||
let mut counts = vec![0; k];
|
||||
for &label in &labels {
|
||||
counts[label] += 1;
|
||||
}
|
||||
|
||||
// The crucial assertion: After re-initialization, no cluster should remain empty.
|
||||
// This verifies that the "furthest point" logic successfully re-assigned a point
|
||||
// to the previously empty cluster.
|
||||
assert!(counts.iter().all(|&c| c > 0), "All clusters should have at least one point after re-initialization.");
|
||||
|
||||
// The crucial assertion: After re-initialization, no cluster should remain empty.
|
||||
// This verifies that the "furthest point" logic successfully re-assigned a point
|
||||
// to the previously empty cluster.
|
||||
assert!(counts.iter().all(|&c| c > 0), "All clusters should have at least one point after re-initialization.");
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user