Coverage for nilearn/regions/tests/test_hierarchical_kmeans_clustering.py: 0%
109 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1import numpy as np
2import pytest
3from numpy.testing import assert_array_almost_equal
4from sklearn.utils.estimator_checks import parametrize_with_checks
6from nilearn._utils.data_gen import generate_fake_fmri
7from nilearn._utils.estimator_checks import (
8 check_estimator,
9 nilearn_check_estimator,
10 return_expected_failed_checks,
11)
12from nilearn._utils.tags import SKLEARN_LT_1_6
13from nilearn.maskers import NiftiMasker, SurfaceMasker
14from nilearn.regions.hierarchical_kmeans_clustering import (
15 HierarchicalKMeans,
16 _adjust_small_clusters,
17 hierarchical_k_means,
18)
19from nilearn.surface import SurfaceImage
20from nilearn.surface.tests.test_surface import flat_mesh
22# IMPORTANT
23# keeping the n_clusters low (< 3) to make it easier
24# to run sklearn checks
25ESTIMATORS_TO_CHECK = [HierarchicalKMeans(n_clusters=2)]
27if SKLEARN_LT_1_6:
29 @pytest.mark.parametrize(
30 "estimator, check, name",
31 check_estimator(estimators=ESTIMATORS_TO_CHECK),
32 )
33 def test_check_estimator_sklearn_valid(estimator, check, name): # noqa: ARG001
34 """Check compliance with sklearn estimators."""
35 check(estimator)
37 @pytest.mark.xfail(reason="invalid checks should fail")
38 @pytest.mark.parametrize(
39 "estimator, check, name",
40 check_estimator(estimators=ESTIMATORS_TO_CHECK, valid=False),
41 )
42 def test_check_estimator_sklearn_invalid(estimator, check, name): # noqa: ARG001
43 """Check compliance with sklearn estimators."""
44 check(estimator)
46else:
48 @parametrize_with_checks(
49 estimators=ESTIMATORS_TO_CHECK,
50 expected_failed_checks=return_expected_failed_checks,
51 )
52 def test_check_estimator_sklearn(estimator, check):
53 """Check compliance with sklearn estimators."""
54 check(estimator)
57@pytest.mark.parametrize(
58 "estimator, check, name",
59 nilearn_check_estimator(estimators=ESTIMATORS_TO_CHECK),
60)
61def test_check_estimator_nilearn(estimator, check, name): # noqa: ARG001
62 """Check compliance with nilearn estimators rules."""
63 check(estimator)
66@pytest.mark.parametrize(
67 "test_list, n_clusters",
68 [
69 ([2.4, 2.6], 5),
70 ([2.7, 3.0, 3.3], 9),
71 ([10 / 3, 10 / 3, 10 / 3], 10),
72 ([1 / 3, 11 / 3, 11 / 3, 10 / 3], 11),
73 ],
74)
75def test_adjust_small_clusters(test_list, n_clusters):
76 test_list = np.asarray(test_list)
78 assert np.sum(test_list) == n_clusters
80 list_round = _adjust_small_clusters(test_list, n_clusters)
82 assert np.all(list_round != 0)
83 assert np.sum(list_round) == n_clusters
84 for a in list_round:
85 assert isinstance(a, (int, np.integer))
88def test_hierarchical_k_means():
89 X = [[10, -10, 30], [12, -8, 24]]
90 truth_labels = np.tile([0, 1, 2], 5)
91 X = np.tile(X, 5).T
92 test_labels = hierarchical_k_means(X, 3)
93 truth_labels = np.tile([test_labels[0], test_labels[1], test_labels[2]], 5)
94 assert_array_almost_equal(test_labels, truth_labels)
97def test_hierarchical_k_means_clustering_transform():
98 n_samples = 15
99 n_clusters = 8
100 data_img, mask_img = generate_fake_fmri(
101 shape=(10, 11, 12), length=n_samples
102 )
103 masker = NiftiMasker(mask_img=mask_img).fit()
104 X = masker.transform(data_img)
105 hkmeans = HierarchicalKMeans(n_clusters=n_clusters).fit(X)
106 X_red = hkmeans.transform(X)
108 assert X_red.shape == (n_samples, n_clusters)
111def test_hierarchical_k_means_clustering_inverse_transform():
112 n_samples = 15
113 n_clusters = 8
114 data_img, mask_img = generate_fake_fmri(
115 shape=(10, 11, 12), length=n_samples
116 )
117 masker = NiftiMasker(mask_img=mask_img).fit()
118 X = masker.transform(data_img)
119 hkmeans = HierarchicalKMeans(n_clusters=n_clusters).fit(X)
120 X_red = hkmeans.transform(X)
121 X_inv = hkmeans.inverse_transform(X_red)
123 assert X_inv.shape == X.shape
126@pytest.mark.parametrize("n_clusters", [None, -2, 0, "2"])
127def test_hierarchical_k_means_clustering_error_n_clusters(n_clusters):
128 n_samples = 15
129 data_img, mask_img = generate_fake_fmri(
130 shape=(10, 11, 12), length=n_samples
131 )
132 masker = NiftiMasker(mask_img=mask_img).fit()
133 X = masker.transform(data_img)
135 with pytest.raises(
136 ValueError,
137 match="n_clusters should be an integer greater than 0."
138 f" {n_clusters} was provided.",
139 ):
140 HierarchicalKMeans(n_clusters=n_clusters).fit(X)
143def test_hierarchical_k_means_clustering_scaling():
144 n_samples = 15
145 n_clusters = 8
146 data_img, mask_img = generate_fake_fmri(
147 shape=(10, 11, 12), length=n_samples
148 )
149 masker = NiftiMasker(mask_img=mask_img).fit()
150 X = masker.transform(data_img)
152 hkmeans = HierarchicalKMeans(n_clusters=n_clusters)
153 X_red = hkmeans.fit_transform(X)
154 X_compress = hkmeans.inverse_transform(X_red)
156 hkmeans_scaled = HierarchicalKMeans(n_clusters=n_clusters, scaling=True)
157 X_red_scaled = hkmeans_scaled.fit_transform(X)
158 sizes = hkmeans_scaled.sizes_
159 X_compress_scaled = hkmeans_scaled.inverse_transform(X_red_scaled)
161 assert_array_almost_equal(
162 np.asarray([np.sqrt(s) * a for s, a in zip(sizes, X_red.T)]).T,
163 X_red_scaled,
164 )
165 assert_array_almost_equal(X_compress, X_compress_scaled)
168@pytest.mark.parametrize("surf_mask_dim", [1, 2])
169@pytest.mark.parametrize("n_clusters", [2, 4, 5])
170def test_hierarchical_k_means_clustering_surface(
171 surf_img_2d, surf_mask_dim, surf_mask_1d, surf_mask_2d, n_clusters
172):
173 """Test hierarchical k-means clustering on surface."""
174 n_samples = 100
175 surf_mask = surf_mask_1d if surf_mask_dim == 1 else surf_mask_2d()
176 # create a surface masker
177 masker = SurfaceMasker(surf_mask).fit()
178 # mask the surface image with 50 samples
179 X = masker.transform(surf_img_2d(n_samples))
180 # instantiate HierarchicalKMeans with n_clusters
181 hkmeans = HierarchicalKMeans(n_clusters=n_clusters)
182 # fit and transform the data
183 X_transformed = hkmeans.fit_transform(X)
184 # inverse transform the transformed data
185 X_inverse = hkmeans.inverse_transform(X_transformed)
187 # make sure the n_features in transformed data were reduced to n_clusters
188 assert X_transformed.shape == (n_samples, n_clusters)
189 assert hkmeans.n_clusters == n_clusters
191 # make sure the inverse transformed data has the same shape as the original
192 assert X_inverse.shape == X.shape
195@pytest.mark.parametrize("img_type", ["surface", "volume"])
196def test_hierarchical_k_means_n_clusters_warning(img_type, rng):
197 n_samples = 15
198 if img_type == "surface":
199 mesh = {
200 "left": flat_mesh(10, 8),
201 "right": flat_mesh(9, 7),
202 }
203 data = {
204 "left": rng.standard_normal(
205 size=(mesh["left"].coordinates.shape[0], n_samples)
206 ),
207 "right": rng.standard_normal(
208 size=(mesh["right"].coordinates.shape[0], n_samples)
209 ),
210 }
211 img = SurfaceImage(mesh=mesh, data=data)
212 X = SurfaceMasker().fit_transform(img)
213 else:
214 img, _ = generate_fake_fmri(shape=(10, 11, 12), length=n_samples)
215 X = NiftiMasker().fit_transform(img)
217 with pytest.warns(
218 match="n_clusters should be at most the number of features.",
219 ):
220 # very high number of clusters
221 HierarchicalKMeans(n_clusters=1000).fit_transform(X)