Coverage for nilearn/regions/tests/test_hierarchical_kmeans_clustering.py: 0%

109 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 10:58 +0200

1import numpy as np 

2import pytest 

3from numpy.testing import assert_array_almost_equal 

4from sklearn.utils.estimator_checks import parametrize_with_checks 

5 

6from nilearn._utils.data_gen import generate_fake_fmri 

7from nilearn._utils.estimator_checks import ( 

8 check_estimator, 

9 nilearn_check_estimator, 

10 return_expected_failed_checks, 

11) 

12from nilearn._utils.tags import SKLEARN_LT_1_6 

13from nilearn.maskers import NiftiMasker, SurfaceMasker 

14from nilearn.regions.hierarchical_kmeans_clustering import ( 

15 HierarchicalKMeans, 

16 _adjust_small_clusters, 

17 hierarchical_k_means, 

18) 

19from nilearn.surface import SurfaceImage 

20from nilearn.surface.tests.test_surface import flat_mesh 

21 

22# IMPORTANT 

23# keeping the n_clusters low (< 3) to make it easier 

24# to run sklearn checks 

25ESTIMATORS_TO_CHECK = [HierarchicalKMeans(n_clusters=2)] 

26 

27if SKLEARN_LT_1_6: 

28 

29 @pytest.mark.parametrize( 

30 "estimator, check, name", 

31 check_estimator(estimators=ESTIMATORS_TO_CHECK), 

32 ) 

33 def test_check_estimator_sklearn_valid(estimator, check, name): # noqa: ARG001 

34 """Check compliance with sklearn estimators.""" 

35 check(estimator) 

36 

37 @pytest.mark.xfail(reason="invalid checks should fail") 

38 @pytest.mark.parametrize( 

39 "estimator, check, name", 

40 check_estimator(estimators=ESTIMATORS_TO_CHECK, valid=False), 

41 ) 

42 def test_check_estimator_sklearn_invalid(estimator, check, name): # noqa: ARG001 

43 """Check compliance with sklearn estimators.""" 

44 check(estimator) 

45 

46else: 

47 

48 @parametrize_with_checks( 

49 estimators=ESTIMATORS_TO_CHECK, 

50 expected_failed_checks=return_expected_failed_checks, 

51 ) 

52 def test_check_estimator_sklearn(estimator, check): 

53 """Check compliance with sklearn estimators.""" 

54 check(estimator) 

55 

56 

57@pytest.mark.parametrize( 

58 "estimator, check, name", 

59 nilearn_check_estimator(estimators=ESTIMATORS_TO_CHECK), 

60) 

61def test_check_estimator_nilearn(estimator, check, name): # noqa: ARG001 

62 """Check compliance with nilearn estimators rules.""" 

63 check(estimator) 

64 

65 

66@pytest.mark.parametrize( 

67 "test_list, n_clusters", 

68 [ 

69 ([2.4, 2.6], 5), 

70 ([2.7, 3.0, 3.3], 9), 

71 ([10 / 3, 10 / 3, 10 / 3], 10), 

72 ([1 / 3, 11 / 3, 11 / 3, 10 / 3], 11), 

73 ], 

74) 

75def test_adjust_small_clusters(test_list, n_clusters): 

76 test_list = np.asarray(test_list) 

77 

78 assert np.sum(test_list) == n_clusters 

79 

80 list_round = _adjust_small_clusters(test_list, n_clusters) 

81 

82 assert np.all(list_round != 0) 

83 assert np.sum(list_round) == n_clusters 

84 for a in list_round: 

85 assert isinstance(a, (int, np.integer)) 

86 

87 

88def test_hierarchical_k_means(): 

89 X = [[10, -10, 30], [12, -8, 24]] 

90 truth_labels = np.tile([0, 1, 2], 5) 

91 X = np.tile(X, 5).T 

92 test_labels = hierarchical_k_means(X, 3) 

93 truth_labels = np.tile([test_labels[0], test_labels[1], test_labels[2]], 5) 

94 assert_array_almost_equal(test_labels, truth_labels) 

95 

96 

97def test_hierarchical_k_means_clustering_transform(): 

98 n_samples = 15 

99 n_clusters = 8 

100 data_img, mask_img = generate_fake_fmri( 

101 shape=(10, 11, 12), length=n_samples 

102 ) 

103 masker = NiftiMasker(mask_img=mask_img).fit() 

104 X = masker.transform(data_img) 

105 hkmeans = HierarchicalKMeans(n_clusters=n_clusters).fit(X) 

106 X_red = hkmeans.transform(X) 

107 

108 assert X_red.shape == (n_samples, n_clusters) 

109 

110 

111def test_hierarchical_k_means_clustering_inverse_transform(): 

112 n_samples = 15 

113 n_clusters = 8 

114 data_img, mask_img = generate_fake_fmri( 

115 shape=(10, 11, 12), length=n_samples 

116 ) 

117 masker = NiftiMasker(mask_img=mask_img).fit() 

118 X = masker.transform(data_img) 

119 hkmeans = HierarchicalKMeans(n_clusters=n_clusters).fit(X) 

120 X_red = hkmeans.transform(X) 

121 X_inv = hkmeans.inverse_transform(X_red) 

122 

123 assert X_inv.shape == X.shape 

124 

125 

126@pytest.mark.parametrize("n_clusters", [None, -2, 0, "2"]) 

127def test_hierarchical_k_means_clustering_error_n_clusters(n_clusters): 

128 n_samples = 15 

129 data_img, mask_img = generate_fake_fmri( 

130 shape=(10, 11, 12), length=n_samples 

131 ) 

132 masker = NiftiMasker(mask_img=mask_img).fit() 

133 X = masker.transform(data_img) 

134 

135 with pytest.raises( 

136 ValueError, 

137 match="n_clusters should be an integer greater than 0." 

138 f" {n_clusters} was provided.", 

139 ): 

140 HierarchicalKMeans(n_clusters=n_clusters).fit(X) 

141 

142 

143def test_hierarchical_k_means_clustering_scaling(): 

144 n_samples = 15 

145 n_clusters = 8 

146 data_img, mask_img = generate_fake_fmri( 

147 shape=(10, 11, 12), length=n_samples 

148 ) 

149 masker = NiftiMasker(mask_img=mask_img).fit() 

150 X = masker.transform(data_img) 

151 

152 hkmeans = HierarchicalKMeans(n_clusters=n_clusters) 

153 X_red = hkmeans.fit_transform(X) 

154 X_compress = hkmeans.inverse_transform(X_red) 

155 

156 hkmeans_scaled = HierarchicalKMeans(n_clusters=n_clusters, scaling=True) 

157 X_red_scaled = hkmeans_scaled.fit_transform(X) 

158 sizes = hkmeans_scaled.sizes_ 

159 X_compress_scaled = hkmeans_scaled.inverse_transform(X_red_scaled) 

160 

161 assert_array_almost_equal( 

162 np.asarray([np.sqrt(s) * a for s, a in zip(sizes, X_red.T)]).T, 

163 X_red_scaled, 

164 ) 

165 assert_array_almost_equal(X_compress, X_compress_scaled) 

166 

167 

168@pytest.mark.parametrize("surf_mask_dim", [1, 2]) 

169@pytest.mark.parametrize("n_clusters", [2, 4, 5]) 

170def test_hierarchical_k_means_clustering_surface( 

171 surf_img_2d, surf_mask_dim, surf_mask_1d, surf_mask_2d, n_clusters 

172): 

173 """Test hierarchical k-means clustering on surface.""" 

174 n_samples = 100 

175 surf_mask = surf_mask_1d if surf_mask_dim == 1 else surf_mask_2d() 

176 # create a surface masker 

177 masker = SurfaceMasker(surf_mask).fit() 

178 # mask the surface image with 50 samples 

179 X = masker.transform(surf_img_2d(n_samples)) 

180 # instantiate HierarchicalKMeans with n_clusters 

181 hkmeans = HierarchicalKMeans(n_clusters=n_clusters) 

182 # fit and transform the data 

183 X_transformed = hkmeans.fit_transform(X) 

184 # inverse transform the transformed data 

185 X_inverse = hkmeans.inverse_transform(X_transformed) 

186 

187 # make sure the n_features in transformed data were reduced to n_clusters 

188 assert X_transformed.shape == (n_samples, n_clusters) 

189 assert hkmeans.n_clusters == n_clusters 

190 

191 # make sure the inverse transformed data has the same shape as the original 

192 assert X_inverse.shape == X.shape 

193 

194 

195@pytest.mark.parametrize("img_type", ["surface", "volume"]) 

196def test_hierarchical_k_means_n_clusters_warning(img_type, rng): 

197 n_samples = 15 

198 if img_type == "surface": 

199 mesh = { 

200 "left": flat_mesh(10, 8), 

201 "right": flat_mesh(9, 7), 

202 } 

203 data = { 

204 "left": rng.standard_normal( 

205 size=(mesh["left"].coordinates.shape[0], n_samples) 

206 ), 

207 "right": rng.standard_normal( 

208 size=(mesh["right"].coordinates.shape[0], n_samples) 

209 ), 

210 } 

211 img = SurfaceImage(mesh=mesh, data=data) 

212 X = SurfaceMasker().fit_transform(img) 

213 else: 

214 img, _ = generate_fake_fmri(shape=(10, 11, 12), length=n_samples) 

215 X = NiftiMasker().fit_transform(img) 

216 

217 with pytest.warns( 

218 match="n_clusters should be at most the number of features.", 

219 ): 

220 # very high number of clusters 

221 HierarchicalKMeans(n_clusters=1000).fit_transform(X)