Coverage for nilearn/regions/hierarchical_kmeans_clustering.py: 19%
109 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Hierarchical k-means clustering."""
3import warnings
5import numpy as np
6from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
7from sklearn.cluster import MiniBatchKMeans
8from sklearn.utils import check_array
9from sklearn.utils.validation import check_is_fitted
11from nilearn._utils import fill_doc
12from nilearn._utils.logger import find_stack_level
13from nilearn._utils.tags import SKLEARN_LT_1_6
16def _remove_empty_labels(labels):
17 """Remove empty values label values from labels list.
19 Returns labels mapped to np.arange(n_unique),
20 where n_unique is the number of unique values in labels
21 """
22 vals = np.unique(labels)
23 inverse_vals = -np.ones(labels.max() + 1, dtype=int)
24 inverse_vals[vals] = np.arange(len(vals))
25 return inverse_vals[labels]
28def _adjust_small_clusters(array, n_clusters):
29 """Take a ndarray of floats summing to n_clusters \
30 and try to round it while enforcing rounded array still sum \
31 to n_clusters and every element is at least 1.
32 """
33 array_round = np.rint(array).astype(int)
34 array_round = np.maximum(array_round, 1)
36 if np.sum(array_round) < n_clusters:
37 while np.sum(array_round) != n_clusters:
38 idx = np.argmax(array - array_round)
39 array_round[idx] += 1
40 elif np.sum(array_round) == n_clusters:
41 pass
42 elif np.sum(array_round) > n_clusters:
43 parent_idx_ = np.arange(array_round.shape[0])
44 while np.sum(array_round) != n_clusters:
45 # prevent element rounded to 1 to be decreased in edge cases
46 mask = array_round != 1
47 idx = np.argmin(array[mask] - array_round[mask])
48 parent_idx = parent_idx_[mask][idx]
49 array_round[parent_idx] -= 1
50 return array_round
53@fill_doc
54def hierarchical_k_means(
55 X,
56 n_clusters,
57 init="k-means++",
58 batch_size=1000,
59 n_init=10,
60 max_no_improvement=10,
61 verbose=0,
62 random_state=0,
63):
64 """Use a recursive k-means to cluster X.
66 First clustering in sqrt(n_clusters) parcels,
67 and Kmeans a second time on each parcel.
69 Parameters
70 ----------
71 X : ndarray (n_samples, n_features)
72 Data to cluster
74 n_clusters : :obj:`int`,
75 The number of clusters to find.
77 init : {'k-means++', 'random' or an ndarray}, default='k-means++'
78 Method for initialization.
79 'k-means++' : selects initial cluster centers for k-means
80 clustering in a smart way to speed up convergence. See section
81 Notes in k_init for more details.
82 'random': choose k observations (rows) at random from data for
83 the initial centroids.
84 If an ndarray is passed, it should be of shape (n_clusters, n_features)
85 and gives the initial centers.
87 batch_size : :obj:`int`, default: 1000
88 Size of the mini batches. (Kmeans performed through MiniBatchKMeans)
90 n_init : :obj:`int`, default=10
91 Number of random initializations that are tried.
92 In contrast to KMeans, the algorithm is only run once, using the
93 best of the ``n_init`` initializations as measured by inertia.
95 max_no_improvement : :obj:`int`, default: 10
96 Control early stopping based on the consecutive number of mini
97 batches that does not yield an improvement on the smoothed inertia.
98 To disable convergence detection based on inertia, set
99 max_no_improvement to None.
101 random_state : :obj:`int`, RandomState instance or None, default=0
102 Determines random number generation for centroid initialization and
103 random reassignment. Use an int to make the randomness deterministic.
105 %(verbose0)s
107 Returns
108 -------
109 labels : list of ints (len n_features)
110 Parcellation of features in clusters
111 """
112 n_big_clusters = int(np.sqrt(n_clusters))
113 mbk = MiniBatchKMeans(
114 init=init,
115 n_clusters=n_big_clusters,
116 batch_size=batch_size,
117 n_init=n_init,
118 max_no_improvement=max_no_improvement,
119 verbose=verbose,
120 random_state=random_state,
121 ).fit(X)
122 coarse_labels = mbk.labels_
123 fine_labels = np.zeros_like(coarse_labels)
124 q = 0
125 counts = np.bincount(coarse_labels)
126 exact_clusters = np.asarray(
127 [
128 n_clusters * counts[i] * 1.0 / X.shape[0]
129 for i in range(n_big_clusters)
130 ]
131 )
133 adjusted_clusters = _adjust_small_clusters(exact_clusters, n_clusters)
134 for i, n_small_clusters in enumerate(adjusted_clusters):
135 mbk = MiniBatchKMeans(
136 init=init,
137 n_clusters=n_small_clusters,
138 batch_size=batch_size,
139 random_state=random_state,
140 max_no_improvement=max_no_improvement,
141 verbose=verbose,
142 n_init=n_init,
143 ).fit(X[coarse_labels == i])
144 fine_labels[coarse_labels == i] = q + mbk.labels_
145 q += n_small_clusters
147 return _remove_empty_labels(fine_labels)
150@fill_doc
151class HierarchicalKMeans(ClusterMixin, TransformerMixin, BaseEstimator):
152 """Hierarchical KMeans.
154 First clusterize the samples into big clusters. Then clusterize the samples
155 inside these big clusters into smaller ones.
157 Parameters
158 ----------
159 n_clusters : :obj:`int`
160 The number of clusters to find.
162 init : {'k-means++', 'random' or an ndarray}, default='k-means++'
163 Method for initialization.
165 * 'k-means++' : selects initial cluster centers for k-means
166 clustering in a smart way to speed up convergence. See section
167 Notes in k_init for more details.
169 * 'random': choose k observations (rows) at random from data for
170 the initial centroids.
172 * If an ndarray is passed, it should be of shape (n_clusters,
173 n_features) and gives the initial centers.
175 batch_size : :obj:`int`, optional, default: 1000
176 Size of the mini batches. (Kmeans performed through MiniBatchKMeans)
178 n_init : :obj:`int`, default=10
179 Number of random initializations that are tried.
180 In contrast to KMeans, the algorithm is only run once, using the
181 best of the ``n_init`` initializations as measured by inertia.
183 max_no_improvement : :obj:`int`, default: 10
184 Control early stopping based on the consecutive number of mini
185 batches that does not yield an improvement on the smoothed inertia.
186 To disable convergence detection based on inertia, set
187 max_no_improvement to None.
189 random_state : :obj:`int`, RandomState instance or None, default=0
190 Determines random number generation for centroid initialization and
191 random reassignment. Use an int to make the randomness deterministic.
193 scaling : :obj:`bool`, default=False
194 If scaling is True, each cluster is scaled by the square root of its
195 size during transform(), preserving the l2-norm of the image.
196 inverse_transform() will apply inversed scaling to yield an image with
197 same l2-norm as input.
199 %(verbose0)s
201 Attributes
202 ----------
203 labels_ : ndarray, shape = [n_features]
204 cluster labels for each feature.
206 sizes_ : ndarray, shape = [n_features]
207 It contains the size of each cluster.
209 """
211 def __init__(
212 self,
213 n_clusters=None,
214 init="k-means++",
215 batch_size=1000,
216 n_init=10,
217 max_no_improvement=10,
218 verbose=0,
219 random_state=0,
220 scaling=False,
221 ):
222 self.n_clusters = n_clusters
223 self.init = init
224 self.batch_size = batch_size
225 self.n_init = n_init
226 self.max_no_improvement = max_no_improvement
227 self.verbose = verbose
228 self.random_state = random_state
229 self.scaling = scaling
231 def _more_tags(self):
232 """Return estimator tags.
234 TODO remove when bumping sklearn_version > 1.5
235 """
236 return self.__sklearn_tags__()
238 def __sklearn_tags__(self):
239 """Return estimator tags.
241 See the sklearn documentation for more details on tags
242 https://scikit-learn.org/1.6/developers/develop.html#estimator-tags
243 """
244 # TODO
245 # get rid of if block
246 # bumping sklearn_version > 1.5
247 if SKLEARN_LT_1_6:
248 from nilearn._utils.tags import tags
250 return tags()
252 from nilearn._utils.tags import InputTags
254 tags = super().__sklearn_tags__()
255 tags.input_tags = InputTags(niimg_like=False)
256 return tags
258 @fill_doc
259 def fit(self, X, y=None):
260 """Compute clustering of the data.
262 Parameters
263 ----------
264 X : ndarray, shape = [n_samples, n_features]
265 Training data.
267 %(y_dummy)s
269 Returns
270 -------
271 self
272 """
273 del y
274 X = check_array(
275 X, ensure_min_features=2, ensure_min_samples=2, estimator=self
276 )
277 # Transpose the data so that we can cluster features (voxels)
278 # and input them as samples to the sklearn's clustering algorithm
279 # This is because sklearn's clustering algorithm does clustering
280 # on samples and not on features
281 X = X.T
282 # n_features for the sklearn's clustering algorithm would be the
283 # number of samples in the input data
284 n_features = X.shape[1]
286 if not isinstance(self.n_clusters, int) or self.n_clusters <= 0:
287 raise ValueError(
288 "n_clusters should be an integer greater than 0."
289 f" {self.n_clusters} was provided."
290 )
292 if self.n_clusters > n_features:
293 self.n_clusters = n_features
294 warnings.warn(
295 "n_clusters should be at most the number of "
296 f"features. Taking n_clusters = {n_features} instead.",
297 stacklevel=find_stack_level(),
298 )
299 self.labels_ = hierarchical_k_means(
300 X,
301 self.n_clusters,
302 self.init,
303 self.batch_size,
304 self.n_init,
305 self.max_no_improvement,
306 self.verbose,
307 self.random_state,
308 )
309 sizes = np.bincount(self.labels_)
311 self.sizes_ = sizes
312 self.n_clusters = len(sizes)
313 return self
315 def __sklearn_is_fitted__(self):
316 return hasattr(self, "labels_")
318 @fill_doc
319 def transform(
320 self,
321 X,
322 y=None, # noqa: ARG002
323 ):
324 """Apply clustering, reduce the dimensionality of the data.
326 Parameters
327 ----------
328 X : ndarray, shape = [n_samples, n_features]
329 Data to transform with the fitted clustering.
331 %(y_dummy)s
333 Returns
334 -------
335 X_red : ndarray, shape = [n_samples, n_clusters]
336 Data reduced with agglomerated signal for each cluster
337 """
338 check_is_fitted(self)
340 # Transpose the data so that we can cluster features (voxels)
341 # and input them as samples to the sklearn's clustering algorithm
342 X = X.T
343 unique_labels = np.arange(self.n_clusters)
345 mean_cluster = np.empty(
346 (len(unique_labels), X.shape[1]), dtype=X.dtype
347 )
348 for label in unique_labels:
349 mean_cluster[label] = np.mean(X[self.labels_ == label], axis=0)
351 X_red = np.array(mean_cluster)
353 if self.scaling:
354 X_red = X_red * np.sqrt(self.sizes_[:, np.newaxis])
356 # Transpose the data back to the original shape i.e.
357 # (n_samples, n_clusters)
358 X_red = X_red.T
359 return X_red
361 def inverse_transform(self, X_red):
362 """Send the reduced 2D data matrix back to the original feature \
363 space (voxels).
365 Parameters
366 ----------
367 X_red : ndarray , shape = [n_samples, n_clusters]
368 Data reduced with agglomerated signal for each cluster
370 Returns
371 -------
372 X_inv : ndarray, shape = [n_samples, n_features]
373 Data reduced expanded to the original feature space
374 """
375 check_is_fitted(self)
377 X_red = X_red.T
378 inverse = self.labels_
379 if self.scaling:
380 X_red = X_red / np.sqrt(self.sizes_[:, np.newaxis])
381 X_inv = X_red[inverse, ...]
382 X_inv = X_inv.T
383 return X_inv
385 def set_output(self, *, transform=None):
386 """Set the output container when ``"transform"`` is called.
388 .. warning::
390 This has not been implemented yet.
391 """
392 raise NotImplementedError()