Coverage for nilearn/reporting/get_clusters_table.py: 12%
128 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 13:00 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 13:00 +0200
1"""Implement plotting functions useful to report analysis results."""
3import warnings
4from collections import OrderedDict
5from decimal import Decimal
6from string import ascii_lowercase
8import numpy as np
9import pandas as pd
10from nibabel import affines
11from scipy.ndimage import (
12 center_of_mass,
13 generate_binary_structure,
14 label,
15 maximum_filter,
16 minimum_filter,
17)
19from nilearn._utils import check_niimg_3d
20from nilearn._utils.logger import find_stack_level
21from nilearn._utils.niimg import safe_get_data
22from nilearn.image import new_img_like, threshold_img
23from nilearn.image.resampling import coord_transform
26def _local_max(data, affine, min_distance):
27 """Find all local maxima of the array, separated by at least min_distance.
29 Adapted from https://stackoverflow.com/a/22631583/2589328
31 Parameters
32 ----------
33 data : array_like
34 3D array of with masked values for cluster.
36 affine : np.ndarray
37 Square matrix specifying the position of the image array data
38 in a reference space.
40 min_distance : int
41 Minimum distance between local maxima in ``data``, in terms of mm.
43 Returns
44 -------
45 ijk : `numpy.ndarray`
46 (n_foci, 3) array of local maxima indices for cluster.
48 vals : `numpy.ndarray`
49 (n_foci,) array of values from data at ijk.
51 """
52 ijk, vals = _identify_subpeaks(data)
53 xyz, ijk, vals = _sort_subpeaks(ijk, vals, affine)
54 ijk, vals = _pare_subpeaks(xyz, ijk, vals, min_distance)
55 return ijk, vals
58def _identify_subpeaks(data):
59 """Identify cluster peak and subpeaks based on minimum distance.
61 Parameters
62 ----------
63 data : `numpy.ndarray`
64 3D array of with masked values for cluster.
66 Returns
67 -------
68 ijk : `numpy.ndarray`
69 (n_foci, 3) array of local maximum indices for cluster.
70 vals : `numpy.ndarray`
71 (n_foci,) array of values from data at ijk.
73 Notes
74 -----
75 When a cluster's local maximum corresponds to contiguous voxels with the
76 same values (as in a binary cluster), this function determines the center
77 of mass for those voxels. If the center of mass falls outside the cluster,
78 we instead report the nearest cluster voxel.
79 """
80 data_max = maximum_filter(data, 3)
81 maxima = data == data_max
82 zero_mask = data == 0
83 maxima[zero_mask] = 0
85 # Don't treat constant patches as maxima unless the entire cluster is
86 # constant (as in a binary cluster).
87 is_constant = np.isclose(data[~zero_mask].max(), data[~zero_mask].min())
88 if not is_constant:
89 data_min = minimum_filter(data, 3)
90 diff = (data_max - data_min) > 0
91 maxima[diff == 0] = 0
93 labeled, n_subpeaks = label(maxima)
94 labels_index = np.arange(1, n_subpeaks + 1)
95 ijk = np.array(center_of_mass(data, labeled, labels_index))
96 ijk = np.round(ijk).astype(int)
97 # Determine if all subpeaks are within the cluster
98 # They may not be if the cluster is binary and has a shape where the COM is
99 # outside the cluster, like a donut.
100 subpeaks_outside_cluster = (
101 labeled[ijk[:, 0], ijk[:, 1], ijk[:, 2]] != labels_index
102 )
103 if np.any(subpeaks_outside_cluster):
104 warnings.warn(
105 (
106 "Attention: At least one of the (sub)peaks "
107 "falls outside of the cluster body. "
108 "Identifying the nearest in-cluster voxel."
109 ),
110 stacklevel=find_stack_level(),
111 )
112 # Replace centers of mass with their nearest neighbor points in the
113 # corresponding clusters. Note this is also equivalent to computing the
114 # centers of mass constrained to points within the cluster.
115 ijk[subpeaks_outside_cluster] = _cluster_nearest_neighbor(
116 ijk[subpeaks_outside_cluster],
117 labels_index[subpeaks_outside_cluster],
118 labeled,
119 )
120 vals = data[ijk[:, 0], ijk[:, 1], ijk[:, 2]]
121 return ijk, vals
124def _cluster_nearest_neighbor(ijk, labels_index, labeled):
125 """Find the nearest neighbor for given points in the corresponding cluster.
127 Parameters
128 ----------
129 ijk : :obj:`numpy.ndarray`
130 (n_pts, 3) array of query points.
131 labels_index : :obj:`numpy.ndarray`
132 (n_pts,) array of corresponding cluster indices.
133 labeled : :obj:`numpy.ndarray`
134 3D array with voxels labeled according to cluster index.
136 Returns
137 -------
138 nbrs : :obj:`numpy.ndarray`
139 (n_pts, 3) nearest neighbor points.
140 """
141 labels = labeled[labeled > 0]
142 clusters_ijk = np.array(labeled.nonzero()).T
143 nbrs = np.zeros_like(ijk)
144 for ii, (lab, point) in enumerate(zip(labels_index, ijk)):
145 lab_ijk = clusters_ijk[labels == lab]
146 dist = np.linalg.norm(lab_ijk - point, axis=1)
147 nbrs[ii] = lab_ijk[np.argmin(dist)]
148 return nbrs
151def _sort_subpeaks(ijk, vals, affine):
152 """Sort subpeaks in cluster in descending order of stat value.
154 Parameters
155 ----------
156 ijk : 2D numpy.ndarray
157 The matrix indices of subpeaks to sort.
158 vals : 1D numpy.ndarray
159 The statistical value associated with each subpeak in ``ijk``.
160 affine : (4x4) numpy.ndarray
161 The affine of the img from which the subpeaks were extracted.
162 Used to convert IJK indices to XYZ coordinates.
164 Returns
165 -------
166 xyz : 2D numpy.ndarray
167 The sorted coordinates of the subpeaks.
168 ijk : 2D numpy.ndarray
169 The sorted matrix indices of subpeaks.
170 vals : 1D numpy.ndarray
171 The sorted statistical value associated with each subpeak in ``ijk``.
172 """
173 order = (-vals).argsort()
174 vals = vals[order]
175 ijk = ijk[order, :]
176 xyz = affines.apply_affine(affine, ijk) # Convert to xyz in mm
177 return xyz, ijk, vals
180def _pare_subpeaks(xyz, ijk, vals, min_distance):
181 """Reduce list of subpeaks based on distance.
183 Parameters
184 ----------
185 xyz : 2D numpy.ndarray
186 Subpeak coordinates to reduce. Rows correspond to peaks, columns
187 correspond to x, y, and z dimensions.
188 ijk : 2D numpy.ndarray
189 The subpeak coordinates in ``xyz``, but converted to matrix indices.
190 vals : 1D numpy.ndarray
191 The statistical value associated with each subpeak in ``xyz``/``ijk``.
192 min_distance : float
193 The minimum distance between subpeaks, in millimeters.
195 Returns
196 -------
197 ijk : 2D numpy.ndarray
198 The reduced index of subpeaks.
199 vals : 1D numpy.ndarray
200 The statistical values associated with the reduced set of subpeaks.
201 """
202 keep_idx = np.ones(xyz.shape[0]).astype(bool)
203 for i in range(xyz.shape[0]):
204 for j in range(i + 1, xyz.shape[0]):
205 if keep_idx[i] == 1:
206 dist = np.linalg.norm(xyz[i, :] - xyz[j, :])
207 keep_idx[j] = dist > min_distance
208 ijk = ijk[keep_idx, :]
209 vals = vals[keep_idx]
210 return ijk, vals
213def get_clusters_table(
214 stat_img,
215 stat_threshold,
216 cluster_threshold=None,
217 two_sided=False,
218 min_distance=8.0,
219 return_label_maps=False,
220):
221 """Create pandas dataframe with img cluster statistics.
223 This function should work on any statistical maps where more extreme values
224 indicate greater statistical significance.
225 For example, z-statistic or -log10(p) maps are valid inputs, but a p-value
226 map is not.
228 .. important::
230 For binary clusters (clusters comprised of only one value),
231 the table reports the center of mass of the cluster,
232 rather than any peaks/subpeaks.
234 This center of mass may, in some cases, appear outside of the cluster.
236 .. versionchanged:: 0.9.2
237 In this case, the cluster voxel nearest to the center of mass is
238 reported.
240 .. seealso::
242 This function does not report any named anatomical location
243 for the clusters.
244 To get the names of the location of the clusters
245 according to one or several atlases,
246 we recommend using
247 the `atlasreader package <https://github.com/miykael/atlasreader>`_.
250 Parameters
251 ----------
252 stat_img : Niimg-like object
253 Statistical image to threshold and summarize.
255 stat_threshold : :obj:`float`
256 Cluster forming threshold. This value must be in the same scale as
257 ``stat_img``.
259 cluster_threshold : :obj:`int` or None, default=None
260 Cluster size threshold, in :term:`voxels<voxel>`.
261 If None, then no cluster size threshold will be applied.
263 two_sided : :obj:`bool`, default=False
264 Whether to employ two-sided thresholding or to evaluate positive values
265 only.
267 min_distance : :obj:`float`, default=8.0
268 Minimum distance between subpeaks, in millimeters.
270 .. note::
271 If two different clusters are closer than ``min_distance``, it can
272 result in peaks closer than ``min_distance``.
274 return_label_maps : :obj:`bool`, default=False
275 Whether or not to additionally output cluster label map images.
277 .. versionadded:: 0.10.1
279 Returns
280 -------
281 result_table : :obj:`pandas.DataFrame`
282 Table with peaks and subpeaks from thresholded ``stat_img``.
283 The columns in this table include:
285 ================== ====================================================
286 Cluster ID The cluster number. Subpeaks have letters after the
287 number.
288 X/Y/Z The coordinate for the peak, in millimeters.
289 Peak Stat The statistical value associated with the peak.
290 The statistic type is dependent on the type of the
291 statistical image.
292 Cluster Size (mm3) The size of the cluster, in millimeters cubed.
293 Rows corresponding to subpeaks will not have a value
294 in this column.
295 ================== ====================================================
297 label_maps : :obj:`list`
298 Returned if return_label_maps=True
299 List of Niimg-like objects of cluster label maps.
300 If two_sided==True, first and second maps correspond
301 to positive and negative tails.
303 .. versionadded:: 0.10.1
305 """
306 cols = ["Cluster ID", "X", "Y", "Z", "Peak Stat", "Cluster Size (mm3)"]
307 # Replace None with 0
308 cluster_threshold = 0 if cluster_threshold is None else cluster_threshold
310 # check that stat_img is niimg-like object and 3D
311 stat_img = check_niimg_3d(stat_img)
312 affine = stat_img.affine
313 shape = stat_img.shape
315 # Apply threshold(s) to image
316 stat_img = threshold_img(
317 img=stat_img,
318 threshold=stat_threshold,
319 cluster_threshold=cluster_threshold,
320 two_sided=two_sided,
321 mask_img=None,
322 copy=True,
323 copy_header=True,
324 )
326 # If cluster threshold is used, there is chance that stat_map will be
327 # modified, therefore copy is needed
328 stat_map = safe_get_data(
329 stat_img,
330 ensure_finite=True,
331 copy_data=(cluster_threshold != 0),
332 )
334 # Define array for 6-connectivity, aka NN1 or "faces"
335 bin_struct = generate_binary_structure(rank=3, connectivity=1)
337 voxel_size = np.prod(stat_img.header.get_zooms())
339 signs = [1, -1] if two_sided else [1]
340 no_clusters_found = True
341 rows = []
342 label_maps = []
343 for sign in signs:
344 # Flip map if necessary
345 temp_stat_map = stat_map * sign
347 # Binarize using cluster-defining threshold
348 binarized = temp_stat_map > stat_threshold
349 binarized = binarized.astype(int)
351 # If the stat threshold is too high simply return an empty dataframe
352 if np.sum(binarized) == 0:
353 warnings.warn(
354 "Attention: No clusters "
355 f"with stat {'higher' if sign == 1 else 'lower'} "
356 f"than {stat_threshold * sign}",
357 category=UserWarning,
358 stacklevel=find_stack_level(),
359 )
360 continue
362 # Now re-label and create table
363 label_map = label(binarized, bin_struct)[0]
364 clust_ids = sorted(np.unique(label_map)[1:])
365 peak_vals = np.array(
366 [np.max(temp_stat_map * (label_map == c)) for c in clust_ids]
367 )
368 # Sort by descending max value
369 clust_ids = [clust_ids[c] for c in (-peak_vals).argsort()]
371 if return_label_maps:
372 # Relabel label_map based on sorted ids
373 relabel_idx = np.insert(clust_ids, 0, 0).argsort().astype(np.int32)
374 relabel_map = relabel_idx[label_map.flatten()].reshape(shape)
375 # Save label maps as nifti objects
376 label_maps.append(
377 new_img_like(stat_img, relabel_map, affine=affine)
378 )
380 for c_id, c_val in enumerate(clust_ids):
381 cluster_mask = label_map == c_val
382 masked_data = temp_stat_map * cluster_mask
384 cluster_size_mm = int(np.sum(cluster_mask) * voxel_size)
386 # Get peaks, subpeaks and associated statistics
387 subpeak_ijk, subpeak_vals = _local_max(
388 masked_data,
389 stat_img.affine,
390 min_distance=min_distance,
391 )
392 subpeak_vals *= sign # flip signs if necessary
393 subpeak_xyz = np.asarray(
394 coord_transform(
395 subpeak_ijk[:, 0],
396 subpeak_ijk[:, 1],
397 subpeak_ijk[:, 2],
398 stat_img.affine,
399 )
400 ).tolist()
401 subpeak_xyz = np.array(subpeak_xyz).T
403 # Only report peak and, at most, top 3 subpeaks.
404 n_subpeaks = np.min((len(subpeak_vals), 4))
405 for subpeak in range(n_subpeaks):
406 if subpeak == 0:
407 row = [
408 c_id + 1,
409 subpeak_xyz[subpeak, 0],
410 subpeak_xyz[subpeak, 1],
411 subpeak_xyz[subpeak, 2],
412 subpeak_vals[subpeak],
413 cluster_size_mm,
414 ]
415 else:
416 # Subpeak naming convention is cluster num+letter:
417 # 1a, 1b, etc
418 sp_id = f"{c_id + 1}{ascii_lowercase[subpeak - 1]}"
419 row = [
420 sp_id,
421 subpeak_xyz[subpeak, 0],
422 subpeak_xyz[subpeak, 1],
423 subpeak_xyz[subpeak, 2],
424 subpeak_vals[subpeak],
425 "",
426 ]
427 rows += [row]
429 # If we reach this point, there are clusters in this sign
430 no_clusters_found = False
432 if no_clusters_found:
433 result_table = pd.DataFrame(columns=cols)
434 else:
435 result_table = pd.DataFrame(columns=cols, data=rows)
437 return (result_table, label_maps) if return_label_maps else result_table
440def clustering_params_to_dataframe(
441 threshold,
442 cluster_threshold,
443 min_distance,
444 height_control,
445 alpha,
446 is_volume_glm,
447):
448 """Create a Pandas DataFrame from the supplied arguments.
450 For use as part of the Cluster Table.
452 Parameters
453 ----------
454 threshold : float
455 Cluster forming threshold in same scale as `stat_img` (either a
456 p-value or z-scale value).
458 cluster_threshold : int or None
459 Cluster size threshold, in voxels.
461 min_distance : float
462 For display purposes only.
463 Minimum distance between subpeaks in mm.
465 height_control : string or None
466 False positive control meaning of cluster forming
467 threshold: 'fpr' (default) or 'fdr' or 'bonferroni' or None
469 alpha : float
470 Number controlling the thresholding (either a p-value or q-value).
471 Its actual meaning depends on the height_control parameter.
472 This function translates alpha to a z-scale threshold.
474 is_volume_glm: bool
475 True if we are dealing with volume data.
477 Returns
478 -------
479 table_details : Pandas.DataFrame
480 Dataframe with clustering parameters.
482 """
483 table_details = OrderedDict()
484 threshold = np.around(threshold, 3)
486 if height_control:
487 table_details.update({"Height control": height_control})
488 # HTMLDocument.get_iframe() invoked in Python2 Jupyter Notebooks
489 # mishandles certain unicode characters
490 # & raises error due to greek alpha symbol.
491 # This is simpler than overloading the class using inheritance,
492 # especially given limited Python2 use at time of release.
493 if alpha < 0.001:
494 alpha = f"{Decimal(alpha):.2E}"
495 table_details.update({"\u03b1": alpha})
496 table_details.update({"Threshold (computed)": threshold})
497 else:
498 table_details.update({"Height control": "None"})
499 table_details.update({"Threshold Z": threshold})
501 if is_volume_glm:
502 table_details.update(
503 {"Cluster size threshold (voxels)": cluster_threshold}
504 )
505 table_details.update({"Minimum distance (mm)": min_distance})
507 table_details = pd.DataFrame.from_dict(
508 table_details,
509 orient="index",
510 )
512 return table_details