Coverage for nilearn/mass_univariate/_utils.py: 8%
113 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 13:00 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 13:00 +0200
1"""Utility functions for the permuted least squares method."""
3from warnings import warn
5import numpy as np
6from scipy import linalg
7from scipy.ndimage import label
9from nilearn._utils.logger import find_stack_level
12def calculate_tfce(
13 arr4d,
14 bin_struct,
15 E=0.5,
16 H=2,
17 dh="auto",
18 two_sided_test=True,
19):
20 """Calculate threshold-free cluster enhancement values for scores maps.
22 The :term:`TFCE` calculation is mostly implemented as described in [1]_,
23 with minor modifications to produce similar results to fslmaths, as well
24 as to support two-sided testing.
26 Parameters
27 ----------
28 arr4d : :obj:`numpy.ndarray` of shape (X, Y, Z, R)
29 Unthresholded 4D array of 3D t-statistic maps.
30 R = regressor.
31 bin_struct : :obj:`numpy.ndarray` of shape (3, 3, 3)
32 Connectivity matrix for defining clusters.
33 E : :obj:`float`, default=0.5
34 Extent weight.
35 H : :obj:`float`, default=2
36 Height weight.
37 dh : 'auto' or :obj:`float`, default='auto'
38 Step size for TFCE calculation.
39 If set to 'auto', use 100 steps, as is done in fslmaths.
40 A good alternative is 0.1 for z and t maps, as in [1]_.
41 two_sided_test : :obj:`bool`, default=False
42 Whether to assess both positive and negative clusters (True) or just
43 positive ones (False).
45 Returns
46 -------
47 tfce_arr : :obj:`numpy.ndarray`, shape=(n_descriptors, n_regressors)
48 :term:`TFCE` values.
50 Notes
51 -----
52 In [1]_, each threshold's partial TFCE score is multiplied by dh,
53 which makes directly comparing TFCE values across different thresholds
54 possible.
55 However, in fslmaths, this is not done.
56 In the interest of maximizing similarity between nilearn and established
57 tools, we chose to follow fslmaths' approach.
59 Additionally, we have modified the method to support two-sided testing.
60 In fslmaths, only positive clusters are considered.
62 References
63 ----------
64 .. [1] Smith, S. M., & Nichols, T. E. (2009).
65 Threshold-free cluster enhancement: addressing problems of smoothing,
66 threshold dependence and localization in cluster inference.
67 Neuroimage, 44(1), 83-98.
68 """
69 tfce_4d = np.zeros_like(arr4d)
71 # For each passed t map
72 for i_regressor in range(arr4d.shape[3]):
73 arr3d = arr4d[..., i_regressor]
75 signs = [-1, 1] if two_sided_test else [1]
76 score_threshs = _return_score_threshs(arr3d, dh, two_sided_test)
78 # If we apply the sign first...
79 for sign in signs:
80 # Init a temp copy of arr3d with the current sign applied,
81 # which can then be reused by incrementally setting more
82 # voxel's to background, by taking advantage that each score_thresh
83 # is incrementally larger
84 temp_arr3d = arr3d * sign
86 # Prep step
87 for score_thresh in score_threshs:
88 temp_arr3d[temp_arr3d < score_thresh] = 0
90 # Label into clusters - importantly (for the next step)
91 # this returns clusters labeled ordinally
92 # from 1 to n_clusters+1,
93 # which allows us to use bincount to count
94 # frequencies directly.
95 labeled_arr3d, _ = label(temp_arr3d, bin_struct)
97 # Next, we want to replace each label with its cluster
98 # extent, that is, the size of the cluster it is part of
99 # To do this, we will first compute a flattened version of
100 # only the non-zero cluster labels.
101 labeled_arr3d_flat = labeled_arr3d.flatten()
102 non_zero_inds = np.where(labeled_arr3d_flat != 0)[0]
103 labeled_non_zero = labeled_arr3d_flat[non_zero_inds]
105 # Count the size of each unique cluster, via its label.
106 # The reason why we pass only the non-zero labels to bincount
107 # is because it includes a bin for zeros, and in our labels
108 # zero represents the background,
109 # which we want to have a TFCE value of 0.
110 cluster_counts = np.bincount(labeled_non_zero)
112 # Next, we convert each unique cluster count to its TFCE value.
113 # Where each cluster's tfce value is based
114 # on both its cluster extent and z-value
115 # (via the current score_thresh)
116 # NOTE: We do not multiply by dh, based on fslmaths'
117 # implementation. This differs from the original paper.
118 cluster_tfces = sign * (cluster_counts**E) * (score_thresh**H)
120 # Before we can add these values to tfce_4d, we need to
121 # map cluster-wise tfce values back to a voxel-wise array,
122 # including any zero / background voxels.
123 tfce_step_values = np.zeros(labeled_arr3d_flat.shape)
124 tfce_step_values[non_zero_inds] = cluster_tfces[
125 labeled_non_zero
126 ]
128 # Now, we just need to reshape these values back to 3D
129 # and they can be incremented to tfce_4d.
130 tfce_4d[..., i_regressor] += tfce_step_values.reshape(
131 temp_arr3d.shape
132 )
134 return tfce_4d
137def _return_score_threshs(arr3d, dh, two_sided_test):
138 """Compute list of score threshold to use for TFCE."""
139 max_score = (
140 np.nanmax(np.abs(arr3d)) if two_sided_test else np.nanmax(arr3d)
141 )
143 number_steps = 100 if dh == "auto" else round(max_score / dh)
144 if number_steps < 10:
145 warn(
146 f"Not enough steps for TFCE. Got: {number_steps=}. "
147 "Setting it to 10.",
148 stacklevel=find_stack_level(),
149 )
150 number_steps = 10
151 if number_steps > 1000:
152 warn(
153 f"Too many steps for TFCE. Got: {number_steps=}. "
154 "Setting it to 1000.",
155 stacklevel=find_stack_level(),
156 )
157 number_steps = 1000
159 return np.linspace(0, max_score, number_steps + 1)[1:]
162def null_to_p(test_values, null_array, alternative="two-sided"):
163 """Return p-value for test value(s) against null array.
165 Parameters
166 ----------
167 test_values : :obj:`int`, :obj:`float`, or array_like of shape (n_samples,)
168 Value(s) for which to determine p-value.
169 null_array : array_like of shape (n_iters,)
170 Null distribution against which test_values is compared.
171 alternative : {'two-sided', 'larger', 'smaller'}, default='two-sided'
172 Whether to compare value against null distribution in a two-sided
173 or one-sided ('larger' or 'smaller') manner. If 'larger', then higher
174 values for the test_values are more significant. If 'smaller', then
175 lower values for the test_values are more significant.
177 Returns
178 -------
179 p_values : :obj:`float` or array_like of shape (n_samples,)
180 P-value(s) associated with the test value when compared against the
181 null distribution. Return type matches input type (i.e., a float if
182 test_values is a single float, and an array if test_values is an
183 array).
185 Notes
186 -----
187 P-values are clipped based on the number of elements in the null array.
188 Therefore no p-values of 0 or 1 should be produced.
190 This function assumes that the null distribution for two-sided tests is
191 symmetric around zero.
192 """
193 if alternative not in {"two-sided", "larger", "smaller"}:
194 raise ValueError(
195 'Argument "alternative" must be one of '
196 '["two-sided", "larger", "smaller"]'
197 )
199 return_first = isinstance(test_values, (float, int))
200 test_values = np.atleast_1d(test_values)
201 null_array = np.array(null_array)
203 # For efficiency's sake, if there are more than 1000 values, pass only the
204 # unique values through percentileofscore(), and then reconstruct.
205 if len(test_values) > 1000:
206 reconstruct = True
207 test_values, uniq_idx = np.unique(test_values, return_inverse=True)
208 else:
209 reconstruct = False
211 def compute_p(t, null):
212 null = np.sort(null)
213 idx = np.searchsorted(null, t, side="left").astype(float)
214 return 1 - idx / len(null)
216 if alternative == "two-sided":
217 # Assumes null distribution is symmetric around 0
218 p = compute_p(np.abs(test_values), np.abs(null_array))
219 elif alternative == "smaller":
220 p = compute_p(test_values * -1, null_array * -1)
221 else:
222 p = compute_p(test_values, null_array)
224 # ensure p_value in the following range:
225 # smallest_value <= p_value <= (1.0 - smallest_value)
226 smallest_value = np.maximum(np.finfo(float).eps, 1.0 / len(null_array))
227 result = np.maximum(smallest_value, np.minimum(p, 1.0 - smallest_value))
229 if reconstruct:
230 result = result[uniq_idx]
232 return result[0] if return_first else result
235def calculate_cluster_measures(
236 arr4d,
237 threshold,
238 bin_struct,
239 two_sided_test=False,
240):
241 """Calculate maximum cluster mass and size for an array.
243 Parameters
244 ----------
245 arr4d : :obj:`numpy.ndarray` of shape (X, Y, Z, R)
246 Unthresholded 4D array of 3D t-statistic maps.
247 R = regressor.
248 threshold : :obj:`float`
249 Uncorrected t-statistic threshold for defining clusters.
250 bin_struct : :obj:`numpy.ndarray` of shape (3, 3, 3)
251 Connectivity matrix for defining clusters.
252 two_sided_test : :obj:`bool`, default=False
253 Whether to assess both positive and negative clusters (True) or just
254 positive ones (False).
256 Returns
257 -------
258 max_size, max_mass : :obj:`numpy.ndarray` of shape (n_regressors,)
259 Maximum cluster size and mass from the matrix, for each regressor.
260 """
261 n_regressors = arr4d.shape[3]
263 max_sizes = np.zeros(n_regressors, int)
264 max_masses = np.zeros(n_regressors, float)
266 for i_regressor in range(n_regressors):
267 arr3d = arr4d[..., i_regressor].copy()
269 if two_sided_test:
270 arr3d[np.abs(arr3d) <= threshold] = 0
271 else:
272 arr3d[arr3d <= threshold] = 0
274 labeled_arr3d, _ = label(arr3d > 0, bin_struct)
276 if two_sided_test:
277 # Label positive and negative clusters separately
278 n_positive_clusters = np.max(labeled_arr3d)
279 temp_labeled_arr3d, _ = label(
280 arr3d < 0,
281 bin_struct,
282 )
283 temp_labeled_arr3d[temp_labeled_arr3d > 0] += n_positive_clusters
284 labeled_arr3d = labeled_arr3d + temp_labeled_arr3d
285 del temp_labeled_arr3d
287 clust_vals, clust_sizes = np.unique(labeled_arr3d, return_counts=True)
288 assert clust_vals[0] == 0
290 clust_vals = clust_vals[1:] # First cluster is zeros in matrix
291 clust_sizes = clust_sizes[1:]
293 # Cluster mass-based inference
294 max_mass = 0
295 for unique_val in clust_vals:
296 ss_vals = np.abs(arr3d[labeled_arr3d == unique_val]) - threshold
297 max_mass = np.maximum(max_mass, np.sum(ss_vals))
299 # Cluster size-based inference
300 max_size = 0
301 if clust_sizes.size:
302 max_size = np.max(clust_sizes)
304 max_sizes[i_regressor], max_masses[i_regressor] = max_size, max_mass
306 return max_sizes, max_masses
309def normalize_matrix_on_axis(m, axis=0):
310 """Normalize a 2D matrix on an axis.
312 Parameters
313 ----------
314 m : numpy 2D array,
315 The matrix to normalize.
317 axis : :obj`int` in {0, 1}, default=0
318 A valid axis to normalize across.
320 Returns
321 -------
322 ret : numpy array, shape = m.shape
323 The normalized matrix
325 Examples
326 --------
327 >>> import numpy as np
328 >>> from nilearn.mass_univariate.permuted_least_squares import (
329 ... normalize_matrix_on_axis,
330 ... )
331 >>> X = np.array([[0, 4], [1, 0]])
332 >>> normalize_matrix_on_axis(X)
333 array([[0., 1.],
334 [1., 0.]])
335 >>> normalize_matrix_on_axis(X, axis=1)
336 array([[0., 1.],
337 [1., 0.]])
339 """
340 if m.ndim > 2:
341 raise ValueError(
342 "This function only accepts 2D arrays. "
343 f"An array of shape {m.shape:r} was passed."
344 )
346 if axis == 0:
347 # array transposition preserves the contiguity flag of that array
348 ret = (m.T / np.sqrt(np.sum(m**2, axis=0))[:, np.newaxis]).T
349 elif axis == 1:
350 ret = normalize_matrix_on_axis(m.T).T
351 else:
352 raise ValueError(f"axis(={int(axis)}) out of bounds")
353 return ret
356def orthonormalize_matrix(m, tol=1.0e-12):
357 """Orthonormalize a matrix.
359 Uses a Singular Value Decomposition.
360 If the input matrix is rank-deficient, then its shape is cropped.
362 Parameters
363 ----------
364 m : numpy array,
365 The matrix to orthonormalize.
367 tol : float, default=1e-12
368 Tolerance parameter for nullity.
370 Returns
371 -------
372 ret : numpy array, shape = m.shape
373 The orthonormalized matrix.
375 Examples
376 --------
377 >>> import numpy as np
378 >>> from nilearn.mass_univariate.permuted_least_squares import (
379 ... orthonormalize_matrix,
380 ... )
381 >>> X = np.array([[1, 2], [0, 1], [1, 1]])
382 >>> orthonormalize_matrix(X)
383 array([[-0.81049889, -0.0987837 ],
384 [-0.31970025, -0.75130448],
385 [-0.49079864, 0.65252078]])
386 >>> X = np.array([[0, 1], [4, 0]])
387 >>> orthonormalize_matrix(X)
388 array([[ 0., -1.],
389 [-1., 0.]])
391 """
392 U, s, _ = linalg.svd(m, full_matrices=False)
393 n_eig = np.count_nonzero(s > tol)
394 return np.ascontiguousarray(U[:, :n_eig])
397def t_score_with_covars_and_normalized_design(
398 tested_vars, target_vars, covars_orthonormalized=None
399):
400 """t-score in the regression of tested variates against target variates.
402 Covariates are taken into account (if not None).
403 The normalized_design case corresponds to the following assumptions:
404 - tested_vars and target_vars are normalized
405 - covars_orthonormalized are orthonormalized
406 - tested_vars and covars_orthonormalized are orthogonal
407 (np.dot(tested_vars.T, covars) == 0)
409 Parameters
410 ----------
411 tested_vars : array-like, shape=(n_samples, n_tested_vars)
412 Explanatory variates.
414 target_vars : array-like, shape=(n_samples, n_target_vars)
415 Targets variates. F-ordered is better for efficient computation.
417 covars_orthonormalized : array-like, shape=(n_samples, n_covars) or None, \
418 optional
419 Confounding variates.
421 Returns
422 -------
423 score : numpy.ndarray, shape=(n_target_vars, n_tested_vars)
424 t-scores associated with the tests of each explanatory variate against
425 each target variate (in the presence of covars).
427 """
428 if covars_orthonormalized is None:
429 lost_dof = 0
430 else:
431 lost_dof = covars_orthonormalized.shape[1]
432 # Tested variates are fitted independently,
433 # so lost_dof is unrelated to n_tested_vars.
434 dof = target_vars.shape[0] - lost_dof
435 beta_targetvars_testedvars = np.dot(target_vars.T, tested_vars)
436 if covars_orthonormalized is None:
437 rss = 1 - beta_targetvars_testedvars**2
438 else:
439 beta_targetvars_covars = np.dot(target_vars.T, covars_orthonormalized)
440 a2 = np.sum(beta_targetvars_covars**2, 1)
441 rss = 1 - a2[:, np.newaxis] - beta_targetvars_testedvars**2
442 return beta_targetvars_testedvars * np.sqrt((dof - 1.0) / rss)