Coverage for nilearn/mass_univariate/_utils.py: 8%

113 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-18 13:00 +0200

1"""Utility functions for the permuted least squares method.""" 

2 

3from warnings import warn 

4 

5import numpy as np 

6from scipy import linalg 

7from scipy.ndimage import label 

8 

9from nilearn._utils.logger import find_stack_level 

10 

11 

12def calculate_tfce( 

13 arr4d, 

14 bin_struct, 

15 E=0.5, 

16 H=2, 

17 dh="auto", 

18 two_sided_test=True, 

19): 

20 """Calculate threshold-free cluster enhancement values for scores maps. 

21 

22 The :term:`TFCE` calculation is mostly implemented as described in [1]_, 

23 with minor modifications to produce similar results to fslmaths, as well 

24 as to support two-sided testing. 

25 

26 Parameters 

27 ---------- 

28 arr4d : :obj:`numpy.ndarray` of shape (X, Y, Z, R) 

29 Unthresholded 4D array of 3D t-statistic maps. 

30 R = regressor. 

31 bin_struct : :obj:`numpy.ndarray` of shape (3, 3, 3) 

32 Connectivity matrix for defining clusters. 

33 E : :obj:`float`, default=0.5 

34 Extent weight. 

35 H : :obj:`float`, default=2 

36 Height weight. 

37 dh : 'auto' or :obj:`float`, default='auto' 

38 Step size for TFCE calculation. 

39 If set to 'auto', use 100 steps, as is done in fslmaths. 

40 A good alternative is 0.1 for z and t maps, as in [1]_. 

41 two_sided_test : :obj:`bool`, default=False 

42 Whether to assess both positive and negative clusters (True) or just 

43 positive ones (False). 

44 

45 Returns 

46 ------- 

47 tfce_arr : :obj:`numpy.ndarray`, shape=(n_descriptors, n_regressors) 

48 :term:`TFCE` values. 

49 

50 Notes 

51 ----- 

52 In [1]_, each threshold's partial TFCE score is multiplied by dh, 

53 which makes directly comparing TFCE values across different thresholds 

54 possible. 

55 However, in fslmaths, this is not done. 

56 In the interest of maximizing similarity between nilearn and established 

57 tools, we chose to follow fslmaths' approach. 

58 

59 Additionally, we have modified the method to support two-sided testing. 

60 In fslmaths, only positive clusters are considered. 

61 

62 References 

63 ---------- 

64 .. [1] Smith, S. M., & Nichols, T. E. (2009). 

65 Threshold-free cluster enhancement: addressing problems of smoothing, 

66 threshold dependence and localization in cluster inference. 

67 Neuroimage, 44(1), 83-98. 

68 """ 

69 tfce_4d = np.zeros_like(arr4d) 

70 

71 # For each passed t map 

72 for i_regressor in range(arr4d.shape[3]): 

73 arr3d = arr4d[..., i_regressor] 

74 

75 signs = [-1, 1] if two_sided_test else [1] 

76 score_threshs = _return_score_threshs(arr3d, dh, two_sided_test) 

77 

78 # If we apply the sign first... 

79 for sign in signs: 

80 # Init a temp copy of arr3d with the current sign applied, 

81 # which can then be reused by incrementally setting more 

82 # voxel's to background, by taking advantage that each score_thresh 

83 # is incrementally larger 

84 temp_arr3d = arr3d * sign 

85 

86 # Prep step 

87 for score_thresh in score_threshs: 

88 temp_arr3d[temp_arr3d < score_thresh] = 0 

89 

90 # Label into clusters - importantly (for the next step) 

91 # this returns clusters labeled ordinally 

92 # from 1 to n_clusters+1, 

93 # which allows us to use bincount to count 

94 # frequencies directly. 

95 labeled_arr3d, _ = label(temp_arr3d, bin_struct) 

96 

97 # Next, we want to replace each label with its cluster 

98 # extent, that is, the size of the cluster it is part of 

99 # To do this, we will first compute a flattened version of 

100 # only the non-zero cluster labels. 

101 labeled_arr3d_flat = labeled_arr3d.flatten() 

102 non_zero_inds = np.where(labeled_arr3d_flat != 0)[0] 

103 labeled_non_zero = labeled_arr3d_flat[non_zero_inds] 

104 

105 # Count the size of each unique cluster, via its label. 

106 # The reason why we pass only the non-zero labels to bincount 

107 # is because it includes a bin for zeros, and in our labels 

108 # zero represents the background, 

109 # which we want to have a TFCE value of 0. 

110 cluster_counts = np.bincount(labeled_non_zero) 

111 

112 # Next, we convert each unique cluster count to its TFCE value. 

113 # Where each cluster's tfce value is based 

114 # on both its cluster extent and z-value 

115 # (via the current score_thresh) 

116 # NOTE: We do not multiply by dh, based on fslmaths' 

117 # implementation. This differs from the original paper. 

118 cluster_tfces = sign * (cluster_counts**E) * (score_thresh**H) 

119 

120 # Before we can add these values to tfce_4d, we need to 

121 # map cluster-wise tfce values back to a voxel-wise array, 

122 # including any zero / background voxels. 

123 tfce_step_values = np.zeros(labeled_arr3d_flat.shape) 

124 tfce_step_values[non_zero_inds] = cluster_tfces[ 

125 labeled_non_zero 

126 ] 

127 

128 # Now, we just need to reshape these values back to 3D 

129 # and they can be incremented to tfce_4d. 

130 tfce_4d[..., i_regressor] += tfce_step_values.reshape( 

131 temp_arr3d.shape 

132 ) 

133 

134 return tfce_4d 

135 

136 

137def _return_score_threshs(arr3d, dh, two_sided_test): 

138 """Compute list of score threshold to use for TFCE.""" 

139 max_score = ( 

140 np.nanmax(np.abs(arr3d)) if two_sided_test else np.nanmax(arr3d) 

141 ) 

142 

143 number_steps = 100 if dh == "auto" else round(max_score / dh) 

144 if number_steps < 10: 

145 warn( 

146 f"Not enough steps for TFCE. Got: {number_steps=}. " 

147 "Setting it to 10.", 

148 stacklevel=find_stack_level(), 

149 ) 

150 number_steps = 10 

151 if number_steps > 1000: 

152 warn( 

153 f"Too many steps for TFCE. Got: {number_steps=}. " 

154 "Setting it to 1000.", 

155 stacklevel=find_stack_level(), 

156 ) 

157 number_steps = 1000 

158 

159 return np.linspace(0, max_score, number_steps + 1)[1:] 

160 

161 

162def null_to_p(test_values, null_array, alternative="two-sided"): 

163 """Return p-value for test value(s) against null array. 

164 

165 Parameters 

166 ---------- 

167 test_values : :obj:`int`, :obj:`float`, or array_like of shape (n_samples,) 

168 Value(s) for which to determine p-value. 

169 null_array : array_like of shape (n_iters,) 

170 Null distribution against which test_values is compared. 

171 alternative : {'two-sided', 'larger', 'smaller'}, default='two-sided' 

172 Whether to compare value against null distribution in a two-sided 

173 or one-sided ('larger' or 'smaller') manner. If 'larger', then higher 

174 values for the test_values are more significant. If 'smaller', then 

175 lower values for the test_values are more significant. 

176 

177 Returns 

178 ------- 

179 p_values : :obj:`float` or array_like of shape (n_samples,) 

180 P-value(s) associated with the test value when compared against the 

181 null distribution. Return type matches input type (i.e., a float if 

182 test_values is a single float, and an array if test_values is an 

183 array). 

184 

185 Notes 

186 ----- 

187 P-values are clipped based on the number of elements in the null array. 

188 Therefore no p-values of 0 or 1 should be produced. 

189 

190 This function assumes that the null distribution for two-sided tests is 

191 symmetric around zero. 

192 """ 

193 if alternative not in {"two-sided", "larger", "smaller"}: 

194 raise ValueError( 

195 'Argument "alternative" must be one of ' 

196 '["two-sided", "larger", "smaller"]' 

197 ) 

198 

199 return_first = isinstance(test_values, (float, int)) 

200 test_values = np.atleast_1d(test_values) 

201 null_array = np.array(null_array) 

202 

203 # For efficiency's sake, if there are more than 1000 values, pass only the 

204 # unique values through percentileofscore(), and then reconstruct. 

205 if len(test_values) > 1000: 

206 reconstruct = True 

207 test_values, uniq_idx = np.unique(test_values, return_inverse=True) 

208 else: 

209 reconstruct = False 

210 

211 def compute_p(t, null): 

212 null = np.sort(null) 

213 idx = np.searchsorted(null, t, side="left").astype(float) 

214 return 1 - idx / len(null) 

215 

216 if alternative == "two-sided": 

217 # Assumes null distribution is symmetric around 0 

218 p = compute_p(np.abs(test_values), np.abs(null_array)) 

219 elif alternative == "smaller": 

220 p = compute_p(test_values * -1, null_array * -1) 

221 else: 

222 p = compute_p(test_values, null_array) 

223 

224 # ensure p_value in the following range: 

225 # smallest_value <= p_value <= (1.0 - smallest_value) 

226 smallest_value = np.maximum(np.finfo(float).eps, 1.0 / len(null_array)) 

227 result = np.maximum(smallest_value, np.minimum(p, 1.0 - smallest_value)) 

228 

229 if reconstruct: 

230 result = result[uniq_idx] 

231 

232 return result[0] if return_first else result 

233 

234 

235def calculate_cluster_measures( 

236 arr4d, 

237 threshold, 

238 bin_struct, 

239 two_sided_test=False, 

240): 

241 """Calculate maximum cluster mass and size for an array. 

242 

243 Parameters 

244 ---------- 

245 arr4d : :obj:`numpy.ndarray` of shape (X, Y, Z, R) 

246 Unthresholded 4D array of 3D t-statistic maps. 

247 R = regressor. 

248 threshold : :obj:`float` 

249 Uncorrected t-statistic threshold for defining clusters. 

250 bin_struct : :obj:`numpy.ndarray` of shape (3, 3, 3) 

251 Connectivity matrix for defining clusters. 

252 two_sided_test : :obj:`bool`, default=False 

253 Whether to assess both positive and negative clusters (True) or just 

254 positive ones (False). 

255 

256 Returns 

257 ------- 

258 max_size, max_mass : :obj:`numpy.ndarray` of shape (n_regressors,) 

259 Maximum cluster size and mass from the matrix, for each regressor. 

260 """ 

261 n_regressors = arr4d.shape[3] 

262 

263 max_sizes = np.zeros(n_regressors, int) 

264 max_masses = np.zeros(n_regressors, float) 

265 

266 for i_regressor in range(n_regressors): 

267 arr3d = arr4d[..., i_regressor].copy() 

268 

269 if two_sided_test: 

270 arr3d[np.abs(arr3d) <= threshold] = 0 

271 else: 

272 arr3d[arr3d <= threshold] = 0 

273 

274 labeled_arr3d, _ = label(arr3d > 0, bin_struct) 

275 

276 if two_sided_test: 

277 # Label positive and negative clusters separately 

278 n_positive_clusters = np.max(labeled_arr3d) 

279 temp_labeled_arr3d, _ = label( 

280 arr3d < 0, 

281 bin_struct, 

282 ) 

283 temp_labeled_arr3d[temp_labeled_arr3d > 0] += n_positive_clusters 

284 labeled_arr3d = labeled_arr3d + temp_labeled_arr3d 

285 del temp_labeled_arr3d 

286 

287 clust_vals, clust_sizes = np.unique(labeled_arr3d, return_counts=True) 

288 assert clust_vals[0] == 0 

289 

290 clust_vals = clust_vals[1:] # First cluster is zeros in matrix 

291 clust_sizes = clust_sizes[1:] 

292 

293 # Cluster mass-based inference 

294 max_mass = 0 

295 for unique_val in clust_vals: 

296 ss_vals = np.abs(arr3d[labeled_arr3d == unique_val]) - threshold 

297 max_mass = np.maximum(max_mass, np.sum(ss_vals)) 

298 

299 # Cluster size-based inference 

300 max_size = 0 

301 if clust_sizes.size: 

302 max_size = np.max(clust_sizes) 

303 

304 max_sizes[i_regressor], max_masses[i_regressor] = max_size, max_mass 

305 

306 return max_sizes, max_masses 

307 

308 

309def normalize_matrix_on_axis(m, axis=0): 

310 """Normalize a 2D matrix on an axis. 

311 

312 Parameters 

313 ---------- 

314 m : numpy 2D array, 

315 The matrix to normalize. 

316 

317 axis : :obj`int` in {0, 1}, default=0 

318 A valid axis to normalize across. 

319 

320 Returns 

321 ------- 

322 ret : numpy array, shape = m.shape 

323 The normalized matrix 

324 

325 Examples 

326 -------- 

327 >>> import numpy as np 

328 >>> from nilearn.mass_univariate.permuted_least_squares import ( 

329 ... normalize_matrix_on_axis, 

330 ... ) 

331 >>> X = np.array([[0, 4], [1, 0]]) 

332 >>> normalize_matrix_on_axis(X) 

333 array([[0., 1.], 

334 [1., 0.]]) 

335 >>> normalize_matrix_on_axis(X, axis=1) 

336 array([[0., 1.], 

337 [1., 0.]]) 

338 

339 """ 

340 if m.ndim > 2: 

341 raise ValueError( 

342 "This function only accepts 2D arrays. " 

343 f"An array of shape {m.shape:r} was passed." 

344 ) 

345 

346 if axis == 0: 

347 # array transposition preserves the contiguity flag of that array 

348 ret = (m.T / np.sqrt(np.sum(m**2, axis=0))[:, np.newaxis]).T 

349 elif axis == 1: 

350 ret = normalize_matrix_on_axis(m.T).T 

351 else: 

352 raise ValueError(f"axis(={int(axis)}) out of bounds") 

353 return ret 

354 

355 

356def orthonormalize_matrix(m, tol=1.0e-12): 

357 """Orthonormalize a matrix. 

358 

359 Uses a Singular Value Decomposition. 

360 If the input matrix is rank-deficient, then its shape is cropped. 

361 

362 Parameters 

363 ---------- 

364 m : numpy array, 

365 The matrix to orthonormalize. 

366 

367 tol : float, default=1e-12 

368 Tolerance parameter for nullity. 

369 

370 Returns 

371 ------- 

372 ret : numpy array, shape = m.shape 

373 The orthonormalized matrix. 

374 

375 Examples 

376 -------- 

377 >>> import numpy as np 

378 >>> from nilearn.mass_univariate.permuted_least_squares import ( 

379 ... orthonormalize_matrix, 

380 ... ) 

381 >>> X = np.array([[1, 2], [0, 1], [1, 1]]) 

382 >>> orthonormalize_matrix(X) 

383 array([[-0.81049889, -0.0987837 ], 

384 [-0.31970025, -0.75130448], 

385 [-0.49079864, 0.65252078]]) 

386 >>> X = np.array([[0, 1], [4, 0]]) 

387 >>> orthonormalize_matrix(X) 

388 array([[ 0., -1.], 

389 [-1., 0.]]) 

390 

391 """ 

392 U, s, _ = linalg.svd(m, full_matrices=False) 

393 n_eig = np.count_nonzero(s > tol) 

394 return np.ascontiguousarray(U[:, :n_eig]) 

395 

396 

397def t_score_with_covars_and_normalized_design( 

398 tested_vars, target_vars, covars_orthonormalized=None 

399): 

400 """t-score in the regression of tested variates against target variates. 

401 

402 Covariates are taken into account (if not None). 

403 The normalized_design case corresponds to the following assumptions: 

404 - tested_vars and target_vars are normalized 

405 - covars_orthonormalized are orthonormalized 

406 - tested_vars and covars_orthonormalized are orthogonal 

407 (np.dot(tested_vars.T, covars) == 0) 

408 

409 Parameters 

410 ---------- 

411 tested_vars : array-like, shape=(n_samples, n_tested_vars) 

412 Explanatory variates. 

413 

414 target_vars : array-like, shape=(n_samples, n_target_vars) 

415 Targets variates. F-ordered is better for efficient computation. 

416 

417 covars_orthonormalized : array-like, shape=(n_samples, n_covars) or None, \ 

418 optional 

419 Confounding variates. 

420 

421 Returns 

422 ------- 

423 score : numpy.ndarray, shape=(n_target_vars, n_tested_vars) 

424 t-scores associated with the tests of each explanatory variate against 

425 each target variate (in the presence of covars). 

426 

427 """ 

428 if covars_orthonormalized is None: 

429 lost_dof = 0 

430 else: 

431 lost_dof = covars_orthonormalized.shape[1] 

432 # Tested variates are fitted independently, 

433 # so lost_dof is unrelated to n_tested_vars. 

434 dof = target_vars.shape[0] - lost_dof 

435 beta_targetvars_testedvars = np.dot(target_vars.T, tested_vars) 

436 if covars_orthonormalized is None: 

437 rss = 1 - beta_targetvars_testedvars**2 

438 else: 

439 beta_targetvars_covars = np.dot(target_vars.T, covars_orthonormalized) 

440 a2 = np.sum(beta_targetvars_covars**2, 1) 

441 rss = 1 - a2[:, np.newaxis] - beta_targetvars_testedvars**2 

442 return beta_targetvars_testedvars * np.sqrt((dof - 1.0) / rss)