Coverage for nilearn/mass_univariate/permuted_least_squares.py: 7%

225 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 10:58 +0200

1"""Massively Univariate Linear Model estimated \ 

2with OLS and permutation test. 

3""" 

4 

5import time 

6import warnings 

7 

8import joblib 

9import numpy as np 

10from nibabel import Nifti1Image 

11from scipy import stats 

12from scipy.ndimage import generate_binary_structure, label 

13from sklearn.utils import check_random_state 

14 

15from nilearn import image 

16from nilearn._utils import fill_doc, logger 

17from nilearn._utils.logger import find_stack_level 

18from nilearn._utils.param_validation import check_params 

19from nilearn.masking import apply_mask 

20from nilearn.mass_univariate._utils import ( 

21 calculate_cluster_measures, 

22 calculate_tfce, 

23 normalize_matrix_on_axis, 

24 null_to_p, 

25 orthonormalize_matrix, 

26 t_score_with_covars_and_normalized_design, 

27) 

28 

29 

30def _permuted_ols_on_chunk( 

31 scores_original_data, 

32 tested_vars, 

33 target_vars, 

34 thread_id, 

35 threshold=None, 

36 confounding_vars=None, 

37 masker=None, 

38 n_perm=10000, 

39 n_perm_chunk=10000, 

40 intercept_test=True, 

41 two_sided_test=True, 

42 tfce=False, 

43 tfce_original_data=None, 

44 random_state=None, 

45 verbose=0, 

46): 

47 """Perform massively univariate analysis with permuted OLS on a data chunk. 

48 

49 To be used in a parallel computing context. 

50 

51 Parameters 

52 ---------- 

53 scores_original_data : array-like, shape=(n_descriptors, n_regressors) 

54 t-scores obtained for the original (non-permuted) data. 

55 

56 tested_vars : array-like, shape=(n_samples, n_regressors) 

57 Explanatory variates. 

58 

59 target_vars : array-like, shape=(n_samples, n_targets) 

60 fMRI data. F-ordered for efficient computations. 

61 

62 thread_id : int 

63 process id, used for display. 

64 

65 threshold : :obj:`float` 

66 Cluster-forming threshold in t-scale. 

67 This is only used for cluster-level inference. 

68 If ``threshold`` is not None, but ``masker`` is, an exception will be 

69 raised. 

70 

71 .. versionadded:: 0.9.2 

72 

73 confounding_vars : array-like, shape=(n_samples, n_covars), optional 

74 Clinical data (covariates). 

75 

76 masker : None or :class:`~nilearn.maskers.NiftiMasker` or \ 

77 :class:`~nilearn.maskers.MultiNiftiMasker`, optional 

78 A mask to be used on the data. 

79 This is used for cluster-level inference and :term:`TFCE`-based 

80 inference, if either is enabled. 

81 If ``threshold`` is not None, but ``masker`` is, an exception will be 

82 raised. 

83 

84 .. versionadded:: 0.9.2 

85 

86 n_perm : int, default=10000 

87 Total number of permutations to perform, only used for 

88 display in this function. 

89 

90 n_perm_chunk : int, default=10000 

91 Number of permutations to be performed. 

92 

93 intercept_test : boolean, default=True 

94 Change the permutation scheme (swap signs for intercept, 

95 switch labels otherwise). See :footcite:t:`Fisher1935`. 

96 

97 two_sided_test : boolean, default=True 

98 If True, performs an unsigned t-test. Both positive and negative 

99 effects are considered; the null hypothesis is that the effect is zero. 

100 If False, only positive effects are considered as relevant. The null 

101 hypothesis is that the effect is zero or negative. 

102 

103 tfce : :obj:`bool`, default=False 

104 Whether to perform :term:`TFCE`-based multiple comparisons correction 

105 or not. 

106 Calculating TFCE values in each permutation can be time-consuming, so 

107 this option is disabled by default. 

108 The TFCE calculation is implemented as described in 

109 :footcite:t:`Smith2009a`. 

110 

111 .. versionadded:: 0.9.2 

112 

113 tfce_original_data : None or array-like, \ 

114 shape=(n_descriptors, n_regressors), optional 

115 TFCE values obtained for the original (non-permuted) data. 

116 

117 .. versionadded:: 0.9.2 

118 

119 %(random_state)s 

120 

121 %(verbose0)s 

122 

123 Returns 

124 ------- 

125 scores_as_ranks_part : array-like, shape=(n_regressors, n_descriptors) 

126 The ranks of the original scores in ``h0_fmax_part``. 

127 When ``n_descriptors`` or ``n_perm`` are large, it can be quite long to 

128 find the rank of the original scores into the whole H0 distribution. 

129 Here, it is performed in parallel by the workers involved in the 

130 permutation computation. 

131 

132 h0_fmax_part : array-like, shape=(n_perm_chunk, n_regressors) 

133 Distribution of the (max) t-statistic under the null hypothesis 

134 (limited to this permutation chunk). 

135 

136 h0_csfwe_part, h0_cmfwe_part : array-like, \ 

137 shape=(n_perm_chunk, n_regressors) 

138 Distribution of max cluster sizes/masses under the null hypothesis. 

139 Only calculated if ``masker`` is not None. 

140 Otherwise, these will both be None. 

141 

142 .. versionadded:: 0.9.2 

143 

144 tfce_scores_as_ranks_part : array-like, shape=(n_regressors, n_descriptors) 

145 The ranks of the original TFCE values in ``h0_tfce_part``. 

146 When ``n_descriptors`` or ``n_perm`` are large, it can be quite long to 

147 find the rank of the original scores into the whole H0 distribution. 

148 Here, it is performed in parallel by the workers involved in the 

149 permutation computation. 

150 

151 .. versionadded:: 0.9.2 

152 

153 h0_tfce_part : array-like, shape=(n_perm_chunk, n_regressors) 

154 Distribution of the (max) TFCE value under the null hypothesis 

155 (limited to this permutation chunk). 

156 

157 .. versionadded:: 0.9.2 

158 

159 References 

160 ---------- 

161 .. footbibliography:: 

162 

163 """ 

164 # initialize the seed of the random generator 

165 rng = check_random_state(random_state) 

166 

167 n_samples, n_regressors = tested_vars.shape 

168 n_descriptors = target_vars.shape[1] 

169 

170 # run the permutations 

171 t0 = time.time() 

172 h0_fmax_part = np.empty((n_regressors, n_perm_chunk)) 

173 scores_as_ranks_part = np.zeros((n_regressors, n_descriptors)) 

174 

175 # Preallocate null arrays for optional outputs 

176 # Any unselected outputs will just return a None 

177 h0_tfce_part, tfce_scores_as_ranks_part = None, None 

178 if tfce: 

179 h0_tfce_part = np.empty((n_regressors, n_perm_chunk)) 

180 tfce_scores_as_ranks_part = np.zeros((n_regressors, n_descriptors)) 

181 

182 h0_csfwe_part, h0_cmfwe_part = None, None 

183 if threshold is not None: 

184 h0_csfwe_part = np.empty((n_regressors, n_perm_chunk)) 

185 h0_cmfwe_part = np.empty((n_regressors, n_perm_chunk)) 

186 

187 for i_perm in range(n_perm_chunk): 

188 if intercept_test: 

189 # sign swap (random multiplication by 1 or -1) 

190 target_vars = target_vars * ( 

191 rng.randint(2, size=(n_samples, 1)) * 2 - 1 

192 ) 

193 else: 

194 # shuffle data 

195 # Regarding computation costs, we choose to shuffle testvars 

196 # and covars rather than fmri_signal. 

197 # Also, it is important to shuffle tested_vars and covars 

198 # jointly to simplify t-scores computation (null dot product). 

199 shuffle_idx = rng.permutation(n_samples) 

200 tested_vars = tested_vars[shuffle_idx] 

201 if confounding_vars is not None: 

202 confounding_vars = confounding_vars[shuffle_idx] 

203 

204 # OLS regression on randomized data 

205 perm_scores = np.asfortranarray( 

206 t_score_with_covars_and_normalized_design( 

207 tested_vars, target_vars, confounding_vars 

208 ) 

209 ) 

210 

211 # find the rank of the original scores in h0_fmax_part 

212 # (when n_descriptors or n_perm are large, it can be quite long to 

213 # find the rank of the original scores into the whole H0 distribution. 

214 # Here, it is performed in parallel by the workers involved in the 

215 # permutation computation) 

216 # NOTE: This is not done for the cluster-level methods. 

217 if two_sided_test: 

218 # Get maximum absolute value for voxel-level FWE 

219 h0_fmax_part[:, i_perm] = np.nanmax(np.fabs(perm_scores), axis=0) 

220 scores_as_ranks_part += ( 

221 h0_fmax_part[:, i_perm].reshape((-1, 1)) 

222 < np.fabs(scores_original_data).T 

223 ) 

224 else: 

225 # Get maximum value for voxel-level FWE 

226 h0_fmax_part[:, i_perm] = np.nanmax(perm_scores, axis=0) 

227 scores_as_ranks_part += ( 

228 h0_fmax_part[:, i_perm].reshape((-1, 1)) 

229 < scores_original_data.T 

230 ) 

231 

232 # Prepare data for cluster thresholding 

233 if tfce or (threshold is not None): 

234 arr4d = masker.inverse_transform(perm_scores.T).get_fdata() 

235 bin_struct = generate_binary_structure(3, 1) 

236 

237 if tfce: 

238 # The TFCE map will contain positive and negative values if 

239 # two_sided_test is True, or positive only if it's False. 

240 # In either case, the maximum absolute value is the one we want. 

241 h0_tfce_part[:, i_perm] = np.nanmax( 

242 np.fabs( 

243 calculate_tfce( 

244 arr4d, 

245 bin_struct=bin_struct, 

246 two_sided_test=two_sided_test, 

247 ) 

248 ), 

249 axis=(0, 1, 2), 

250 ) 

251 tfce_scores_as_ranks_part += h0_tfce_part[:, i_perm].reshape( 

252 (-1, 1) 

253 ) < np.fabs(tfce_original_data.T) 

254 

255 if threshold is not None: 

256 ( 

257 h0_csfwe_part[:, i_perm], 

258 h0_cmfwe_part[:, i_perm], 

259 ) = calculate_cluster_measures( 

260 arr4d, 

261 threshold, 

262 bin_struct, 

263 two_sided_test=two_sided_test, 

264 ) 

265 

266 if verbose > 0: 

267 step = 11 - min(verbose, 10) 

268 if i_perm % step == 0: 

269 # If there is only one job, progress information is fixed 

270 crlf = "\n" 

271 if n_perm == n_perm_chunk: 

272 crlf = "\r" 

273 

274 percent = float(i_perm) / n_perm_chunk 

275 percent = round(percent * 100, 2) 

276 dt = time.time() - t0 

277 remaining = (100.0 - percent) / max(0.01, percent) * dt 

278 

279 logger.log( 

280 f"Job #{thread_id}, processed {i_perm}/{n_perm_chunk} " 

281 f"permutations ({percent:0.2f}%, {remaining:0.2f} seconds " 

282 f"remaining){crlf}", 

283 ) 

284 

285 return ( 

286 scores_as_ranks_part, 

287 h0_fmax_part, 

288 h0_csfwe_part, 

289 h0_cmfwe_part, 

290 tfce_scores_as_ranks_part, 

291 h0_tfce_part, 

292 ) 

293 

294 

295@fill_doc 

296def permuted_ols( 

297 tested_vars, 

298 target_vars, 

299 confounding_vars=None, 

300 model_intercept=True, 

301 n_perm=10000, 

302 two_sided_test=True, 

303 random_state=None, 

304 n_jobs=1, 

305 verbose=0, 

306 masker=None, 

307 tfce=False, 

308 threshold=None, 

309 output_type="legacy", 

310): 

311 """Massively univariate group analysis with permuted OLS. 

312 

313 Tested variates are independently fitted to target variates descriptors 

314 (e.g. brain imaging signal) according to a linear model solved with an 

315 Ordinary Least Squares criterion. 

316 Confounding variates may be included in the model. 

317 Permutation testing is used to assess the significance of the relationship 

318 between the tested variates and the target variates 

319 :footcite:p:`Anderson2001`, :footcite:p:`Winkler2014`. 

320 A max-type procedure is used to obtain family-wise corrected p-values 

321 based on t-statistics (voxel-level FWE), cluster sizes, cluster masses, 

322 and :term:`TFCE` values. 

323 

324 The specific permutation scheme implemented here is the one of 

325 :footcite:t:`Freedman1983`. 

326 Its has been demonstrated in :footcite:t:`Anderson2001` that 

327 this scheme conveys more sensitivity than alternative schemes. This holds 

328 for neuroimaging applications, as discussed in details in 

329 :footcite:t:`Winkler2014`. 

330 

331 Permutations are performed on parallel computing units. 

332 Each of them performs a fraction of permutations on the whole dataset. 

333 Thus, the max t-score amongst data descriptors can be computed directly, 

334 which avoids storing all the computed t-scores. 

335 

336 The variates should be given C-contiguous. 

337 ``target_vars`` are fortran-ordered automatically to speed-up computations. 

338 

339 Parameters 

340 ---------- 

341 tested_vars : array-like, shape=(n_samples, n_regressors) 

342 Explanatory variates, fitted and tested independently from each others. 

343 

344 target_vars : array-like, shape=(n_samples, n_descriptors) 

345 :term:`fMRI` data to analyze according 

346 to the explanatory and confounding variates. 

347 

348 In a group-level analysis, the samples will typically be voxels 

349 (for volumetric data) or :term:`vertices<vertex>` (for surface data), 

350 while the descriptors will generally be images, 

351 such as run-wise z-statistic maps. 

352 

353 confounding_vars : array-like, shape=(n_samples, n_covars), default=None 

354 Confounding variates (covariates), fitted but not tested. 

355 If None, no confounding variate is added to the model 

356 (except maybe a constant column according to the value of 

357 ``model_intercept``). 

358 

359 model_intercept : :obj:`bool`, default=True 

360 If True, a constant column is added to the confounding variates 

361 unless the tested variate is already the intercept or when 

362 confounding variates already contain an intercept. 

363 

364 %(n_perm)s 

365 If ``n_perm`` is set to 0, then no p-values will be estimated. 

366 

367 %(two_sided_test)s 

368 

369 %(random_state)s 

370 

371 n_jobs : :obj:`int`, default=1 

372 Number of parallel workers. 

373 If -1 is provided, all CPUs are used. 

374 A negative number indicates that all the CPUs except (abs(n_jobs) - 1) 

375 ones will be used. 

376 

377 %(verbose0)s 

378 

379 masker : None or :class:`~nilearn.maskers.NiftiMasker` or \ 

380 :class:`~nilearn.maskers.MultiNiftiMasker`, default=None 

381 A mask to be used on the data. 

382 This is required for cluster-level inference, so it must be provided 

383 if ``threshold`` is not None. 

384 

385 .. versionadded:: 0.9.2 

386 

387 threshold : None or :obj:`float`, default=None 

388 Cluster-forming threshold in p-scale. 

389 This is only used for cluster-level inference. 

390 If None, cluster-level inference will not be performed. 

391 

392 .. warning:: 

393 

394 Performing cluster-level inference will increase the computation 

395 time of the permutation procedure. 

396 

397 .. versionadded:: 0.9.2 

398 

399 %(tfce)s 

400 

401 .. versionadded:: 0.9.2 

402 

403 output_type : {'legacy', 'dict'}, default="legacy" 

404 Determines how outputs should be returned. 

405 The two options are: 

406 

407 - 'legacy': return a pvals, score_orig_data, and h0_fmax. 

408 This option is the default, but it is deprecated until 0.13, 

409 when the default will be changed to 'dict'. 

410 It will be removed in 0.15. 

411 - 'dict': return a dictionary containing output arrays. 

412 This option will be made the default in 0.13. 

413 Additionally, if ``tfce`` is True or ``threshold`` is not None, 

414 ``output_type`` will automatically be set to 'dict'. 

415 

416 .. deprecated:: 0.9.2 

417 

418 The default value for this parameter will change from 'legacy' to 

419 'dict' in 0.13, and the parameter will be removed completely in 

420 0.15. 

421 

422 .. versionadded:: 0.9.2 

423 

424 Returns 

425 ------- 

426 pvals : array-like, shape=(n_regressors, n_descriptors) 

427 Negative log10 p-values associated with the significance test of the 

428 n_regressors explanatory variates against the n_descriptors target 

429 variates. Family-wise corrected p-values. 

430 

431 .. note:: 

432 

433 This is returned if ``output_type`` == 'legacy'. 

434 

435 .. deprecated:: 0.9.2 

436 

437 The 'legacy' option for ``output_type`` is deprecated. 

438 The default value will change to 'dict' in 0.13, 

439 and the ``output_type`` parameter will be removed in 0.15. 

440 

441 score_orig_data : numpy.ndarray, shape=(n_regressors, n_descriptors) 

442 t-statistic associated with the significance test of the n_regressors 

443 explanatory variates against the n_descriptors target variates. 

444 The ranks of the scores into the h0 distribution correspond to the 

445 p-values. 

446 

447 .. note:: 

448 

449 This is returned if ``output_type`` == 'legacy'. 

450 

451 .. deprecated:: 0.9.2 

452 

453 The 'legacy' option for ``output_type`` is deprecated. 

454 The default value will change to 'dict' in 0.13, 

455 and the ``output_type`` parameter will be removed in 0.15. 

456 

457 h0_fmax : array-like, shape=(n_regressors, n_perm) 

458 Distribution of the (max) t-statistic under the null hypothesis 

459 (obtained from the permutations). Array is sorted. 

460 

461 .. note:: 

462 

463 This is returned if ``output_type`` == 'legacy'. 

464 

465 .. deprecated:: 0.9.2 

466 

467 The 'legacy' option for ``output_type`` is deprecated. 

468 The default value will change to 'dict' in 0.13, 

469 and the ``output_type`` parameter will be removed in 0.15. 

470 

471 .. versionchanged:: 0.9.2 

472 

473 Return H0 for all regressors, instead of only the first one. 

474 

475 outputs : :obj:`dict` 

476 Output arrays, organized in a dictionary. 

477 

478 .. note:: 

479 

480 This is returned if ``output_type`` == 'dict'. 

481 This will be the default output starting in version 0.13. 

482 

483 .. versionadded:: 0.9.2 

484 

485 Here are the keys: 

486 

487 ============= ============== ========================================== 

488 key shape description 

489 ============= ============== ========================================== 

490 t (n_regressors, t-statistic associated with the 

491 n_descriptors) significance test of the n_regressors 

492 explanatory variates against the 

493 n_descriptors target variates. 

494 The ranks of the scores into the h0 

495 distribution correspond to the p-values. 

496 logp_max_t (n_regressors, Negative log10 p-values associated with 

497 n_descriptors) the significance test of the n_regressors 

498 explanatory variates against the 

499 n_descriptors target variates. 

500 Family-wise corrected p-values, based on 

501 ``h0_max_t``. 

502 h0_max_t (n_regressors, Distribution of the max t-statistic under 

503 n_perm) the null hypothesis (obtained from the 

504 permutations). Array is sorted. 

505 tfce (n_regressors, TFCE values associated with the 

506 n_descriptors) significance test of the n_regressors 

507 explanatory variates against the 

508 n_descriptors target variates. 

509 The ranks of the scores into the h0 

510 distribution correspond to the TFCE 

511 p-values. 

512 logp_max_tfce (n_regressors, Negative log10 p-values associated with 

513 n_descriptors) the significance test of the n_regressors 

514 explanatory variates against the 

515 n_descriptors target variates. 

516 Family-wise corrected p-values, based on 

517 ``h0_max_tfce``. 

518 

519 Returned only if ``tfce`` is True. 

520 h0_max_tfce (n_regressors, Distribution of the max TFCE value under 

521 n_perm) the null hypothesis (obtained from the 

522 permutations). Array is sorted. 

523 

524 Returned only if ``tfce`` is True. 

525 size (n_regressors, Cluster size values associated with the 

526 n_descriptors) significance test of the n_regressors 

527 explanatory variates against the 

528 n_descriptors target variates. 

529 The ranks of the scores into the h0 

530 distribution correspond to the size 

531 p-values. 

532 

533 Returned only if ``threshold`` is not 

534 None. 

535 logp_max_size (n_regressors, Negative log10 p-values associated with 

536 n_descriptors) the cluster-level significance test of 

537 the n_regressors explanatory variates 

538 against the n_descriptors target 

539 variates. 

540 Family-wise corrected, cluster-level 

541 p-values, based on ``h0_max_size``. 

542 

543 Returned only if ``threshold`` is not 

544 None. 

545 h0_max_size (n_regressors, Distribution of the max cluster size 

546 n_perm) value under the null hypothesis (obtained 

547 from the permutations). Array is sorted. 

548 

549 Returned only if ``threshold`` is not 

550 None. 

551 mass (n_regressors, Cluster mass values associated with the 

552 n_descriptors) significance test of the n_regressors 

553 explanatory variates against the 

554 n_descriptors target variates. 

555 The ranks of the scores into the h0 

556 distribution correspond to the mass 

557 p-values. 

558 

559 Returned only if ``threshold`` is not 

560 None. 

561 logp_max_mass (n_regressors, Negative log10 p-values associated with 

562 n_descriptors) the cluster-level significance test of 

563 the n_regressors explanatory variates 

564 against the n_descriptors target 

565 variates. 

566 Family-wise corrected, cluster-level 

567 p-values, based on ``h0_max_mass``. 

568 

569 Returned only if ``threshold`` is not 

570 None. 

571 h0_max_mass (n_regressors, Distribution of the max cluster mass 

572 n_perm) value under the null hypothesis (obtained 

573 from the permutations). Array is sorted. 

574 

575 Returned only if ``threshold`` is not 

576 None. 

577 ============= ============== ========================================== 

578 

579 References 

580 ---------- 

581 .. footbibliography:: 

582 

583 """ 

584 check_params(locals()) 

585 _check_inputs_permuted_ols(n_jobs, tfce, masker, threshold, target_vars) 

586 

587 n_jobs, output_type, target_vars, tested_vars = ( 

588 _sanitize_inputs_permuted_ols( 

589 n_jobs, output_type, tfce, threshold, target_vars, tested_vars 

590 ) 

591 ) 

592 

593 # initialize the seed of the random generator 

594 rng = check_random_state(random_state) 

595 

596 n_descriptors = target_vars.shape[1] 

597 

598 n_samples, n_regressors = tested_vars.shape 

599 

600 intercept_test = n_regressors == np.unique(tested_vars).size == 1 

601 

602 # check if confounding vars contains an intercept 

603 if confounding_vars is not None: 

604 # Search for all constant columns 

605 constants = [ 

606 x 

607 for x in range(confounding_vars.shape[1]) 

608 if np.unique(confounding_vars[:, x]).size == 1 

609 ] 

610 

611 # check if multiple intercepts are defined across all variates 

612 if (intercept_test and len(constants) == 1) or len(constants) > 1: 

613 # remove all constant columns 

614 confounding_vars = np.delete(confounding_vars, constants, axis=1) 

615 # warn user if multiple intercepts are found 

616 warnings.warn( 

617 category=UserWarning, 

618 message=( 

619 'Multiple columns across "confounding_vars" and/or ' 

620 '"target_vars" are constant. Only one will be used ' 

621 "as intercept." 

622 ), 

623 stacklevel=find_stack_level(), 

624 ) 

625 model_intercept = True 

626 

627 # remove confounding vars variable if it is empty 

628 if confounding_vars.size == 0: 

629 confounding_vars = None 

630 

631 # intercept is only defined in confounding vars 

632 if not intercept_test and len(constants) == 1: 

633 intercept_test = True 

634 

635 # optionally add intercept 

636 if model_intercept and not intercept_test: 

637 if confounding_vars is not None: 

638 confounding_vars = np.hstack( 

639 (confounding_vars, np.ones((n_samples, 1))) 

640 ) 

641 else: 

642 confounding_vars = np.ones((n_samples, 1)) 

643 

644 # OLS regression on original data 

645 covars_orthonormalized = None 

646 if confounding_vars is not None: 

647 # step 1: extract effect of covars from target vars 

648 covars_orthonormalized = orthonormalize_matrix(confounding_vars) 

649 if not covars_orthonormalized.flags["C_CONTIGUOUS"]: 

650 # useful to developer 

651 warnings.warn( 

652 "Confounding variates not C_CONTIGUOUS.", 

653 stacklevel=find_stack_level(), 

654 ) 

655 covars_orthonormalized = np.ascontiguousarray( 

656 covars_orthonormalized 

657 ) 

658 

659 targetvars_normalized = normalize_matrix_on_axis( 

660 target_vars 

661 ).T # faster with F-ordered target_vars_chunk 

662 if not targetvars_normalized.flags["C_CONTIGUOUS"]: 

663 # useful to developer 

664 warnings.warn( 

665 "Target variates not C_CONTIGUOUS.", 

666 stacklevel=find_stack_level(), 

667 ) 

668 targetvars_normalized = np.ascontiguousarray(targetvars_normalized) 

669 

670 beta_targetvars_covars = np.dot( 

671 targetvars_normalized, covars_orthonormalized 

672 ) 

673 targetvars_resid_covars = targetvars_normalized - np.dot( 

674 beta_targetvars_covars, covars_orthonormalized.T 

675 ) 

676 targetvars_resid_covars = normalize_matrix_on_axis( 

677 targetvars_resid_covars, axis=1 

678 ) 

679 

680 # step 2: extract effect of covars from tested vars 

681 testedvars_normalized = normalize_matrix_on_axis(tested_vars.T, axis=1) 

682 beta_testedvars_covars = np.dot( 

683 testedvars_normalized, covars_orthonormalized 

684 ) 

685 testedvars_resid_covars = testedvars_normalized - np.dot( 

686 beta_testedvars_covars, covars_orthonormalized.T 

687 ) 

688 testedvars_resid_covars = normalize_matrix_on_axis( 

689 testedvars_resid_covars, axis=1 

690 ).T.copy() 

691 

692 else: 

693 targetvars_resid_covars = normalize_matrix_on_axis(target_vars).T 

694 testedvars_resid_covars = normalize_matrix_on_axis(tested_vars).copy() 

695 

696 # check arrays contiguousity for the sake of code efficiency 

697 targetvars_resid_covars = _make_array_contiguous(targetvars_resid_covars) 

698 testedvars_resid_covars = _make_array_contiguous(testedvars_resid_covars) 

699 

700 # step 3: original regression (= regression on residuals + adjust t-score) 

701 # compute t score map of each tested var for original data 

702 # scores_original_data is in samples-by-regressors shape 

703 scores_original_data = t_score_with_covars_and_normalized_design( 

704 testedvars_resid_covars, 

705 targetvars_resid_covars.T, 

706 covars_orthonormalized, 

707 ) 

708 

709 # Define connectivity for TFCE and/or cluster measures 

710 bin_struct = generate_binary_structure(3, 1) 

711 

712 tfce_original_data = None 

713 if tfce: 

714 scores_4d = masker.inverse_transform( 

715 scores_original_data.T 

716 ).get_fdata() 

717 tfce_original_data = calculate_tfce( 

718 scores_4d, 

719 bin_struct=bin_struct, 

720 two_sided_test=two_sided_test, 

721 ) 

722 tfce_original_data = apply_mask( 

723 Nifti1Image( 

724 tfce_original_data, 

725 masker.mask_img_.affine, 

726 masker.mask_img_.header, 

727 ), 

728 masker.mask_img_, 

729 ).T 

730 

731 # 0 or negative number of permutations => original data scores only 

732 if n_perm <= 0: 

733 if output_type == "legacy": 

734 return np.asarray([]), scores_original_data.T, np.asarray([]) 

735 

736 out = {"t": scores_original_data.T} 

737 if tfce: 

738 out["tfce"] = tfce_original_data.T 

739 return out 

740 

741 # Permutations 

742 # parallel computing units perform a reduced number of permutations each 

743 if n_perm > n_jobs: 

744 n_perm_chunks = np.asarray([n_perm / n_jobs] * n_jobs, dtype=int) 

745 n_perm_chunks[-1] += n_perm % n_jobs 

746 elif n_perm > 0: 

747 warnings.warn( 

748 f"The specified number of permutations is {n_perm} " 

749 "and the number of jobs to be performed in parallel " 

750 f"has set to {n_jobs}. " 

751 f"This is incompatible so only {n_perm} jobs will be running. " 

752 "You may want to perform more permutations " 

753 "in order to take the most of the available computing resources.", 

754 UserWarning, 

755 stacklevel=find_stack_level(), 

756 ) 

757 n_perm_chunks = np.ones(n_perm, dtype=int) 

758 

759 threshold_t = _compute_t_stat_threshold( 

760 threshold, two_sided_test, tested_vars, confounding_vars 

761 ) 

762 

763 # actual permutations, seeded from a random integer between 0 and maximum 

764 # value represented by np.int32 (to have a large entropy). 

765 ret = joblib.Parallel(n_jobs=n_jobs, verbose=verbose)( 

766 joblib.delayed(_permuted_ols_on_chunk)( 

767 scores_original_data, 

768 testedvars_resid_covars, 

769 targetvars_resid_covars.T, 

770 thread_id=thread_id + 1, 

771 threshold=threshold_t, 

772 confounding_vars=covars_orthonormalized, 

773 masker=masker, 

774 n_perm=n_perm, 

775 n_perm_chunk=n_perm_chunk, 

776 intercept_test=intercept_test, 

777 two_sided_test=two_sided_test, 

778 tfce=tfce, 

779 tfce_original_data=tfce_original_data, 

780 random_state=rng.randint(1, np.iinfo(np.int32).max - 1), 

781 verbose=verbose, 

782 ) 

783 for thread_id, n_perm_chunk in enumerate(n_perm_chunks) 

784 ) 

785 

786 # reduce results 

787 ( 

788 vfwe_scores_as_ranks_parts, 

789 h0_vfwe_parts, 

790 csfwe_h0_parts, 

791 cmfwe_h0_parts, 

792 tfce_scores_as_ranks_parts, 

793 h0_tfce_parts, 

794 ) = zip(*ret) 

795 

796 # Voxel-level FWE 

797 vfwe_h0 = np.hstack(h0_vfwe_parts) 

798 vfwe_scores_as_ranks = np.zeros((n_regressors, n_descriptors)) 

799 for scores_as_ranks_part in vfwe_scores_as_ranks_parts: 

800 vfwe_scores_as_ranks += scores_as_ranks_part 

801 

802 vfwe_pvals = (n_perm + 1 - vfwe_scores_as_ranks) / float(1 + n_perm) 

803 

804 if output_type == "legacy": 

805 return (-np.log10(vfwe_pvals), scores_original_data.T, vfwe_h0) 

806 

807 outputs = { 

808 "t": scores_original_data.T, 

809 "logp_max_t": -np.log10(vfwe_pvals), 

810 "h0_max_t": vfwe_h0, 

811 } 

812 

813 if not tfce and threshold is None: 

814 return outputs 

815 

816 outputs = _update_outputs_for_tfce( 

817 outputs, 

818 tfce, 

819 tfce_original_data, 

820 h0_tfce_parts, 

821 n_regressors, 

822 n_descriptors, 

823 tfce_scores_as_ranks_parts, 

824 n_perm, 

825 ) 

826 

827 return _prepare_output_permuted_ols( 

828 outputs, 

829 vfwe_pvals, 

830 scores_original_data, 

831 n_regressors, 

832 threshold, 

833 csfwe_h0_parts, 

834 cmfwe_h0_parts, 

835 masker, 

836 threshold_t, 

837 bin_struct, 

838 two_sided_test, 

839 ) 

840 

841 

842def _make_array_contiguous(array): 

843 """Make arrays contiguous for code efficiency.""" 

844 if not array.flags["C_CONTIGUOUS"]: 

845 # useful to developer 

846 warnings.warn( 

847 "Target variates not C_CONTIGUOUS.", stacklevel=find_stack_level() 

848 ) 

849 array = np.ascontiguousarray(array) 

850 return array 

851 

852 

853def _compute_t_stat_threshold( 

854 threshold, two_sided_test, tested_vars, confounding_vars 

855): 

856 """Compute t-stat threshold if needed based on degrees of freedom.""" 

857 if threshold is None: 

858 return None 

859 n_samples, n_regressors = tested_vars.shape 

860 n_covars = 0 if confounding_vars is None else confounding_vars.shape[1] 

861 # determine t-statistic threshold 

862 degrees_of_freedom = n_samples - (n_regressors + n_covars) 

863 return ( 

864 stats.t.isf(threshold / 2, df=degrees_of_freedom) 

865 if two_sided_test 

866 else stats.t.isf(threshold, df=degrees_of_freedom) 

867 ) 

868 

869 

870def _check_inputs_permuted_ols(n_jobs, tfce, masker, threshold, target_vars): 

871 # invalid according to joblib's conventions 

872 if n_jobs == 0: 

873 raise ValueError( 

874 "'n_jobs == 0' is not a valid choice. " 

875 "Please provide a positive number of CPUs, " 

876 "or -1 for all CPUs, " 

877 "or a negative number (-i) for 'all but (i-1)' CPUs " 

878 "(joblib conventions)." 

879 ) 

880 # check that masker is provided if it is needed 

881 if tfce and not masker: 

882 raise ValueError("A masker must be provided if tfce is True.") 

883 

884 if (threshold is not None) and (masker is None): 

885 raise ValueError( 

886 'If "threshold" is not None, masker must be defined as well.' 

887 ) 

888 

889 # make target_vars F-ordered to speed-up computation 

890 if target_vars.ndim != 2: 

891 raise ValueError( 

892 "'target_vars' should be a 2D array. " 

893 f"An array with {target_vars.ndim} dimension(s) was passed." 

894 ) 

895 

896 

897def _sanitize_inputs_permuted_ols( 

898 n_jobs, output_type, tfce, threshold, target_vars, tested_vars 

899): 

900 # check n_jobs (number of CPUs) 

901 if n_jobs < 0: 

902 n_jobs = max(1, joblib.cpu_count() - int(n_jobs) + 1) 

903 else: 

904 n_jobs = min(n_jobs, joblib.cpu_count()) 

905 

906 # Resolve the output_type as well 

907 if tfce and output_type == "legacy": 

908 warnings.warn( 

909 'If "tfce" is set to True, "output_type" must be set to "dict". ' 

910 "Overriding.", 

911 stacklevel=find_stack_level(), 

912 ) 

913 output_type = "dict" 

914 

915 if (threshold is not None) and (output_type == "legacy"): 

916 warnings.warn( 

917 'If "threshold" is not None, "output_type" must be set to "dict". ' 

918 "Overriding.", 

919 stacklevel=find_stack_level(), 

920 ) 

921 output_type = "dict" 

922 

923 if output_type == "legacy": 

924 warnings.warn( 

925 category=DeprecationWarning, 

926 message=( 

927 'The "legacy" output structure for "permuted_ols" is ' 

928 "deprecated. " 

929 'The default output structure will be changed to "dict" ' 

930 "in version 0.13." 

931 ), 

932 stacklevel=find_stack_level(), 

933 ) 

934 

935 target_vars = np.asfortranarray(target_vars) # efficient for chunking 

936 

937 if np.any(np.all(target_vars == 0, axis=0)): 

938 warnings.warn( 

939 "Some descriptors in 'target_vars' have zeros across all samples. " 

940 "These descriptors will be ignored " 

941 "during null distribution generation.", 

942 stacklevel=find_stack_level(), 

943 ) 

944 

945 # check explanatory variates' dimensions 

946 if tested_vars.ndim == 1: 

947 tested_vars = np.atleast_2d(tested_vars).T 

948 

949 return n_jobs, output_type, target_vars, tested_vars 

950 

951 

952def _prepare_output_permuted_ols( 

953 outputs, 

954 vfwe_pvals, 

955 scores_original_data, 

956 n_regressors, 

957 threshold, 

958 csfwe_h0_parts, 

959 cmfwe_h0_parts, 

960 masker, 

961 threshold_t, 

962 bin_struct, 

963 two_sided_test, 

964): 

965 if threshold is None: 

966 return outputs 

967 

968 # Cluster-size and cluster-mass FWE 

969 # a dictionary to collect mass/size measures 

970 cluster_dict = { 

971 "size_h0": np.hstack(csfwe_h0_parts), 

972 "mass_h0": np.hstack(cmfwe_h0_parts), 

973 "size": np.zeros_like(vfwe_pvals).astype(int), 

974 "mass": np.zeros_like(vfwe_pvals), 

975 "size_pvals": np.zeros_like(vfwe_pvals), 

976 "mass_pvals": np.zeros_like(vfwe_pvals), 

977 } 

978 

979 scores_original_data_4d = masker.inverse_transform( 

980 scores_original_data.T 

981 ).get_fdata() 

982 

983 for i_regressor in range(n_regressors): 

984 scores_original_data_3d = scores_original_data_4d[..., i_regressor] 

985 

986 # Label the clusters for both cluster mass and size inference 

987 labeled_arr3d, _ = label( 

988 scores_original_data_3d > threshold_t, 

989 bin_struct, 

990 ) 

991 

992 if two_sided_test: 

993 # Add negative cluster labels 

994 temp_labeled_arr3d, _ = label( 

995 scores_original_data_3d < -threshold_t, 

996 bin_struct, 

997 ) 

998 n_negative_clusters = np.max(temp_labeled_arr3d) 

999 labeled_arr3d[labeled_arr3d > 0] += n_negative_clusters 

1000 labeled_arr3d = labeled_arr3d + temp_labeled_arr3d 

1001 del temp_labeled_arr3d 

1002 

1003 cluster_labels, idx, cluster_dict["size_regressor"] = np.unique( 

1004 labeled_arr3d, 

1005 return_inverse=True, 

1006 return_counts=True, 

1007 ) 

1008 assert cluster_labels[0] == 0 # the background 

1009 

1010 # Replace background's "cluster size" w zeros 

1011 cluster_dict["size_regressor"][0] = 0 

1012 

1013 # Calculate mass for each cluster 

1014 cluster_dict["mass_regressor"] = np.zeros(cluster_labels.shape) 

1015 for j_val in cluster_labels[1:]: # skip background 

1016 cluster_mass = np.sum( 

1017 np.fabs(scores_original_data_3d[labeled_arr3d == j_val]) 

1018 - threshold_t 

1019 ) 

1020 cluster_dict["mass_regressor"][j_val] = cluster_mass 

1021 

1022 # Calculate p-values from size/mass values and associated h0s 

1023 for metric in ["mass", "size"]: 

1024 p_vals = null_to_p( 

1025 cluster_dict[f"{metric}_regressor"], 

1026 cluster_dict[f"{metric}_h0"][i_regressor, :], 

1027 "larger", 

1028 ) 

1029 p_map = p_vals[np.reshape(idx, labeled_arr3d.shape)] 

1030 metric_map = cluster_dict[f"{metric}_regressor"][ 

1031 np.reshape(idx, labeled_arr3d.shape) 

1032 ] 

1033 

1034 # Convert 3D to image, then to 1D 

1035 # There is a problem if the masker performs preprocessing, 

1036 # so we use apply_mask here. 

1037 cluster_dict[f"{metric}_pvals"][i_regressor, :] = np.squeeze( 

1038 apply_mask( 

1039 image.new_img_like(masker.mask_img_, p_map), 

1040 masker.mask_img_, 

1041 ) 

1042 ) 

1043 cluster_dict[metric][i_regressor, :] = np.squeeze( 

1044 apply_mask( 

1045 image.new_img_like(masker.mask_img_, metric_map), 

1046 masker.mask_img_, 

1047 ) 

1048 ) 

1049 

1050 outputs["size"] = cluster_dict["size"] 

1051 outputs["logp_max_size"] = -np.log10(cluster_dict["size_pvals"]) 

1052 outputs["h0_max_size"] = cluster_dict["size_h0"] 

1053 outputs["mass"] = cluster_dict["mass"] 

1054 outputs["logp_max_mass"] = -np.log10(cluster_dict["mass_pvals"]) 

1055 outputs["h0_max_mass"] = cluster_dict["mass_h0"] 

1056 

1057 return outputs 

1058 

1059 

1060def _update_outputs_for_tfce( 

1061 outputs, 

1062 tfce, 

1063 tfce_original_data, 

1064 h0_tfce_parts, 

1065 n_regressors, 

1066 n_descriptors, 

1067 tfce_scores_as_ranks_parts, 

1068 n_perm, 

1069): 

1070 if not tfce: 

1071 return outputs 

1072 

1073 outputs["tfce"] = tfce_original_data.T 

1074 

1075 # We can use the same approach for TFCE that we use for vFWE 

1076 outputs["h0_max_tfce"] = np.hstack(h0_tfce_parts) 

1077 

1078 tfce_scores_as_ranks = np.zeros((n_regressors, n_descriptors)) 

1079 for tfce_scores_as_ranks_part in tfce_scores_as_ranks_parts: 

1080 tfce_scores_as_ranks += tfce_scores_as_ranks_part 

1081 

1082 tfce_pvals = (n_perm + 1 - tfce_scores_as_ranks) / float(1 + n_perm) 

1083 outputs["logp_max_tfce"] = -np.log10(tfce_pvals) 

1084 

1085 return outputs