Coverage for nilearn/mass_univariate/permuted_least_squares.py: 7%
225 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Massively Univariate Linear Model estimated \
2with OLS and permutation test.
3"""
5import time
6import warnings
8import joblib
9import numpy as np
10from nibabel import Nifti1Image
11from scipy import stats
12from scipy.ndimage import generate_binary_structure, label
13from sklearn.utils import check_random_state
15from nilearn import image
16from nilearn._utils import fill_doc, logger
17from nilearn._utils.logger import find_stack_level
18from nilearn._utils.param_validation import check_params
19from nilearn.masking import apply_mask
20from nilearn.mass_univariate._utils import (
21 calculate_cluster_measures,
22 calculate_tfce,
23 normalize_matrix_on_axis,
24 null_to_p,
25 orthonormalize_matrix,
26 t_score_with_covars_and_normalized_design,
27)
30def _permuted_ols_on_chunk(
31 scores_original_data,
32 tested_vars,
33 target_vars,
34 thread_id,
35 threshold=None,
36 confounding_vars=None,
37 masker=None,
38 n_perm=10000,
39 n_perm_chunk=10000,
40 intercept_test=True,
41 two_sided_test=True,
42 tfce=False,
43 tfce_original_data=None,
44 random_state=None,
45 verbose=0,
46):
47 """Perform massively univariate analysis with permuted OLS on a data chunk.
49 To be used in a parallel computing context.
51 Parameters
52 ----------
53 scores_original_data : array-like, shape=(n_descriptors, n_regressors)
54 t-scores obtained for the original (non-permuted) data.
56 tested_vars : array-like, shape=(n_samples, n_regressors)
57 Explanatory variates.
59 target_vars : array-like, shape=(n_samples, n_targets)
60 fMRI data. F-ordered for efficient computations.
62 thread_id : int
63 process id, used for display.
65 threshold : :obj:`float`
66 Cluster-forming threshold in t-scale.
67 This is only used for cluster-level inference.
68 If ``threshold`` is not None, but ``masker`` is, an exception will be
69 raised.
71 .. versionadded:: 0.9.2
73 confounding_vars : array-like, shape=(n_samples, n_covars), optional
74 Clinical data (covariates).
76 masker : None or :class:`~nilearn.maskers.NiftiMasker` or \
77 :class:`~nilearn.maskers.MultiNiftiMasker`, optional
78 A mask to be used on the data.
79 This is used for cluster-level inference and :term:`TFCE`-based
80 inference, if either is enabled.
81 If ``threshold`` is not None, but ``masker`` is, an exception will be
82 raised.
84 .. versionadded:: 0.9.2
86 n_perm : int, default=10000
87 Total number of permutations to perform, only used for
88 display in this function.
90 n_perm_chunk : int, default=10000
91 Number of permutations to be performed.
93 intercept_test : boolean, default=True
94 Change the permutation scheme (swap signs for intercept,
95 switch labels otherwise). See :footcite:t:`Fisher1935`.
97 two_sided_test : boolean, default=True
98 If True, performs an unsigned t-test. Both positive and negative
99 effects are considered; the null hypothesis is that the effect is zero.
100 If False, only positive effects are considered as relevant. The null
101 hypothesis is that the effect is zero or negative.
103 tfce : :obj:`bool`, default=False
104 Whether to perform :term:`TFCE`-based multiple comparisons correction
105 or not.
106 Calculating TFCE values in each permutation can be time-consuming, so
107 this option is disabled by default.
108 The TFCE calculation is implemented as described in
109 :footcite:t:`Smith2009a`.
111 .. versionadded:: 0.9.2
113 tfce_original_data : None or array-like, \
114 shape=(n_descriptors, n_regressors), optional
115 TFCE values obtained for the original (non-permuted) data.
117 .. versionadded:: 0.9.2
119 %(random_state)s
121 %(verbose0)s
123 Returns
124 -------
125 scores_as_ranks_part : array-like, shape=(n_regressors, n_descriptors)
126 The ranks of the original scores in ``h0_fmax_part``.
127 When ``n_descriptors`` or ``n_perm`` are large, it can be quite long to
128 find the rank of the original scores into the whole H0 distribution.
129 Here, it is performed in parallel by the workers involved in the
130 permutation computation.
132 h0_fmax_part : array-like, shape=(n_perm_chunk, n_regressors)
133 Distribution of the (max) t-statistic under the null hypothesis
134 (limited to this permutation chunk).
136 h0_csfwe_part, h0_cmfwe_part : array-like, \
137 shape=(n_perm_chunk, n_regressors)
138 Distribution of max cluster sizes/masses under the null hypothesis.
139 Only calculated if ``masker`` is not None.
140 Otherwise, these will both be None.
142 .. versionadded:: 0.9.2
144 tfce_scores_as_ranks_part : array-like, shape=(n_regressors, n_descriptors)
145 The ranks of the original TFCE values in ``h0_tfce_part``.
146 When ``n_descriptors`` or ``n_perm`` are large, it can be quite long to
147 find the rank of the original scores into the whole H0 distribution.
148 Here, it is performed in parallel by the workers involved in the
149 permutation computation.
151 .. versionadded:: 0.9.2
153 h0_tfce_part : array-like, shape=(n_perm_chunk, n_regressors)
154 Distribution of the (max) TFCE value under the null hypothesis
155 (limited to this permutation chunk).
157 .. versionadded:: 0.9.2
159 References
160 ----------
161 .. footbibliography::
163 """
164 # initialize the seed of the random generator
165 rng = check_random_state(random_state)
167 n_samples, n_regressors = tested_vars.shape
168 n_descriptors = target_vars.shape[1]
170 # run the permutations
171 t0 = time.time()
172 h0_fmax_part = np.empty((n_regressors, n_perm_chunk))
173 scores_as_ranks_part = np.zeros((n_regressors, n_descriptors))
175 # Preallocate null arrays for optional outputs
176 # Any unselected outputs will just return a None
177 h0_tfce_part, tfce_scores_as_ranks_part = None, None
178 if tfce:
179 h0_tfce_part = np.empty((n_regressors, n_perm_chunk))
180 tfce_scores_as_ranks_part = np.zeros((n_regressors, n_descriptors))
182 h0_csfwe_part, h0_cmfwe_part = None, None
183 if threshold is not None:
184 h0_csfwe_part = np.empty((n_regressors, n_perm_chunk))
185 h0_cmfwe_part = np.empty((n_regressors, n_perm_chunk))
187 for i_perm in range(n_perm_chunk):
188 if intercept_test:
189 # sign swap (random multiplication by 1 or -1)
190 target_vars = target_vars * (
191 rng.randint(2, size=(n_samples, 1)) * 2 - 1
192 )
193 else:
194 # shuffle data
195 # Regarding computation costs, we choose to shuffle testvars
196 # and covars rather than fmri_signal.
197 # Also, it is important to shuffle tested_vars and covars
198 # jointly to simplify t-scores computation (null dot product).
199 shuffle_idx = rng.permutation(n_samples)
200 tested_vars = tested_vars[shuffle_idx]
201 if confounding_vars is not None:
202 confounding_vars = confounding_vars[shuffle_idx]
204 # OLS regression on randomized data
205 perm_scores = np.asfortranarray(
206 t_score_with_covars_and_normalized_design(
207 tested_vars, target_vars, confounding_vars
208 )
209 )
211 # find the rank of the original scores in h0_fmax_part
212 # (when n_descriptors or n_perm are large, it can be quite long to
213 # find the rank of the original scores into the whole H0 distribution.
214 # Here, it is performed in parallel by the workers involved in the
215 # permutation computation)
216 # NOTE: This is not done for the cluster-level methods.
217 if two_sided_test:
218 # Get maximum absolute value for voxel-level FWE
219 h0_fmax_part[:, i_perm] = np.nanmax(np.fabs(perm_scores), axis=0)
220 scores_as_ranks_part += (
221 h0_fmax_part[:, i_perm].reshape((-1, 1))
222 < np.fabs(scores_original_data).T
223 )
224 else:
225 # Get maximum value for voxel-level FWE
226 h0_fmax_part[:, i_perm] = np.nanmax(perm_scores, axis=0)
227 scores_as_ranks_part += (
228 h0_fmax_part[:, i_perm].reshape((-1, 1))
229 < scores_original_data.T
230 )
232 # Prepare data for cluster thresholding
233 if tfce or (threshold is not None):
234 arr4d = masker.inverse_transform(perm_scores.T).get_fdata()
235 bin_struct = generate_binary_structure(3, 1)
237 if tfce:
238 # The TFCE map will contain positive and negative values if
239 # two_sided_test is True, or positive only if it's False.
240 # In either case, the maximum absolute value is the one we want.
241 h0_tfce_part[:, i_perm] = np.nanmax(
242 np.fabs(
243 calculate_tfce(
244 arr4d,
245 bin_struct=bin_struct,
246 two_sided_test=two_sided_test,
247 )
248 ),
249 axis=(0, 1, 2),
250 )
251 tfce_scores_as_ranks_part += h0_tfce_part[:, i_perm].reshape(
252 (-1, 1)
253 ) < np.fabs(tfce_original_data.T)
255 if threshold is not None:
256 (
257 h0_csfwe_part[:, i_perm],
258 h0_cmfwe_part[:, i_perm],
259 ) = calculate_cluster_measures(
260 arr4d,
261 threshold,
262 bin_struct,
263 two_sided_test=two_sided_test,
264 )
266 if verbose > 0:
267 step = 11 - min(verbose, 10)
268 if i_perm % step == 0:
269 # If there is only one job, progress information is fixed
270 crlf = "\n"
271 if n_perm == n_perm_chunk:
272 crlf = "\r"
274 percent = float(i_perm) / n_perm_chunk
275 percent = round(percent * 100, 2)
276 dt = time.time() - t0
277 remaining = (100.0 - percent) / max(0.01, percent) * dt
279 logger.log(
280 f"Job #{thread_id}, processed {i_perm}/{n_perm_chunk} "
281 f"permutations ({percent:0.2f}%, {remaining:0.2f} seconds "
282 f"remaining){crlf}",
283 )
285 return (
286 scores_as_ranks_part,
287 h0_fmax_part,
288 h0_csfwe_part,
289 h0_cmfwe_part,
290 tfce_scores_as_ranks_part,
291 h0_tfce_part,
292 )
295@fill_doc
296def permuted_ols(
297 tested_vars,
298 target_vars,
299 confounding_vars=None,
300 model_intercept=True,
301 n_perm=10000,
302 two_sided_test=True,
303 random_state=None,
304 n_jobs=1,
305 verbose=0,
306 masker=None,
307 tfce=False,
308 threshold=None,
309 output_type="legacy",
310):
311 """Massively univariate group analysis with permuted OLS.
313 Tested variates are independently fitted to target variates descriptors
314 (e.g. brain imaging signal) according to a linear model solved with an
315 Ordinary Least Squares criterion.
316 Confounding variates may be included in the model.
317 Permutation testing is used to assess the significance of the relationship
318 between the tested variates and the target variates
319 :footcite:p:`Anderson2001`, :footcite:p:`Winkler2014`.
320 A max-type procedure is used to obtain family-wise corrected p-values
321 based on t-statistics (voxel-level FWE), cluster sizes, cluster masses,
322 and :term:`TFCE` values.
324 The specific permutation scheme implemented here is the one of
325 :footcite:t:`Freedman1983`.
326 Its has been demonstrated in :footcite:t:`Anderson2001` that
327 this scheme conveys more sensitivity than alternative schemes. This holds
328 for neuroimaging applications, as discussed in details in
329 :footcite:t:`Winkler2014`.
331 Permutations are performed on parallel computing units.
332 Each of them performs a fraction of permutations on the whole dataset.
333 Thus, the max t-score amongst data descriptors can be computed directly,
334 which avoids storing all the computed t-scores.
336 The variates should be given C-contiguous.
337 ``target_vars`` are fortran-ordered automatically to speed-up computations.
339 Parameters
340 ----------
341 tested_vars : array-like, shape=(n_samples, n_regressors)
342 Explanatory variates, fitted and tested independently from each others.
344 target_vars : array-like, shape=(n_samples, n_descriptors)
345 :term:`fMRI` data to analyze according
346 to the explanatory and confounding variates.
348 In a group-level analysis, the samples will typically be voxels
349 (for volumetric data) or :term:`vertices<vertex>` (for surface data),
350 while the descriptors will generally be images,
351 such as run-wise z-statistic maps.
353 confounding_vars : array-like, shape=(n_samples, n_covars), default=None
354 Confounding variates (covariates), fitted but not tested.
355 If None, no confounding variate is added to the model
356 (except maybe a constant column according to the value of
357 ``model_intercept``).
359 model_intercept : :obj:`bool`, default=True
360 If True, a constant column is added to the confounding variates
361 unless the tested variate is already the intercept or when
362 confounding variates already contain an intercept.
364 %(n_perm)s
365 If ``n_perm`` is set to 0, then no p-values will be estimated.
367 %(two_sided_test)s
369 %(random_state)s
371 n_jobs : :obj:`int`, default=1
372 Number of parallel workers.
373 If -1 is provided, all CPUs are used.
374 A negative number indicates that all the CPUs except (abs(n_jobs) - 1)
375 ones will be used.
377 %(verbose0)s
379 masker : None or :class:`~nilearn.maskers.NiftiMasker` or \
380 :class:`~nilearn.maskers.MultiNiftiMasker`, default=None
381 A mask to be used on the data.
382 This is required for cluster-level inference, so it must be provided
383 if ``threshold`` is not None.
385 .. versionadded:: 0.9.2
387 threshold : None or :obj:`float`, default=None
388 Cluster-forming threshold in p-scale.
389 This is only used for cluster-level inference.
390 If None, cluster-level inference will not be performed.
392 .. warning::
394 Performing cluster-level inference will increase the computation
395 time of the permutation procedure.
397 .. versionadded:: 0.9.2
399 %(tfce)s
401 .. versionadded:: 0.9.2
403 output_type : {'legacy', 'dict'}, default="legacy"
404 Determines how outputs should be returned.
405 The two options are:
407 - 'legacy': return a pvals, score_orig_data, and h0_fmax.
408 This option is the default, but it is deprecated until 0.13,
409 when the default will be changed to 'dict'.
410 It will be removed in 0.15.
411 - 'dict': return a dictionary containing output arrays.
412 This option will be made the default in 0.13.
413 Additionally, if ``tfce`` is True or ``threshold`` is not None,
414 ``output_type`` will automatically be set to 'dict'.
416 .. deprecated:: 0.9.2
418 The default value for this parameter will change from 'legacy' to
419 'dict' in 0.13, and the parameter will be removed completely in
420 0.15.
422 .. versionadded:: 0.9.2
424 Returns
425 -------
426 pvals : array-like, shape=(n_regressors, n_descriptors)
427 Negative log10 p-values associated with the significance test of the
428 n_regressors explanatory variates against the n_descriptors target
429 variates. Family-wise corrected p-values.
431 .. note::
433 This is returned if ``output_type`` == 'legacy'.
435 .. deprecated:: 0.9.2
437 The 'legacy' option for ``output_type`` is deprecated.
438 The default value will change to 'dict' in 0.13,
439 and the ``output_type`` parameter will be removed in 0.15.
441 score_orig_data : numpy.ndarray, shape=(n_regressors, n_descriptors)
442 t-statistic associated with the significance test of the n_regressors
443 explanatory variates against the n_descriptors target variates.
444 The ranks of the scores into the h0 distribution correspond to the
445 p-values.
447 .. note::
449 This is returned if ``output_type`` == 'legacy'.
451 .. deprecated:: 0.9.2
453 The 'legacy' option for ``output_type`` is deprecated.
454 The default value will change to 'dict' in 0.13,
455 and the ``output_type`` parameter will be removed in 0.15.
457 h0_fmax : array-like, shape=(n_regressors, n_perm)
458 Distribution of the (max) t-statistic under the null hypothesis
459 (obtained from the permutations). Array is sorted.
461 .. note::
463 This is returned if ``output_type`` == 'legacy'.
465 .. deprecated:: 0.9.2
467 The 'legacy' option for ``output_type`` is deprecated.
468 The default value will change to 'dict' in 0.13,
469 and the ``output_type`` parameter will be removed in 0.15.
471 .. versionchanged:: 0.9.2
473 Return H0 for all regressors, instead of only the first one.
475 outputs : :obj:`dict`
476 Output arrays, organized in a dictionary.
478 .. note::
480 This is returned if ``output_type`` == 'dict'.
481 This will be the default output starting in version 0.13.
483 .. versionadded:: 0.9.2
485 Here are the keys:
487 ============= ============== ==========================================
488 key shape description
489 ============= ============== ==========================================
490 t (n_regressors, t-statistic associated with the
491 n_descriptors) significance test of the n_regressors
492 explanatory variates against the
493 n_descriptors target variates.
494 The ranks of the scores into the h0
495 distribution correspond to the p-values.
496 logp_max_t (n_regressors, Negative log10 p-values associated with
497 n_descriptors) the significance test of the n_regressors
498 explanatory variates against the
499 n_descriptors target variates.
500 Family-wise corrected p-values, based on
501 ``h0_max_t``.
502 h0_max_t (n_regressors, Distribution of the max t-statistic under
503 n_perm) the null hypothesis (obtained from the
504 permutations). Array is sorted.
505 tfce (n_regressors, TFCE values associated with the
506 n_descriptors) significance test of the n_regressors
507 explanatory variates against the
508 n_descriptors target variates.
509 The ranks of the scores into the h0
510 distribution correspond to the TFCE
511 p-values.
512 logp_max_tfce (n_regressors, Negative log10 p-values associated with
513 n_descriptors) the significance test of the n_regressors
514 explanatory variates against the
515 n_descriptors target variates.
516 Family-wise corrected p-values, based on
517 ``h0_max_tfce``.
519 Returned only if ``tfce`` is True.
520 h0_max_tfce (n_regressors, Distribution of the max TFCE value under
521 n_perm) the null hypothesis (obtained from the
522 permutations). Array is sorted.
524 Returned only if ``tfce`` is True.
525 size (n_regressors, Cluster size values associated with the
526 n_descriptors) significance test of the n_regressors
527 explanatory variates against the
528 n_descriptors target variates.
529 The ranks of the scores into the h0
530 distribution correspond to the size
531 p-values.
533 Returned only if ``threshold`` is not
534 None.
535 logp_max_size (n_regressors, Negative log10 p-values associated with
536 n_descriptors) the cluster-level significance test of
537 the n_regressors explanatory variates
538 against the n_descriptors target
539 variates.
540 Family-wise corrected, cluster-level
541 p-values, based on ``h0_max_size``.
543 Returned only if ``threshold`` is not
544 None.
545 h0_max_size (n_regressors, Distribution of the max cluster size
546 n_perm) value under the null hypothesis (obtained
547 from the permutations). Array is sorted.
549 Returned only if ``threshold`` is not
550 None.
551 mass (n_regressors, Cluster mass values associated with the
552 n_descriptors) significance test of the n_regressors
553 explanatory variates against the
554 n_descriptors target variates.
555 The ranks of the scores into the h0
556 distribution correspond to the mass
557 p-values.
559 Returned only if ``threshold`` is not
560 None.
561 logp_max_mass (n_regressors, Negative log10 p-values associated with
562 n_descriptors) the cluster-level significance test of
563 the n_regressors explanatory variates
564 against the n_descriptors target
565 variates.
566 Family-wise corrected, cluster-level
567 p-values, based on ``h0_max_mass``.
569 Returned only if ``threshold`` is not
570 None.
571 h0_max_mass (n_regressors, Distribution of the max cluster mass
572 n_perm) value under the null hypothesis (obtained
573 from the permutations). Array is sorted.
575 Returned only if ``threshold`` is not
576 None.
577 ============= ============== ==========================================
579 References
580 ----------
581 .. footbibliography::
583 """
584 check_params(locals())
585 _check_inputs_permuted_ols(n_jobs, tfce, masker, threshold, target_vars)
587 n_jobs, output_type, target_vars, tested_vars = (
588 _sanitize_inputs_permuted_ols(
589 n_jobs, output_type, tfce, threshold, target_vars, tested_vars
590 )
591 )
593 # initialize the seed of the random generator
594 rng = check_random_state(random_state)
596 n_descriptors = target_vars.shape[1]
598 n_samples, n_regressors = tested_vars.shape
600 intercept_test = n_regressors == np.unique(tested_vars).size == 1
602 # check if confounding vars contains an intercept
603 if confounding_vars is not None:
604 # Search for all constant columns
605 constants = [
606 x
607 for x in range(confounding_vars.shape[1])
608 if np.unique(confounding_vars[:, x]).size == 1
609 ]
611 # check if multiple intercepts are defined across all variates
612 if (intercept_test and len(constants) == 1) or len(constants) > 1:
613 # remove all constant columns
614 confounding_vars = np.delete(confounding_vars, constants, axis=1)
615 # warn user if multiple intercepts are found
616 warnings.warn(
617 category=UserWarning,
618 message=(
619 'Multiple columns across "confounding_vars" and/or '
620 '"target_vars" are constant. Only one will be used '
621 "as intercept."
622 ),
623 stacklevel=find_stack_level(),
624 )
625 model_intercept = True
627 # remove confounding vars variable if it is empty
628 if confounding_vars.size == 0:
629 confounding_vars = None
631 # intercept is only defined in confounding vars
632 if not intercept_test and len(constants) == 1:
633 intercept_test = True
635 # optionally add intercept
636 if model_intercept and not intercept_test:
637 if confounding_vars is not None:
638 confounding_vars = np.hstack(
639 (confounding_vars, np.ones((n_samples, 1)))
640 )
641 else:
642 confounding_vars = np.ones((n_samples, 1))
644 # OLS regression on original data
645 covars_orthonormalized = None
646 if confounding_vars is not None:
647 # step 1: extract effect of covars from target vars
648 covars_orthonormalized = orthonormalize_matrix(confounding_vars)
649 if not covars_orthonormalized.flags["C_CONTIGUOUS"]:
650 # useful to developer
651 warnings.warn(
652 "Confounding variates not C_CONTIGUOUS.",
653 stacklevel=find_stack_level(),
654 )
655 covars_orthonormalized = np.ascontiguousarray(
656 covars_orthonormalized
657 )
659 targetvars_normalized = normalize_matrix_on_axis(
660 target_vars
661 ).T # faster with F-ordered target_vars_chunk
662 if not targetvars_normalized.flags["C_CONTIGUOUS"]:
663 # useful to developer
664 warnings.warn(
665 "Target variates not C_CONTIGUOUS.",
666 stacklevel=find_stack_level(),
667 )
668 targetvars_normalized = np.ascontiguousarray(targetvars_normalized)
670 beta_targetvars_covars = np.dot(
671 targetvars_normalized, covars_orthonormalized
672 )
673 targetvars_resid_covars = targetvars_normalized - np.dot(
674 beta_targetvars_covars, covars_orthonormalized.T
675 )
676 targetvars_resid_covars = normalize_matrix_on_axis(
677 targetvars_resid_covars, axis=1
678 )
680 # step 2: extract effect of covars from tested vars
681 testedvars_normalized = normalize_matrix_on_axis(tested_vars.T, axis=1)
682 beta_testedvars_covars = np.dot(
683 testedvars_normalized, covars_orthonormalized
684 )
685 testedvars_resid_covars = testedvars_normalized - np.dot(
686 beta_testedvars_covars, covars_orthonormalized.T
687 )
688 testedvars_resid_covars = normalize_matrix_on_axis(
689 testedvars_resid_covars, axis=1
690 ).T.copy()
692 else:
693 targetvars_resid_covars = normalize_matrix_on_axis(target_vars).T
694 testedvars_resid_covars = normalize_matrix_on_axis(tested_vars).copy()
696 # check arrays contiguousity for the sake of code efficiency
697 targetvars_resid_covars = _make_array_contiguous(targetvars_resid_covars)
698 testedvars_resid_covars = _make_array_contiguous(testedvars_resid_covars)
700 # step 3: original regression (= regression on residuals + adjust t-score)
701 # compute t score map of each tested var for original data
702 # scores_original_data is in samples-by-regressors shape
703 scores_original_data = t_score_with_covars_and_normalized_design(
704 testedvars_resid_covars,
705 targetvars_resid_covars.T,
706 covars_orthonormalized,
707 )
709 # Define connectivity for TFCE and/or cluster measures
710 bin_struct = generate_binary_structure(3, 1)
712 tfce_original_data = None
713 if tfce:
714 scores_4d = masker.inverse_transform(
715 scores_original_data.T
716 ).get_fdata()
717 tfce_original_data = calculate_tfce(
718 scores_4d,
719 bin_struct=bin_struct,
720 two_sided_test=two_sided_test,
721 )
722 tfce_original_data = apply_mask(
723 Nifti1Image(
724 tfce_original_data,
725 masker.mask_img_.affine,
726 masker.mask_img_.header,
727 ),
728 masker.mask_img_,
729 ).T
731 # 0 or negative number of permutations => original data scores only
732 if n_perm <= 0:
733 if output_type == "legacy":
734 return np.asarray([]), scores_original_data.T, np.asarray([])
736 out = {"t": scores_original_data.T}
737 if tfce:
738 out["tfce"] = tfce_original_data.T
739 return out
741 # Permutations
742 # parallel computing units perform a reduced number of permutations each
743 if n_perm > n_jobs:
744 n_perm_chunks = np.asarray([n_perm / n_jobs] * n_jobs, dtype=int)
745 n_perm_chunks[-1] += n_perm % n_jobs
746 elif n_perm > 0:
747 warnings.warn(
748 f"The specified number of permutations is {n_perm} "
749 "and the number of jobs to be performed in parallel "
750 f"has set to {n_jobs}. "
751 f"This is incompatible so only {n_perm} jobs will be running. "
752 "You may want to perform more permutations "
753 "in order to take the most of the available computing resources.",
754 UserWarning,
755 stacklevel=find_stack_level(),
756 )
757 n_perm_chunks = np.ones(n_perm, dtype=int)
759 threshold_t = _compute_t_stat_threshold(
760 threshold, two_sided_test, tested_vars, confounding_vars
761 )
763 # actual permutations, seeded from a random integer between 0 and maximum
764 # value represented by np.int32 (to have a large entropy).
765 ret = joblib.Parallel(n_jobs=n_jobs, verbose=verbose)(
766 joblib.delayed(_permuted_ols_on_chunk)(
767 scores_original_data,
768 testedvars_resid_covars,
769 targetvars_resid_covars.T,
770 thread_id=thread_id + 1,
771 threshold=threshold_t,
772 confounding_vars=covars_orthonormalized,
773 masker=masker,
774 n_perm=n_perm,
775 n_perm_chunk=n_perm_chunk,
776 intercept_test=intercept_test,
777 two_sided_test=two_sided_test,
778 tfce=tfce,
779 tfce_original_data=tfce_original_data,
780 random_state=rng.randint(1, np.iinfo(np.int32).max - 1),
781 verbose=verbose,
782 )
783 for thread_id, n_perm_chunk in enumerate(n_perm_chunks)
784 )
786 # reduce results
787 (
788 vfwe_scores_as_ranks_parts,
789 h0_vfwe_parts,
790 csfwe_h0_parts,
791 cmfwe_h0_parts,
792 tfce_scores_as_ranks_parts,
793 h0_tfce_parts,
794 ) = zip(*ret)
796 # Voxel-level FWE
797 vfwe_h0 = np.hstack(h0_vfwe_parts)
798 vfwe_scores_as_ranks = np.zeros((n_regressors, n_descriptors))
799 for scores_as_ranks_part in vfwe_scores_as_ranks_parts:
800 vfwe_scores_as_ranks += scores_as_ranks_part
802 vfwe_pvals = (n_perm + 1 - vfwe_scores_as_ranks) / float(1 + n_perm)
804 if output_type == "legacy":
805 return (-np.log10(vfwe_pvals), scores_original_data.T, vfwe_h0)
807 outputs = {
808 "t": scores_original_data.T,
809 "logp_max_t": -np.log10(vfwe_pvals),
810 "h0_max_t": vfwe_h0,
811 }
813 if not tfce and threshold is None:
814 return outputs
816 outputs = _update_outputs_for_tfce(
817 outputs,
818 tfce,
819 tfce_original_data,
820 h0_tfce_parts,
821 n_regressors,
822 n_descriptors,
823 tfce_scores_as_ranks_parts,
824 n_perm,
825 )
827 return _prepare_output_permuted_ols(
828 outputs,
829 vfwe_pvals,
830 scores_original_data,
831 n_regressors,
832 threshold,
833 csfwe_h0_parts,
834 cmfwe_h0_parts,
835 masker,
836 threshold_t,
837 bin_struct,
838 two_sided_test,
839 )
842def _make_array_contiguous(array):
843 """Make arrays contiguous for code efficiency."""
844 if not array.flags["C_CONTIGUOUS"]:
845 # useful to developer
846 warnings.warn(
847 "Target variates not C_CONTIGUOUS.", stacklevel=find_stack_level()
848 )
849 array = np.ascontiguousarray(array)
850 return array
853def _compute_t_stat_threshold(
854 threshold, two_sided_test, tested_vars, confounding_vars
855):
856 """Compute t-stat threshold if needed based on degrees of freedom."""
857 if threshold is None:
858 return None
859 n_samples, n_regressors = tested_vars.shape
860 n_covars = 0 if confounding_vars is None else confounding_vars.shape[1]
861 # determine t-statistic threshold
862 degrees_of_freedom = n_samples - (n_regressors + n_covars)
863 return (
864 stats.t.isf(threshold / 2, df=degrees_of_freedom)
865 if two_sided_test
866 else stats.t.isf(threshold, df=degrees_of_freedom)
867 )
870def _check_inputs_permuted_ols(n_jobs, tfce, masker, threshold, target_vars):
871 # invalid according to joblib's conventions
872 if n_jobs == 0:
873 raise ValueError(
874 "'n_jobs == 0' is not a valid choice. "
875 "Please provide a positive number of CPUs, "
876 "or -1 for all CPUs, "
877 "or a negative number (-i) for 'all but (i-1)' CPUs "
878 "(joblib conventions)."
879 )
880 # check that masker is provided if it is needed
881 if tfce and not masker:
882 raise ValueError("A masker must be provided if tfce is True.")
884 if (threshold is not None) and (masker is None):
885 raise ValueError(
886 'If "threshold" is not None, masker must be defined as well.'
887 )
889 # make target_vars F-ordered to speed-up computation
890 if target_vars.ndim != 2:
891 raise ValueError(
892 "'target_vars' should be a 2D array. "
893 f"An array with {target_vars.ndim} dimension(s) was passed."
894 )
897def _sanitize_inputs_permuted_ols(
898 n_jobs, output_type, tfce, threshold, target_vars, tested_vars
899):
900 # check n_jobs (number of CPUs)
901 if n_jobs < 0:
902 n_jobs = max(1, joblib.cpu_count() - int(n_jobs) + 1)
903 else:
904 n_jobs = min(n_jobs, joblib.cpu_count())
906 # Resolve the output_type as well
907 if tfce and output_type == "legacy":
908 warnings.warn(
909 'If "tfce" is set to True, "output_type" must be set to "dict". '
910 "Overriding.",
911 stacklevel=find_stack_level(),
912 )
913 output_type = "dict"
915 if (threshold is not None) and (output_type == "legacy"):
916 warnings.warn(
917 'If "threshold" is not None, "output_type" must be set to "dict". '
918 "Overriding.",
919 stacklevel=find_stack_level(),
920 )
921 output_type = "dict"
923 if output_type == "legacy":
924 warnings.warn(
925 category=DeprecationWarning,
926 message=(
927 'The "legacy" output structure for "permuted_ols" is '
928 "deprecated. "
929 'The default output structure will be changed to "dict" '
930 "in version 0.13."
931 ),
932 stacklevel=find_stack_level(),
933 )
935 target_vars = np.asfortranarray(target_vars) # efficient for chunking
937 if np.any(np.all(target_vars == 0, axis=0)):
938 warnings.warn(
939 "Some descriptors in 'target_vars' have zeros across all samples. "
940 "These descriptors will be ignored "
941 "during null distribution generation.",
942 stacklevel=find_stack_level(),
943 )
945 # check explanatory variates' dimensions
946 if tested_vars.ndim == 1:
947 tested_vars = np.atleast_2d(tested_vars).T
949 return n_jobs, output_type, target_vars, tested_vars
952def _prepare_output_permuted_ols(
953 outputs,
954 vfwe_pvals,
955 scores_original_data,
956 n_regressors,
957 threshold,
958 csfwe_h0_parts,
959 cmfwe_h0_parts,
960 masker,
961 threshold_t,
962 bin_struct,
963 two_sided_test,
964):
965 if threshold is None:
966 return outputs
968 # Cluster-size and cluster-mass FWE
969 # a dictionary to collect mass/size measures
970 cluster_dict = {
971 "size_h0": np.hstack(csfwe_h0_parts),
972 "mass_h0": np.hstack(cmfwe_h0_parts),
973 "size": np.zeros_like(vfwe_pvals).astype(int),
974 "mass": np.zeros_like(vfwe_pvals),
975 "size_pvals": np.zeros_like(vfwe_pvals),
976 "mass_pvals": np.zeros_like(vfwe_pvals),
977 }
979 scores_original_data_4d = masker.inverse_transform(
980 scores_original_data.T
981 ).get_fdata()
983 for i_regressor in range(n_regressors):
984 scores_original_data_3d = scores_original_data_4d[..., i_regressor]
986 # Label the clusters for both cluster mass and size inference
987 labeled_arr3d, _ = label(
988 scores_original_data_3d > threshold_t,
989 bin_struct,
990 )
992 if two_sided_test:
993 # Add negative cluster labels
994 temp_labeled_arr3d, _ = label(
995 scores_original_data_3d < -threshold_t,
996 bin_struct,
997 )
998 n_negative_clusters = np.max(temp_labeled_arr3d)
999 labeled_arr3d[labeled_arr3d > 0] += n_negative_clusters
1000 labeled_arr3d = labeled_arr3d + temp_labeled_arr3d
1001 del temp_labeled_arr3d
1003 cluster_labels, idx, cluster_dict["size_regressor"] = np.unique(
1004 labeled_arr3d,
1005 return_inverse=True,
1006 return_counts=True,
1007 )
1008 assert cluster_labels[0] == 0 # the background
1010 # Replace background's "cluster size" w zeros
1011 cluster_dict["size_regressor"][0] = 0
1013 # Calculate mass for each cluster
1014 cluster_dict["mass_regressor"] = np.zeros(cluster_labels.shape)
1015 for j_val in cluster_labels[1:]: # skip background
1016 cluster_mass = np.sum(
1017 np.fabs(scores_original_data_3d[labeled_arr3d == j_val])
1018 - threshold_t
1019 )
1020 cluster_dict["mass_regressor"][j_val] = cluster_mass
1022 # Calculate p-values from size/mass values and associated h0s
1023 for metric in ["mass", "size"]:
1024 p_vals = null_to_p(
1025 cluster_dict[f"{metric}_regressor"],
1026 cluster_dict[f"{metric}_h0"][i_regressor, :],
1027 "larger",
1028 )
1029 p_map = p_vals[np.reshape(idx, labeled_arr3d.shape)]
1030 metric_map = cluster_dict[f"{metric}_regressor"][
1031 np.reshape(idx, labeled_arr3d.shape)
1032 ]
1034 # Convert 3D to image, then to 1D
1035 # There is a problem if the masker performs preprocessing,
1036 # so we use apply_mask here.
1037 cluster_dict[f"{metric}_pvals"][i_regressor, :] = np.squeeze(
1038 apply_mask(
1039 image.new_img_like(masker.mask_img_, p_map),
1040 masker.mask_img_,
1041 )
1042 )
1043 cluster_dict[metric][i_regressor, :] = np.squeeze(
1044 apply_mask(
1045 image.new_img_like(masker.mask_img_, metric_map),
1046 masker.mask_img_,
1047 )
1048 )
1050 outputs["size"] = cluster_dict["size"]
1051 outputs["logp_max_size"] = -np.log10(cluster_dict["size_pvals"])
1052 outputs["h0_max_size"] = cluster_dict["size_h0"]
1053 outputs["mass"] = cluster_dict["mass"]
1054 outputs["logp_max_mass"] = -np.log10(cluster_dict["mass_pvals"])
1055 outputs["h0_max_mass"] = cluster_dict["mass_h0"]
1057 return outputs
1060def _update_outputs_for_tfce(
1061 outputs,
1062 tfce,
1063 tfce_original_data,
1064 h0_tfce_parts,
1065 n_regressors,
1066 n_descriptors,
1067 tfce_scores_as_ranks_parts,
1068 n_perm,
1069):
1070 if not tfce:
1071 return outputs
1073 outputs["tfce"] = tfce_original_data.T
1075 # We can use the same approach for TFCE that we use for vFWE
1076 outputs["h0_max_tfce"] = np.hstack(h0_tfce_parts)
1078 tfce_scores_as_ranks = np.zeros((n_regressors, n_descriptors))
1079 for tfce_scores_as_ranks_part in tfce_scores_as_ranks_parts:
1080 tfce_scores_as_ranks += tfce_scores_as_ranks_part
1082 tfce_pvals = (n_perm + 1 - tfce_scores_as_ranks) / float(1 + n_perm)
1083 outputs["logp_max_tfce"] = -np.log10(tfce_pvals)
1085 return outputs