Coverage for nilearn/decomposition/_base.py: 18%
205 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Base class for decomposition estimators.
3Utilities for masking and dimension reduction of group data
4"""
6import glob
7import inspect
8import itertools
9import warnings
10from math import ceil
11from pathlib import Path
12from string import Template
14import numpy as np
15from joblib import Memory, Parallel, delayed
16from scipy import linalg
17from sklearn.base import BaseEstimator, TransformerMixin
18from sklearn.linear_model import LinearRegression
19from sklearn.utils import check_random_state
20from sklearn.utils.estimator_checks import check_is_fitted
21from sklearn.utils.extmath import randomized_svd, svd_flip
23import nilearn
24from nilearn._utils import fill_doc, logger
25from nilearn._utils.cache_mixin import CacheMixin, cache
26from nilearn._utils.logger import find_stack_level
27from nilearn._utils.masker_validation import check_embedded_masker
28from nilearn._utils.niimg import safe_get_data
29from nilearn._utils.param_validation import check_params
30from nilearn._utils.path_finding import resolve_globbing
31from nilearn._utils.tags import SKLEARN_LT_1_6
32from nilearn.maskers import NiftiMapsMasker, SurfaceMapsMasker, SurfaceMasker
33from nilearn.signal import row_sum_of_squares
34from nilearn.surface import SurfaceImage
37def _warn_ignored_surface_masker_params(estimator):
38 """Warn about parameters that are ignored by SurfaceMasker.
40 Only raise warning if parameters are different
41 from the default value in the estimator __init__ signature.
43 Parameters
44 ----------
45 estimator : _BaseDecomposition
46 The estimator to check for ignored parameters.
47 """
48 params_to_ignore = ["mask_strategy", "target_affine", "target_shape"]
50 tmp = dict(**inspect.signature(estimator.__init__).parameters)
52 ignored_params = []
53 for param in params_to_ignore:
54 if param in tmp:
55 if (
56 tmp[param].default is None
57 and getattr(estimator, param) is not None
58 ):
59 # this should catch when user passes a numpy array
60 ignored_params.append(param)
61 elif getattr(estimator, param) != tmp[param].default:
62 ignored_params.append(param)
64 if ignored_params:
65 warnings.warn(
66 Template(
67 "The following parameters are not relevant when the input "
68 "images and mask are SurfaceImages: "
69 "${params}. They will be ignored."
70 ).substitute(params=", ".join(ignored_params)),
71 UserWarning,
72 stacklevel=find_stack_level(),
73 )
76def _fast_svd(X, n_components, random_state=None):
77 """Automatically switch between randomized and lapack SVD (heuristic \
78 of scikit-learn).
80 Parameters
81 ----------
82 X : array, shape (n_samples, n_features)
83 The data to decompose
85 n_components : integer
86 The order of the dimensionality of the truncated SVD
88 %(random_state)s
89 default=0
91 Returns
92 -------
93 U : array, shape (n_samples, n_components)
94 The first matrix of the truncated svd
96 S : array, shape (n_components)
97 The second matrix of the truncated svd
99 V : array, shape (n_components, n_features)
100 The last matrix of the truncated svd
102 """
103 random_state = check_random_state(random_state)
104 # Small problem, just call full PCA
105 if max(X.shape) <= 500:
106 svd_solver = "full"
107 elif 1 <= n_components < 0.8 * min(X.shape):
108 svd_solver = "randomized"
109 # This is also the case of n_components in (0,1)
110 else:
111 svd_solver = "full"
113 # Call different fits for either full or truncated SVD
114 if svd_solver == "full":
115 U, S, V = linalg.svd(X, full_matrices=False)
116 # flip eigenvectors' sign to enforce deterministic output
117 U, V = svd_flip(U, V)
118 # The "copy" are there to free the reference on the non reduced
119 # data, and hence clear memory early
120 U = U[:, :n_components].copy()
121 S = S[:n_components]
122 V = V[:n_components].copy()
123 else:
124 n_iter = "auto"
126 U, S, V = randomized_svd(
127 X,
128 n_components=n_components,
129 n_iter=n_iter,
130 flip_sign=True,
131 random_state=random_state,
132 )
133 return U, S, V
136def _mask_and_reduce(
137 masker,
138 imgs,
139 confounds=None,
140 reduction_ratio="auto",
141 n_components=None,
142 random_state=None,
143 memory_level=0,
144 memory=None,
145 n_jobs=1,
146):
147 """Mask and reduce provided 4D images with given masker.
149 Uses a PCA (randomized for small reduction ratio) or a range finding matrix
150 on time series to reduce data size in time direction. For multiple images,
151 the concatenation of data is returned, either as an ndarray or a memorymap
152 (useful for big datasets that do not fit in memory).
154 Parameters
155 ----------
156 masker : :obj:`~nilearn.maskers.NiftiMasker` or \
157 :obj:`~nilearn.maskers.MultiNiftiMasker` or \
158 :obj:`~nilearn.maskers.SurfaceMasker`
159 Instance used to mask provided data.
161 imgs : list of 4D Niimg-like objects or list of \
162 :obj:`~nilearn.surface.SurfaceImage`
163 See :ref:`extracting_data`.
164 List of subject data to mask, reduce and stack.
166 confounds : CSV file path or numpy ndarray, or pandas DataFrame, optional
167 This parameter is passed to signal.clean. Please see the
168 corresponding documentation for details.
170 reduction_ratio : 'auto' or float between 0. and 1., default='auto'
171 - Between 0. or 1. : controls data reduction in the temporal domain
172 , 1. means no reduction, < 1. calls for an SVD based reduction.
173 - if set to 'auto', estimator will set the number of components per
174 reduced session to be n_components.
176 n_components : integer, optional
177 Number of components per subject to be extracted by dimension reduction
179 %(random_state)s
180 default=0
182 memory_level : integer, default=0
183 Integer indicating the level of memorization. The higher, the more
184 function calls are cached.
186 memory : joblib.Memory, default=None
187 Used to cache the function calls.
188 If ``None`` is passed will default to ``Memory(location=None)``.
190 n_jobs : integer, default=1
191 The number of CPUs to use to do the computation. -1 means
192 'all CPUs', -2 'all CPUs but one', and so on.
194 Returns
195 -------
196 data : ndarray or memorymap
197 Concatenation of reduced data.
199 """
200 if memory is None:
201 memory = Memory(location=None)
202 if not hasattr(imgs, "__iter__"):
203 imgs = [imgs]
205 if reduction_ratio == "auto":
206 if n_components is None:
207 # Reduction ratio is 1 if
208 # neither n_components nor ratio is provided
209 reduction_ratio = 1
210 else:
211 reduction_ratio = (
212 1 if reduction_ratio is None else float(reduction_ratio)
213 )
214 if not 0 <= reduction_ratio <= 1:
215 raise ValueError(
216 "Reduction ratio should be between 0.0 and 1.0, "
217 f"got {reduction_ratio:.2f}"
218 )
220 if confounds is None:
221 confounds = itertools.repeat(confounds)
223 if reduction_ratio == "auto":
224 n_samples = n_components
225 reduction_ratio = None
226 else:
227 # We'll let _mask_and_reduce_single decide on the number of
228 # samples based on the reduction_ratio
229 n_samples = None
231 data_list = Parallel(n_jobs=n_jobs)(
232 delayed(_mask_and_reduce_single)(
233 masker,
234 img,
235 confound,
236 reduction_ratio=reduction_ratio,
237 n_samples=n_samples,
238 memory=memory,
239 memory_level=memory_level,
240 random_state=random_state,
241 )
242 for img, confound in zip(imgs, confounds)
243 )
245 subject_n_samples = [subject_data.shape[0] for subject_data in data_list]
247 n_samples = np.sum(subject_n_samples)
248 # n_features is the number of True vertices in the mask if it is a surface
249 if isinstance(masker, SurfaceMasker):
250 n_features = masker.n_elements_
251 # n_features is the number of True voxels in the mask if it is a volume
252 else:
253 n_features = int(np.sum(safe_get_data(masker.mask_img_)))
254 dtype = np.float64 if data_list[0].dtype.type is np.float64 else np.float32
255 data = np.empty((n_samples, n_features), order="F", dtype=dtype)
257 current_position = 0
258 for i, next_position in enumerate(np.cumsum(subject_n_samples)):
259 data[current_position:next_position] = data_list[i]
260 current_position = next_position
261 # Clear memory as fast as possible: remove the reference on
262 # the corresponding block of data
263 data_list[i] = None
264 return data
267def _mask_and_reduce_single(
268 masker,
269 img,
270 confound,
271 reduction_ratio=None,
272 n_samples=None,
273 memory=None,
274 memory_level=0,
275 random_state=None,
276):
277 """Implement multiprocessing from MaskReducer."""
278 if confound is not None and not isinstance(confound, list):
279 confound = [confound]
280 this_data = masker.transform(img, confound)
281 this_data = np.atleast_2d(this_data)
282 # Now get rid of the img as fast as possible, to free a
283 # reference count on it, and possibly free the corresponding
284 # data
285 del img
286 random_state = check_random_state(random_state)
288 data_n_samples = this_data.shape[0]
289 if reduction_ratio is None:
290 assert n_samples is not None
291 n_samples = min(n_samples, data_n_samples)
292 else:
293 n_samples = ceil(data_n_samples * reduction_ratio)
295 U, S, V = cache(
296 _fast_svd, memory, memory_level=memory_level, func_memory_level=3
297 )(this_data.T, n_samples, random_state=random_state)
298 U = U.T.copy()
299 U = U * S[:, np.newaxis]
300 return U
303@fill_doc
304class _BaseDecomposition(CacheMixin, TransformerMixin, BaseEstimator):
305 """Base class for matrix factorization based decomposition estimators.
307 Handles mask logic, provides transform and inverse_transform methods
309 .. versionadded:: 0.2
311 Parameters
312 ----------
313 n_components : int, default=20
314 Number of components to extract,
315 for each 4D-Niimage or each 2D surface image
317 %(random_state)s
319 mask : Niimg-like object, :obj:`~nilearn.maskers.MultiNiftiMasker` or
320 :obj:`~nilearn.surface.SurfaceImage` or
321 :obj:`~nilearn.maskers.SurfaceMasker` object, optional
322 Mask to be used on data. If an instance of masker is passed,
323 then its mask will be used. If no mask is given, for Nifti images,
324 it will be computed automatically by a MultiNiftiMasker with default
325 parameters; for surface images, all the vertices will be used.
327 %(smoothing_fwhm)s
329 standardize : boolean, default=True
330 If standardize is True, the time-series are centered and normed:
331 their mean is put to 0 and their variance to 1 in the time dimension.
333 standardize_confounds : boolean, default=True
334 If standardize_confounds is True, the confounds are z-scored:
335 their mean is put to 0 and their variance to 1 in the time dimension.
337 detrend : boolean, default=True
338 This parameter is passed to signal.clean. Please see the related
339 documentation for details.
341 %(low_pass)s
343 .. note::
344 This parameter is passed to :func:`nilearn.image.resample_img`.
346 %(high_pass)s
348 .. note::
349 This parameter is passed to :func:`nilearn.image.resample_img`.
351 %(t_r)s
353 .. note::
354 This parameter is passed to :func:`nilearn.image.resample_img`.
356 %(target_affine)s
358 .. note::
359 This parameter is passed to :func:`nilearn.image.resample_img`.
361 %(target_shape)s
363 .. note::
364 This parameter is passed to :func:`nilearn.image.resample_img`.
366 %(mask_strategy)s
368 Default='epi'.
369 .. note::
371 These strategies are only relevant for Nifti images and the parameter
372 is ignored for SurfaceImage objects.
374 mask_args : dict, optional
375 If mask is None, these are additional parameters passed to
376 :func:`nilearn.masking.compute_background_mask`,
377 or :func:`nilearn.masking.compute_epi_mask`
378 to fine-tune mask computation.
379 Please see the related documentation for details.
381 memory : instance of joblib.Memory or str, default=None
382 Used to cache the masking process.
383 By default, no caching is done.
384 If a string is given, it is the path to the caching directory.
385 If ``None`` is passed will default to ``Memory(location=None)``.
387 memory_level : integer, default=0
388 Rough estimator of the amount of memory used by caching. Higher value
389 means more memory for caching.
391 n_jobs : integer, default=1
392 The number of CPUs to use to do the computation. -1 means
393 'all CPUs', -2 'all CPUs but one', and so on.
395 %(verbose0)s
397 %(base_decomposition_attributes)s
398 """
400 def __init__(
401 self,
402 n_components=20,
403 random_state=None,
404 mask=None,
405 smoothing_fwhm=None,
406 standardize=True,
407 standardize_confounds=True,
408 detrend=True,
409 low_pass=None,
410 high_pass=None,
411 t_r=None,
412 target_affine=None,
413 target_shape=None,
414 mask_strategy="epi",
415 mask_args=None,
416 memory=None,
417 memory_level=0,
418 n_jobs=1,
419 verbose=0,
420 ):
421 self.n_components = n_components
422 self.random_state = random_state
423 self.mask = mask
425 self.smoothing_fwhm = smoothing_fwhm
426 self.standardize = standardize
427 self.standardize_confounds = standardize_confounds
428 self.detrend = detrend
429 self.low_pass = low_pass
430 self.high_pass = high_pass
431 self.t_r = t_r
432 self.target_affine = target_affine
433 self.target_shape = target_shape
434 self.mask_strategy = mask_strategy
435 self.mask_args = mask_args
436 self.memory = memory
437 self.memory_level = memory_level
438 self.n_jobs = n_jobs
439 self.verbose = verbose
441 def _more_tags(self):
442 """Return estimator tags.
444 TODO remove when bumping sklearn_version > 1.5
445 """
446 return self.__sklearn_tags__()
448 def __sklearn_tags__(self):
449 """Return estimator tags.
451 See the sklearn documentation for more details on tags
452 https://scikit-learn.org/1.6/developers/develop.html#estimator-tags
453 """
454 # TODO
455 # get rid of if block
456 if SKLEARN_LT_1_6:
457 from nilearn._utils.tags import tags
459 return tags(surf_img=True, niimg_like=True)
461 from nilearn._utils.tags import InputTags
463 tags = super().__sklearn_tags__()
464 tags.input_tags = InputTags(surf_img=True, niimg_like=True)
465 return tags
467 @fill_doc
468 def fit(self, imgs, y=None, confounds=None):
469 """Compute the mask and the components across subjects.
471 Parameters
472 ----------
473 imgs : list of Niimg-like objects or \
474 list of :obj:`~nilearn.surface.SurfaceImage`
475 See :ref:`extracting_data`.
476 Data on which the mask is calculated. If this is a list,
477 the affine (for Niimg-like objects) and mesh (for SurfaceImages)
478 is considered the same for all
480 %(y_dummy)s
482 confounds : list of CSV file paths, numpy.ndarrays
483 or pandas DataFrames, optional.
484 This parameter is passed to nilearn.signal.clean.
485 Please see the related documentation for details.
486 Should match with the list of imgs given.
488 Returns
489 -------
490 self : object
491 Returns the instance itself. Contains attributes listed
492 at the object level.
494 """
495 del y
496 # Base fit for decomposition estimators : compute the embedded masker
497 check_params(self.__dict__)
499 if (
500 isinstance(imgs, str)
501 and nilearn.EXPAND_PATH_WILDCARDS
502 and glob.has_magic(imgs)
503 ):
504 imgs = resolve_globbing(imgs)
506 if isinstance(imgs, (str, Path)) or not hasattr(imgs, "__iter__"):
507 # these classes are meant for list of 4D images
508 # (multi-subject), we want it to work also on a single
509 # subject, so we hack it.
510 imgs = [
511 imgs,
512 ]
514 if len(imgs) == 0:
515 # Common error that arises from a null glob. Capture
516 # it early and raise a helpful message
517 raise ValueError(
518 "Need one or more Niimg-like or SurfaceImage "
519 "objects as input, "
520 "an empty list was given."
521 )
523 masker_type = "multi_nii"
524 if isinstance(self.mask, (SurfaceMasker, SurfaceImage)) or any(
525 isinstance(x, SurfaceImage) for x in imgs
526 ):
527 masker_type = "surface"
528 _warn_ignored_surface_masker_params(self)
529 self.masker_ = check_embedded_masker(self, masker_type=masker_type)
531 # Avoid warning with imgs != None
532 # if masker_ has been provided a mask_img
533 if self.masker_.mask_img is None:
534 self.masker_.fit(imgs)
535 else:
536 self.masker_.fit()
537 self.mask_img_ = self.masker_.mask_img_
539 # _mask_and_reduce step for decomposition estimators i.e.
540 # MultiPCA, CanICA and Dictionary Learning
541 logger.log("Loading data", self.verbose)
542 data = _mask_and_reduce(
543 self.masker_,
544 imgs,
545 confounds=confounds,
546 n_components=self.n_components,
547 random_state=self.random_state,
548 memory=self.memory,
549 memory_level=max(0, self.memory_level + 1),
550 n_jobs=self.n_jobs,
551 )
552 self._raw_fit(data)
554 # Create and fit appropriate MapsMasker for transform
555 # and inverse_transform
556 if isinstance(self.masker_, SurfaceMasker):
557 self.maps_masker_ = SurfaceMapsMasker(
558 self.components_img_, self.masker_.mask_img_
559 )
560 else:
561 self.maps_masker_ = NiftiMapsMasker(
562 self.components_img_,
563 self.masker_.mask_img_,
564 resampling_target="maps",
565 )
566 self.maps_masker_.fit()
568 return self
570 @property
571 def nifti_maps_masker_(self):
572 # TODO: remove in 0.13
573 warnings.warn(
574 message="The 'nifti_maps_masker_' attribute is deprecated "
575 "and will be removed in Nilearn 0.13.0.\n"
576 "Please use 'maps_masker_' instead.",
577 category=FutureWarning,
578 stacklevel=find_stack_level(),
579 )
580 return self.maps_masker_
582 def __sklearn_is_fitted__(self):
583 return hasattr(self, "components_")
585 def transform(self, imgs, confounds=None):
586 """Project the data into a reduced representation.
588 Parameters
589 ----------
590 imgs : iterable of Niimg-like objects or \
591 :obj:`list` of :obj:`~nilearn.surface.SurfaceImage`
592 See :ref:`extracting_data`.
593 Data to be projected
595 confounds : CSV file path or numpy.ndarray
596 or pandas DataFrame, optional
597 This parameter is passed to nilearn.signal.clean. Please see the
598 related documentation for details
600 Returns
601 -------
602 loadings : list of 2D ndarray,
603 For each subject, each sample, loadings for each decomposition
604 components
605 shape: number of subjects * (number of scans, number of regions)
607 """
608 check_is_fitted(self)
610 # XXX: dealing properly with 4D/ list of 4D data?
611 if confounds is None:
612 confounds = [None] * len(imgs)
613 return [
614 self.maps_masker_.transform(img, confounds=confound)
615 for img, confound in zip(imgs, confounds)
616 ]
618 def inverse_transform(self, loadings):
619 """Use provided loadings to compute corresponding linear component \
620 combination in whole-brain voxel space.
622 Parameters
623 ----------
624 loadings : list of numpy array (n_samples x n_components)
625 Component signals to transform back into voxel signals
627 Returns
628 -------
629 reconstructed_imgs : list of nibabel.Nifti1Image or \
630 :class:`~nilearn.surface.SurfaceImage`
632 For each loading, reconstructed Nifti1Image or SurfaceImage.
634 """
635 check_is_fitted(self)
637 # XXX: dealing properly with 2D/ list of 2D data?
638 return [
639 self.maps_masker_.inverse_transform(loading)
640 for loading in loadings
641 ]
643 def _sort_by_score(self, data):
644 """Sort components on the explained variance over data of estimator \
645 components_.
646 """
647 components_score = self._raw_score(data, per_component=True)
648 order = np.argsort(components_score)[::-1]
649 self.components_ = self.components_[order]
651 def _raw_score(self, data, per_component=True):
652 """Return explained variance over data of estimator components_."""
653 return self._cache(_explained_variance)(
654 data, self.components_, per_component=per_component
655 )
657 def score(self, imgs, confounds=None, per_component=False):
658 """Score function based on explained variance on imgs.
660 Should only be used by DecompositionEstimator derived classes
662 Parameters
663 ----------
664 imgs : iterable of Niimg-like objects or \
665 :obj:`list` of :obj:`~nilearn.surface.SurfaceImage`
666 See :ref:`extracting_data`.
667 Data to be scored
669 confounds : CSV file path or numpy.ndarray
670 or pandas DataFrame, optional
671 This parameter is passed to nilearn.signal.clean. Please see the
672 related documentation for details
674 per_component : bool, default=False
675 Specify whether the explained variance ratio is desired for each
676 map or for the global set of components.
678 Returns
679 -------
680 score : float
681 Holds the score for each subjects. Score is two dimensional
682 if per_component is True. First dimension
683 is squeezed if the number of subjects is one
685 """
686 check_is_fitted(self)
688 data = _mask_and_reduce(
689 self.masker_,
690 imgs,
691 confounds,
692 reduction_ratio=1.0,
693 random_state=self.random_state,
694 )
695 return self._raw_score(data, per_component=per_component)
697 def set_output(self, *, transform=None):
698 """Set the output container when ``"transform"`` is called.
700 .. warning::
702 This has not been implemented yet.
703 """
704 raise NotImplementedError()
707def _explained_variance(X, components, per_component=True):
708 """Score function based on explained variance.
710 Parameters
711 ----------
712 X : ndarray
713 Holds single subject data to be tested against components.
715 components : array-like
716 Represents the components estimated by the decomposition algorithm.
718 per_component : bool, default=True
719 Specify whether the explained variance ratio is desired for each
720 map or for the global set of components_.
722 Returns
723 -------
724 score : ndarray
725 Holds the score for each subjects. score is two dimensional if
726 per_component = True.
728 """
729 full_var = np.var(X)
730 n_components = components.shape[0]
731 S = np.sqrt(np.sum(components**2, axis=1))
732 S[S == 0] = 1
733 components = components / S[:, np.newaxis]
734 projected_data = components.dot(X.T)
735 if per_component:
736 res_var = np.zeros(n_components)
737 for i in range(n_components):
738 res = X - np.outer(projected_data[i], components[i])
739 res_var[i] = np.var(res)
740 # Free some memory
741 del res
742 return np.maximum(0.0, 1.0 - res_var / full_var)
743 else:
744 lr = LinearRegression(fit_intercept=True)
745 lr.fit(components.T, X.T)
746 res = X - lr.coef_.dot(components)
747 res_var = row_sum_of_squares(res).sum()
748 return np.maximum(0.0, 1.0 - res_var / row_sum_of_squares(X).sum())