Coverage for nilearn/signal.py: 8%
337 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""
2Preprocessing functions for time series.
4All functions in this module should take X matrices with samples x
5features
6"""
8import warnings
9from pathlib import Path
11import numpy as np
12import pandas as pd
13from scipy import linalg
14from scipy import signal as sp_signal
15from scipy.interpolate import CubicSpline
16from sklearn.utils import as_float_array, gen_even_slices
18from nilearn._utils import fill_doc, stringify_path
19from nilearn._utils.exceptions import AllVolumesRemovedError
20from nilearn._utils.logger import find_stack_level
21from nilearn._utils.numpy_conversions import as_ndarray, csv_to_array
22from nilearn._utils.param_validation import (
23 check_params,
24 check_run_sample_masks,
25)
27__all__ = [
28 "butterworth",
29 "clean",
30 "high_variance_confounds",
31]
33available_filters = ("butterworth", "cosine")
36def standardize_signal(
37 signals,
38 detrend=False,
39 standardize="zscore",
40):
41 """Center and standardize a given signal (time is along first axis).
43 Parameters
44 ----------
45 signals : :class:`numpy.ndarray`
46 Timeseries to standardize.
48 detrend : :obj:`bool`, default=False
49 If detrending of timeseries is requested.
51 standardize : {'zscore_sample', 'zscore', 'psc', True, False}, \
52 default='zscore'
53 Strategy to standardize the signal:
55 - 'zscore_sample': The signal is z-scored. Timeseries are shifted
56 to zero mean and scaled to unit variance. Uses sample std.
57 - 'zscore': The signal is z-scored. Timeseries are shifted
58 to zero mean and scaled to unit variance. Uses population std
59 by calling :obj:`numpy.std` with N - ``ddof=0``.
60 - 'psc': Timeseries are shifted to zero mean value and scaled
61 to percent signal change (as compared to original mean signal).
62 - True: The signal is z-scored (same as option `zscore`).
63 Timeseries are shifted to zero mean and scaled to unit variance.
64 - False: Do not standardize the data.
67 Returns
68 -------
69 std_signals : :class:`numpy.ndarray`
70 Copy of signals, standardized.
71 """
72 if standardize not in [True, False, "psc", "zscore", "zscore_sample"]:
73 raise ValueError(f"{standardize} is no valid standardize strategy.")
75 signals = _detrend(signals, inplace=False) if detrend else signals.copy()
77 if standardize:
78 if signals.shape[0] == 1:
79 warnings.warn(
80 "Standardization of 3D signal has been requested but "
81 "would lead to zero values. Skipping.",
82 stacklevel=find_stack_level(),
83 )
84 return signals
86 elif standardize == "zscore_sample":
87 if not detrend:
88 # remove mean if not already detrended
89 signals = signals - signals.mean(axis=0)
91 std = signals.std(axis=0, ddof=1)
92 # avoid numerical problems
93 std[std < np.finfo(np.float64).eps] = 1.0
94 signals /= std
96 elif (standardize == "zscore") or (standardize is True):
97 std_strategy_default = (
98 "The default strategy for standardize is currently 'zscore' "
99 "which incorrectly uses population std to calculate sample "
100 "zscores. The new strategy 'zscore_sample' corrects this "
101 "behavior by using the sample std. In release 0.13, the "
102 "default strategy will be replaced by the new strategy and "
103 "the 'zscore' option will be removed. Please use "
104 "'zscore_sample' instead."
105 )
106 warnings.warn(
107 category=DeprecationWarning,
108 message=std_strategy_default,
109 stacklevel=find_stack_level(),
110 )
112 if not detrend:
113 # remove mean if not already detrended
114 signals = signals - signals.mean(axis=0)
116 std = signals.std(axis=0)
117 # avoid numerical problems
118 std[std < np.finfo(np.float64).eps] = 1.0
120 signals /= std
122 elif standardize == "psc":
123 mean_signals = signals.mean(axis=0)
124 invalid_ix = np.absolute(mean_signals) < np.finfo(np.float64).eps
125 signals = (signals - mean_signals) / np.absolute(mean_signals)
126 signals *= 100
128 if np.any(invalid_ix):
129 warnings.warn(
130 "psc standardization strategy is meaningless "
131 "for features that have a mean of 0. "
132 "These time series are set to 0.",
133 stacklevel=find_stack_level(),
134 )
135 signals[:, invalid_ix] = 0
137 return signals
140def _mean_of_squares(signals, n_batches=20):
141 """Compute mean of squares for each signal.
143 This function is equivalent to:
145 .. code-block:: python
147 var = np.copy(signals)
148 var **= 2
149 var = var.mean(axis=0)
151 but uses a lot less memory.
153 Parameters
154 ----------
155 signals : :class:`numpy.ndarray`, shape (n_samples, n_features)
156 Signal whose mean of squares must be computed.
158 n_batches : :obj:`int`, default=20
159 Number of batches to use in the computation.
161 .. note::
162 Tweaking this value can lead to variation of memory usage
163 and computation time. The higher the value, the lower the
164 memory consumption.
167 Returns
168 -------
169 var : :class:`numpy.ndarray`
170 1D array holding the mean of squares.
171 """
172 # No batching for small arrays
173 if signals.shape[1] < 500:
174 n_batches = 1
176 # Fastest for C order
177 var = np.empty(signals.shape[1])
178 for batch in gen_even_slices(signals.shape[1], n_batches):
179 tvar = np.copy(signals[:, batch])
180 tvar **= 2
181 var[batch] = tvar.mean(axis=0)
183 return var
186def row_sum_of_squares(signals, n_batches=20):
187 """Compute sum of squares for each signal.
189 This function is equivalent to:
191 .. code-block:: python
193 signals **= 2
194 signals = signals.sum(axis=0)
196 but uses a lot less memory.
198 Parameters
199 ----------
200 signals : :class:`numpy.ndarray`, shape (n_samples, n_features)
201 Signal whose sum of squares must be computed.
203 n_batches : :obj:`int`, default=20
204 Number of batches to use in the computation.
206 .. note::
207 Tweaking this value can lead to variation of memory usage
208 and computation time. The higher the value, the lower the
209 memory consumption.
212 Returns
213 -------
214 var : :class:`numpy.ndarray`
215 1D array holding the sum of squares.
216 """
217 # No batching for small arrays
218 if signals.shape[1] < 500:
219 n_batches = 1
221 # Fastest for C order
222 var = np.empty(signals.shape[1])
223 for batch in gen_even_slices(signals.shape[1], n_batches):
224 var[batch] = np.sum(signals[:, batch] ** 2, 0)
226 return var
229def _detrend(signals, inplace=False, type="linear", n_batches=10):
230 """Detrend columns of input array.
232 Signals are supposed to be columns of `signals`.
233 This function is significantly faster than :func:`scipy.signal.detrend`
234 on this case and uses a lot less memory.
236 Parameters
237 ----------
238 signals : :class:`numpy.ndarray`
239 This parameter must be two-dimensional.
240 Signals to detrend. A signal is a column.
242 inplace : :obj:`bool`, default=False
243 Tells if the computation must be made inplace or not.
245 type : {"linear", "constant"}, default="linear"
246 Detrending type, either "linear" or "constant".
247 See also :func:`scipy.signal.detrend`.
249 n_batches : :obj:`int`, optional
250 Number of batches to use in the computation.
252 .. note::
253 Tweaking this value can lead to variation of memory usage
254 and computation time. The higher the value, the lower the
255 memory consumption.
257 Returns
258 -------
259 detrended_signals : :class:`numpy.ndarray`
260 Detrended signals. The shape is that of ``signals``.
262 Notes
263 -----
264 If a signal of length 1 is given, it is returned unchanged.
265 """
266 signals = as_float_array(signals, copy=not inplace)
267 if signals.shape[0] == 1:
268 warnings.warn(
269 "Detrending of 3D signal has been requested but "
270 "would lead to zero values. Skipping.",
271 stacklevel=find_stack_level(),
272 )
273 return signals
275 signals -= np.mean(signals, axis=0)
276 if type == "linear":
277 # Keeping "signals" dtype avoids some type conversion further down,
278 # and can save a lot of memory if dtype is single-precision.
279 regressor = np.arange(signals.shape[0], dtype=signals.dtype)
280 regressor -= regressor.mean()
281 std = np.sqrt((regressor**2).sum())
282 # avoid numerical problems
283 if not std < np.finfo(np.float64).eps:
284 regressor /= std
285 regressor = regressor[:, np.newaxis]
287 # No batching for small arrays
288 if signals.shape[1] < 500:
289 n_batches = 1
291 # This is fastest for C order.
292 for batch in gen_even_slices(signals.shape[1], n_batches):
293 signals[:, batch] -= (
294 np.dot(regressor[:, 0], signals[:, batch]) * regressor
295 )
296 return signals
299def _check_wn(btype, freq, nyq):
300 """Ensure that the critical frequency works with the Nyquist frequency.
302 The critical frequency must be (1) >= 0 and (2) < Nyquist.
303 When critical frequencies are exactly at the Nyquist frequency,
304 results are unstable.
306 See issue at SciPy https://github.com/scipy/scipy/issues/6265.
307 Due to unstable results as pointed in the issue above,
308 we force the critical frequencies to be slightly less than the Nyquist
309 frequency, and slightly more than zero.
310 """
311 EPS = np.finfo(np.float32).eps
312 if freq >= nyq:
313 freq = nyq - (nyq * 10 * EPS)
314 warnings.warn(
315 f"The frequency specified for the {btype} pass filter is "
316 "too high to be handled by a digital filter "
317 "(superior to Nyquist frequency). "
318 f"It has been lowered to {freq} (Nyquist frequency).",
319 stacklevel=find_stack_level(),
320 )
322 elif freq < 0.0: # equal to 0.0 is okay
323 freq = nyq * EPS
324 warnings.warn(
325 f"The frequency specified for the {btype} pass filter is too "
326 "low to be handled by a digital filter (must be non-negative). "
327 f"It has been set to eps: {freq}.",
328 stacklevel=find_stack_level(),
329 )
331 return freq
334@fill_doc
335def butterworth(
336 signals,
337 sampling_rate,
338 low_pass=None,
339 high_pass=None,
340 order=5,
341 padtype="odd",
342 padlen=None,
343 copy=False,
344):
345 """Apply a low-pass, high-pass or band-pass \
346 `Butterworth filter <https://en.wikipedia.org/wiki/Butterworth_filter>`_.
348 Apply a filter to remove signal below the `low` frequency and above the
349 `high` frequency.
351 Parameters
352 ----------
353 signals : :class:`numpy.ndarray` (1D sequence or n_samples x n_sources)
354 Signals to be filtered. A signal is assumed to be a column
355 of `signals`.
357 sampling_rate : :obj:`float`
358 Number of samples per second (sample frequency, in Hertz).
359 %(low_pass)s
360 %(high_pass)s
361 order : :obj:`int`, default=5
362 Order of the `Butterworth filter
363 <https://en.wikipedia.org/wiki/Butterworth_filter>`_.
364 When filtering signals, the filter has a decay to avoid ringing.
365 Increasing the order sharpens this decay. Be aware that very high
366 orders can lead to numerical instability.
368 padtype : {"odd", "even", "constant", None}, default="odd"
369 Type of padding to use for the Butterworth filter.
370 For more information about this, see :func:`scipy.signal.filtfilt`.
372 padlen : :obj:`int` or None, default=None
373 The size of the padding to add to the beginning and end of ``signals``.
374 If None, the default value from :func:`scipy.signal.filtfilt` will be
375 used.
377 copy : :obj:`bool`, default=False
378 If False, `signals` is modified inplace, and memory consumption is
379 lower than for ``copy=True``, though computation time is higher.
381 Returns
382 -------
383 filtered_signals : :class:`numpy.ndarray`
384 Signals filtered according to the given parameters.
385 """
386 check_params(locals())
387 if low_pass is None and high_pass is None:
388 return signals.copy() if copy else signals
390 if (
391 low_pass is not None
392 and high_pass is not None
393 and high_pass >= low_pass
394 ):
395 raise ValueError(
396 f"High pass cutoff frequency ({high_pass}) is greater than or "
397 f"equal to low pass filter frequency ({low_pass}). "
398 "This case is not handled by this function."
399 )
401 nyq = sampling_rate * 0.5
403 critical_freq = []
404 if high_pass is not None:
405 btype = "high"
406 critical_freq.append(_check_wn(btype, high_pass, nyq))
408 if low_pass is not None:
409 btype = "low"
410 critical_freq.append(_check_wn(btype, low_pass, nyq))
412 if len(critical_freq) == 2:
413 btype = "band"
414 # Inappropriate parameter input might lead to coercion of both
415 # elements of critical_freq to a value just below Nyquist.
416 # Scipy fix now enforces that critical frequencies cannot be equal.
417 # See https://github.com/scipy/scipy/pull/15886.
418 # If this is the case, we return the signals unfiltered.
419 if critical_freq[0] == critical_freq[1]:
420 warnings.warn(
421 "Signals are returned unfiltered because band-pass critical "
422 "frequencies are equal. Please check that inputs for "
423 "sampling_rate, low_pass, and high_pass are valid.",
424 stacklevel=find_stack_level(),
425 )
426 return signals.copy() if copy else signals
427 else:
428 critical_freq = critical_freq[0]
430 sos = sp_signal.butter(
431 N=order,
432 Wn=critical_freq,
433 btype=btype,
434 output="sos",
435 fs=sampling_rate,
436 )
437 if signals.ndim == 1:
438 # 1D case
439 output = sp_signal.sosfiltfilt(
440 sos,
441 x=signals,
442 padtype=padtype,
443 padlen=padlen,
444 )
445 if copy: # filtfilt does a copy in all cases.
446 signals = output
447 else:
448 signals[...] = output
449 elif copy:
450 # No way to save memory when a copy has been requested,
451 # because filtfilt does out-of-place processing
452 signals = sp_signal.sosfiltfilt(
453 sos,
454 x=signals,
455 axis=0,
456 padtype=padtype,
457 padlen=padlen,
458 )
459 else:
460 # Lesser memory consumption, slower.
461 for timeseries in signals.T:
462 timeseries[:] = sp_signal.sosfiltfilt(
463 sos,
464 x=timeseries,
465 padtype=padtype,
466 padlen=padlen,
467 )
469 # results returned in-place
471 return signals
474@fill_doc
475def high_variance_confounds(
476 series, n_confounds=5, percentile=2.0, detrend=True
477) -> np.ndarray:
478 """Return confounds time series extracted from series \
479 with highest variance.
481 Parameters
482 ----------
483 series : :class:`numpy.ndarray`
484 Timeseries. A timeseries is a column in the "series" array.
485 shape (sample number, feature number)
487 n_confounds : :obj:`int`, default=5
488 Number of confounds to return.
490 percentile : :obj:`float`, default=2.0
491 Highest-variance series percentile to keep before computing the
492 singular value decomposition, 0. <= `percentile` <= 100.
493 ``series.shape[0] * percentile / 100`` must be greater
494 than ``n_confounds``.
495 %(detrend)s
496 Default=True.
498 Returns
499 -------
500 v : :class:`numpy.ndarray`
501 Highest variance confounds. Shape: (samples, n_confounds)
503 Notes
504 -----
505 This method is related to what has been published in the literature
506 as 'CompCor' :footcite:p:`Behzadi2007`.
508 The implemented algorithm does the following:
510 - compute sum of squares for each time series (no mean removal)
511 - keep a given percentile of series with highest variances (percentile)
512 - compute an svd of the extracted series
513 - return a given number (n_confounds) of series from the svd with
514 highest singular values.
516 References
517 ----------
518 .. footbibliography::
520 See Also
521 --------
522 nilearn.image.high_variance_confounds
523 """
524 check_params(locals())
525 if detrend:
526 series = _detrend(series) # copy
528 # Retrieve the voxels|features with highest variance
530 # Compute variance without mean removal.
531 var = _mean_of_squares(series)
532 var_thr = np.nanpercentile(var, 100.0 - percentile)
533 series = series[:, var > var_thr] # extract columns (i.e. features)
534 # Return the singular vectors with largest singular values
535 # We solve the symmetric eigenvalue problem here, increasing stability
536 s, u = linalg.eigh(series.dot(series.T) / series.shape[0])
537 ix_ = np.argsort(s)[::-1]
538 u = u[:, ix_[:n_confounds]].copy()
539 return u
542def _ensure_float(data):
543 """Make sure that data is a float type."""
544 if data.dtype.kind != "f":
545 if data.dtype.itemsize == 8:
546 data = data.astype(np.float64)
547 else:
548 data = data.astype(np.float32)
549 return data
552@fill_doc
553def clean(
554 signals,
555 runs=None,
556 detrend=True,
557 standardize="zscore",
558 sample_mask=None,
559 confounds=None,
560 standardize_confounds=True,
561 filter="butterworth",
562 low_pass=None,
563 high_pass=None,
564 t_r=2.5,
565 ensure_finite=False,
566 extrapolate=True,
567 **kwargs,
568):
569 """Improve :term:`SNR` on masked :term:`fMRI` signals.
571 This function can do several things on the input signals. With the default
572 options, the procedures are performed in the following order:
574 - detrend
575 - low- and high-pass butterworth filter
576 - remove confounds
577 - standardize
579 Low-pass filtering improves specificity.
581 High-pass filtering should be kept small, to keep some sensitivity.
583 Butterworth filtering is only meaningful on evenly-sampled signals.
585 When performing scrubbing (censoring high-motion volumes) with butterworth
586 filtering, the signal is processed in the following order, based on the
587 second recommendation in :footcite:t:`Lindquist2018`:
589 - interpolate high motion volumes with cubic spline interpolation
590 - detrend
591 - low- and high-pass butterworth filter
592 - censor high motion volumes
593 - remove confounds
594 - standardize
596 According to :footcite:t:`Lindquist2018`, removal of confounds will be done
597 orthogonally to temporal filters (low- and/or high-pass filters), if both
598 are specified. The censored volumes should be removed in both signals and
599 confounds before the nuisance regression.
601 When performing scrubbing with cosine drift term filtering, the signal is
602 processed in the following order, based on the first recommendation in
603 :footcite:t:`Lindquist2018`:
605 - generate cosine drift term
606 - censor high motion volumes in both signal and confounds
607 - detrend
608 - remove confounds
609 - standardize
611 Parameters
612 ----------
613 signals : :class:`numpy.ndarray`
614 Timeseries. Must have shape (instant number, features number).
615 This array is not modified.
617 runs : :class:`numpy.ndarray`, default=None
618 Add a run level to the cleaning process. Each run will be
619 cleaned independently. Must be a 1D array of n_samples elements.
621 confounds : :class:`numpy.ndarray`, :obj:`str`, :class:`pathlib.Path`, \
622 :class:`pandas.DataFrame` \
623 or :obj:`list` of confounds timeseries, default=None
624 Shape must be (instant number, confound number), or just
625 (instant number,).
626 The number of time instants in ``signals`` and ``confounds`` must be
627 identical (i.e. ``signals.shape[0] == confounds.shape[0]``).
628 If a string is provided, it is assumed to be the name of a csv file
629 containing signals as columns, with an optional one-line header.
630 If a list is provided, all confounds are removed from the input
631 signal, as if all were in the same array.
633 sample_mask : None, Any type compatible with numpy-array indexing, \
634 or :obj:`list` of \
635 shape: (number of scans - number of volumes removed, ) \
636 for explicit index, or (number of scans, ) for binary mask, \
637 default=None
638 Masks the niimgs along time/fourth dimension to perform scrubbing
639 (remove volumes with high motion) and/or non-steady-state volumes.
640 When passing binary mask with boolean values, ``True`` refers to
641 volumes kept, and ``False`` for volumes removed.
642 This masking step is applied before signal cleaning. When supplying run
643 information, sample_mask must be a list containing sets of indexes for
644 each run.
646 .. versionadded:: 0.8.0
648 %(t_r)s
649 Default=2.5.
650 filter : {'butterworth', 'cosine', False}, default='butterworth'
651 Filtering methods:
653 - 'butterworth': perform butterworth filtering.
654 - 'cosine': generate discrete cosine transformation drift terms.
655 - False: Do not perform filtering.
657 %(low_pass)s
659 .. note::
660 `low_pass` is not implemented for filter='cosine'.
662 %(high_pass)s
663 %(detrend)s
664 standardize : {'zscore_sample', 'zscore', 'psc', True, False}, \
665 default="zscore"
666 Strategy to standardize the signal:
668 - 'zscore_sample':
669 The signal is z-scored.
670 Timeseries are shifted to zero mean and scaled to unit variance.
671 Uses sample std.
672 - 'zscore':
673 The signal is z-scored.
674 Timeseries are shifted to zero mean and scaled to unit variance.
675 Uses population std by calling :obj:`numpy.std` with N - ``ddof=0``.
676 - 'psc':
677 Timeseries are shifted to zero mean value and scaled
678 to percent signal change (as compared to original mean signal).
679 - True:
680 The signal is z-scored (same as option `zscore`).
681 Timeseries are shifted to zero mean and scaled to unit variance.
682 - False: Do not standardize the data.
684 %(standardize_confounds)s
686 ensure_finite : :obj:`bool`, default=False
687 If `True`, the non-finite values (NANs and infs) found in the data
688 will be replaced by zeros.
690 extrapolate : :obj:`bool`, default=True
691 If `True` and filter='butterworth', censored volumes in both ends of
692 the signal data will be interpolated before filtering. Otherwise, they
693 will be discarded from the band-pass filtering process.
695 kwargs : :obj:`dict`
696 Keyword arguments to be passed to functions called within ``clean``.
697 Kwargs prefixed with ``'butterworth__'`` will be passed to
698 :func:`~nilearn.signal.butterworth`.
701 Returns
702 -------
703 cleaned_signals : :class:`numpy.ndarray`
704 Input signals, cleaned. Same shape as `signals` unless `sample_mask`
705 is applied.
707 Notes
708 -----
709 Confounds removal is based on a projection on the orthogonal
710 of the signal space. See :footcite:t:`Friston1994`.
712 Orthogonalization between temporal filters and confound removal is based on
713 suggestions in :footcite:t:`Lindquist2018`.
715 References
716 ----------
717 .. footbibliography::
719 See Also
720 --------
721 nilearn.image.clean_img
722 """
723 check_params(locals())
724 # Raise warning for some parameter combinations when confounds present
725 confounds = stringify_path(confounds)
726 if confounds is not None:
727 _check_signal_parameters(detrend, standardize_confounds)
728 # check if filter parameters are satisfied and return correct filter
729 filter_type = _check_filter_parameters(filter, low_pass, high_pass, t_r)
731 # Read confounds and signals
732 signals, runs, confounds, sample_mask = _sanitize_inputs(
733 signals, runs, confounds, sample_mask, ensure_finite
734 )
736 # Process each run independently
737 if runs is not None:
738 return _process_runs(
739 signals,
740 runs,
741 detrend,
742 standardize,
743 confounds,
744 sample_mask,
745 filter_type,
746 low_pass,
747 high_pass,
748 t_r,
749 )
751 # For the following steps, sample_mask should be either None or index-like
753 # Generate cosine drift terms using the full length of the signals
754 if filter_type == "cosine":
755 confounds = _create_cosine_drift_terms(
756 signals, confounds, high_pass, t_r
757 )
759 # Interpolation / censoring
760 signals, confounds, sample_mask = _handle_scrubbed_volumes(
761 signals, confounds, sample_mask, filter_type, t_r, extrapolate
762 )
763 # Detrend
764 # Detrend and filtering should apply to confounds, if confound presents
765 # keep filters orthogonal (according to Lindquist et al. (2018))
766 # Restrict the signal to the orthogonal of the confounds
767 original_mean_signals = signals.mean(axis=0)
768 if detrend:
769 signals = standardize_signal(
770 signals, standardize=False, detrend=detrend
771 )
772 if confounds is not None:
773 confounds = standardize_signal(
774 confounds, standardize=False, detrend=detrend
775 )
777 # Butterworth filtering
778 if filter_type == "butterworth":
779 butterworth_kwargs = {
780 k.replace("butterworth__", ""): v
781 for k, v in kwargs.items()
782 if k.startswith("butterworth__")
783 }
784 signals = butterworth(
785 signals,
786 sampling_rate=1.0 / t_r,
787 low_pass=low_pass,
788 high_pass=high_pass,
789 **butterworth_kwargs,
790 )
791 if confounds is not None:
792 # Apply low- and high-pass filters to keep filters orthogonal
793 # (according to Lindquist et al. (2018))
794 confounds = butterworth(
795 confounds,
796 sampling_rate=1.0 / t_r,
797 low_pass=low_pass,
798 high_pass=high_pass,
799 **butterworth_kwargs,
800 )
802 # apply sample_mask to remove censored volumes after signal filtering
803 if sample_mask is not None:
804 signals, confounds = _censor_signals(
805 signals, confounds, sample_mask
806 )
808 # Remove confounds
809 if confounds is not None:
810 confounds = standardize_signal(
811 confounds, standardize=standardize_confounds, detrend=False
812 )
813 if not standardize_confounds:
814 # Improve numerical stability by controlling the range of
815 # confounds. We don't rely on standardize_signal as it removes any
816 # constant contribution to confounds.
817 confound_max = np.max(np.abs(confounds), axis=0)
818 confound_max[confound_max == 0] = 1
819 confounds /= confound_max
821 # Pivoting in qr decomposition was added in scipy 0.10
822 Q, R, _ = linalg.qr(confounds, mode="economic", pivoting=True)
823 Q = Q[:, np.abs(np.diag(R)) > np.finfo(np.float64).eps * 100.0]
824 signals -= Q.dot(Q.T).dot(signals)
826 # Standardize
827 if not standardize:
828 return signals
830 # detect if mean is close to zero; This can obscure the scale of the signal
831 # with percent signal change standardization. This should happen when the
832 # data was 1. detrended 2. high pass filtered.
833 filtered_mean_check = (
834 np.abs(signals.mean(0)).mean() / np.abs(original_mean_signals).mean()
835 < 1e-1
836 )
837 if standardize == "psc" and filtered_mean_check:
838 # If the signal is detrended, the mean signal will be zero or close to
839 # zero. If signal is high pass filtered with butterworth, the constant
840 # (mean) will be removed. This is detected through checking the scale
841 # difference of the original mean and filtered mean signal. When the
842 # mean is too small, we have to know the original mean signal to
843 # calculate the psc to avoid weird scaling.
844 signals = standardize_signal(
845 signals + original_mean_signals,
846 standardize=standardize,
847 detrend=False,
848 )
849 else:
850 signals = standardize_signal(
851 signals,
852 standardize=standardize,
853 detrend=False,
854 )
855 return signals
858def _handle_scrubbed_volumes(
859 signals, confounds, sample_mask, filter_type, t_r, extrapolate
860):
861 """Interpolate or censor scrubbed volumes."""
862 if sample_mask is None:
863 return signals, confounds, sample_mask
864 elif sample_mask.size == 0:
865 raise AllVolumesRemovedError()
867 if filter_type == "butterworth":
868 signals = _interpolate_volumes(signals, sample_mask, t_r, extrapolate)
869 # discard non-interpolated out-of-bounds volumes
870 signals = signals[~np.isnan(signals).all(axis=1), :]
871 if confounds is not None:
872 confounds = _interpolate_volumes(
873 confounds, sample_mask, t_r, extrapolate
874 )
875 # discard non-interpolated out-of-bounds volumes
876 confounds = confounds[~np.isnan(confounds).all(axis=1), :]
877 if sample_mask is not None and not extrapolate:
878 # reset the indexing of the sample_mask excluding non-interpolated
879 # volumes at the head of the data
880 sample_mask -= sample_mask[0]
881 else: # Or censor when no filtering, or cosine filter
882 signals, confounds = _censor_signals(signals, confounds, sample_mask)
883 return signals, confounds, sample_mask
886def _censor_signals(signals, confounds, sample_mask):
887 """Apply sample masks to data."""
888 signals = signals[sample_mask, :]
889 if confounds is not None:
890 confounds = confounds[sample_mask, :]
891 return signals, confounds
894def _interpolate_volumes(volumes, sample_mask, t_r, extrapolate):
895 """Interpolate censored volumes in signals/confounds."""
896 if extrapolate:
897 extrapolate_default = (
898 "By default the cubic spline interpolator extrapolates "
899 "the out-of-bounds censored volumes in the data run. This "
900 "can lead to undesired filtered signal results. Starting in "
901 "version 0.13, the default strategy will be not to extrapolate "
902 "but to discard those volumes at filtering."
903 )
904 warnings.warn(
905 category=FutureWarning,
906 message=extrapolate_default,
907 stacklevel=find_stack_level(),
908 )
909 frame_times = np.arange(volumes.shape[0]) * t_r
910 remained_vol = frame_times[sample_mask]
911 remained_x = volumes[sample_mask, :]
912 cubic_spline_fitter = CubicSpline(
913 remained_vol, remained_x, extrapolate=extrapolate
914 )
915 volumes_interpolated = cubic_spline_fitter(frame_times)
916 volumes[~sample_mask, :] = volumes_interpolated[~sample_mask, :]
917 return volumes
920def _create_cosine_drift_terms(signals, confounds, high_pass, t_r):
921 """Create cosine drift terms, append to confounds regressors."""
922 from nilearn.glm.first_level.design_matrix import create_cosine_drift
924 frame_times = np.arange(signals.shape[0]) * t_r
925 # remove constant, as the signal is mean centered
926 cosine_drift = create_cosine_drift(high_pass, frame_times)[:, :-1]
927 confounds = _check_cosine_by_user(confounds, cosine_drift)
928 return confounds
931def _check_cosine_by_user(confounds, cosine_drift):
932 """Check if cosine term exists, based on correlation > 0.9."""
933 # stack consine drift terms if there's no cosine drift term in data
934 n_cosines = cosine_drift.shape[1]
936 if n_cosines == 0:
937 warnings.warn(
938 "Cosine filter was not created. The time series might be too "
939 "short or the high pass filter is not suitable for the data.",
940 stacklevel=find_stack_level(),
941 )
942 return confounds
944 if confounds is None:
945 return cosine_drift.copy()
947 # check if cosine drift term is supplied by user
948 # given the threshold and timeseries length, there can be no cosine drift
949 # term
950 corr_cosine = np.corrcoef(cosine_drift.T, confounds.T)
951 np.fill_diagonal(corr_cosine, 0)
952 cosine_exists = sum(corr_cosine[:n_cosines, :].flatten() > 0.9) > 0
954 if cosine_exists:
955 warnings.warn(
956 "Cosine filter(s) exist in user supplied confounds."
957 "Use user supplied regressors only.",
958 stacklevel=find_stack_level(),
959 )
960 return confounds
962 return np.hstack((confounds, cosine_drift))
965def _process_runs(
966 signals,
967 runs,
968 detrend,
969 standardize,
970 confounds,
971 sample_mask,
972 filter,
973 low_pass,
974 high_pass,
975 t_r,
976):
977 """Process each run independently."""
978 if len(runs) != len(signals):
979 raise ValueError(
980 f"The length of the run vector ({len(runs)}) "
981 f"does not match the length of the signals ({len(signals)})"
982 )
983 cleaned_signals = []
984 for i, run in enumerate(np.unique(runs)):
985 run_confounds = None
986 run_sample_mask = None
987 if confounds is not None:
988 run_confounds = confounds[runs == run]
989 if sample_mask is not None:
990 run_sample_mask = sample_mask[i]
991 run_signals = clean(
992 signals[runs == run],
993 detrend=detrend,
994 standardize=standardize,
995 confounds=run_confounds,
996 sample_mask=run_sample_mask,
997 filter=filter,
998 low_pass=low_pass,
999 high_pass=high_pass,
1000 t_r=t_r,
1001 )
1002 cleaned_signals.append(run_signals)
1003 return np.vstack(cleaned_signals)
1006def _sanitize_inputs(signals, runs, confounds, sample_mask, ensure_finite):
1007 """Clean up signals and confounds before processing."""
1008 n_time = len(signals) # original length of the signal
1009 n_runs, runs = _sanitize_runs(n_time, runs)
1010 confounds = sanitize_confounds(n_time, confounds)
1011 sample_mask = _sanitize_sample_mask(n_time, n_runs, runs, sample_mask)
1012 signals = _sanitize_signals(signals, ensure_finite)
1013 return signals, runs, confounds, sample_mask
1016def sanitize_confounds(n_time, confounds):
1017 """Check confounds are the correct type.
1019 When passing multiple runs, ensure the
1020 number of runs matches the sets of confound regressors.
1021 """
1022 if confounds is None:
1023 return confounds
1025 if not isinstance(confounds, (list, tuple, str, np.ndarray, pd.DataFrame)):
1026 raise TypeError(
1027 f"confounds keyword has an unhandled type: {confounds.__class__}"
1028 )
1030 if not isinstance(confounds, (list, tuple)):
1031 confounds = (confounds,)
1033 all_confounds = []
1034 for confound in confounds:
1035 confound = _sanitize_confound_dtype(n_time, confound)
1036 all_confounds.append(confound)
1037 confounds = np.hstack(all_confounds)
1038 return _ensure_float(confounds)
1041def _sanitize_sample_mask(n_time, n_runs, runs, sample_mask):
1042 """Check sample_mask is the right data type and matches the run index."""
1043 if sample_mask is None:
1044 return sample_mask
1046 sample_mask = check_run_sample_masks(n_runs, sample_mask)
1048 if runs is None:
1049 runs = np.zeros(n_time)
1051 # check sample mask of each run
1052 for i, current_mask in enumerate(sample_mask):
1053 _check_sample_mask_index(i, n_runs, runs, current_mask)
1055 return sample_mask[0] if sum(runs) == 0 else sample_mask
1058def _check_sample_mask_index(i, n_runs, runs, current_mask):
1059 """Ensure the index in sample mask is valid."""
1060 len_run = sum(i == runs)
1061 len_current_mask = len(current_mask)
1062 # sample_mask longer than signal
1063 if len_current_mask > len_run:
1064 raise IndexError(
1065 f"sample_mask {i + 1} of {n_runs} is has more timepoints "
1066 f"than the current run ;sample_mask contains {len_current_mask} "
1067 f"index but the run has {len_run} timepoints."
1068 )
1069 # sample_mask index exceed signal timepoints
1070 invalid_index = current_mask[current_mask > len_run]
1071 if invalid_index.size > 0:
1072 raise IndexError(
1073 f"sample_mask {i + 1} of {n_runs} contains "
1074 f"invalid index {invalid_index}. "
1075 f"The signal contains {len_run} time points."
1076 )
1079def _sanitize_runs(n_time, runs):
1080 """Check runs are supplied in the correct format \
1081 and detect the number of unique runs.
1082 """
1083 if runs is not None and len(runs) != n_time:
1084 raise ValueError(
1085 f"The length of the run vector ({len(runs)}) "
1086 f"does not match the length of the signals ({n_time})"
1087 )
1088 n_runs = 1 if runs is None else len(np.unique(runs))
1089 return n_runs, runs
1092def _sanitize_confound_dtype(n_signal, confound):
1093 """Check confound is the correct datatype."""
1094 if isinstance(confound, pd.DataFrame):
1095 confound = confound.to_numpy()
1096 if isinstance(confound, (str, Path)):
1097 filename = confound
1098 confound = csv_to_array(filename)
1099 if np.isnan(confound.flat[0]):
1100 # There may be a header
1101 confound = csv_to_array(filename, skip_header=1)
1102 if confound.shape[0] != n_signal:
1103 raise ValueError(
1104 "Confound signal has an incorrect length. \n"
1105 f"Signal length: {n_signal}; "
1106 f"confound length: {confound.shape[0]}"
1107 )
1108 elif isinstance(confound, np.ndarray):
1109 if confound.ndim == 1:
1110 confound = np.atleast_2d(confound).T
1111 elif confound.ndim != 2:
1112 raise ValueError(
1113 "confound array has an incorrect number "
1114 f"of dimensions: {confound.ndim}"
1115 )
1116 if confound.shape[0] != n_signal:
1117 raise ValueError(
1118 "Confound signal has an incorrect length. "
1119 f"Signal length: {n_signal}; "
1120 f"confound length: {confound.shape[0]}."
1121 )
1123 else:
1124 raise TypeError(
1125 f"confound has an unhandled type: {confound.__class__}"
1126 )
1127 return confound
1130def _check_filter_parameters(filter, low_pass, high_pass, t_r):
1131 """Check all filter related parameters are set correctly."""
1132 if not filter:
1133 if any(
1134 isinstance(item, (float, int)) for item in [low_pass, high_pass]
1135 ):
1136 warnings.warn(
1137 "No filter type selected but cutoff frequency provided."
1138 "Will not perform filtering.",
1139 stacklevel=find_stack_level(),
1140 )
1141 return False
1142 elif filter in available_filters:
1143 if filter == "cosine" and not all(
1144 isinstance(item, (float, int)) for item in [t_r, high_pass]
1145 ):
1146 raise ValueError(
1147 "Repetition time (t_r) and low cutoff frequency (high_pass) "
1148 "must be specified for cosine "
1149 f"filtering.t_r='{t_r}', high_pass='{high_pass}'"
1150 )
1151 if filter == "butterworth":
1152 if all(item is None for item in [low_pass, high_pass]):
1153 # Butterworth was switched off by passing
1154 # None to at least low_pass and high_pass
1155 return False
1156 if t_r is None:
1157 raise ValueError(
1158 "Repetition time (t_r) must be specified for "
1159 "butterworth filtering."
1160 )
1161 if any(isinstance(item, bool) for item in [low_pass, high_pass]):
1162 raise TypeError(
1163 "high/low pass must be float or None but you provided "
1164 f"high_pass='{high_pass}', low_pass='{low_pass}'"
1165 )
1166 return filter
1167 else:
1168 raise ValueError(f"Filter method {filter} not implemented.")
1171def _sanitize_signals(signals, ensure_finite):
1172 """Ensure signals are in the correct state."""
1173 if not isinstance(ensure_finite, bool):
1174 raise ValueError(
1175 "'ensure_finite' must be boolean type True or False "
1176 f"but you provided ensure_finite={ensure_finite}"
1177 )
1178 signals = signals.copy()
1179 if not isinstance(signals, np.ndarray):
1180 signals = as_ndarray(signals)
1181 if ensure_finite:
1182 mask = np.logical_not(np.isfinite(signals))
1183 if mask.any():
1184 signals[mask] = 0
1185 return _ensure_float(signals)
1188def _check_signal_parameters(detrend, standardize_confounds):
1189 """Raise warning if the combination is illogical."""
1190 if not detrend and not standardize_confounds:
1191 warnings.warn(
1192 "When confounds are provided, one must perform detrend "
1193 "and/or standardize confounds. "
1194 f"You provided detrend={detrend}, "
1195 f"standardize_confounds={standardize_confounds}. "
1196 "If confounds were not standardized or demeaned "
1197 "before passing to signal.clean signal "
1198 "will not be correctly cleaned. ",
1199 stacklevel=find_stack_level(),
1200 )