Coverage for nilearn/signal.py: 8%

337 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 10:58 +0200

1""" 

2Preprocessing functions for time series. 

3 

4All functions in this module should take X matrices with samples x 

5features 

6""" 

7 

8import warnings 

9from pathlib import Path 

10 

11import numpy as np 

12import pandas as pd 

13from scipy import linalg 

14from scipy import signal as sp_signal 

15from scipy.interpolate import CubicSpline 

16from sklearn.utils import as_float_array, gen_even_slices 

17 

18from nilearn._utils import fill_doc, stringify_path 

19from nilearn._utils.exceptions import AllVolumesRemovedError 

20from nilearn._utils.logger import find_stack_level 

21from nilearn._utils.numpy_conversions import as_ndarray, csv_to_array 

22from nilearn._utils.param_validation import ( 

23 check_params, 

24 check_run_sample_masks, 

25) 

26 

27__all__ = [ 

28 "butterworth", 

29 "clean", 

30 "high_variance_confounds", 

31] 

32 

33available_filters = ("butterworth", "cosine") 

34 

35 

36def standardize_signal( 

37 signals, 

38 detrend=False, 

39 standardize="zscore", 

40): 

41 """Center and standardize a given signal (time is along first axis). 

42 

43 Parameters 

44 ---------- 

45 signals : :class:`numpy.ndarray` 

46 Timeseries to standardize. 

47 

48 detrend : :obj:`bool`, default=False 

49 If detrending of timeseries is requested. 

50 

51 standardize : {'zscore_sample', 'zscore', 'psc', True, False}, \ 

52 default='zscore' 

53 Strategy to standardize the signal: 

54 

55 - 'zscore_sample': The signal is z-scored. Timeseries are shifted 

56 to zero mean and scaled to unit variance. Uses sample std. 

57 - 'zscore': The signal is z-scored. Timeseries are shifted 

58 to zero mean and scaled to unit variance. Uses population std 

59 by calling :obj:`numpy.std` with N - ``ddof=0``. 

60 - 'psc': Timeseries are shifted to zero mean value and scaled 

61 to percent signal change (as compared to original mean signal). 

62 - True: The signal is z-scored (same as option `zscore`). 

63 Timeseries are shifted to zero mean and scaled to unit variance. 

64 - False: Do not standardize the data. 

65 

66 

67 Returns 

68 ------- 

69 std_signals : :class:`numpy.ndarray` 

70 Copy of signals, standardized. 

71 """ 

72 if standardize not in [True, False, "psc", "zscore", "zscore_sample"]: 

73 raise ValueError(f"{standardize} is no valid standardize strategy.") 

74 

75 signals = _detrend(signals, inplace=False) if detrend else signals.copy() 

76 

77 if standardize: 

78 if signals.shape[0] == 1: 

79 warnings.warn( 

80 "Standardization of 3D signal has been requested but " 

81 "would lead to zero values. Skipping.", 

82 stacklevel=find_stack_level(), 

83 ) 

84 return signals 

85 

86 elif standardize == "zscore_sample": 

87 if not detrend: 

88 # remove mean if not already detrended 

89 signals = signals - signals.mean(axis=0) 

90 

91 std = signals.std(axis=0, ddof=1) 

92 # avoid numerical problems 

93 std[std < np.finfo(np.float64).eps] = 1.0 

94 signals /= std 

95 

96 elif (standardize == "zscore") or (standardize is True): 

97 std_strategy_default = ( 

98 "The default strategy for standardize is currently 'zscore' " 

99 "which incorrectly uses population std to calculate sample " 

100 "zscores. The new strategy 'zscore_sample' corrects this " 

101 "behavior by using the sample std. In release 0.13, the " 

102 "default strategy will be replaced by the new strategy and " 

103 "the 'zscore' option will be removed. Please use " 

104 "'zscore_sample' instead." 

105 ) 

106 warnings.warn( 

107 category=DeprecationWarning, 

108 message=std_strategy_default, 

109 stacklevel=find_stack_level(), 

110 ) 

111 

112 if not detrend: 

113 # remove mean if not already detrended 

114 signals = signals - signals.mean(axis=0) 

115 

116 std = signals.std(axis=0) 

117 # avoid numerical problems 

118 std[std < np.finfo(np.float64).eps] = 1.0 

119 

120 signals /= std 

121 

122 elif standardize == "psc": 

123 mean_signals = signals.mean(axis=0) 

124 invalid_ix = np.absolute(mean_signals) < np.finfo(np.float64).eps 

125 signals = (signals - mean_signals) / np.absolute(mean_signals) 

126 signals *= 100 

127 

128 if np.any(invalid_ix): 

129 warnings.warn( 

130 "psc standardization strategy is meaningless " 

131 "for features that have a mean of 0. " 

132 "These time series are set to 0.", 

133 stacklevel=find_stack_level(), 

134 ) 

135 signals[:, invalid_ix] = 0 

136 

137 return signals 

138 

139 

140def _mean_of_squares(signals, n_batches=20): 

141 """Compute mean of squares for each signal. 

142 

143 This function is equivalent to: 

144 

145 .. code-block:: python 

146 

147 var = np.copy(signals) 

148 var **= 2 

149 var = var.mean(axis=0) 

150 

151 but uses a lot less memory. 

152 

153 Parameters 

154 ---------- 

155 signals : :class:`numpy.ndarray`, shape (n_samples, n_features) 

156 Signal whose mean of squares must be computed. 

157 

158 n_batches : :obj:`int`, default=20 

159 Number of batches to use in the computation. 

160 

161 .. note:: 

162 Tweaking this value can lead to variation of memory usage 

163 and computation time. The higher the value, the lower the 

164 memory consumption. 

165 

166 

167 Returns 

168 ------- 

169 var : :class:`numpy.ndarray` 

170 1D array holding the mean of squares. 

171 """ 

172 # No batching for small arrays 

173 if signals.shape[1] < 500: 

174 n_batches = 1 

175 

176 # Fastest for C order 

177 var = np.empty(signals.shape[1]) 

178 for batch in gen_even_slices(signals.shape[1], n_batches): 

179 tvar = np.copy(signals[:, batch]) 

180 tvar **= 2 

181 var[batch] = tvar.mean(axis=0) 

182 

183 return var 

184 

185 

186def row_sum_of_squares(signals, n_batches=20): 

187 """Compute sum of squares for each signal. 

188 

189 This function is equivalent to: 

190 

191 .. code-block:: python 

192 

193 signals **= 2 

194 signals = signals.sum(axis=0) 

195 

196 but uses a lot less memory. 

197 

198 Parameters 

199 ---------- 

200 signals : :class:`numpy.ndarray`, shape (n_samples, n_features) 

201 Signal whose sum of squares must be computed. 

202 

203 n_batches : :obj:`int`, default=20 

204 Number of batches to use in the computation. 

205 

206 .. note:: 

207 Tweaking this value can lead to variation of memory usage 

208 and computation time. The higher the value, the lower the 

209 memory consumption. 

210 

211 

212 Returns 

213 ------- 

214 var : :class:`numpy.ndarray` 

215 1D array holding the sum of squares. 

216 """ 

217 # No batching for small arrays 

218 if signals.shape[1] < 500: 

219 n_batches = 1 

220 

221 # Fastest for C order 

222 var = np.empty(signals.shape[1]) 

223 for batch in gen_even_slices(signals.shape[1], n_batches): 

224 var[batch] = np.sum(signals[:, batch] ** 2, 0) 

225 

226 return var 

227 

228 

229def _detrend(signals, inplace=False, type="linear", n_batches=10): 

230 """Detrend columns of input array. 

231 

232 Signals are supposed to be columns of `signals`. 

233 This function is significantly faster than :func:`scipy.signal.detrend` 

234 on this case and uses a lot less memory. 

235 

236 Parameters 

237 ---------- 

238 signals : :class:`numpy.ndarray` 

239 This parameter must be two-dimensional. 

240 Signals to detrend. A signal is a column. 

241 

242 inplace : :obj:`bool`, default=False 

243 Tells if the computation must be made inplace or not. 

244 

245 type : {"linear", "constant"}, default="linear" 

246 Detrending type, either "linear" or "constant". 

247 See also :func:`scipy.signal.detrend`. 

248 

249 n_batches : :obj:`int`, optional 

250 Number of batches to use in the computation. 

251 

252 .. note:: 

253 Tweaking this value can lead to variation of memory usage 

254 and computation time. The higher the value, the lower the 

255 memory consumption. 

256 

257 Returns 

258 ------- 

259 detrended_signals : :class:`numpy.ndarray` 

260 Detrended signals. The shape is that of ``signals``. 

261 

262 Notes 

263 ----- 

264 If a signal of length 1 is given, it is returned unchanged. 

265 """ 

266 signals = as_float_array(signals, copy=not inplace) 

267 if signals.shape[0] == 1: 

268 warnings.warn( 

269 "Detrending of 3D signal has been requested but " 

270 "would lead to zero values. Skipping.", 

271 stacklevel=find_stack_level(), 

272 ) 

273 return signals 

274 

275 signals -= np.mean(signals, axis=0) 

276 if type == "linear": 

277 # Keeping "signals" dtype avoids some type conversion further down, 

278 # and can save a lot of memory if dtype is single-precision. 

279 regressor = np.arange(signals.shape[0], dtype=signals.dtype) 

280 regressor -= regressor.mean() 

281 std = np.sqrt((regressor**2).sum()) 

282 # avoid numerical problems 

283 if not std < np.finfo(np.float64).eps: 

284 regressor /= std 

285 regressor = regressor[:, np.newaxis] 

286 

287 # No batching for small arrays 

288 if signals.shape[1] < 500: 

289 n_batches = 1 

290 

291 # This is fastest for C order. 

292 for batch in gen_even_slices(signals.shape[1], n_batches): 

293 signals[:, batch] -= ( 

294 np.dot(regressor[:, 0], signals[:, batch]) * regressor 

295 ) 

296 return signals 

297 

298 

299def _check_wn(btype, freq, nyq): 

300 """Ensure that the critical frequency works with the Nyquist frequency. 

301 

302 The critical frequency must be (1) >= 0 and (2) < Nyquist. 

303 When critical frequencies are exactly at the Nyquist frequency, 

304 results are unstable. 

305 

306 See issue at SciPy https://github.com/scipy/scipy/issues/6265. 

307 Due to unstable results as pointed in the issue above, 

308 we force the critical frequencies to be slightly less than the Nyquist 

309 frequency, and slightly more than zero. 

310 """ 

311 EPS = np.finfo(np.float32).eps 

312 if freq >= nyq: 

313 freq = nyq - (nyq * 10 * EPS) 

314 warnings.warn( 

315 f"The frequency specified for the {btype} pass filter is " 

316 "too high to be handled by a digital filter " 

317 "(superior to Nyquist frequency). " 

318 f"It has been lowered to {freq} (Nyquist frequency).", 

319 stacklevel=find_stack_level(), 

320 ) 

321 

322 elif freq < 0.0: # equal to 0.0 is okay 

323 freq = nyq * EPS 

324 warnings.warn( 

325 f"The frequency specified for the {btype} pass filter is too " 

326 "low to be handled by a digital filter (must be non-negative). " 

327 f"It has been set to eps: {freq}.", 

328 stacklevel=find_stack_level(), 

329 ) 

330 

331 return freq 

332 

333 

334@fill_doc 

335def butterworth( 

336 signals, 

337 sampling_rate, 

338 low_pass=None, 

339 high_pass=None, 

340 order=5, 

341 padtype="odd", 

342 padlen=None, 

343 copy=False, 

344): 

345 """Apply a low-pass, high-pass or band-pass \ 

346 `Butterworth filter <https://en.wikipedia.org/wiki/Butterworth_filter>`_. 

347 

348 Apply a filter to remove signal below the `low` frequency and above the 

349 `high` frequency. 

350 

351 Parameters 

352 ---------- 

353 signals : :class:`numpy.ndarray` (1D sequence or n_samples x n_sources) 

354 Signals to be filtered. A signal is assumed to be a column 

355 of `signals`. 

356 

357 sampling_rate : :obj:`float` 

358 Number of samples per second (sample frequency, in Hertz). 

359 %(low_pass)s 

360 %(high_pass)s 

361 order : :obj:`int`, default=5 

362 Order of the `Butterworth filter 

363 <https://en.wikipedia.org/wiki/Butterworth_filter>`_. 

364 When filtering signals, the filter has a decay to avoid ringing. 

365 Increasing the order sharpens this decay. Be aware that very high 

366 orders can lead to numerical instability. 

367 

368 padtype : {"odd", "even", "constant", None}, default="odd" 

369 Type of padding to use for the Butterworth filter. 

370 For more information about this, see :func:`scipy.signal.filtfilt`. 

371 

372 padlen : :obj:`int` or None, default=None 

373 The size of the padding to add to the beginning and end of ``signals``. 

374 If None, the default value from :func:`scipy.signal.filtfilt` will be 

375 used. 

376 

377 copy : :obj:`bool`, default=False 

378 If False, `signals` is modified inplace, and memory consumption is 

379 lower than for ``copy=True``, though computation time is higher. 

380 

381 Returns 

382 ------- 

383 filtered_signals : :class:`numpy.ndarray` 

384 Signals filtered according to the given parameters. 

385 """ 

386 check_params(locals()) 

387 if low_pass is None and high_pass is None: 

388 return signals.copy() if copy else signals 

389 

390 if ( 

391 low_pass is not None 

392 and high_pass is not None 

393 and high_pass >= low_pass 

394 ): 

395 raise ValueError( 

396 f"High pass cutoff frequency ({high_pass}) is greater than or " 

397 f"equal to low pass filter frequency ({low_pass}). " 

398 "This case is not handled by this function." 

399 ) 

400 

401 nyq = sampling_rate * 0.5 

402 

403 critical_freq = [] 

404 if high_pass is not None: 

405 btype = "high" 

406 critical_freq.append(_check_wn(btype, high_pass, nyq)) 

407 

408 if low_pass is not None: 

409 btype = "low" 

410 critical_freq.append(_check_wn(btype, low_pass, nyq)) 

411 

412 if len(critical_freq) == 2: 

413 btype = "band" 

414 # Inappropriate parameter input might lead to coercion of both 

415 # elements of critical_freq to a value just below Nyquist. 

416 # Scipy fix now enforces that critical frequencies cannot be equal. 

417 # See https://github.com/scipy/scipy/pull/15886. 

418 # If this is the case, we return the signals unfiltered. 

419 if critical_freq[0] == critical_freq[1]: 

420 warnings.warn( 

421 "Signals are returned unfiltered because band-pass critical " 

422 "frequencies are equal. Please check that inputs for " 

423 "sampling_rate, low_pass, and high_pass are valid.", 

424 stacklevel=find_stack_level(), 

425 ) 

426 return signals.copy() if copy else signals 

427 else: 

428 critical_freq = critical_freq[0] 

429 

430 sos = sp_signal.butter( 

431 N=order, 

432 Wn=critical_freq, 

433 btype=btype, 

434 output="sos", 

435 fs=sampling_rate, 

436 ) 

437 if signals.ndim == 1: 

438 # 1D case 

439 output = sp_signal.sosfiltfilt( 

440 sos, 

441 x=signals, 

442 padtype=padtype, 

443 padlen=padlen, 

444 ) 

445 if copy: # filtfilt does a copy in all cases. 

446 signals = output 

447 else: 

448 signals[...] = output 

449 elif copy: 

450 # No way to save memory when a copy has been requested, 

451 # because filtfilt does out-of-place processing 

452 signals = sp_signal.sosfiltfilt( 

453 sos, 

454 x=signals, 

455 axis=0, 

456 padtype=padtype, 

457 padlen=padlen, 

458 ) 

459 else: 

460 # Lesser memory consumption, slower. 

461 for timeseries in signals.T: 

462 timeseries[:] = sp_signal.sosfiltfilt( 

463 sos, 

464 x=timeseries, 

465 padtype=padtype, 

466 padlen=padlen, 

467 ) 

468 

469 # results returned in-place 

470 

471 return signals 

472 

473 

474@fill_doc 

475def high_variance_confounds( 

476 series, n_confounds=5, percentile=2.0, detrend=True 

477) -> np.ndarray: 

478 """Return confounds time series extracted from series \ 

479 with highest variance. 

480 

481 Parameters 

482 ---------- 

483 series : :class:`numpy.ndarray` 

484 Timeseries. A timeseries is a column in the "series" array. 

485 shape (sample number, feature number) 

486 

487 n_confounds : :obj:`int`, default=5 

488 Number of confounds to return. 

489 

490 percentile : :obj:`float`, default=2.0 

491 Highest-variance series percentile to keep before computing the 

492 singular value decomposition, 0. <= `percentile` <= 100. 

493 ``series.shape[0] * percentile / 100`` must be greater 

494 than ``n_confounds``. 

495 %(detrend)s 

496 Default=True. 

497 

498 Returns 

499 ------- 

500 v : :class:`numpy.ndarray` 

501 Highest variance confounds. Shape: (samples, n_confounds) 

502 

503 Notes 

504 ----- 

505 This method is related to what has been published in the literature 

506 as 'CompCor' :footcite:p:`Behzadi2007`. 

507 

508 The implemented algorithm does the following: 

509 

510 - compute sum of squares for each time series (no mean removal) 

511 - keep a given percentile of series with highest variances (percentile) 

512 - compute an svd of the extracted series 

513 - return a given number (n_confounds) of series from the svd with 

514 highest singular values. 

515 

516 References 

517 ---------- 

518 .. footbibliography:: 

519 

520 See Also 

521 -------- 

522 nilearn.image.high_variance_confounds 

523 """ 

524 check_params(locals()) 

525 if detrend: 

526 series = _detrend(series) # copy 

527 

528 # Retrieve the voxels|features with highest variance 

529 

530 # Compute variance without mean removal. 

531 var = _mean_of_squares(series) 

532 var_thr = np.nanpercentile(var, 100.0 - percentile) 

533 series = series[:, var > var_thr] # extract columns (i.e. features) 

534 # Return the singular vectors with largest singular values 

535 # We solve the symmetric eigenvalue problem here, increasing stability 

536 s, u = linalg.eigh(series.dot(series.T) / series.shape[0]) 

537 ix_ = np.argsort(s)[::-1] 

538 u = u[:, ix_[:n_confounds]].copy() 

539 return u 

540 

541 

542def _ensure_float(data): 

543 """Make sure that data is a float type.""" 

544 if data.dtype.kind != "f": 

545 if data.dtype.itemsize == 8: 

546 data = data.astype(np.float64) 

547 else: 

548 data = data.astype(np.float32) 

549 return data 

550 

551 

552@fill_doc 

553def clean( 

554 signals, 

555 runs=None, 

556 detrend=True, 

557 standardize="zscore", 

558 sample_mask=None, 

559 confounds=None, 

560 standardize_confounds=True, 

561 filter="butterworth", 

562 low_pass=None, 

563 high_pass=None, 

564 t_r=2.5, 

565 ensure_finite=False, 

566 extrapolate=True, 

567 **kwargs, 

568): 

569 """Improve :term:`SNR` on masked :term:`fMRI` signals. 

570 

571 This function can do several things on the input signals. With the default 

572 options, the procedures are performed in the following order: 

573 

574 - detrend 

575 - low- and high-pass butterworth filter 

576 - remove confounds 

577 - standardize 

578 

579 Low-pass filtering improves specificity. 

580 

581 High-pass filtering should be kept small, to keep some sensitivity. 

582 

583 Butterworth filtering is only meaningful on evenly-sampled signals. 

584 

585 When performing scrubbing (censoring high-motion volumes) with butterworth 

586 filtering, the signal is processed in the following order, based on the 

587 second recommendation in :footcite:t:`Lindquist2018`: 

588 

589 - interpolate high motion volumes with cubic spline interpolation 

590 - detrend 

591 - low- and high-pass butterworth filter 

592 - censor high motion volumes 

593 - remove confounds 

594 - standardize 

595 

596 According to :footcite:t:`Lindquist2018`, removal of confounds will be done 

597 orthogonally to temporal filters (low- and/or high-pass filters), if both 

598 are specified. The censored volumes should be removed in both signals and 

599 confounds before the nuisance regression. 

600 

601 When performing scrubbing with cosine drift term filtering, the signal is 

602 processed in the following order, based on the first recommendation in 

603 :footcite:t:`Lindquist2018`: 

604 

605 - generate cosine drift term 

606 - censor high motion volumes in both signal and confounds 

607 - detrend 

608 - remove confounds 

609 - standardize 

610 

611 Parameters 

612 ---------- 

613 signals : :class:`numpy.ndarray` 

614 Timeseries. Must have shape (instant number, features number). 

615 This array is not modified. 

616 

617 runs : :class:`numpy.ndarray`, default=None 

618 Add a run level to the cleaning process. Each run will be 

619 cleaned independently. Must be a 1D array of n_samples elements. 

620 

621 confounds : :class:`numpy.ndarray`, :obj:`str`, :class:`pathlib.Path`, \ 

622 :class:`pandas.DataFrame` \ 

623 or :obj:`list` of confounds timeseries, default=None 

624 Shape must be (instant number, confound number), or just 

625 (instant number,). 

626 The number of time instants in ``signals`` and ``confounds`` must be 

627 identical (i.e. ``signals.shape[0] == confounds.shape[0]``). 

628 If a string is provided, it is assumed to be the name of a csv file 

629 containing signals as columns, with an optional one-line header. 

630 If a list is provided, all confounds are removed from the input 

631 signal, as if all were in the same array. 

632 

633 sample_mask : None, Any type compatible with numpy-array indexing, \ 

634 or :obj:`list` of \ 

635 shape: (number of scans - number of volumes removed, ) \ 

636 for explicit index, or (number of scans, ) for binary mask, \ 

637 default=None 

638 Masks the niimgs along time/fourth dimension to perform scrubbing 

639 (remove volumes with high motion) and/or non-steady-state volumes. 

640 When passing binary mask with boolean values, ``True`` refers to 

641 volumes kept, and ``False`` for volumes removed. 

642 This masking step is applied before signal cleaning. When supplying run 

643 information, sample_mask must be a list containing sets of indexes for 

644 each run. 

645 

646 .. versionadded:: 0.8.0 

647 

648 %(t_r)s 

649 Default=2.5. 

650 filter : {'butterworth', 'cosine', False}, default='butterworth' 

651 Filtering methods: 

652 

653 - 'butterworth': perform butterworth filtering. 

654 - 'cosine': generate discrete cosine transformation drift terms. 

655 - False: Do not perform filtering. 

656 

657 %(low_pass)s 

658 

659 .. note:: 

660 `low_pass` is not implemented for filter='cosine'. 

661 

662 %(high_pass)s 

663 %(detrend)s 

664 standardize : {'zscore_sample', 'zscore', 'psc', True, False}, \ 

665 default="zscore" 

666 Strategy to standardize the signal: 

667 

668 - 'zscore_sample': 

669 The signal is z-scored. 

670 Timeseries are shifted to zero mean and scaled to unit variance. 

671 Uses sample std. 

672 - 'zscore': 

673 The signal is z-scored. 

674 Timeseries are shifted to zero mean and scaled to unit variance. 

675 Uses population std by calling :obj:`numpy.std` with N - ``ddof=0``. 

676 - 'psc': 

677 Timeseries are shifted to zero mean value and scaled 

678 to percent signal change (as compared to original mean signal). 

679 - True: 

680 The signal is z-scored (same as option `zscore`). 

681 Timeseries are shifted to zero mean and scaled to unit variance. 

682 - False: Do not standardize the data. 

683 

684 %(standardize_confounds)s 

685 

686 ensure_finite : :obj:`bool`, default=False 

687 If `True`, the non-finite values (NANs and infs) found in the data 

688 will be replaced by zeros. 

689 

690 extrapolate : :obj:`bool`, default=True 

691 If `True` and filter='butterworth', censored volumes in both ends of 

692 the signal data will be interpolated before filtering. Otherwise, they 

693 will be discarded from the band-pass filtering process. 

694 

695 kwargs : :obj:`dict` 

696 Keyword arguments to be passed to functions called within ``clean``. 

697 Kwargs prefixed with ``'butterworth__'`` will be passed to 

698 :func:`~nilearn.signal.butterworth`. 

699 

700 

701 Returns 

702 ------- 

703 cleaned_signals : :class:`numpy.ndarray` 

704 Input signals, cleaned. Same shape as `signals` unless `sample_mask` 

705 is applied. 

706 

707 Notes 

708 ----- 

709 Confounds removal is based on a projection on the orthogonal 

710 of the signal space. See :footcite:t:`Friston1994`. 

711 

712 Orthogonalization between temporal filters and confound removal is based on 

713 suggestions in :footcite:t:`Lindquist2018`. 

714 

715 References 

716 ---------- 

717 .. footbibliography:: 

718 

719 See Also 

720 -------- 

721 nilearn.image.clean_img 

722 """ 

723 check_params(locals()) 

724 # Raise warning for some parameter combinations when confounds present 

725 confounds = stringify_path(confounds) 

726 if confounds is not None: 

727 _check_signal_parameters(detrend, standardize_confounds) 

728 # check if filter parameters are satisfied and return correct filter 

729 filter_type = _check_filter_parameters(filter, low_pass, high_pass, t_r) 

730 

731 # Read confounds and signals 

732 signals, runs, confounds, sample_mask = _sanitize_inputs( 

733 signals, runs, confounds, sample_mask, ensure_finite 

734 ) 

735 

736 # Process each run independently 

737 if runs is not None: 

738 return _process_runs( 

739 signals, 

740 runs, 

741 detrend, 

742 standardize, 

743 confounds, 

744 sample_mask, 

745 filter_type, 

746 low_pass, 

747 high_pass, 

748 t_r, 

749 ) 

750 

751 # For the following steps, sample_mask should be either None or index-like 

752 

753 # Generate cosine drift terms using the full length of the signals 

754 if filter_type == "cosine": 

755 confounds = _create_cosine_drift_terms( 

756 signals, confounds, high_pass, t_r 

757 ) 

758 

759 # Interpolation / censoring 

760 signals, confounds, sample_mask = _handle_scrubbed_volumes( 

761 signals, confounds, sample_mask, filter_type, t_r, extrapolate 

762 ) 

763 # Detrend 

764 # Detrend and filtering should apply to confounds, if confound presents 

765 # keep filters orthogonal (according to Lindquist et al. (2018)) 

766 # Restrict the signal to the orthogonal of the confounds 

767 original_mean_signals = signals.mean(axis=0) 

768 if detrend: 

769 signals = standardize_signal( 

770 signals, standardize=False, detrend=detrend 

771 ) 

772 if confounds is not None: 

773 confounds = standardize_signal( 

774 confounds, standardize=False, detrend=detrend 

775 ) 

776 

777 # Butterworth filtering 

778 if filter_type == "butterworth": 

779 butterworth_kwargs = { 

780 k.replace("butterworth__", ""): v 

781 for k, v in kwargs.items() 

782 if k.startswith("butterworth__") 

783 } 

784 signals = butterworth( 

785 signals, 

786 sampling_rate=1.0 / t_r, 

787 low_pass=low_pass, 

788 high_pass=high_pass, 

789 **butterworth_kwargs, 

790 ) 

791 if confounds is not None: 

792 # Apply low- and high-pass filters to keep filters orthogonal 

793 # (according to Lindquist et al. (2018)) 

794 confounds = butterworth( 

795 confounds, 

796 sampling_rate=1.0 / t_r, 

797 low_pass=low_pass, 

798 high_pass=high_pass, 

799 **butterworth_kwargs, 

800 ) 

801 

802 # apply sample_mask to remove censored volumes after signal filtering 

803 if sample_mask is not None: 

804 signals, confounds = _censor_signals( 

805 signals, confounds, sample_mask 

806 ) 

807 

808 # Remove confounds 

809 if confounds is not None: 

810 confounds = standardize_signal( 

811 confounds, standardize=standardize_confounds, detrend=False 

812 ) 

813 if not standardize_confounds: 

814 # Improve numerical stability by controlling the range of 

815 # confounds. We don't rely on standardize_signal as it removes any 

816 # constant contribution to confounds. 

817 confound_max = np.max(np.abs(confounds), axis=0) 

818 confound_max[confound_max == 0] = 1 

819 confounds /= confound_max 

820 

821 # Pivoting in qr decomposition was added in scipy 0.10 

822 Q, R, _ = linalg.qr(confounds, mode="economic", pivoting=True) 

823 Q = Q[:, np.abs(np.diag(R)) > np.finfo(np.float64).eps * 100.0] 

824 signals -= Q.dot(Q.T).dot(signals) 

825 

826 # Standardize 

827 if not standardize: 

828 return signals 

829 

830 # detect if mean is close to zero; This can obscure the scale of the signal 

831 # with percent signal change standardization. This should happen when the 

832 # data was 1. detrended 2. high pass filtered. 

833 filtered_mean_check = ( 

834 np.abs(signals.mean(0)).mean() / np.abs(original_mean_signals).mean() 

835 < 1e-1 

836 ) 

837 if standardize == "psc" and filtered_mean_check: 

838 # If the signal is detrended, the mean signal will be zero or close to 

839 # zero. If signal is high pass filtered with butterworth, the constant 

840 # (mean) will be removed. This is detected through checking the scale 

841 # difference of the original mean and filtered mean signal. When the 

842 # mean is too small, we have to know the original mean signal to 

843 # calculate the psc to avoid weird scaling. 

844 signals = standardize_signal( 

845 signals + original_mean_signals, 

846 standardize=standardize, 

847 detrend=False, 

848 ) 

849 else: 

850 signals = standardize_signal( 

851 signals, 

852 standardize=standardize, 

853 detrend=False, 

854 ) 

855 return signals 

856 

857 

858def _handle_scrubbed_volumes( 

859 signals, confounds, sample_mask, filter_type, t_r, extrapolate 

860): 

861 """Interpolate or censor scrubbed volumes.""" 

862 if sample_mask is None: 

863 return signals, confounds, sample_mask 

864 elif sample_mask.size == 0: 

865 raise AllVolumesRemovedError() 

866 

867 if filter_type == "butterworth": 

868 signals = _interpolate_volumes(signals, sample_mask, t_r, extrapolate) 

869 # discard non-interpolated out-of-bounds volumes 

870 signals = signals[~np.isnan(signals).all(axis=1), :] 

871 if confounds is not None: 

872 confounds = _interpolate_volumes( 

873 confounds, sample_mask, t_r, extrapolate 

874 ) 

875 # discard non-interpolated out-of-bounds volumes 

876 confounds = confounds[~np.isnan(confounds).all(axis=1), :] 

877 if sample_mask is not None and not extrapolate: 

878 # reset the indexing of the sample_mask excluding non-interpolated 

879 # volumes at the head of the data 

880 sample_mask -= sample_mask[0] 

881 else: # Or censor when no filtering, or cosine filter 

882 signals, confounds = _censor_signals(signals, confounds, sample_mask) 

883 return signals, confounds, sample_mask 

884 

885 

886def _censor_signals(signals, confounds, sample_mask): 

887 """Apply sample masks to data.""" 

888 signals = signals[sample_mask, :] 

889 if confounds is not None: 

890 confounds = confounds[sample_mask, :] 

891 return signals, confounds 

892 

893 

894def _interpolate_volumes(volumes, sample_mask, t_r, extrapolate): 

895 """Interpolate censored volumes in signals/confounds.""" 

896 if extrapolate: 

897 extrapolate_default = ( 

898 "By default the cubic spline interpolator extrapolates " 

899 "the out-of-bounds censored volumes in the data run. This " 

900 "can lead to undesired filtered signal results. Starting in " 

901 "version 0.13, the default strategy will be not to extrapolate " 

902 "but to discard those volumes at filtering." 

903 ) 

904 warnings.warn( 

905 category=FutureWarning, 

906 message=extrapolate_default, 

907 stacklevel=find_stack_level(), 

908 ) 

909 frame_times = np.arange(volumes.shape[0]) * t_r 

910 remained_vol = frame_times[sample_mask] 

911 remained_x = volumes[sample_mask, :] 

912 cubic_spline_fitter = CubicSpline( 

913 remained_vol, remained_x, extrapolate=extrapolate 

914 ) 

915 volumes_interpolated = cubic_spline_fitter(frame_times) 

916 volumes[~sample_mask, :] = volumes_interpolated[~sample_mask, :] 

917 return volumes 

918 

919 

920def _create_cosine_drift_terms(signals, confounds, high_pass, t_r): 

921 """Create cosine drift terms, append to confounds regressors.""" 

922 from nilearn.glm.first_level.design_matrix import create_cosine_drift 

923 

924 frame_times = np.arange(signals.shape[0]) * t_r 

925 # remove constant, as the signal is mean centered 

926 cosine_drift = create_cosine_drift(high_pass, frame_times)[:, :-1] 

927 confounds = _check_cosine_by_user(confounds, cosine_drift) 

928 return confounds 

929 

930 

931def _check_cosine_by_user(confounds, cosine_drift): 

932 """Check if cosine term exists, based on correlation > 0.9.""" 

933 # stack consine drift terms if there's no cosine drift term in data 

934 n_cosines = cosine_drift.shape[1] 

935 

936 if n_cosines == 0: 

937 warnings.warn( 

938 "Cosine filter was not created. The time series might be too " 

939 "short or the high pass filter is not suitable for the data.", 

940 stacklevel=find_stack_level(), 

941 ) 

942 return confounds 

943 

944 if confounds is None: 

945 return cosine_drift.copy() 

946 

947 # check if cosine drift term is supplied by user 

948 # given the threshold and timeseries length, there can be no cosine drift 

949 # term 

950 corr_cosine = np.corrcoef(cosine_drift.T, confounds.T) 

951 np.fill_diagonal(corr_cosine, 0) 

952 cosine_exists = sum(corr_cosine[:n_cosines, :].flatten() > 0.9) > 0 

953 

954 if cosine_exists: 

955 warnings.warn( 

956 "Cosine filter(s) exist in user supplied confounds." 

957 "Use user supplied regressors only.", 

958 stacklevel=find_stack_level(), 

959 ) 

960 return confounds 

961 

962 return np.hstack((confounds, cosine_drift)) 

963 

964 

965def _process_runs( 

966 signals, 

967 runs, 

968 detrend, 

969 standardize, 

970 confounds, 

971 sample_mask, 

972 filter, 

973 low_pass, 

974 high_pass, 

975 t_r, 

976): 

977 """Process each run independently.""" 

978 if len(runs) != len(signals): 

979 raise ValueError( 

980 f"The length of the run vector ({len(runs)}) " 

981 f"does not match the length of the signals ({len(signals)})" 

982 ) 

983 cleaned_signals = [] 

984 for i, run in enumerate(np.unique(runs)): 

985 run_confounds = None 

986 run_sample_mask = None 

987 if confounds is not None: 

988 run_confounds = confounds[runs == run] 

989 if sample_mask is not None: 

990 run_sample_mask = sample_mask[i] 

991 run_signals = clean( 

992 signals[runs == run], 

993 detrend=detrend, 

994 standardize=standardize, 

995 confounds=run_confounds, 

996 sample_mask=run_sample_mask, 

997 filter=filter, 

998 low_pass=low_pass, 

999 high_pass=high_pass, 

1000 t_r=t_r, 

1001 ) 

1002 cleaned_signals.append(run_signals) 

1003 return np.vstack(cleaned_signals) 

1004 

1005 

1006def _sanitize_inputs(signals, runs, confounds, sample_mask, ensure_finite): 

1007 """Clean up signals and confounds before processing.""" 

1008 n_time = len(signals) # original length of the signal 

1009 n_runs, runs = _sanitize_runs(n_time, runs) 

1010 confounds = sanitize_confounds(n_time, confounds) 

1011 sample_mask = _sanitize_sample_mask(n_time, n_runs, runs, sample_mask) 

1012 signals = _sanitize_signals(signals, ensure_finite) 

1013 return signals, runs, confounds, sample_mask 

1014 

1015 

1016def sanitize_confounds(n_time, confounds): 

1017 """Check confounds are the correct type. 

1018 

1019 When passing multiple runs, ensure the 

1020 number of runs matches the sets of confound regressors. 

1021 """ 

1022 if confounds is None: 

1023 return confounds 

1024 

1025 if not isinstance(confounds, (list, tuple, str, np.ndarray, pd.DataFrame)): 

1026 raise TypeError( 

1027 f"confounds keyword has an unhandled type: {confounds.__class__}" 

1028 ) 

1029 

1030 if not isinstance(confounds, (list, tuple)): 

1031 confounds = (confounds,) 

1032 

1033 all_confounds = [] 

1034 for confound in confounds: 

1035 confound = _sanitize_confound_dtype(n_time, confound) 

1036 all_confounds.append(confound) 

1037 confounds = np.hstack(all_confounds) 

1038 return _ensure_float(confounds) 

1039 

1040 

1041def _sanitize_sample_mask(n_time, n_runs, runs, sample_mask): 

1042 """Check sample_mask is the right data type and matches the run index.""" 

1043 if sample_mask is None: 

1044 return sample_mask 

1045 

1046 sample_mask = check_run_sample_masks(n_runs, sample_mask) 

1047 

1048 if runs is None: 

1049 runs = np.zeros(n_time) 

1050 

1051 # check sample mask of each run 

1052 for i, current_mask in enumerate(sample_mask): 

1053 _check_sample_mask_index(i, n_runs, runs, current_mask) 

1054 

1055 return sample_mask[0] if sum(runs) == 0 else sample_mask 

1056 

1057 

1058def _check_sample_mask_index(i, n_runs, runs, current_mask): 

1059 """Ensure the index in sample mask is valid.""" 

1060 len_run = sum(i == runs) 

1061 len_current_mask = len(current_mask) 

1062 # sample_mask longer than signal 

1063 if len_current_mask > len_run: 

1064 raise IndexError( 

1065 f"sample_mask {i + 1} of {n_runs} is has more timepoints " 

1066 f"than the current run ;sample_mask contains {len_current_mask} " 

1067 f"index but the run has {len_run} timepoints." 

1068 ) 

1069 # sample_mask index exceed signal timepoints 

1070 invalid_index = current_mask[current_mask > len_run] 

1071 if invalid_index.size > 0: 

1072 raise IndexError( 

1073 f"sample_mask {i + 1} of {n_runs} contains " 

1074 f"invalid index {invalid_index}. " 

1075 f"The signal contains {len_run} time points." 

1076 ) 

1077 

1078 

1079def _sanitize_runs(n_time, runs): 

1080 """Check runs are supplied in the correct format \ 

1081 and detect the number of unique runs. 

1082 """ 

1083 if runs is not None and len(runs) != n_time: 

1084 raise ValueError( 

1085 f"The length of the run vector ({len(runs)}) " 

1086 f"does not match the length of the signals ({n_time})" 

1087 ) 

1088 n_runs = 1 if runs is None else len(np.unique(runs)) 

1089 return n_runs, runs 

1090 

1091 

1092def _sanitize_confound_dtype(n_signal, confound): 

1093 """Check confound is the correct datatype.""" 

1094 if isinstance(confound, pd.DataFrame): 

1095 confound = confound.to_numpy() 

1096 if isinstance(confound, (str, Path)): 

1097 filename = confound 

1098 confound = csv_to_array(filename) 

1099 if np.isnan(confound.flat[0]): 

1100 # There may be a header 

1101 confound = csv_to_array(filename, skip_header=1) 

1102 if confound.shape[0] != n_signal: 

1103 raise ValueError( 

1104 "Confound signal has an incorrect length. \n" 

1105 f"Signal length: {n_signal}; " 

1106 f"confound length: {confound.shape[0]}" 

1107 ) 

1108 elif isinstance(confound, np.ndarray): 

1109 if confound.ndim == 1: 

1110 confound = np.atleast_2d(confound).T 

1111 elif confound.ndim != 2: 

1112 raise ValueError( 

1113 "confound array has an incorrect number " 

1114 f"of dimensions: {confound.ndim}" 

1115 ) 

1116 if confound.shape[0] != n_signal: 

1117 raise ValueError( 

1118 "Confound signal has an incorrect length. " 

1119 f"Signal length: {n_signal}; " 

1120 f"confound length: {confound.shape[0]}." 

1121 ) 

1122 

1123 else: 

1124 raise TypeError( 

1125 f"confound has an unhandled type: {confound.__class__}" 

1126 ) 

1127 return confound 

1128 

1129 

1130def _check_filter_parameters(filter, low_pass, high_pass, t_r): 

1131 """Check all filter related parameters are set correctly.""" 

1132 if not filter: 

1133 if any( 

1134 isinstance(item, (float, int)) for item in [low_pass, high_pass] 

1135 ): 

1136 warnings.warn( 

1137 "No filter type selected but cutoff frequency provided." 

1138 "Will not perform filtering.", 

1139 stacklevel=find_stack_level(), 

1140 ) 

1141 return False 

1142 elif filter in available_filters: 

1143 if filter == "cosine" and not all( 

1144 isinstance(item, (float, int)) for item in [t_r, high_pass] 

1145 ): 

1146 raise ValueError( 

1147 "Repetition time (t_r) and low cutoff frequency (high_pass) " 

1148 "must be specified for cosine " 

1149 f"filtering.t_r='{t_r}', high_pass='{high_pass}'" 

1150 ) 

1151 if filter == "butterworth": 

1152 if all(item is None for item in [low_pass, high_pass]): 

1153 # Butterworth was switched off by passing 

1154 # None to at least low_pass and high_pass 

1155 return False 

1156 if t_r is None: 

1157 raise ValueError( 

1158 "Repetition time (t_r) must be specified for " 

1159 "butterworth filtering." 

1160 ) 

1161 if any(isinstance(item, bool) for item in [low_pass, high_pass]): 

1162 raise TypeError( 

1163 "high/low pass must be float or None but you provided " 

1164 f"high_pass='{high_pass}', low_pass='{low_pass}'" 

1165 ) 

1166 return filter 

1167 else: 

1168 raise ValueError(f"Filter method {filter} not implemented.") 

1169 

1170 

1171def _sanitize_signals(signals, ensure_finite): 

1172 """Ensure signals are in the correct state.""" 

1173 if not isinstance(ensure_finite, bool): 

1174 raise ValueError( 

1175 "'ensure_finite' must be boolean type True or False " 

1176 f"but you provided ensure_finite={ensure_finite}" 

1177 ) 

1178 signals = signals.copy() 

1179 if not isinstance(signals, np.ndarray): 

1180 signals = as_ndarray(signals) 

1181 if ensure_finite: 

1182 mask = np.logical_not(np.isfinite(signals)) 

1183 if mask.any(): 

1184 signals[mask] = 0 

1185 return _ensure_float(signals) 

1186 

1187 

1188def _check_signal_parameters(detrend, standardize_confounds): 

1189 """Raise warning if the combination is illogical.""" 

1190 if not detrend and not standardize_confounds: 

1191 warnings.warn( 

1192 "When confounds are provided, one must perform detrend " 

1193 "and/or standardize confounds. " 

1194 f"You provided detrend={detrend}, " 

1195 f"standardize_confounds={standardize_confounds}. " 

1196 "If confounds were not standardized or demeaned " 

1197 "before passing to signal.clean signal " 

1198 "will not be correctly cleaned. ", 

1199 stacklevel=find_stack_level(), 

1200 )