Coverage for nilearn/glm/first_level/design_matrix.py: 10%

134 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 10:58 +0200

1"""Implement fMRI Design Matrix creation. 

2 

3Design matrices are represented by Pandas DataFrames 

4Computations of the different parts of the design matrix are confined 

5to the make_first_level_design_matrix function, that create a DataFrame 

6All the others are ancillary functions. 

7 

8Design matrices contain three different types of regressors: 

9 

101. Task-related regressors, that result from the convolution 

11 of the experimental paradigm regressors with hemodynamic models 

12 A hemodynamic model is one of: 

13 

14 - 'spm' : linear filter used in the SPM software 

15 - 'glover' : linear filter estimated by G.Glover 

16 - 'spm + derivative', 'glover + derivative': the same linear models, 

17 plus their time derivative (2 regressors per condition) 

18 - 'spm + derivative + dispersion', 'glover + derivative + dispersion': 

19 idem plus the derivative wrt the dispersion parameter of the hrf 

20 (3 regressors per condition) 

21 - 'fir' : finite impulse response model, generic linear filter 

22 

232. User-specified regressors, that represent information available on 

24 the data, e.g. motion parameters, physiological data resampled at 

25 the acquisition rate, or sinusoidal regressors that model the 

26 signal at a frequency of interest. 

27 

283. Drift regressors, that represent low_frequency phenomena of no 

29 interest in the data; they need to be included to reduce variance 

30 estimates. 

31""" 

32 

33from warnings import warn 

34 

35import numpy as np 

36import pandas as pd 

37 

38from nilearn._utils import fill_doc 

39from nilearn._utils.glm import check_and_load_tables 

40from nilearn._utils.logger import find_stack_level 

41from nilearn._utils.param_validation import check_params 

42from nilearn.glm._utils import full_rank 

43from nilearn.glm.first_level.experimental_paradigm import ( 

44 check_events, 

45 handle_modulation_of_duplicate_events, 

46) 

47from nilearn.glm.first_level.hemodynamic_models import ( 

48 compute_regressor, 

49 orthogonalize, 

50) 

51 

52###################################################################### 

53# Ancillary functions 

54###################################################################### 

55 

56 

57def _poly_drift(order, frame_times): 

58 """Create a polynomial drift matrix. 

59 

60 Parameters 

61 ---------- 

62 order : :obj:`int`, 

63 Number of polynomials in the drift model. 

64 

65 frame_times : array of shape(n_scans), 

66 Time stamps used to sample polynomials. 

67 

68 Returns 

69 ------- 

70 pol : ndarray, shape(n_scans, order + 1) 

71 Estimated polynomial drifts plus a constant regressor. 

72 

73 """ 

74 order = int(order) 

75 pol = np.zeros((np.size(frame_times), order + 1)) 

76 tmax = float(frame_times.max()) 

77 for k in range(order + 1): 

78 pol[:, k] = (frame_times / tmax) ** k 

79 pol = orthogonalize(pol) 

80 pol = np.hstack((pol[:, 1:], pol[:, :1])) 

81 return pol 

82 

83 

84def create_cosine_drift(high_pass, frame_times): 

85 """Create a cosine drift matrix with frequencies or equal to high_pass. 

86 

87 Parameters 

88 ---------- 

89 high_pass : :obj:`float` 

90 Cut frequency of the high-pass filter in Hz 

91 

92 frame_times : array of shape (n_scans,) 

93 The sampling times in seconds 

94 

95 Returns 

96 ------- 

97 cosine_drift : array of shape(n_scans, n_drifts) 

98 Cosine drifts plus a constant regressor at cosine_drift[:, -1] 

99 

100 References 

101 ---------- 

102 http://en.wikipedia.org/wiki/Discrete_cosine_transform DCT-II 

103 

104 """ 

105 n_frames = len(frame_times) 

106 n_times = np.arange(n_frames) 

107 dt = (frame_times[-1] - frame_times[0]) / (n_frames - 1) 

108 if high_pass * dt >= 0.5: 

109 warn( 

110 "High-pass filter will span all accessible frequencies " 

111 "and saturate the design matrix. " 

112 "You may want to reduce the high_pass value." 

113 f"The provided value is {high_pass} Hz", 

114 stacklevel=find_stack_level(), 

115 ) 

116 order = np.minimum( 

117 n_frames - 1, int(np.floor(2 * n_frames * high_pass * dt)) 

118 ) 

119 cosine_drift = np.zeros((n_frames, order + 1)) 

120 normalizer = np.sqrt(2.0 / n_frames) 

121 

122 for k in range(1, order + 1): 

123 cosine_drift[:, k - 1] = normalizer * np.cos( 

124 (np.pi / n_frames) * (n_times + 0.5) * k 

125 ) 

126 

127 cosine_drift[:, -1] = 1.0 

128 return cosine_drift 

129 

130 

131def _none_drift(frame_times): 

132 """Create an intercept vector. 

133 

134 Returns 

135 ------- 

136 np.ones_like(frame_times) 

137 

138 """ 

139 return np.reshape(np.ones_like(frame_times), (np.size(frame_times), 1)) 

140 

141 

142def _make_drift(drift_model, frame_times, order, high_pass): 

143 """Create the drift matrix. 

144 

145 Parameters 

146 ---------- 

147 drift_model : {'polynomial', 'cosine', None}, 

148 string that specifies the desired drift model 

149 

150 frame_times : array of shape(n_scans), 

151 list of values representing the desired TRs 

152 

153 order : :obj:`int`, optional, 

154 order of the drift model (in case it is polynomial) 

155 

156 high_pass : :obj:`float`, optional, 

157 high-pass frequency in case of a cosine model (in Hz) 

158 

159 Returns 

160 ------- 

161 drift : array of shape(n_scans, n_drifts), 

162 the drift matrix 

163 

164 names : :obj:`list` of length(n_drifts), 

165 the associated names 

166 

167 """ 

168 if isinstance(drift_model, str): 

169 drift_model = drift_model.lower() # for robust comparisons 

170 if drift_model == "polynomial": 

171 drift = _poly_drift(order, frame_times) 

172 elif drift_model == "cosine": 

173 drift = create_cosine_drift(high_pass, frame_times) 

174 elif drift_model is None: 

175 drift = _none_drift(frame_times) 

176 else: 

177 raise NotImplementedError(f"Unknown drift model {drift_model!r}") 

178 names = [f"drift_{int(k)}" for k in range(1, drift.shape[1])] 

179 names.append("constant") 

180 return drift, names 

181 

182 

183def _convolve_regressors( 

184 events, 

185 hrf_model, 

186 frame_times, 

187 fir_delays=None, 

188 min_onset=-24, 

189 oversampling=50, 

190): 

191 """Creation of a matrix that comprises \ 

192 the convolution of the conditions onset with a certain hrf model. 

193 

194 Parameters 

195 ---------- 

196 events : DataFrame instance, 

197 Events data describing the experimental paradigm 

198 see nilearn.glm.first_level.experimental_paradigm to check the 

199 specification for these to be valid paradigm descriptors 

200 

201 %(hrf_model)s 

202 

203 frame_times : array of shape (n_scans,) 

204 The targeted timing for the design matrix. 

205 

206 fir_delays : array-like of shape (n_onsets,), default=None 

207 In case of FIR design, yields the array of delays 

208 used in the FIR model (in scans). 

209 Will default to ``[0]`` if ``None`` is passed. 

210 

211 min_onset : :obj:`float`, default=-24 

212 Minimal onset relative to frame_times[0] (in seconds) events 

213 that start before frame_times[0] + min_onset are not considered. 

214 

215 oversampling : :obj:`int`, default=50 

216 Oversampling factor used in temporal convolutions. 

217 

218 Returns 

219 ------- 

220 regressor_matrix : array of shape (n_scans, n_regressors), 

221 Contains the convolved regressors associated with the 

222 experimental conditions. 

223 

224 regressor_names : :obj:`list` of strings, 

225 The regressor names, that depend on the hrf model used 

226 if 'glover' or 'spm' then this is identical to the input names 

227 if 'glover + derivative' or 'spm + derivative', a second name is output 

228 i.e. '#name_derivative' 

229 if 'spm + derivative + dispersion' or 

230 'glover + derivative + dispersion', 

231 a third name is used, i.e. '#name_dispersion' 

232 if 'fir', the regressors are numbered according to '#name_#delay' 

233 

234 """ 

235 check_params(locals()) 

236 if fir_delays is None: 

237 fir_delays = [0] 

238 regressor_names = [] 

239 regressor_matrix = None 

240 

241 events_copy = check_events(events) 

242 cleaned_events = handle_modulation_of_duplicate_events(events_copy) 

243 

244 trial_type = cleaned_events["trial_type"].to_numpy() 

245 onset = cleaned_events["onset"].to_numpy() 

246 duration = cleaned_events["duration"].to_numpy() 

247 modulation = cleaned_events["modulation"].to_numpy() 

248 

249 for condition in np.unique(trial_type): 

250 condition_mask = trial_type == condition 

251 exp_condition = ( 

252 onset[condition_mask], 

253 duration[condition_mask], 

254 modulation[condition_mask], 

255 ) 

256 reg, names = compute_regressor( 

257 exp_condition, 

258 hrf_model, 

259 frame_times, 

260 con_id=condition, 

261 fir_delays=fir_delays, 

262 oversampling=oversampling, 

263 min_onset=min_onset, 

264 ) 

265 

266 regressor_names += names 

267 if regressor_matrix is None: 

268 regressor_matrix = reg 

269 else: 

270 regressor_matrix = np.hstack((regressor_matrix, reg)) 

271 return regressor_matrix, regressor_names 

272 

273 

274###################################################################### 

275# Design matrix creation 

276###################################################################### 

277 

278 

279@fill_doc 

280def make_first_level_design_matrix( 

281 frame_times, 

282 events=None, 

283 hrf_model="glover", 

284 drift_model="cosine", 

285 high_pass=0.01, 

286 drift_order=1, 

287 fir_delays=None, 

288 add_regs=None, 

289 add_reg_names=None, 

290 min_onset=-24, 

291 oversampling=50, 

292): 

293 """Generate a design matrix from the input parameters. 

294 

295 Parameters 

296 ---------- 

297 frame_times : array of shape (n_frames,) 

298 The timing of acquisition of the scans in seconds. 

299 

300 events : :obj:`pandas.DataFrame` instance, \ 

301 or :obj:`str` or :obj:`pathlib.Path` to a CSV or TSV file, \ 

302 or None, default=None 

303 Events data that describes the experimental paradigm. 

304 The resulting DataFrame instance must/may have these keys: 

305 

306 - ``'onset'``: REQUIRED 

307 Column to specify the start time of each events in seconds. 

308 An error is raised if this key is missing. 

309 

310 - ``'duration'``: REQUIRED 

311 Column to specify the duration of each events in seconds. 

312 

313 .. warning:: 

314 

315 Events with a duration of 0 seconds will be modeled 

316 using a 'delta function'. 

317 

318 - ``'trial_type'``: OPTIONAL 

319 Column to specify per-event experimental conditions identifier. 

320 If missing each event are labeled 'dummy' 

321 and considered to form a unique condition. 

322 

323 - ``'modulation'``: OPTIONAL 

324 Column to specify the amplitude of each events. 

325 If missing the default is set to ones(n_events). 

326 

327 An experimental paradigm is valid if it has an ``'onset'`` key 

328 and a ``'duration'`` key. 

329 If these keys are missing an error will be raised. 

330 For the others keys a warning will be displayed. 

331 Particular attention should be given to the ``'trial_type'`` key 

332 which defines the different conditions in the experimental paradigm. 

333 

334 %(hrf_model)s 

335 

336 drift_model : {'cosine', 'polynomial', None}, default='cosine' 

337 Specifies the desired drift model. 

338 

339 high_pass : :obj:`float`, default=0.01 

340 High-pass frequency in case of a cosine model (in Hz). 

341 

342 drift_order : :obj:`int`, default=1 

343 Order of the drift model (in case it is polynomial). 

344 

345 fir_delays : array of shape(n_onsets), :obj:`list` or None, default=None 

346 Will be set to ``[0]`` if ``None`` is passed. 

347 In case of :term:`FIR` design, 

348 yields the array of delays used in the :term:`FIR` 

349 model (in scans). 

350 

351 add_regs : array of shape(n_frames, n_add_reg) or \ 

352 pandas DataFrame or None, default=None 

353 additional user-supplied regressors, e.g. data driven noise regressors 

354 or seed based regressors. 

355 

356 add_reg_names : :obj:`list` of (n_add_reg,) :obj:`str`, or \ 

357 None, default=None 

358 If None, while add_regs was provided, these will be termed 

359 'reg_i', i = 0..n_add_reg - 1 

360 If add_regs is a DataFrame, the corresponding column names are used 

361 and add_reg_names is ignored. 

362 

363 min_onset : :obj:`float`, default=-24 

364 Minimal onset relative to frame_times[0] (in seconds) 

365 events that start before frame_times[0] + min_onset are not considered. 

366 

367 oversampling : :obj:`int`, default=50 

368 Oversampling factor used in temporal convolutions. 

369 

370 Returns 

371 ------- 

372 design_matrix : DataFrame instance, 

373 holding the computed design matrix, the index being the frames_times 

374 and each column a regressor. 

375 

376 """ 

377 check_params(locals()) 

378 if fir_delays is None: 

379 fir_delays = [0] 

380 # check arguments 

381 # check that additional regressor specification is correct 

382 n_add_regs = 0 

383 if add_regs is not None: 

384 if isinstance(add_regs, pd.DataFrame): 

385 add_regs_ = add_regs.to_numpy() 

386 add_reg_names = add_regs.columns.tolist() 

387 else: 

388 add_regs_ = np.atleast_2d(add_regs) 

389 n_add_regs = add_regs_.shape[1] 

390 assert add_regs_.shape[0] == np.size(frame_times), ( 

391 "Incorrect specification of additional regressors: " 

392 f"length of regressors provided: {add_regs_.shape[0]}, number of " 

393 f"time-frames: {np.size(frame_times)}." 

394 ) 

395 

396 # check that additional regressor names are well specified 

397 if add_reg_names is None: 

398 add_reg_names = [f"reg{int(k)}" for k in range(n_add_regs)] 

399 elif len(add_reg_names) != n_add_regs: 

400 raise ValueError( 

401 "Incorrect number of additional regressor names was provided" 

402 f"({len(add_reg_names)} provided, {n_add_regs} expected." 

403 ) 

404 

405 # computation of the matrix 

406 names = [] 

407 matrix = None 

408 

409 # step 1: events-related regressors 

410 if events is not None: 

411 events = check_and_load_tables(events, "events")[0] 

412 # create the condition-related regressors 

413 if isinstance(hrf_model, str): 

414 hrf_model = hrf_model.lower() 

415 matrix, names = _convolve_regressors( 

416 events, hrf_model, frame_times, fir_delays, min_onset, oversampling 

417 ) 

418 

419 # step 2: additional regressors 

420 if add_regs is not None: 

421 # add user-supplied regressors and corresponding names 

422 matrix = ( 

423 np.hstack((matrix, add_regs)) if matrix is not None else add_regs 

424 ) 

425 names += add_reg_names 

426 

427 # step 3: drifts 

428 drift, dnames = _make_drift( 

429 drift_model, frame_times, drift_order, high_pass 

430 ) 

431 

432 matrix = np.hstack((matrix, drift)) if matrix is not None else drift 

433 

434 names += dnames 

435 # check column names are all unique 

436 if len(np.unique(names)) != len(names): 

437 raise ValueError("Design matrix columns do not have unique names") 

438 

439 # step 4: Force the design matrix to be full rank at working precision 

440 matrix, _ = full_rank(matrix) 

441 

442 design_matrix = pd.DataFrame(matrix, columns=names, index=frame_times) 

443 return design_matrix 

444 

445 

446def check_design_matrix(design_matrix): 

447 """Check that the provided DataFrame is indeed a valid design matrix \ 

448 descriptor, and returns a triplet of fields. 

449 

450 Parameters 

451 ---------- 

452 design matrix : :obj:`pandas.DataFrame` 

453 Describes a design matrix. 

454 

455 Returns 

456 ------- 

457 frame_times : array of shape (n_frames,), 

458 Sampling times of the design matrix in seconds. 

459 

460 matrix : array of shape (n_frames, n_regressors), dtype='f' 

461 Numerical values for the design matrix. 

462 

463 names : array of shape (n_events,), dtype='f' 

464 Per-event onset time (in seconds) 

465 

466 """ 

467 if len(design_matrix.columns) == 0: 

468 raise ValueError("The design_matrix dataframe cannot be empty.") 

469 names = list(design_matrix.keys()) 

470 frame_times = design_matrix.index 

471 matrix = design_matrix.to_numpy() 

472 return frame_times, matrix, names 

473 

474 

475def make_second_level_design_matrix(subjects_label, confounds=None): 

476 """Set up a second level design. 

477 

478 Construct a design matrix with an intercept and subject specific confounds. 

479 

480 Parameters 

481 ---------- 

482 subjects_label : :obj:`list` of :obj:`str` 

483 Contain subject labels to extract confounders in the right order, 

484 corresponding with the images, to create the design matrix. 

485 

486 confounds : :class:`pandas.DataFrame` or ``None``, default=None 

487 If given, contains at least two columns, ``subject_label`` and one 

488 confound. The subjects list determines the rows to extract from 

489 confounds thanks to its ``subject_label`` column. All subjects must 

490 have confounds specified. There should be only one row per subject. 

491 

492 Returns 

493 ------- 

494 design_matrix : :class:`pandas.DataFrame` 

495 The second level design matrix. 

496 

497 """ 

498 confounds_name = [] 

499 if confounds is not None: 

500 confounds_name = confounds.columns.tolist() 

501 confounds_name.remove("subject_label") 

502 

503 design_columns = [*confounds_name, "intercept"] 

504 # check column names are unique 

505 if len(np.unique(design_columns)) != len(design_columns): 

506 raise ValueError("Design matrix columns do not have unique names") 

507 

508 # float dtype necessary for linalg 

509 design_matrix = pd.DataFrame(columns=design_columns, dtype="float64") 

510 for ridx, subject_label in enumerate(subjects_label): 

511 design_matrix.loc[ridx] = [0.0] * len(design_columns) 

512 design_matrix.loc[ridx, "intercept"] = 1.0 

513 if confounds is not None: 

514 conrow = confounds["subject_label"] == subject_label 

515 if np.sum(conrow) > 1: 

516 raise ValueError( 

517 "confounds contain more than one row " 

518 f"for subject {subject_label}" 

519 ) 

520 elif np.sum(conrow) == 0: 

521 raise ValueError( 

522 f"confounds not specified for subject {subject_label}" 

523 ) 

524 for conf_name in confounds_name: 

525 confounds_value = confounds[conrow][conf_name].to_numpy()[0] 

526 design_matrix.loc[ridx, conf_name] = confounds_value 

527 

528 # check design matrix is not singular 

529 if np.linalg.cond(design_matrix.values) > design_matrix.size: 

530 warn( 

531 "Attention: Design matrix is singular. Aberrant estimates " 

532 "are expected.", 

533 stacklevel=find_stack_level(), 

534 ) 

535 return design_matrix