Coverage for nilearn/glm/first_level/design_matrix.py: 10%
134 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Implement fMRI Design Matrix creation.
3Design matrices are represented by Pandas DataFrames
4Computations of the different parts of the design matrix are confined
5to the make_first_level_design_matrix function, that create a DataFrame
6All the others are ancillary functions.
8Design matrices contain three different types of regressors:
101. Task-related regressors, that result from the convolution
11 of the experimental paradigm regressors with hemodynamic models
12 A hemodynamic model is one of:
14 - 'spm' : linear filter used in the SPM software
15 - 'glover' : linear filter estimated by G.Glover
16 - 'spm + derivative', 'glover + derivative': the same linear models,
17 plus their time derivative (2 regressors per condition)
18 - 'spm + derivative + dispersion', 'glover + derivative + dispersion':
19 idem plus the derivative wrt the dispersion parameter of the hrf
20 (3 regressors per condition)
21 - 'fir' : finite impulse response model, generic linear filter
232. User-specified regressors, that represent information available on
24 the data, e.g. motion parameters, physiological data resampled at
25 the acquisition rate, or sinusoidal regressors that model the
26 signal at a frequency of interest.
283. Drift regressors, that represent low_frequency phenomena of no
29 interest in the data; they need to be included to reduce variance
30 estimates.
31"""
33from warnings import warn
35import numpy as np
36import pandas as pd
38from nilearn._utils import fill_doc
39from nilearn._utils.glm import check_and_load_tables
40from nilearn._utils.logger import find_stack_level
41from nilearn._utils.param_validation import check_params
42from nilearn.glm._utils import full_rank
43from nilearn.glm.first_level.experimental_paradigm import (
44 check_events,
45 handle_modulation_of_duplicate_events,
46)
47from nilearn.glm.first_level.hemodynamic_models import (
48 compute_regressor,
49 orthogonalize,
50)
52######################################################################
53# Ancillary functions
54######################################################################
57def _poly_drift(order, frame_times):
58 """Create a polynomial drift matrix.
60 Parameters
61 ----------
62 order : :obj:`int`,
63 Number of polynomials in the drift model.
65 frame_times : array of shape(n_scans),
66 Time stamps used to sample polynomials.
68 Returns
69 -------
70 pol : ndarray, shape(n_scans, order + 1)
71 Estimated polynomial drifts plus a constant regressor.
73 """
74 order = int(order)
75 pol = np.zeros((np.size(frame_times), order + 1))
76 tmax = float(frame_times.max())
77 for k in range(order + 1):
78 pol[:, k] = (frame_times / tmax) ** k
79 pol = orthogonalize(pol)
80 pol = np.hstack((pol[:, 1:], pol[:, :1]))
81 return pol
84def create_cosine_drift(high_pass, frame_times):
85 """Create a cosine drift matrix with frequencies or equal to high_pass.
87 Parameters
88 ----------
89 high_pass : :obj:`float`
90 Cut frequency of the high-pass filter in Hz
92 frame_times : array of shape (n_scans,)
93 The sampling times in seconds
95 Returns
96 -------
97 cosine_drift : array of shape(n_scans, n_drifts)
98 Cosine drifts plus a constant regressor at cosine_drift[:, -1]
100 References
101 ----------
102 http://en.wikipedia.org/wiki/Discrete_cosine_transform DCT-II
104 """
105 n_frames = len(frame_times)
106 n_times = np.arange(n_frames)
107 dt = (frame_times[-1] - frame_times[0]) / (n_frames - 1)
108 if high_pass * dt >= 0.5:
109 warn(
110 "High-pass filter will span all accessible frequencies "
111 "and saturate the design matrix. "
112 "You may want to reduce the high_pass value."
113 f"The provided value is {high_pass} Hz",
114 stacklevel=find_stack_level(),
115 )
116 order = np.minimum(
117 n_frames - 1, int(np.floor(2 * n_frames * high_pass * dt))
118 )
119 cosine_drift = np.zeros((n_frames, order + 1))
120 normalizer = np.sqrt(2.0 / n_frames)
122 for k in range(1, order + 1):
123 cosine_drift[:, k - 1] = normalizer * np.cos(
124 (np.pi / n_frames) * (n_times + 0.5) * k
125 )
127 cosine_drift[:, -1] = 1.0
128 return cosine_drift
131def _none_drift(frame_times):
132 """Create an intercept vector.
134 Returns
135 -------
136 np.ones_like(frame_times)
138 """
139 return np.reshape(np.ones_like(frame_times), (np.size(frame_times), 1))
142def _make_drift(drift_model, frame_times, order, high_pass):
143 """Create the drift matrix.
145 Parameters
146 ----------
147 drift_model : {'polynomial', 'cosine', None},
148 string that specifies the desired drift model
150 frame_times : array of shape(n_scans),
151 list of values representing the desired TRs
153 order : :obj:`int`, optional,
154 order of the drift model (in case it is polynomial)
156 high_pass : :obj:`float`, optional,
157 high-pass frequency in case of a cosine model (in Hz)
159 Returns
160 -------
161 drift : array of shape(n_scans, n_drifts),
162 the drift matrix
164 names : :obj:`list` of length(n_drifts),
165 the associated names
167 """
168 if isinstance(drift_model, str):
169 drift_model = drift_model.lower() # for robust comparisons
170 if drift_model == "polynomial":
171 drift = _poly_drift(order, frame_times)
172 elif drift_model == "cosine":
173 drift = create_cosine_drift(high_pass, frame_times)
174 elif drift_model is None:
175 drift = _none_drift(frame_times)
176 else:
177 raise NotImplementedError(f"Unknown drift model {drift_model!r}")
178 names = [f"drift_{int(k)}" for k in range(1, drift.shape[1])]
179 names.append("constant")
180 return drift, names
183def _convolve_regressors(
184 events,
185 hrf_model,
186 frame_times,
187 fir_delays=None,
188 min_onset=-24,
189 oversampling=50,
190):
191 """Creation of a matrix that comprises \
192 the convolution of the conditions onset with a certain hrf model.
194 Parameters
195 ----------
196 events : DataFrame instance,
197 Events data describing the experimental paradigm
198 see nilearn.glm.first_level.experimental_paradigm to check the
199 specification for these to be valid paradigm descriptors
201 %(hrf_model)s
203 frame_times : array of shape (n_scans,)
204 The targeted timing for the design matrix.
206 fir_delays : array-like of shape (n_onsets,), default=None
207 In case of FIR design, yields the array of delays
208 used in the FIR model (in scans).
209 Will default to ``[0]`` if ``None`` is passed.
211 min_onset : :obj:`float`, default=-24
212 Minimal onset relative to frame_times[0] (in seconds) events
213 that start before frame_times[0] + min_onset are not considered.
215 oversampling : :obj:`int`, default=50
216 Oversampling factor used in temporal convolutions.
218 Returns
219 -------
220 regressor_matrix : array of shape (n_scans, n_regressors),
221 Contains the convolved regressors associated with the
222 experimental conditions.
224 regressor_names : :obj:`list` of strings,
225 The regressor names, that depend on the hrf model used
226 if 'glover' or 'spm' then this is identical to the input names
227 if 'glover + derivative' or 'spm + derivative', a second name is output
228 i.e. '#name_derivative'
229 if 'spm + derivative + dispersion' or
230 'glover + derivative + dispersion',
231 a third name is used, i.e. '#name_dispersion'
232 if 'fir', the regressors are numbered according to '#name_#delay'
234 """
235 check_params(locals())
236 if fir_delays is None:
237 fir_delays = [0]
238 regressor_names = []
239 regressor_matrix = None
241 events_copy = check_events(events)
242 cleaned_events = handle_modulation_of_duplicate_events(events_copy)
244 trial_type = cleaned_events["trial_type"].to_numpy()
245 onset = cleaned_events["onset"].to_numpy()
246 duration = cleaned_events["duration"].to_numpy()
247 modulation = cleaned_events["modulation"].to_numpy()
249 for condition in np.unique(trial_type):
250 condition_mask = trial_type == condition
251 exp_condition = (
252 onset[condition_mask],
253 duration[condition_mask],
254 modulation[condition_mask],
255 )
256 reg, names = compute_regressor(
257 exp_condition,
258 hrf_model,
259 frame_times,
260 con_id=condition,
261 fir_delays=fir_delays,
262 oversampling=oversampling,
263 min_onset=min_onset,
264 )
266 regressor_names += names
267 if regressor_matrix is None:
268 regressor_matrix = reg
269 else:
270 regressor_matrix = np.hstack((regressor_matrix, reg))
271 return regressor_matrix, regressor_names
274######################################################################
275# Design matrix creation
276######################################################################
279@fill_doc
280def make_first_level_design_matrix(
281 frame_times,
282 events=None,
283 hrf_model="glover",
284 drift_model="cosine",
285 high_pass=0.01,
286 drift_order=1,
287 fir_delays=None,
288 add_regs=None,
289 add_reg_names=None,
290 min_onset=-24,
291 oversampling=50,
292):
293 """Generate a design matrix from the input parameters.
295 Parameters
296 ----------
297 frame_times : array of shape (n_frames,)
298 The timing of acquisition of the scans in seconds.
300 events : :obj:`pandas.DataFrame` instance, \
301 or :obj:`str` or :obj:`pathlib.Path` to a CSV or TSV file, \
302 or None, default=None
303 Events data that describes the experimental paradigm.
304 The resulting DataFrame instance must/may have these keys:
306 - ``'onset'``: REQUIRED
307 Column to specify the start time of each events in seconds.
308 An error is raised if this key is missing.
310 - ``'duration'``: REQUIRED
311 Column to specify the duration of each events in seconds.
313 .. warning::
315 Events with a duration of 0 seconds will be modeled
316 using a 'delta function'.
318 - ``'trial_type'``: OPTIONAL
319 Column to specify per-event experimental conditions identifier.
320 If missing each event are labeled 'dummy'
321 and considered to form a unique condition.
323 - ``'modulation'``: OPTIONAL
324 Column to specify the amplitude of each events.
325 If missing the default is set to ones(n_events).
327 An experimental paradigm is valid if it has an ``'onset'`` key
328 and a ``'duration'`` key.
329 If these keys are missing an error will be raised.
330 For the others keys a warning will be displayed.
331 Particular attention should be given to the ``'trial_type'`` key
332 which defines the different conditions in the experimental paradigm.
334 %(hrf_model)s
336 drift_model : {'cosine', 'polynomial', None}, default='cosine'
337 Specifies the desired drift model.
339 high_pass : :obj:`float`, default=0.01
340 High-pass frequency in case of a cosine model (in Hz).
342 drift_order : :obj:`int`, default=1
343 Order of the drift model (in case it is polynomial).
345 fir_delays : array of shape(n_onsets), :obj:`list` or None, default=None
346 Will be set to ``[0]`` if ``None`` is passed.
347 In case of :term:`FIR` design,
348 yields the array of delays used in the :term:`FIR`
349 model (in scans).
351 add_regs : array of shape(n_frames, n_add_reg) or \
352 pandas DataFrame or None, default=None
353 additional user-supplied regressors, e.g. data driven noise regressors
354 or seed based regressors.
356 add_reg_names : :obj:`list` of (n_add_reg,) :obj:`str`, or \
357 None, default=None
358 If None, while add_regs was provided, these will be termed
359 'reg_i', i = 0..n_add_reg - 1
360 If add_regs is a DataFrame, the corresponding column names are used
361 and add_reg_names is ignored.
363 min_onset : :obj:`float`, default=-24
364 Minimal onset relative to frame_times[0] (in seconds)
365 events that start before frame_times[0] + min_onset are not considered.
367 oversampling : :obj:`int`, default=50
368 Oversampling factor used in temporal convolutions.
370 Returns
371 -------
372 design_matrix : DataFrame instance,
373 holding the computed design matrix, the index being the frames_times
374 and each column a regressor.
376 """
377 check_params(locals())
378 if fir_delays is None:
379 fir_delays = [0]
380 # check arguments
381 # check that additional regressor specification is correct
382 n_add_regs = 0
383 if add_regs is not None:
384 if isinstance(add_regs, pd.DataFrame):
385 add_regs_ = add_regs.to_numpy()
386 add_reg_names = add_regs.columns.tolist()
387 else:
388 add_regs_ = np.atleast_2d(add_regs)
389 n_add_regs = add_regs_.shape[1]
390 assert add_regs_.shape[0] == np.size(frame_times), (
391 "Incorrect specification of additional regressors: "
392 f"length of regressors provided: {add_regs_.shape[0]}, number of "
393 f"time-frames: {np.size(frame_times)}."
394 )
396 # check that additional regressor names are well specified
397 if add_reg_names is None:
398 add_reg_names = [f"reg{int(k)}" for k in range(n_add_regs)]
399 elif len(add_reg_names) != n_add_regs:
400 raise ValueError(
401 "Incorrect number of additional regressor names was provided"
402 f"({len(add_reg_names)} provided, {n_add_regs} expected."
403 )
405 # computation of the matrix
406 names = []
407 matrix = None
409 # step 1: events-related regressors
410 if events is not None:
411 events = check_and_load_tables(events, "events")[0]
412 # create the condition-related regressors
413 if isinstance(hrf_model, str):
414 hrf_model = hrf_model.lower()
415 matrix, names = _convolve_regressors(
416 events, hrf_model, frame_times, fir_delays, min_onset, oversampling
417 )
419 # step 2: additional regressors
420 if add_regs is not None:
421 # add user-supplied regressors and corresponding names
422 matrix = (
423 np.hstack((matrix, add_regs)) if matrix is not None else add_regs
424 )
425 names += add_reg_names
427 # step 3: drifts
428 drift, dnames = _make_drift(
429 drift_model, frame_times, drift_order, high_pass
430 )
432 matrix = np.hstack((matrix, drift)) if matrix is not None else drift
434 names += dnames
435 # check column names are all unique
436 if len(np.unique(names)) != len(names):
437 raise ValueError("Design matrix columns do not have unique names")
439 # step 4: Force the design matrix to be full rank at working precision
440 matrix, _ = full_rank(matrix)
442 design_matrix = pd.DataFrame(matrix, columns=names, index=frame_times)
443 return design_matrix
446def check_design_matrix(design_matrix):
447 """Check that the provided DataFrame is indeed a valid design matrix \
448 descriptor, and returns a triplet of fields.
450 Parameters
451 ----------
452 design matrix : :obj:`pandas.DataFrame`
453 Describes a design matrix.
455 Returns
456 -------
457 frame_times : array of shape (n_frames,),
458 Sampling times of the design matrix in seconds.
460 matrix : array of shape (n_frames, n_regressors), dtype='f'
461 Numerical values for the design matrix.
463 names : array of shape (n_events,), dtype='f'
464 Per-event onset time (in seconds)
466 """
467 if len(design_matrix.columns) == 0:
468 raise ValueError("The design_matrix dataframe cannot be empty.")
469 names = list(design_matrix.keys())
470 frame_times = design_matrix.index
471 matrix = design_matrix.to_numpy()
472 return frame_times, matrix, names
475def make_second_level_design_matrix(subjects_label, confounds=None):
476 """Set up a second level design.
478 Construct a design matrix with an intercept and subject specific confounds.
480 Parameters
481 ----------
482 subjects_label : :obj:`list` of :obj:`str`
483 Contain subject labels to extract confounders in the right order,
484 corresponding with the images, to create the design matrix.
486 confounds : :class:`pandas.DataFrame` or ``None``, default=None
487 If given, contains at least two columns, ``subject_label`` and one
488 confound. The subjects list determines the rows to extract from
489 confounds thanks to its ``subject_label`` column. All subjects must
490 have confounds specified. There should be only one row per subject.
492 Returns
493 -------
494 design_matrix : :class:`pandas.DataFrame`
495 The second level design matrix.
497 """
498 confounds_name = []
499 if confounds is not None:
500 confounds_name = confounds.columns.tolist()
501 confounds_name.remove("subject_label")
503 design_columns = [*confounds_name, "intercept"]
504 # check column names are unique
505 if len(np.unique(design_columns)) != len(design_columns):
506 raise ValueError("Design matrix columns do not have unique names")
508 # float dtype necessary for linalg
509 design_matrix = pd.DataFrame(columns=design_columns, dtype="float64")
510 for ridx, subject_label in enumerate(subjects_label):
511 design_matrix.loc[ridx] = [0.0] * len(design_columns)
512 design_matrix.loc[ridx, "intercept"] = 1.0
513 if confounds is not None:
514 conrow = confounds["subject_label"] == subject_label
515 if np.sum(conrow) > 1:
516 raise ValueError(
517 "confounds contain more than one row "
518 f"for subject {subject_label}"
519 )
520 elif np.sum(conrow) == 0:
521 raise ValueError(
522 f"confounds not specified for subject {subject_label}"
523 )
524 for conf_name in confounds_name:
525 confounds_value = confounds[conrow][conf_name].to_numpy()[0]
526 design_matrix.loc[ridx, conf_name] = confounds_value
528 # check design matrix is not singular
529 if np.linalg.cond(design_matrix.values) > design_matrix.size:
530 warn(
531 "Attention: Design matrix is singular. Aberrant estimates "
532 "are expected.",
533 stacklevel=find_stack_level(),
534 )
535 return design_matrix