Coverage for nilearn/interfaces/fmriprep/load_confounds_utils.py: 16%
132 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
1"""Helper functions for the manipulation of fmriprep output confounds."""
3import itertools
4import json
5import re
6from pathlib import Path
8import numpy as np
9import pandas as pd
10from sklearn.preprocessing import scale
12from nilearn._utils.fmriprep_confounds import flag_single_gifti, is_camel_case
13from nilearn.interfaces.bids import parse_bids_filename
15from .load_confounds_scrub import extract_outlier_regressors
17img_file_patterns = {
18 "aroma": "_desc-smoothAROMAnonaggr_bold",
19 "nii.gz": "(_space-.*)?_desc-preproc_bold.nii.gz",
20 "dtseries.nii": "(_space-.*)?_bold.dtseries.nii",
21 "func.gii": "_hemi-[LR](_space-.*)?_bold.func.gii",
22}
24img_file_error = {
25 "aroma": (
26 "Input must be desc-smoothAROMAnonaggr_bold for full ICA-AROMA"
27 " strategy."
28 ),
29 "nii.gz": "Invalid file type for the selected method.",
30 "dtseries.nii": "Invalid file type for the selected method.",
31 "func.gii": "need fMRIprep output with extension func.gii",
32}
35def check_params_confounds(confounds_raw, params):
36 """Check that specified parameters can be found in the confounds.
38 Used for motion, wm_csf, global_signal, and compcor regressors.
40 Parameters
41 ----------
42 confounds_raw : pandas.DataFrame
43 Raw confounds loaded from the confounds file.
45 params : :obj:`list` of :obj:`str`
46 List of parameters constructed based on users choices.
48 Returns
49 -------
50 bool or :obj:`list` of :obj:`str`
51 True if all parameters are found in the confounds.
52 False if none of the parameters are found in the confounds.
53 List of parameters that are not found in the confounds
54 if only some parameters are found.
55 """
56 not_found_params = [
57 par for par in params if par not in confounds_raw.columns
58 ]
59 if len(not_found_params) == len(params):
60 return False
61 elif not_found_params:
62 return not_found_params
63 else:
64 return True
67def find_confounds(confounds_raw, keywords):
68 """Find confounds that contain certain keywords.
70 Used for cosine regressors and ICA-AROMA regressors.
72 Parameters
73 ----------
74 confounds_raw : pandas.DataFrame
75 Raw confounds loaded from the confounds file.
77 keywords : :obj:`list` of :obj:`str`
78 List of keywords to search for in the confounds.
80 Returns
81 -------
82 list of :obj:`str`
83 List of confounds that contain the keywords.
84 """
85 list_confounds = []
86 for key in keywords:
87 key_found = [col for col in confounds_raw.columns if key in col]
88 if key_found:
89 list_confounds.extend(key_found)
90 return list_confounds
93def sanitize_confounds(img_files):
94 """Make sure the inputs are in the correct format.
96 Parameters
97 ----------
98 img_files : :obj:`str` or :obj:`list` of :obj:`str`
99 Path to the functional image file(s).
101 Returns
102 -------
103 img_files : :obj:`list` of :obj:`str`
104 List of functional image file(s).
105 flag_single : bool
106 True if the input is a single file, False if it is a :obj:`list` of
107 files.
108 """
109 # we want to support loading a single set of confounds, instead of a list
110 # so we hack it
111 if len(img_files) == 1:
112 return img_files, True
113 # gifti has to be passed as pair
114 if isinstance(img_files, list) and len(img_files) == 2:
115 flag_single = flag_single_gifti(img_files)
116 else: # single file
117 flag_single = isinstance(img_files, str)
118 if flag_single:
119 img_files = [img_files]
120 return img_files, flag_single
123def add_suffix(params, model):
124 """Add derivative suffixes to a list of parameters.
126 Used from motion, wm_csf, global_signal.
128 Parameters
129 ----------
130 params : :obj:`list` of :obj:`str`
131 List of parameters to add suffixes to.
132 model : :obj:`str`
133 Model to use. Options are "basic", "derivatives", "power2", or
134 "full".
136 Returns
137 -------
138 params_full : :obj:`list` of :obj:`str`
139 List of parameters with suffixes added.
140 """
141 params_full = params.copy()
142 suffix = {
143 "basic": {},
144 "derivatives": {"derivative1"},
145 "power2": {"power2"},
146 "full": {"derivative1", "power2", "derivative1_power2"},
147 }
148 for par in params:
149 for suff in suffix[model]:
150 params_full.append(f"{par}_{suff}")
151 return params_full
154def _generate_confounds_file_candidates(nii_file):
155 """Generate confounds file candidates.
157 Build a list of potential confounds filenames using all combinations of
158 the entities in the image file.
160 Parameters
161 ----------
162 nii_file : str
163 Path to the functional image file.
165 Returns
166 -------
167 filenames : list of str
168 List of potential confounds filenames.
169 """
170 parsed_file = parse_bids_filename(nii_file, legacy=False)
171 entities = parsed_file["entities"]
172 entities["desc"] = "confounds"
174 all_subsets = [
175 list(itertools.combinations(entities.keys(), n_entities))
176 for n_entities in range(1, len(entities.keys()) + 1)
177 ]
179 # Flatten the list of lists
180 all_subsets = [list(item) for sublist in all_subsets for item in sublist]
181 # https://stackoverflow.com/a/3724558/2589328
182 unique_subsets = [list(x) for x in {tuple(x) for x in all_subsets}]
184 # Require "desc"
185 unique_subsets = [subset for subset in unique_subsets if "desc" in subset]
187 filenames = [
188 "_".join(["-".join([k, entities[k]]) for k in lst])
189 for lst in unique_subsets
190 ]
191 return filenames
194def _get_file_name(nii_file):
195 """Identify the confounds file associated with a functional image.
197 Parameters
198 ----------
199 nii_file : str
200 Path to the functional image file.
202 Returns
203 -------
204 confound_file : str
205 Path to the associated confounds file.
206 """
207 if isinstance(nii_file, list): # catch gifti
208 nii_file = nii_file[0]
210 base_dir = Path(nii_file).parent
212 filenames = _generate_confounds_file_candidates(nii_file)
214 # fmriprep has changed the file suffix between v20.1.1 and v20.2.0 with
215 # respect to BEP 012.
216 # cf. https://neurostars.org/t/naming-change-confounds-regressors-to-confounds-timeseries/17637 # noqa: E501
217 # Check file with new naming scheme exists or replace,
218 # for backward compatibility.
219 suffixes = ["_timeseries.tsv", "_regressors.tsv"]
221 confound_file_candidates = []
222 for suffix in suffixes:
223 confound_file_candidates += [f + suffix for f in filenames]
225 # Sort the potential filenames by decreasing length,
226 # so earlier entries reflect more retained entities.
227 # https://www.geeksforgeeks.org/python-sort-list-of-lists-by-the-size-of-sublists/
228 confound_file_candidates = sorted(confound_file_candidates, key=len)[::-1]
229 confound_file_candidates = [
230 base_dir / crc for crc in confound_file_candidates
231 ]
232 found_files = [str(cr) for cr in confound_file_candidates if cr.is_file()]
234 if not found_files:
235 raise ValueError(
236 "Could not find associated confound file. "
237 "The functional derivatives should exist under the same parent "
238 "directory."
239 )
240 elif len(found_files) != 1:
241 found_str = "\n\t".join(found_files)
242 raise ValueError(f"Found more than one confound file:\n\t{found_str}")
243 else:
244 return found_files[0]
247def get_confounds_file(image_file, flag_full_aroma):
248 """Return the confounds file associated with a functional image.
250 Parameters
251 ----------
252 image_file : :obj:`str`
253 Path to the functional image file.
255 flag_full_aroma : :obj:`bool`
256 True if the input is a full ICA-AROMA output, False otherwise.
258 Returns
259 -------
260 confounds_raw_path : :obj:`str`
261 Path to the associated confounds file.
262 """
263 _check_images(image_file, flag_full_aroma)
264 confounds_raw_path = _get_file_name(image_file)
265 return confounds_raw_path
268def get_json(confounds_raw_path):
269 """Return json data companion file to the confounds tsv file."""
270 # Load JSON file
271 return str(confounds_raw_path).replace("tsv", "json")
274def load_confounds_json(confounds_json, flag_acompcor):
275 """Load json data companion to the confounds tsv file.
277 Parameters
278 ----------
279 confounds_json : :obj:`str`
280 Path to the json file.
282 flag_acompcor : :obj:`bool`
283 True if user selected anatomical compcor for denoising strategy,
284 False otherwise.
286 Returns
287 -------
288 confounds_json : dict
289 Dictionary of confounds meta data from the confounds.json file.
291 Raises
292 ------
293 ValueError
294 If the json file is not found. This should not be the case for
295 fMRIprep >= 1.4.0.
296 """
297 try:
298 with Path(confounds_json).open("rb") as f:
299 confounds_json = json.load(f)
300 except OSError:
301 if flag_acompcor:
302 raise ValueError(
303 f"Could not find associated json file {confounds_json}."
304 "This is necessary for anatomical CompCor."
305 "The CompCor component is only supported for fMRIprep "
306 "version >= 1.4.0."
307 )
308 return confounds_json
311def load_confounds_file_as_dataframe(confounds_raw_path):
312 """Load raw confounds as a pandas DataFrame.
314 Meanwhile detect if the fMRIPrep version is supported.
316 Parameters
317 ----------
318 confounds_raw_path : :obj:`str`
319 Path to the confounds file.
321 Returns
322 -------
323 confounds_raw : pandas.DataFrame
324 Raw confounds loaded from the confounds file.
325 """
326 confounds_raw = pd.read_csv(
327 confounds_raw_path, delimiter="\t", encoding="utf-8"
328 )
330 # check if the version of fMRIprep (>=1.2.0) is supported based on
331 # header format. 1.0.x and 1.1.x series uses camel case
332 if any(is_camel_case(col_name) for col_name in confounds_raw.columns):
333 raise ValueError(
334 "The confound file contains header in camel case. "
335 "This is likely the output from 1.0.x and 1.1.x series. "
336 "We only support fmriprep outputs >= 1.2.0."
337 f"{confounds_raw.columns}"
338 )
340 # even old version with no header will have the first row as header
341 try:
342 too_old = float(confounds_raw.columns[0])
343 except ValueError:
344 too_old = False
346 if too_old:
347 bad_file = pd.read_csv(
348 confounds_raw_path, delimiter="\t", encoding="utf-8", header=None
349 )
350 raise ValueError(
351 "The confound file contains no header."
352 "Is this an old version fMRIprep output?"
353 f"{bad_file.head()}"
354 )
355 return confounds_raw
358def _ext_validator(image_file, ext):
359 """Check image is valid based on extension.
361 Parameters
362 ----------
363 image_file : str
364 Path to the functional image file.
366 ext : str
367 Extension to check.
369 Returns
370 -------
371 valid_img : bool
372 True if the image is valid, False otherwise.
374 error_message : str
375 Error message to raise if the image is invalid.
376 """
377 try:
378 valid_img = all(
379 bool(re.search(img_file_patterns[ext], img)) for img in image_file
380 )
381 error_message = img_file_error[ext]
382 except KeyError:
383 valid_img = False
384 error_message = "Unsupported input."
385 return valid_img, error_message
388def _check_images(image_file, flag_full_aroma):
389 """Validate input file and ICA AROMA related file.
391 Parameters
392 ----------
393 image_file : str
394 Path to the functional image file.
396 flag_full_aroma : bool
397 True if the input is a full ICA-AROMA output, False otherwise.
399 Raises
400 ------
401 ValueError
402 If the image is not valid.
403 """
404 if len(image_file) == 2: # must be gifti
405 valid_img, error_message = _ext_validator(image_file, "func.gii")
406 elif flag_full_aroma:
407 valid_img, error_message = _ext_validator([image_file], "aroma")
408 else:
409 ext = ".".join(image_file.split(".")[-2:])
410 valid_img, error_message = _ext_validator([image_file], ext)
411 if not valid_img:
412 raise ValueError(error_message)
415def prepare_output(confounds, demean):
416 """Demean and create sample mask for the selected confounds.
418 Parameters
419 ----------
420 confounds : pandas.DataFrame
421 Confound regressors loaded based on user's choice.
423 demean : :obj:`bool`
424 True if the confounds should be demeaned, False otherwise.
426 Returns
427 -------
428 sample_mask : None or numpy.ndarray
429 When no volume removal is required, the value is None.
430 Otherwise, the shape is \
431 (number of scans - number of volumes removed, )
432 The index of the niimgs along time/fourth dimension for valid
433 volumes for subsequent analysis.
435 confounds : pandas.DataFrame
436 Demeaned confounds ready for subsequent analysis.
437 """
438 sample_mask, confounds, _ = extract_outlier_regressors(confounds)
439 if confounds.size != 0: # ica_aroma = "full" generate empty output
440 # Derivatives have NaN on the first row
441 # Replace them by estimates at second time point,
442 # otherwise nilearn will crash.
443 mask_nan = np.isnan(confounds.to_numpy()[0, :])
444 confounds.iloc[0, mask_nan] = confounds.iloc[1, mask_nan]
445 if demean:
446 confounds = _demean_confounds(confounds, sample_mask)
447 return sample_mask, confounds
450def _demean_confounds(confounds, sample_mask):
451 """Demean the confounds.
453 The mean is calculated on non-outlier values.
455 Parameters
456 ----------
457 confounds : pandas.DataFrame
458 Confound regressors loaded based on user's choice.
460 sample_mask : None or numpy.ndarray
461 When no volume removal is required, the value is None.
462 Otherwise, the shape is \
463 (number of scans - number of volumes removed, )
464 The index of the niimgs along time/fourth dimension for valid
465 volumes for subsequent analysis.
467 Returns
468 -------
469 confounds : pandas.DataFrame
470 Demeaned confounds.
471 """
472 confound_cols = confounds.columns
473 if sample_mask is None:
474 confounds = scale(confounds, axis=0, with_std=False)
475 else: # calculate the mean without outliers.
476 confounds_mean = confounds.iloc[sample_mask, :].mean(axis=0)
477 confounds -= confounds_mean
478 return pd.DataFrame(confounds, columns=confound_cols)
481class MissingConfoundError(Exception):
482 """
483 Exception raised when failing to find params in the confounds.
485 Parameters
486 ----------
487 params : :obj:`list` of missing params, default=[]
489 keywords : :obj:`list` of missing keywords, default=[]
490 """
492 def __init__(self, params=None, keywords=None):
493 """Set missing parameters and keywords."""
494 self.params = params or []
495 self.keywords = keywords or []