Coverage for nilearn/interfaces/fmriprep/load_confounds_utils.py: 16%

132 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-16 12:32 +0200

1"""Helper functions for the manipulation of fmriprep output confounds.""" 

2 

3import itertools 

4import json 

5import re 

6from pathlib import Path 

7 

8import numpy as np 

9import pandas as pd 

10from sklearn.preprocessing import scale 

11 

12from nilearn._utils.fmriprep_confounds import flag_single_gifti, is_camel_case 

13from nilearn.interfaces.bids import parse_bids_filename 

14 

15from .load_confounds_scrub import extract_outlier_regressors 

16 

17img_file_patterns = { 

18 "aroma": "_desc-smoothAROMAnonaggr_bold", 

19 "nii.gz": "(_space-.*)?_desc-preproc_bold.nii.gz", 

20 "dtseries.nii": "(_space-.*)?_bold.dtseries.nii", 

21 "func.gii": "_hemi-[LR](_space-.*)?_bold.func.gii", 

22} 

23 

24img_file_error = { 

25 "aroma": ( 

26 "Input must be desc-smoothAROMAnonaggr_bold for full ICA-AROMA" 

27 " strategy." 

28 ), 

29 "nii.gz": "Invalid file type for the selected method.", 

30 "dtseries.nii": "Invalid file type for the selected method.", 

31 "func.gii": "need fMRIprep output with extension func.gii", 

32} 

33 

34 

35def check_params_confounds(confounds_raw, params): 

36 """Check that specified parameters can be found in the confounds. 

37 

38 Used for motion, wm_csf, global_signal, and compcor regressors. 

39 

40 Parameters 

41 ---------- 

42 confounds_raw : pandas.DataFrame 

43 Raw confounds loaded from the confounds file. 

44 

45 params : :obj:`list` of :obj:`str` 

46 List of parameters constructed based on users choices. 

47 

48 Returns 

49 ------- 

50 bool or :obj:`list` of :obj:`str` 

51 True if all parameters are found in the confounds. 

52 False if none of the parameters are found in the confounds. 

53 List of parameters that are not found in the confounds 

54 if only some parameters are found. 

55 """ 

56 not_found_params = [ 

57 par for par in params if par not in confounds_raw.columns 

58 ] 

59 if len(not_found_params) == len(params): 

60 return False 

61 elif not_found_params: 

62 return not_found_params 

63 else: 

64 return True 

65 

66 

67def find_confounds(confounds_raw, keywords): 

68 """Find confounds that contain certain keywords. 

69 

70 Used for cosine regressors and ICA-AROMA regressors. 

71 

72 Parameters 

73 ---------- 

74 confounds_raw : pandas.DataFrame 

75 Raw confounds loaded from the confounds file. 

76 

77 keywords : :obj:`list` of :obj:`str` 

78 List of keywords to search for in the confounds. 

79 

80 Returns 

81 ------- 

82 list of :obj:`str` 

83 List of confounds that contain the keywords. 

84 """ 

85 list_confounds = [] 

86 for key in keywords: 

87 key_found = [col for col in confounds_raw.columns if key in col] 

88 if key_found: 

89 list_confounds.extend(key_found) 

90 return list_confounds 

91 

92 

93def sanitize_confounds(img_files): 

94 """Make sure the inputs are in the correct format. 

95 

96 Parameters 

97 ---------- 

98 img_files : :obj:`str` or :obj:`list` of :obj:`str` 

99 Path to the functional image file(s). 

100 

101 Returns 

102 ------- 

103 img_files : :obj:`list` of :obj:`str` 

104 List of functional image file(s). 

105 flag_single : bool 

106 True if the input is a single file, False if it is a :obj:`list` of 

107 files. 

108 """ 

109 # we want to support loading a single set of confounds, instead of a list 

110 # so we hack it 

111 if len(img_files) == 1: 

112 return img_files, True 

113 # gifti has to be passed as pair 

114 if isinstance(img_files, list) and len(img_files) == 2: 

115 flag_single = flag_single_gifti(img_files) 

116 else: # single file 

117 flag_single = isinstance(img_files, str) 

118 if flag_single: 

119 img_files = [img_files] 

120 return img_files, flag_single 

121 

122 

123def add_suffix(params, model): 

124 """Add derivative suffixes to a list of parameters. 

125 

126 Used from motion, wm_csf, global_signal. 

127 

128 Parameters 

129 ---------- 

130 params : :obj:`list` of :obj:`str` 

131 List of parameters to add suffixes to. 

132 model : :obj:`str` 

133 Model to use. Options are "basic", "derivatives", "power2", or 

134 "full". 

135 

136 Returns 

137 ------- 

138 params_full : :obj:`list` of :obj:`str` 

139 List of parameters with suffixes added. 

140 """ 

141 params_full = params.copy() 

142 suffix = { 

143 "basic": {}, 

144 "derivatives": {"derivative1"}, 

145 "power2": {"power2"}, 

146 "full": {"derivative1", "power2", "derivative1_power2"}, 

147 } 

148 for par in params: 

149 for suff in suffix[model]: 

150 params_full.append(f"{par}_{suff}") 

151 return params_full 

152 

153 

154def _generate_confounds_file_candidates(nii_file): 

155 """Generate confounds file candidates. 

156 

157 Build a list of potential confounds filenames using all combinations of 

158 the entities in the image file. 

159 

160 Parameters 

161 ---------- 

162 nii_file : str 

163 Path to the functional image file. 

164 

165 Returns 

166 ------- 

167 filenames : list of str 

168 List of potential confounds filenames. 

169 """ 

170 parsed_file = parse_bids_filename(nii_file, legacy=False) 

171 entities = parsed_file["entities"] 

172 entities["desc"] = "confounds" 

173 

174 all_subsets = [ 

175 list(itertools.combinations(entities.keys(), n_entities)) 

176 for n_entities in range(1, len(entities.keys()) + 1) 

177 ] 

178 

179 # Flatten the list of lists 

180 all_subsets = [list(item) for sublist in all_subsets for item in sublist] 

181 # https://stackoverflow.com/a/3724558/2589328 

182 unique_subsets = [list(x) for x in {tuple(x) for x in all_subsets}] 

183 

184 # Require "desc" 

185 unique_subsets = [subset for subset in unique_subsets if "desc" in subset] 

186 

187 filenames = [ 

188 "_".join(["-".join([k, entities[k]]) for k in lst]) 

189 for lst in unique_subsets 

190 ] 

191 return filenames 

192 

193 

194def _get_file_name(nii_file): 

195 """Identify the confounds file associated with a functional image. 

196 

197 Parameters 

198 ---------- 

199 nii_file : str 

200 Path to the functional image file. 

201 

202 Returns 

203 ------- 

204 confound_file : str 

205 Path to the associated confounds file. 

206 """ 

207 if isinstance(nii_file, list): # catch gifti 

208 nii_file = nii_file[0] 

209 

210 base_dir = Path(nii_file).parent 

211 

212 filenames = _generate_confounds_file_candidates(nii_file) 

213 

214 # fmriprep has changed the file suffix between v20.1.1 and v20.2.0 with 

215 # respect to BEP 012. 

216 # cf. https://neurostars.org/t/naming-change-confounds-regressors-to-confounds-timeseries/17637 # noqa: E501 

217 # Check file with new naming scheme exists or replace, 

218 # for backward compatibility. 

219 suffixes = ["_timeseries.tsv", "_regressors.tsv"] 

220 

221 confound_file_candidates = [] 

222 for suffix in suffixes: 

223 confound_file_candidates += [f + suffix for f in filenames] 

224 

225 # Sort the potential filenames by decreasing length, 

226 # so earlier entries reflect more retained entities. 

227 # https://www.geeksforgeeks.org/python-sort-list-of-lists-by-the-size-of-sublists/ 

228 confound_file_candidates = sorted(confound_file_candidates, key=len)[::-1] 

229 confound_file_candidates = [ 

230 base_dir / crc for crc in confound_file_candidates 

231 ] 

232 found_files = [str(cr) for cr in confound_file_candidates if cr.is_file()] 

233 

234 if not found_files: 

235 raise ValueError( 

236 "Could not find associated confound file. " 

237 "The functional derivatives should exist under the same parent " 

238 "directory." 

239 ) 

240 elif len(found_files) != 1: 

241 found_str = "\n\t".join(found_files) 

242 raise ValueError(f"Found more than one confound file:\n\t{found_str}") 

243 else: 

244 return found_files[0] 

245 

246 

247def get_confounds_file(image_file, flag_full_aroma): 

248 """Return the confounds file associated with a functional image. 

249 

250 Parameters 

251 ---------- 

252 image_file : :obj:`str` 

253 Path to the functional image file. 

254 

255 flag_full_aroma : :obj:`bool` 

256 True if the input is a full ICA-AROMA output, False otherwise. 

257 

258 Returns 

259 ------- 

260 confounds_raw_path : :obj:`str` 

261 Path to the associated confounds file. 

262 """ 

263 _check_images(image_file, flag_full_aroma) 

264 confounds_raw_path = _get_file_name(image_file) 

265 return confounds_raw_path 

266 

267 

268def get_json(confounds_raw_path): 

269 """Return json data companion file to the confounds tsv file.""" 

270 # Load JSON file 

271 return str(confounds_raw_path).replace("tsv", "json") 

272 

273 

274def load_confounds_json(confounds_json, flag_acompcor): 

275 """Load json data companion to the confounds tsv file. 

276 

277 Parameters 

278 ---------- 

279 confounds_json : :obj:`str` 

280 Path to the json file. 

281 

282 flag_acompcor : :obj:`bool` 

283 True if user selected anatomical compcor for denoising strategy, 

284 False otherwise. 

285 

286 Returns 

287 ------- 

288 confounds_json : dict 

289 Dictionary of confounds meta data from the confounds.json file. 

290 

291 Raises 

292 ------ 

293 ValueError 

294 If the json file is not found. This should not be the case for 

295 fMRIprep >= 1.4.0. 

296 """ 

297 try: 

298 with Path(confounds_json).open("rb") as f: 

299 confounds_json = json.load(f) 

300 except OSError: 

301 if flag_acompcor: 

302 raise ValueError( 

303 f"Could not find associated json file {confounds_json}." 

304 "This is necessary for anatomical CompCor." 

305 "The CompCor component is only supported for fMRIprep " 

306 "version >= 1.4.0." 

307 ) 

308 return confounds_json 

309 

310 

311def load_confounds_file_as_dataframe(confounds_raw_path): 

312 """Load raw confounds as a pandas DataFrame. 

313 

314 Meanwhile detect if the fMRIPrep version is supported. 

315 

316 Parameters 

317 ---------- 

318 confounds_raw_path : :obj:`str` 

319 Path to the confounds file. 

320 

321 Returns 

322 ------- 

323 confounds_raw : pandas.DataFrame 

324 Raw confounds loaded from the confounds file. 

325 """ 

326 confounds_raw = pd.read_csv( 

327 confounds_raw_path, delimiter="\t", encoding="utf-8" 

328 ) 

329 

330 # check if the version of fMRIprep (>=1.2.0) is supported based on 

331 # header format. 1.0.x and 1.1.x series uses camel case 

332 if any(is_camel_case(col_name) for col_name in confounds_raw.columns): 

333 raise ValueError( 

334 "The confound file contains header in camel case. " 

335 "This is likely the output from 1.0.x and 1.1.x series. " 

336 "We only support fmriprep outputs >= 1.2.0." 

337 f"{confounds_raw.columns}" 

338 ) 

339 

340 # even old version with no header will have the first row as header 

341 try: 

342 too_old = float(confounds_raw.columns[0]) 

343 except ValueError: 

344 too_old = False 

345 

346 if too_old: 

347 bad_file = pd.read_csv( 

348 confounds_raw_path, delimiter="\t", encoding="utf-8", header=None 

349 ) 

350 raise ValueError( 

351 "The confound file contains no header." 

352 "Is this an old version fMRIprep output?" 

353 f"{bad_file.head()}" 

354 ) 

355 return confounds_raw 

356 

357 

358def _ext_validator(image_file, ext): 

359 """Check image is valid based on extension. 

360 

361 Parameters 

362 ---------- 

363 image_file : str 

364 Path to the functional image file. 

365 

366 ext : str 

367 Extension to check. 

368 

369 Returns 

370 ------- 

371 valid_img : bool 

372 True if the image is valid, False otherwise. 

373 

374 error_message : str 

375 Error message to raise if the image is invalid. 

376 """ 

377 try: 

378 valid_img = all( 

379 bool(re.search(img_file_patterns[ext], img)) for img in image_file 

380 ) 

381 error_message = img_file_error[ext] 

382 except KeyError: 

383 valid_img = False 

384 error_message = "Unsupported input." 

385 return valid_img, error_message 

386 

387 

388def _check_images(image_file, flag_full_aroma): 

389 """Validate input file and ICA AROMA related file. 

390 

391 Parameters 

392 ---------- 

393 image_file : str 

394 Path to the functional image file. 

395 

396 flag_full_aroma : bool 

397 True if the input is a full ICA-AROMA output, False otherwise. 

398 

399 Raises 

400 ------ 

401 ValueError 

402 If the image is not valid. 

403 """ 

404 if len(image_file) == 2: # must be gifti 

405 valid_img, error_message = _ext_validator(image_file, "func.gii") 

406 elif flag_full_aroma: 

407 valid_img, error_message = _ext_validator([image_file], "aroma") 

408 else: 

409 ext = ".".join(image_file.split(".")[-2:]) 

410 valid_img, error_message = _ext_validator([image_file], ext) 

411 if not valid_img: 

412 raise ValueError(error_message) 

413 

414 

415def prepare_output(confounds, demean): 

416 """Demean and create sample mask for the selected confounds. 

417 

418 Parameters 

419 ---------- 

420 confounds : pandas.DataFrame 

421 Confound regressors loaded based on user's choice. 

422 

423 demean : :obj:`bool` 

424 True if the confounds should be demeaned, False otherwise. 

425 

426 Returns 

427 ------- 

428 sample_mask : None or numpy.ndarray 

429 When no volume removal is required, the value is None. 

430 Otherwise, the shape is \ 

431 (number of scans - number of volumes removed, ) 

432 The index of the niimgs along time/fourth dimension for valid 

433 volumes for subsequent analysis. 

434 

435 confounds : pandas.DataFrame 

436 Demeaned confounds ready for subsequent analysis. 

437 """ 

438 sample_mask, confounds, _ = extract_outlier_regressors(confounds) 

439 if confounds.size != 0: # ica_aroma = "full" generate empty output 

440 # Derivatives have NaN on the first row 

441 # Replace them by estimates at second time point, 

442 # otherwise nilearn will crash. 

443 mask_nan = np.isnan(confounds.to_numpy()[0, :]) 

444 confounds.iloc[0, mask_nan] = confounds.iloc[1, mask_nan] 

445 if demean: 

446 confounds = _demean_confounds(confounds, sample_mask) 

447 return sample_mask, confounds 

448 

449 

450def _demean_confounds(confounds, sample_mask): 

451 """Demean the confounds. 

452 

453 The mean is calculated on non-outlier values. 

454 

455 Parameters 

456 ---------- 

457 confounds : pandas.DataFrame 

458 Confound regressors loaded based on user's choice. 

459 

460 sample_mask : None or numpy.ndarray 

461 When no volume removal is required, the value is None. 

462 Otherwise, the shape is \ 

463 (number of scans - number of volumes removed, ) 

464 The index of the niimgs along time/fourth dimension for valid 

465 volumes for subsequent analysis. 

466 

467 Returns 

468 ------- 

469 confounds : pandas.DataFrame 

470 Demeaned confounds. 

471 """ 

472 confound_cols = confounds.columns 

473 if sample_mask is None: 

474 confounds = scale(confounds, axis=0, with_std=False) 

475 else: # calculate the mean without outliers. 

476 confounds_mean = confounds.iloc[sample_mask, :].mean(axis=0) 

477 confounds -= confounds_mean 

478 return pd.DataFrame(confounds, columns=confound_cols) 

479 

480 

481class MissingConfoundError(Exception): 

482 """ 

483 Exception raised when failing to find params in the confounds. 

484 

485 Parameters 

486 ---------- 

487 params : :obj:`list` of missing params, default=[] 

488 

489 keywords : :obj:`list` of missing keywords, default=[] 

490 """ 

491 

492 def __init__(self, params=None, keywords=None): 

493 """Set missing parameters and keywords.""" 

494 self.params = params or [] 

495 self.keywords = keywords or []