Coverage for nilearn/datasets/func.py: 10%

770 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 10:58 +0200

1"""Downloading NeuroImaging datasets: \ 

2functional datasets (task + resting-state). 

3""" 

4 

5import fnmatch 

6import functools 

7import itertools 

8import json 

9import numbers 

10import os 

11import re 

12import warnings 

13from io import BytesIO 

14from pathlib import Path 

15 

16import numpy as np 

17import pandas as pd 

18from nibabel import Nifti1Image, four_to_three 

19from scipy.io import loadmat 

20from scipy.io.matlab import MatReadError 

21from sklearn.utils import Bunch 

22 

23from nilearn._utils import check_niimg, fill_doc, logger, remove_parameters 

24from nilearn._utils.logger import find_stack_level 

25from nilearn._utils.param_validation import check_params 

26from nilearn.datasets._utils import ( 

27 ALLOWED_MESH_TYPES, 

28 PACKAGE_DIRECTORY, 

29 fetch_files, 

30 fetch_single_file, 

31 filter_columns, 

32 get_dataset_descr, 

33 get_dataset_dir, 

34 read_md5_sum_file, 

35 tree, 

36 uncompress_file, 

37) 

38from nilearn.datasets.struct import load_fsaverage 

39from nilearn.image import get_data 

40from nilearn.interfaces.bids import get_bids_files 

41from nilearn.surface import SurfaceImage 

42 

43from .._utils.numpy_conversions import csv_to_array 

44 

45 

46@fill_doc 

47def fetch_haxby( 

48 data_dir=None, 

49 subjects=(2,), 

50 fetch_stimuli=False, 

51 url=None, 

52 resume=True, 

53 verbose=1, 

54): 

55 """Download and loads complete haxby dataset. 

56 

57 See :footcite:t:`Haxby2001`. 

58 

59 Parameters 

60 ---------- 

61 %(data_dir)s 

62 subjects : :obj:`list` or :obj:`tuple` or :obj:`int`, default=(2,) 

63 Either a list of subjects or the number of subjects to load, 

64 from 1 to 6. 

65 By default, 2nd subject will be loaded. 

66 Empty list returns no subject data. 

67 

68 fetch_stimuli : :obj:`bool`, default=False 

69 Indicate if stimuli images must be downloaded. 

70 They will be presented as a dictionary of categories. 

71 %(url)s 

72 %(resume)s 

73 %(verbose)s 

74 

75 Returns 

76 ------- 

77 data : :obj:`sklearn.utils.Bunch` 

78 Dictionary-like object, the interest attributes are : 

79 

80 - 'anat': :obj:`list` of :obj:`str`. Paths to anatomic images. 

81 - 'func': :obj:`list` of :obj:`str`. 

82 Paths to nifti file with :term:`BOLD` data. 

83 - 'session_target': :obj:`list` of :obj:`str`. 

84 Paths to text file containing run and target data. 

85 - 'mask': :obj:`str`. Path to fullbrain mask file. 

86 - 'mask_vt': :obj:`list` of :obj:`str`. 

87 Paths to nifti ventral temporal mask file. 

88 - 'mask_face': :obj:`list` of :obj:`str`. 

89 Paths to nifti with face-responsive brain regions. 

90 - 'mask_face_little': :obj:`list` of :obj:`str`. 

91 Spatially more constrained version of the above. 

92 - 'mask_house': :obj:`list` of :obj:`str`. 

93 Paths to nifti with house-responsive brain regions. 

94 - 'mask_house_little': :obj:`list` of :obj:`str`. 

95 Spatially more constrained version of the above. 

96 

97 References 

98 ---------- 

99 .. footbibliography:: 

100 

101 Notes 

102 ----- 

103 PyMVPA provides a tutorial making use of this dataset: 

104 http://www.pymvpa.org/tutorial.html 

105 

106 More information about its structure: 

107 http://dev.pymvpa.org/datadb/haxby2001.html 

108 

109 See `additional information 

110 <https://www.science.org/doi/10.1126/science.1063736>` 

111 

112 Run 8 in subject 5 does not contain any task labels. 

113 The anatomical image for subject 6 is unavailable. 

114 

115 """ 

116 check_params(locals()) 

117 

118 if isinstance(subjects, numbers.Number) and subjects > 6: 

119 subjects = 6 

120 

121 if subjects is not None and isinstance(subjects, (list, tuple)): 

122 for sub_id in subjects: 

123 if sub_id not in [1, 2, 3, 4, 5, 6]: 

124 raise ValueError( 

125 f"You provided invalid subject id {sub_id} in a " 

126 "list. Subjects must be selected in " 

127 "[1, 2, 3, 4, 5, 6]" 

128 ) 

129 

130 dataset_name = "haxby2001" 

131 data_dir = get_dataset_dir( 

132 dataset_name, data_dir=data_dir, verbose=verbose 

133 ) 

134 

135 # Get the mask 

136 url_mask = "https://www.nitrc.org/frs/download.php/7868/mask.nii.gz" 

137 mask = fetch_files( 

138 data_dir, [("mask.nii.gz", url_mask, {})], verbose=verbose 

139 )[0] 

140 

141 # Dataset files 

142 if url is None: 

143 url = "http://data.pymvpa.org/datasets/haxby2001/" 

144 md5sums = fetch_files( 

145 data_dir, [("MD5SUMS", url + "MD5SUMS", {})], verbose=verbose 

146 )[0] 

147 md5sums = read_md5_sum_file(md5sums) 

148 

149 # definition of dataset files 

150 sub_files = [ 

151 "bold.nii.gz", 

152 "labels.txt", 

153 "mask4_vt.nii.gz", 

154 "mask8b_face_vt.nii.gz", 

155 "mask8b_house_vt.nii.gz", 

156 "mask8_face_vt.nii.gz", 

157 "mask8_house_vt.nii.gz", 

158 "anat.nii.gz", 

159 ] 

160 n_files = len(sub_files) 

161 

162 if subjects is None: 

163 subjects = [] 

164 

165 if isinstance(subjects, numbers.Number): 

166 subject_mask = np.arange(1, subjects + 1) 

167 else: 

168 subject_mask = np.array(subjects) 

169 

170 files = [ 

171 ( 

172 Path(f"subj{int(i)}") / sub_file, 

173 url + f"subj{int(i)}-2010.01.14.tar.gz", 

174 { 

175 "uncompress": True, 

176 "md5sum": md5sums.get(f"subj{int(i)}-2010.01.14.tar.gz"), 

177 }, 

178 ) 

179 for i in subject_mask 

180 for sub_file in sub_files 

181 if sub_file != "anat.nii.gz" or i != 6 

182 ] 

183 

184 files = fetch_files(data_dir, files, resume=resume, verbose=verbose) 

185 

186 if (isinstance(subjects, numbers.Number) and subjects == 6) or np.any( 

187 subject_mask == 6 

188 ): 

189 files.append(None) # None value because subject 6 has no anat 

190 

191 kwargs = {} 

192 if fetch_stimuli: 

193 stimuli_files = [ 

194 ( 

195 Path("stimuli") / "README", 

196 url + "stimuli-2010.01.14.tar.gz", 

197 {"uncompress": True}, 

198 ) 

199 ] 

200 readme = fetch_files( 

201 data_dir, stimuli_files, resume=resume, verbose=verbose 

202 )[0] 

203 kwargs["stimuli"] = tree( 

204 Path(readme).parent, pattern="*.jpg", dictionary=True 

205 ) 

206 

207 fdescr = get_dataset_descr(dataset_name) 

208 

209 # return the data 

210 return Bunch( 

211 anat=files[7::n_files], 

212 func=files[0::n_files], 

213 session_target=files[1::n_files], 

214 mask_vt=files[2::n_files], 

215 mask_face=files[3::n_files], 

216 mask_house=files[4::n_files], 

217 mask_face_little=files[5::n_files], 

218 mask_house_little=files[6::n_files], 

219 mask=mask, 

220 description=fdescr, 

221 **kwargs, 

222 ) 

223 

224 

225def adhd_ids(): 

226 """Return subject ids for the ADHD dataset.""" 

227 return [ 

228 "0010042", 

229 "0010064", 

230 "0010128", 

231 "0021019", 

232 "0023008", 

233 "0023012", 

234 "0027011", 

235 "0027018", 

236 "0027034", 

237 "0027037", 

238 "1019436", 

239 "1206380", 

240 "1418396", 

241 "1517058", 

242 "1552181", 

243 "1562298", 

244 "1679142", 

245 "2014113", 

246 "2497695", 

247 "2950754", 

248 "3007585", 

249 "3154996", 

250 "3205761", 

251 "3520880", 

252 "3624598", 

253 "3699991", 

254 "3884955", 

255 "3902469", 

256 "3994098", 

257 "4016887", 

258 "4046678", 

259 "4134561", 

260 "4164316", 

261 "4275075", 

262 "6115230", 

263 "7774305", 

264 "8409791", 

265 "8697774", 

266 "9744150", 

267 "9750701", 

268 ] 

269 

270 

271@fill_doc 

272def fetch_adhd(n_subjects=30, data_dir=None, url=None, resume=True, verbose=1): 

273 """Download and load the ADHD :term:`resting-state` dataset. 

274 

275 See :footcite:t:`ADHDdataset`. 

276 

277 Parameters 

278 ---------- 

279 n_subjects : :obj:`int`, default=30 

280 The number of subjects to load from maximum of 40 subjects. 

281 By default, 30 subjects will be loaded. If None is given, 

282 all 40 subjects will be loaded. 

283 %(data_dir)s 

284 %(url)s 

285 %(resume)s 

286 %(verbose)s 

287 

288 Returns 

289 ------- 

290 data : :obj:`sklearn.utils.Bunch` 

291 Dictionary-like object, the interest attributes are : 

292 

293 - 'func': Paths to functional :term:`resting-state` images 

294 - 'phenotypic': pd.dataframe with explanations of preprocessing steps 

295 - 'confounds': CSV files containing the nuisance variables 

296 

297 References 

298 ---------- 

299 .. footbibliography:: 

300 

301 """ 

302 check_params(locals()) 

303 

304 if url is None: 

305 url = "https://www.nitrc.org/frs/download.php/" 

306 

307 # Preliminary checks and declarations 

308 dataset_name = "adhd" 

309 data_dir = get_dataset_dir( 

310 dataset_name, data_dir=data_dir, verbose=verbose 

311 ) 

312 ids = adhd_ids() 

313 nitrc_ids = range(7782, 7822) 

314 max_subjects = len(ids) 

315 if n_subjects is None: 

316 n_subjects = max_subjects 

317 if n_subjects > max_subjects: 

318 warnings.warn( 

319 f"Warning: there are only {max_subjects} subjects.", 

320 stacklevel=find_stack_level(), 

321 ) 

322 n_subjects = max_subjects 

323 ids = ids[:n_subjects] 

324 nitrc_ids = nitrc_ids[:n_subjects] 

325 

326 opts = {"uncompress": True} 

327 

328 # Dataset description 

329 fdescr = get_dataset_descr(dataset_name) 

330 

331 # First, get the metadata 

332 phenotypic = ( 

333 "ADHD200_40subs_motion_parameters_and_phenotypics.csv", 

334 url + "7781/adhd40_metadata.tgz", 

335 opts, 

336 ) 

337 

338 phenotypic = fetch_files( 

339 data_dir, [phenotypic], resume=resume, verbose=verbose 

340 )[0] 

341 

342 # Load the csv file 

343 phenotypic = pd.read_table(phenotypic, delimiter=",") 

344 

345 # Keep phenotypic information for selected subjects 

346 mask = phenotypic["Subject"].apply(lambda x: str(x) in ids) 

347 phenotypic = phenotypic[mask] 

348 

349 # Download dataset files 

350 

351 archives = [ 

352 url + f"{int(ni)}/adhd40_{ii}.tgz" for ni, ii in zip(nitrc_ids, ids) 

353 ] 

354 functionals = [ 

355 f"data/{i}/{i}_rest_tshift_RPI_voreg_mni.nii.gz" for i in ids 

356 ] 

357 confounds = [f"data/{i}/{i}_regressors.csv" for i in ids] 

358 

359 functionals = fetch_files( 

360 data_dir, 

361 zip(functionals, archives, (opts,) * n_subjects), 

362 resume=resume, 

363 verbose=verbose, 

364 ) 

365 

366 confounds = fetch_files( 

367 data_dir, 

368 zip(confounds, archives, (opts,) * n_subjects), 

369 resume=resume, 

370 verbose=verbose, 

371 ) 

372 

373 return Bunch( 

374 func=functionals, 

375 confounds=confounds, 

376 phenotypic=phenotypic, 

377 description=fdescr, 

378 ) 

379 

380 

381def miyawaki2008_file_mask(): 

382 """Return file listing for the miyawaki 2008 dataset.""" 

383 return [ 

384 "mask.nii.gz", 

385 "LHlag0to1.nii.gz", 

386 "LHlag10to11.nii.gz", 

387 "LHlag1to2.nii.gz", 

388 "LHlag2to3.nii.gz", 

389 "LHlag3to4.nii.gz", 

390 "LHlag4to5.nii.gz", 

391 "LHlag5to6.nii.gz", 

392 "LHlag6to7.nii.gz", 

393 "LHlag7to8.nii.gz", 

394 "LHlag8to9.nii.gz", 

395 "LHlag9to10.nii.gz", 

396 "LHV1d.nii.gz", 

397 "LHV1v.nii.gz", 

398 "LHV2d.nii.gz", 

399 "LHV2v.nii.gz", 

400 "LHV3A.nii.gz", 

401 "LHV3.nii.gz", 

402 "LHV4v.nii.gz", 

403 "LHVP.nii.gz", 

404 "RHlag0to1.nii.gz", 

405 "RHlag10to11.nii.gz", 

406 "RHlag1to2.nii.gz", 

407 "RHlag2to3.nii.gz", 

408 "RHlag3to4.nii.gz", 

409 "RHlag4to5.nii.gz", 

410 "RHlag5to6.nii.gz", 

411 "RHlag6to7.nii.gz", 

412 "RHlag7to8.nii.gz", 

413 "RHlag8to9.nii.gz", 

414 "RHlag9to10.nii.gz", 

415 "RHV1d.nii.gz", 

416 "RHV1v.nii.gz", 

417 "RHV2d.nii.gz", 

418 "RHV2v.nii.gz", 

419 "RHV3A.nii.gz", 

420 "RHV3.nii.gz", 

421 "RHV4v.nii.gz", 

422 "RHVP.nii.gz", 

423 ] 

424 

425 

426@fill_doc 

427def fetch_miyawaki2008(data_dir=None, url=None, resume=True, verbose=1): 

428 """Download and loads Miyawaki et al. 2008 dataset (153MB). 

429 

430 See :footcite:t:`Miyawaki2008`. 

431 

432 Parameters 

433 ---------- 

434 %(data_dir)s 

435 %(url)s 

436 %(resume)s 

437 %(verbose)s 

438 

439 Returns 

440 ------- 

441 data : Bunch 

442 Dictionary-like object, the interest attributes are : 

443 

444 - 'func': :obj:`list` of :obj:`str` 

445 Paths to nifti file with :term:`BOLD` data 

446 - 'label': :obj:`list` of :obj:`str` 

447 Paths to text file containing run and target data 

448 - 'mask': :obj:`str` 

449 Path to nifti mask file to define target volume in visual 

450 cortex 

451 - 'background': :obj:`str` 

452 Path to nifti file containing a background image usable as a 

453 background image for miyawaki images. 

454 

455 References 

456 ---------- 

457 .. footbibliography:: 

458 

459 Notes 

460 ----- 

461 This dataset is available on the `brainliner website 

462 <http://brainliner.jp/restrictedProject.atr>`_ 

463 

464 See `additional information 

465 <https://bicr.atr.jp//dni/en/downloads/\ 

466 fmri-data-set-for-visual-image-reconstruction/>`_ 

467 

468 """ 

469 check_params(locals()) 

470 

471 url = ( 

472 "https://www.nitrc.org/frs/download.php" 

473 "/8486/miyawaki2008.tgz?i_agree=1&download_now=1" 

474 ) 

475 opts = {"uncompress": True} 

476 

477 # Dataset files 

478 

479 # Functional MRI: 

480 # * 20 random scans (usually used for training) 

481 # * 12 figure scans (usually used for testing) 

482 

483 func_figure = [ 

484 (Path("func", f"data_figure_run{int(i):02}.nii.gz"), url, opts) 

485 for i in range(1, 13) 

486 ] 

487 

488 func_random = [ 

489 (Path("func", f"data_random_run{int(i):02}.nii.gz"), url, opts) 

490 for i in range(1, 21) 

491 ] 

492 

493 # Labels, 10x10 patches, stimuli shown to the subject: 

494 # * 20 random labels 

495 # * 12 figure labels (letters and shapes) 

496 

497 label_filename = "data_%s_run%02d_label.csv" 

498 label_figure = [ 

499 (Path("label", label_filename % ("figure", i)), url, opts) 

500 for i in range(1, 13) 

501 ] 

502 

503 label_random = [ 

504 (Path("label", label_filename % ("random", i)), url, opts) 

505 for i in range(1, 21) 

506 ] 

507 

508 # Masks 

509 file_mask = [ 

510 (Path("mask", m), url, opts) for m in miyawaki2008_file_mask() 

511 ] 

512 

513 file_names = ( 

514 func_figure + func_random + label_figure + label_random + file_mask 

515 ) 

516 

517 dataset_name = "miyawaki2008" 

518 data_dir = get_dataset_dir( 

519 dataset_name, data_dir=data_dir, verbose=verbose 

520 ) 

521 files = fetch_files(data_dir, file_names, resume=resume, verbose=verbose) 

522 

523 # Fetch the background image 

524 bg_img = fetch_files( 

525 data_dir, [("bg.nii.gz", url, opts)], resume=resume, verbose=verbose 

526 )[0] 

527 

528 fdescr = get_dataset_descr(dataset_name) 

529 

530 # Return the data 

531 return Bunch( 

532 func=files[:32], 

533 label=files[32:64], 

534 mask=files[64], 

535 mask_roi=files[65:], 

536 background=bg_img, 

537 description=fdescr, 

538 ) 

539 

540 

541# we allow the user to use alternatives to Brainomics contrast names 

542CONTRAST_NAME_WRAPPER = { 

543 # Checkerboard 

544 "checkerboard": "checkerboard", 

545 "horizontal checkerboard": "horizontal checkerboard", 

546 "vertical checkerboard": "vertical checkerboard", 

547 "horizontal vs vertical checkerboard": "horizontal vs vertical checkerboard", # noqa: E501 

548 "vertical vs horizontal checkerboard": "vertical vs horizontal checkerboard", # noqa: E501 

549 # Sentences 

550 "sentence listening": "auditory sentences", 

551 "sentence reading": "visual sentences", 

552 "sentence listening and reading": "auditory&visual sentences", 

553 "sentence reading vs checkerboard": "visual sentences vs checkerboard", 

554 # Calculation 

555 "calculation (auditory cue)": "auditory calculation", 

556 "calculation (visual cue)": "visual calculation", 

557 "calculation (auditory and visual cue)": "auditory&visual calculation", 

558 "calculation (auditory cue) vs sentence listening": "auditory calculation vs auditory sentences", # noqa: E501 

559 "calculation (visual cue) vs sentence reading": "visual calculation vs sentences", # noqa: E501 

560 "calculation vs sentences": "auditory&visual calculation vs sentences", 

561 # Calculation + Sentences 

562 "calculation (auditory cue) and sentence listening": "auditory processing", 

563 "calculation (visual cue) and sentence reading": "visual processing", 

564 "calculation (visual cue) and sentence reading vs " 

565 "calculation (auditory cue) and sentence listening": "visual processing vs auditory processing", # noqa: E501 

566 "calculation (auditory cue) and sentence listening vs " 

567 "calculation (visual cue) and sentence reading": "auditory processing vs visual processing", # noqa: E501 

568 "calculation (visual cue) and sentence reading vs checkerboard": "visual processing vs checkerboard", # noqa: E501 

569 "calculation and sentence listening/reading vs button press": "cognitive processing vs motor", # noqa: E501 

570 # Button press 

571 "left button press (auditory cue)": "left auditory click", 

572 "left button press (visual cue)": "left visual click", 

573 "left button press": "left auditory&visual click", 

574 "left vs right button press": "left auditory & visual click vs right auditory&visual click", # noqa: E501 

575 "right button press (auditory cue)": "right auditory click", 

576 "right button press (visual cue)": "right visual click", 

577 "right button press": "right auditory & visual click", 

578 "right vs left button press": "right auditory & visual click vs left auditory&visual click", # noqa: E501 

579 "button press (auditory cue) vs sentence listening": "auditory click vs auditory sentences", # noqa: E501 

580 "button press (visual cue) vs sentence reading": "visual click vs visual sentences", # noqa: E501 

581 "button press vs calculation and sentence listening/reading": "auditory&visual motor vs cognitive processing", # noqa: E501 

582} 

583ALLOWED_CONTRASTS = list(CONTRAST_NAME_WRAPPER.values()) 

584 

585 

586@fill_doc 

587def fetch_localizer_contrasts( 

588 contrasts, 

589 n_subjects=None, 

590 get_tmaps=False, 

591 get_masks=False, 

592 get_anats=False, 

593 data_dir=None, 

594 resume=True, 

595 verbose=1, 

596): 

597 """Download and load Brainomics/Localizer dataset (94 subjects). 

598 

599 "The Functional Localizer is a simple and fast acquisition 

600 procedure based on a 5-minute functional magnetic resonance 

601 imaging (fMRI) sequence that can be run as easily and as 

602 systematically as an anatomical scan. This protocol captures the 

603 cerebral bases of auditory and visual perception, motor actions, 

604 reading, language comprehension and mental calculation at an 

605 individual level. Individual functional maps are reliable and 

606 quite precise. The procedure is described in more detail on the 

607 Functional Localizer page." 

608 (see https://osf.io/vhtf6/) 

609 

610 You may cite :footcite:t:`Papadopoulos-Orfanos2017` 

611 when using this dataset. 

612 

613 Scientific results obtained using this dataset are described 

614 in :footcite:t:`Pinel2007`. 

615 

616 Parameters 

617 ---------- 

618 contrasts : :obj:`list` of :obj:`str` 

619 The contrasts to be fetched (for all 94 subjects available). 

620 Allowed values are:: 

621 

622 - "checkerboard" 

623 - "horizontal checkerboard" 

624 - "vertical checkerboard" 

625 - "horizontal vs vertical checkerboard" 

626 - "vertical vs horizontal checkerboard" 

627 - "sentence listening" 

628 - "sentence reading" 

629 - "sentence listening and reading" 

630 - "sentence reading vs checkerboard" 

631 - "calculation (auditory cue)" 

632 - "calculation (visual cue)" 

633 - "calculation (auditory and visual cue)" 

634 - "calculation (auditory cue) vs sentence listening" 

635 - "calculation (visual cue) vs sentence reading" 

636 - "calculation vs sentences" 

637 - "calculation (auditory cue) and sentence listening" 

638 - "calculation (visual cue) and sentence reading" 

639 - "calculation and sentence listening/reading" 

640 - "calculation (auditory cue) and sentence listening vs " 

641 - "calculation (visual cue) and sentence reading" 

642 - "calculation (visual cue) and sentence reading vs checkerboard" 

643 - "calculation and sentence listening/reading vs button press" 

644 - "left button press (auditory cue)" 

645 - "left button press (visual cue)" 

646 - "left button press" 

647 - "left vs right button press" 

648 - "right button press (auditory cue)" 

649 - "right button press (visual cue)" 

650 - "right button press" 

651 - "right vs left button press" 

652 - "button press (auditory cue) vs sentence listening" 

653 - "button press (visual cue) vs sentence reading" 

654 - "button press vs calculation and sentence listening/reading" 

655 

656 or equivalently on can use the original names:: 

657 

658 - "checkerboard" 

659 - "horizontal checkerboard" 

660 - "vertical checkerboard" 

661 - "horizontal vs vertical checkerboard" 

662 - "vertical vs horizontal checkerboard" 

663 - "auditory sentences" 

664 - "visual sentences" 

665 - "auditory&visual sentences" 

666 - "visual sentences vs checkerboard" 

667 - "auditory calculation" 

668 - "visual calculation" 

669 - "auditory&visual calculation" 

670 - "auditory calculation vs auditory sentences" 

671 - "visual calculation vs sentences" 

672 - "auditory&visual calculation vs sentences" 

673 - "auditory processing" 

674 - "visual processing" 

675 - "visual processing vs auditory processing" 

676 - "auditory processing vs visual processing" 

677 - "visual processing vs checkerboard" 

678 - "cognitive processing vs motor" 

679 - "left auditory click" 

680 - "left visual click" 

681 - "left auditory&visual click" 

682 - "left auditory & visual click vs right auditory&visual click" 

683 - "right auditory click" 

684 - "right visual click" 

685 - "right auditory&visual click" 

686 - "right auditory & visual click vs left auditory&visual click" 

687 - "auditory click vs auditory sentences" 

688 - "visual click vs visual sentences" 

689 - "auditory&visual motor vs cognitive processing" 

690 

691 n_subjects : :obj:`int` or :obj:`list` or None, default=None 

692 The number or list of subjects to load. If None is given, 

693 all 94 subjects are used. 

694 

695 get_tmaps : :obj:`bool`, default=False 

696 Whether t maps should be fetched or not. 

697 

698 get_masks : :obj:`bool`, default=False 

699 Whether individual masks should be fetched or not. 

700 

701 get_anats : :obj:`bool`, default=False 

702 Whether individual structural images should be fetched or not. 

703 

704 %(data_dir)s 

705 

706 %(resume)s 

707 

708 %(verbose)s 

709 

710 Returns 

711 ------- 

712 data : Bunch 

713 Dictionary-like object, the interest attributes are : 

714 

715 - 'cmaps': :obj:`list` of :obj:`str` 

716 Paths to nifti contrast maps 

717 - 'tmaps' :obj:`list` of :obj:`str` (if 'get_tmaps' set to True) 

718 Paths to nifti t maps 

719 - 'masks': :obj:`list` of :obj:`str` 

720 Paths to nifti files corresponding to the subjects individual masks 

721 - 'anats': :obj:`str` 

722 Path to nifti files corresponding to the subjects structural images 

723 

724 References 

725 ---------- 

726 .. footbibliography:: 

727 

728 See Also 

729 -------- 

730 nilearn.datasets.fetch_localizer_calculation_task 

731 nilearn.datasets.fetch_localizer_button_task 

732 

733 """ 

734 check_params(locals()) 

735 

736 _check_inputs_fetch_localizer_contrasts(contrasts) 

737 

738 if n_subjects is None: 

739 n_subjects = 94 # 94 subjects available 

740 if isinstance(n_subjects, numbers.Number) and ( 

741 (n_subjects > 94) or (n_subjects < 1) 

742 ): 

743 warnings.warn( 

744 "Wrong value for 'n_subjects' (%d). The maximum " 

745 "value will be used instead ('n_subjects=94').", 

746 stacklevel=find_stack_level(), 

747 ) 

748 n_subjects = 94 # 94 subjects available 

749 

750 # convert contrast names 

751 contrasts_wrapped = [] 

752 # get a unique ID for each contrast. It is used to give a unique name to 

753 # each download file and avoid name collisions. 

754 contrasts_indices = [] 

755 for contrast in contrasts: 

756 if contrast in ALLOWED_CONTRASTS: 

757 contrasts_wrapped.append(contrast.title().replace(" ", "")) 

758 contrasts_indices.append(ALLOWED_CONTRASTS.index(contrast)) 

759 elif contrast in CONTRAST_NAME_WRAPPER: 

760 name = CONTRAST_NAME_WRAPPER[contrast] 

761 contrasts_wrapped.append(name.title().replace(" ", "")) 

762 contrasts_indices.append(ALLOWED_CONTRASTS.index(name)) 

763 

764 # Get the dataset OSF index 

765 dataset_name = "brainomics_localizer" 

766 index_url = "https://osf.io/hwbm2/download" 

767 data_dir = get_dataset_dir( 

768 dataset_name, data_dir=data_dir, verbose=verbose 

769 ) 

770 

771 index_file = fetch_single_file( 

772 index_url, data_dir, verbose=verbose, resume=resume 

773 ) 

774 with index_file.open() as of: 

775 index = json.load(of) 

776 

777 if isinstance(n_subjects, numbers.Number): 

778 subject_mask = np.arange(1, n_subjects + 1) 

779 else: 

780 subject_mask = np.array(n_subjects) 

781 subject_ids = [f"S{int(s):02}" for s in subject_mask] 

782 

783 data_types = ["cmaps"] 

784 if get_tmaps: 

785 data_types.append("tmaps") 

786 

787 # Build data URLs that will be fetched 

788 # Download from the relevant OSF project, 

789 # using hashes generated from the OSF API. 

790 # Note the trailing slash. 

791 # For more info, see: 

792 # https://gist.github.com/emdupre/3cb4d564511d495ea6bf89c6a577da74 

793 root_url = "https://osf.io/download/{0}/" 

794 files = {} 

795 filenames = [] 

796 

797 for subject_id, data_type, contrast in itertools.product( 

798 subject_ids, data_types, contrasts_wrapped 

799 ): 

800 name_aux = f"{data_type}_{contrast}" 

801 name_aux.replace(" ", "_") 

802 file_path = Path("brainomics_data", subject_id, f"{name_aux}.nii.gz") 

803 

804 path = "/".join( 

805 [ 

806 "/localizer", 

807 "derivatives", 

808 "spm_1st_level", 

809 f"sub-{subject_id}", 

810 ( 

811 f"sub-{subject_id}_task-localizer" 

812 f"_acq-{contrast}_{data_type}.nii.gz" 

813 ), 

814 ] 

815 ) 

816 

817 if _is_valid_path(path, index, verbose=verbose): 

818 file_url = root_url.format(index[path][1:]) 

819 opts = {"move": file_path} 

820 filenames.append((file_path, file_url, opts)) 

821 files.setdefault(data_type, []).append(file_path) 

822 

823 # Fetch masks if asked by user 

824 if get_masks: 

825 for subject_id in subject_ids: 

826 file_path = Path( 

827 "brainomics_data", subject_id, "boolean_mask_mask.nii.gz" 

828 ) 

829 

830 path = "/".join( 

831 [ 

832 "/localizer", 

833 "derivatives", 

834 "spm_1st_level", 

835 f"sub-{subject_id}", 

836 f"sub-{subject_id}_mask.nii.gz", 

837 ] 

838 ) 

839 

840 if _is_valid_path(path, index, verbose=verbose): 

841 file_url = root_url.format(index[path][1:]) 

842 opts = {"move": file_path} 

843 filenames.append((file_path, file_url, opts)) 

844 files.setdefault("masks", []).append(file_path) 

845 

846 # Fetch anats if asked by user 

847 if get_anats: 

848 for subject_id in subject_ids: 

849 file_path = Path( 

850 "brainomics_data", 

851 subject_id, 

852 "normalized_T1_anat_defaced.nii.gz", 

853 ) 

854 

855 path = "/".join( 

856 [ 

857 "/localizer", 

858 "derivatives", 

859 "spm_preprocessing", 

860 f"sub-{subject_id}", 

861 f"sub-{subject_id}_T1w.nii.gz", 

862 ] 

863 ) 

864 

865 if _is_valid_path(path, index, verbose=verbose): 

866 file_url = root_url.format(index[path][1:]) 

867 opts = {"move": file_path} 

868 filenames.append((file_path, file_url, opts)) 

869 files.setdefault("anats", []).append(file_path) 

870 

871 # Fetch subject characteristics 

872 participants_file = Path("brainomics_data", "participants.tsv") 

873 path = "/localizer/participants.tsv" 

874 if _is_valid_path(path, index, verbose=verbose): 

875 file_url = root_url.format(index[path][1:]) 

876 opts = {"move": participants_file} 

877 filenames.append((participants_file, file_url, opts)) 

878 

879 # Fetch behavioral 

880 behavioural_file = Path("brainomics_data", "phenotype", "behavioural.tsv") 

881 

882 path = "/localizer/phenotype/behavioural.tsv" 

883 if _is_valid_path(path, index, verbose=verbose): 

884 file_url = root_url.format(index[path][1:]) 

885 opts = {"move": behavioural_file} 

886 filenames.append((behavioural_file, file_url, opts)) 

887 

888 # Actual data fetching 

889 fdescr = get_dataset_descr(dataset_name) 

890 fetch_files(data_dir, filenames, verbose=verbose) 

891 for key, value in files.items(): 

892 files[key] = [str(data_dir / val) for val in value] 

893 

894 # Load covariates file 

895 participants_file = data_dir / participants_file 

896 csv_data = pd.read_csv(participants_file, delimiter="\t") 

897 behavioural_file = data_dir / behavioural_file 

898 csv_data2 = pd.read_csv(behavioural_file, delimiter="\t") 

899 csv_data = csv_data.merge(csv_data2) 

900 subject_names = csv_data["participant_id"].tolist() 

901 subjects_indices = [ 

902 subject_names.index(name) 

903 for name in subject_ids 

904 if name in subject_names 

905 ] 

906 csv_data = csv_data.iloc[subjects_indices] 

907 

908 return Bunch(ext_vars=csv_data, description=fdescr, **files) 

909 

910 

911def _check_inputs_fetch_localizer_contrasts(contrasts): 

912 """Check that requested contrast name exists.""" 

913 if isinstance(contrasts, str): 

914 raise ValueError( 

915 "Contrasts should be a list of strings, but " 

916 f'a single string was given: "{contrasts}"' 

917 ) 

918 unknown_contrasts = [ 

919 x 

920 for x in contrasts 

921 if (x not in ALLOWED_CONTRASTS and x not in CONTRAST_NAME_WRAPPER) 

922 ] 

923 if unknown_contrasts: 

924 raise ValueError( 

925 "The following contrasts are not available:\n" 

926 f"- {'- '.join(unknown_contrasts)}" 

927 ) 

928 

929 

930def _is_valid_path(path, index, verbose): 

931 if path not in index: 

932 logger.log(f"Skipping path '{path}'...", verbose) 

933 return False 

934 return True 

935 

936 

937@fill_doc 

938def fetch_localizer_calculation_task(n_subjects=1, data_dir=None, verbose=1): 

939 """Fetch calculation task contrast maps from the localizer. 

940 

941 Parameters 

942 ---------- 

943 n_subjects : :obj:`int`, default=1 

944 The number of subjects to load. If None is given, 

945 all 94 subjects are used. 

946 

947 %(data_dir)s 

948 

949 %(verbose)s 

950 

951 Returns 

952 ------- 

953 data : Bunch 

954 Dictionary-like object, the interest attributes are : 

955 'cmaps': string list, giving paths to nifti contrast maps 

956 

957 Notes 

958 ----- 

959 This function is only a caller for the fetch_localizer_contrasts in order 

960 to simplify examples reading and understanding. 

961 The 'calculation (auditory and visual cue)' contrast is used. 

962 

963 See Also 

964 -------- 

965 nilearn.datasets.fetch_localizer_button_task 

966 nilearn.datasets.fetch_localizer_contrasts 

967 

968 """ 

969 check_params(locals()) 

970 

971 data = fetch_localizer_contrasts( 

972 ["calculation (auditory and visual cue)"], 

973 n_subjects=n_subjects, 

974 get_tmaps=False, 

975 get_masks=False, 

976 get_anats=False, 

977 data_dir=data_dir, 

978 resume=True, 

979 verbose=verbose, 

980 ) 

981 return data 

982 

983 

984@fill_doc 

985def fetch_localizer_button_task(data_dir=None, verbose=1): 

986 """Fetch left vs right button press :term:`contrast` maps \ 

987 from the localizer. 

988 

989 Parameters 

990 ---------- 

991 %(data_dir)s 

992 

993 %(verbose)s 

994 

995 Returns 

996 ------- 

997 data : Bunch 

998 Dictionary-like object, the interest attributes are : 

999 

1000 - 'cmaps': string list, giving paths to nifti :term:`contrast` maps 

1001 - 'tmap': string, giving paths to nifti :term:`contrast` maps 

1002 - 'anat': string, giving paths to normalized anatomical image 

1003 

1004 Notes 

1005 ----- 

1006 This function is only a caller for the fetch_localizer_contrasts in order 

1007 to simplify examples reading and understanding. 

1008 The 'left vs right button press' contrast is used. 

1009 

1010 See Also 

1011 -------- 

1012 nilearn.datasets.fetch_localizer_calculation_task 

1013 nilearn.datasets.fetch_localizer_contrasts 

1014 

1015 """ 

1016 check_params(locals()) 

1017 

1018 data = fetch_localizer_contrasts( 

1019 ["left vs right button press"], 

1020 n_subjects=[2], 

1021 get_tmaps=True, 

1022 get_masks=False, 

1023 get_anats=True, 

1024 data_dir=data_dir, 

1025 resume=True, 

1026 verbose=verbose, 

1027 ) 

1028 # Additional keys for backward compatibility 

1029 data["tmap"] = data["tmaps"][0] 

1030 data["anat"] = data["anats"][0] 

1031 return data 

1032 

1033 

1034@fill_doc 

1035def fetch_abide_pcp( 

1036 data_dir=None, 

1037 n_subjects=None, 

1038 pipeline="cpac", 

1039 band_pass_filtering=False, 

1040 global_signal_regression=False, 

1041 derivatives=None, 

1042 quality_checked=True, 

1043 url=None, 

1044 verbose=1, 

1045 **kwargs, 

1046): 

1047 """Fetch ABIDE dataset. 

1048 

1049 Fetch the Autism Brain Imaging Data Exchange (ABIDE) dataset wrt criteria 

1050 that can be passed as parameter. Note that this is the preprocessed 

1051 version of ABIDE provided by the preprocess connectome projects (PCP). 

1052 See :footcite:t:`Nielsen2013`. 

1053 

1054 Parameters 

1055 ---------- 

1056 %(data_dir)s 

1057 n_subjects : :obj:`int`, default=None 

1058 The number of subjects to load. If None is given, 

1059 all available subjects are used (this number depends on the 

1060 preprocessing pipeline used). 

1061 

1062 pipeline : :obj:`str` {'cpac', 'css', 'dparsf', 'niak'}, default='cpac' 

1063 Possible pipelines are "ccs", "cpac", "dparsf" and "niak". 

1064 

1065 band_pass_filtering : :obj:`bool`, default=False 

1066 Due to controversies in the literature, band pass filtering is 

1067 optional. If true, signal is band filtered between 0.01Hz and 0.1Hz. 

1068 

1069 global_signal_regression : :obj:`bool`, default=False 

1070 Indicates if global signal regression should be applied on the 

1071 signals. 

1072 

1073 derivatives : :obj:`list` of :obj:`str`, default=None 

1074 Types of downloaded files. Possible values are: alff, degree_binarize, 

1075 degree_weighted, dual_regression, eigenvector_binarize, 

1076 eigenvector_weighted, falff, func_mask, func_mean, func_preproc, lfcd, 

1077 reho, rois_aal, rois_cc200, rois_cc400, rois_dosenbach160, rois_ez, 

1078 rois_ho, rois_tt, and vmhc. Please refer to the PCP site for more 

1079 details. 

1080 Will default to ``['func_preproc']`` if ``None`` is passed. 

1081 

1082 quality_checked : :obj:`bool`, default=True 

1083 If true (default), restrict the list of the subjects to the one that 

1084 passed quality assessment for all raters. 

1085 %(url)s 

1086 %(verbose)s 

1087 

1088 kwargs : extra parameters, optional 

1089 Any extra keyword argument will be used to filter downloaded subjects 

1090 according to the CSV phenotypic file. Some examples of filters are 

1091 indicated below. 

1092 

1093 SUB_ID : :obj:`list` of :obj:`int` in [50001, 50607], optional 

1094 Ids of the subjects to be loaded. 

1095 

1096 DX_GROUP : :obj:`int` in {1, 2}, optional 

1097 1 is autism, 2 is control. 

1098 

1099 DSM_IV_TR : :obj:`int` in [0, 4], optional 

1100 O is control, 1 is autism, 2 is Asperger, 3 is PPD-NOS, 

1101 4 is Asperger or PPD-NOS. 

1102 

1103 AGE_AT_SCAN : :obj:`float` in [6.47, 64], optional 

1104 Age of the subject. 

1105 

1106 SEX : :obj:`int` in {1, 2}, optional 

1107 1 is male, 2 is female. 

1108 

1109 HANDEDNESS_CATEGORY : :obj:`str` in {'R', 'L', 'Mixed', 'Ambi'}, optional 

1110 R = Right, L = Left, Ambi = Ambidextrous. 

1111 

1112 HANDEDNESS_SCORE : :obj:`int` in [-100, 100], optional 

1113 Positive = Right, Negative = Left, 0 = Ambidextrous. 

1114 

1115 Returns 

1116 ------- 

1117 data : :class:`sklearn.utils.Bunch` 

1118 Dictionary-like object, the keys are described below. 

1119 

1120 - 'description': :obj:`str`, description of the dataset. 

1121 

1122 - 'phenotypic': :obj:`pandas.DataFrame` 

1123 phenotypic information for each subject. 

1124 

1125 - Specific Derivative Keys: 

1126 Additional keys,'func_preproc' being the default, are 

1127 introduced based on the provided 'derivatives' 

1128 parameter during fetching. Any combination of the 

1129 parameters below may occur. 

1130 

1131 - 'func_preproc' (default): :obj:`numpy.ndarray`, 

1132 paths to preprocessed functional MRI data in NIfTI format. 

1133 This key is present by default when fetching the dataset. 

1134 - 'alff': :obj:`numpy.ndarray`, 

1135 amplitude values of low-frequency fluctuations 

1136 in functional MRI data. 

1137 - 'degree_binarize': :obj:`numpy.ndarray`, 

1138 data specific to binarized node degree in brain networks. 

1139 - 'degree_weighted': :obj:`numpy.ndarray`, 

1140 data specific to weighted node degree, 

1141 considering connectivity strength in brain networks. 

1142 - 'dual_regression': :obj:`numpy.ndarray`, 

1143 results from dual regression analysis, 

1144 often involving the identification of resting-state networks. 

1145 - 'eigenvector_binarize': :obj:`numpy.ndarray`, 

1146 data specific to binarized eigenvector 

1147 centrality, a measure of node influence in brain networks. 

1148 - 'eigenvector_weighted': :obj:`numpy.ndarray`, 

1149 data specific to weighted eigenvector 

1150 centrality, reflecting node influence with consideration 

1151 of connectivity strength. 

1152 - 'falff': :obj:`numpy.ndarray`, 

1153 data specific to fractional amplitude values of 

1154 low-frequency fluctuations. 

1155 - 'func_mask': :obj:`numpy.ndarray`, 

1156 functional mask data, often used to define regions of interest. 

1157 - 'func_mean': :obj:`numpy.ndarray`, 

1158 mean functional MRI data, 

1159 representing average activity across the brain. 

1160 - 'lfcd': :obj:`numpy.ndarray`, 

1161 data specific to local functional connectivity density 

1162 in brain networks. 

1163 - 'reho': :obj:`numpy.ndarray`, 

1164 data specific to regional homogeneity in functional MRI data. 

1165 - 'rois_aal': :obj:`numpy.ndarray`, 

1166 data specific to anatomical regions 

1167 defined by the Automatic Anatomical Labeling atlas. 

1168 - 'rois_cc200': :obj:`numpy.ndarray` 

1169 data specific to regions defined by the Craddock 200 atlas. 

1170 - 'rois_cc400': :obj:`numpy.ndarray`, 

1171 data specific to regions defined by the Craddock 400 atlas. 

1172 - 'rois_dosenbach160': :obj:`numpy.ndarray`, 

1173 data specific to regions defined by the Dosenbach 160 atlas. 

1174 - 'rois_ez': :obj:`numpy.ndarray`, 

1175 data specific to regions defined by the EZ atlas. 

1176 - 'rois_ho': :obj:`numpy.ndarray`, 

1177 data specific to regions defined by the Harvard-Oxford atlas. 

1178 - 'rois_tt': :obj:`numpy.ndarray`, 

1179 data specific to regions defined by the Talairach atlas. 

1180 - 'vmhc': :obj:`numpy.ndarray`, 

1181 data specific to voxel-mirrored homotopic connectivity in 

1182 functional MRI data. 

1183 

1184 Notes 

1185 ----- 

1186 Code and description of preprocessing pipelines are provided on the 

1187 `PCP website <http://preprocessed-connectomes-project.org/>`_. 

1188 

1189 References 

1190 ---------- 

1191 .. footbibliography:: 

1192 

1193 """ 

1194 check_params(locals()) 

1195 

1196 if derivatives is None: 

1197 derivatives = ["func_preproc"] 

1198 # People keep getting it wrong and submitting a string instead of a 

1199 # list of strings. We'll make their life easy 

1200 if isinstance(derivatives, str): 

1201 derivatives = [derivatives] 

1202 

1203 # Parameter check 

1204 for derivative in derivatives: 

1205 if derivative not in [ 

1206 "alff", 

1207 "degree_binarize", 

1208 "degree_weighted", 

1209 "dual_regression", 

1210 "eigenvector_binarize", 

1211 "eigenvector_weighted", 

1212 "falff", 

1213 "func_mask", 

1214 "func_mean", 

1215 "func_preproc", 

1216 "lfcd", 

1217 "reho", 

1218 "rois_aal", 

1219 "rois_cc200", 

1220 "rois_cc400", 

1221 "rois_dosenbach160", 

1222 "rois_ez", 

1223 "rois_ho", 

1224 "rois_tt", 

1225 "vmhc", 

1226 ]: 

1227 raise KeyError(f"{derivative} is not a valid derivative") 

1228 

1229 strategy = "" 

1230 if not band_pass_filtering: 

1231 strategy += "no" 

1232 strategy += "filt_" 

1233 if not global_signal_regression: 

1234 strategy += "no" 

1235 strategy += "global" 

1236 

1237 # General file: phenotypic information 

1238 dataset_name = "ABIDE_pcp" 

1239 data_dir = get_dataset_dir( 

1240 dataset_name, data_dir=data_dir, verbose=verbose 

1241 ) 

1242 

1243 if url is None: 

1244 url = ( 

1245 "https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative" 

1246 ) 

1247 

1248 if quality_checked: 

1249 kwargs["qc_rater_1"] = "OK" 

1250 kwargs["qc_anat_rater_2"] = ["OK", "maybe"] 

1251 kwargs["qc_func_rater_2"] = ["OK", "maybe"] 

1252 kwargs["qc_anat_rater_3"] = "OK" 

1253 kwargs["qc_func_rater_3"] = "OK" 

1254 

1255 # Fetch the phenotypic file and load it 

1256 csv = "Phenotypic_V1_0b_preprocessed1.csv" 

1257 path_csv = Path( 

1258 fetch_files(data_dir, [(csv, f"{url}/{csv}", {})], verbose=verbose)[0] 

1259 ) 

1260 

1261 # Note: the phenotypic file contains string that contains comma which mess 

1262 # up numpy array csv loading. This is why I do a pass to remove the last 

1263 # field. This can be 

1264 # done simply with pandas but we don't want such dependency ATM 

1265 # pheno = pandas.read_csv(path_csv).to_records() 

1266 with path_csv.open() as pheno_f: 

1267 pheno = [f"i{pheno_f.readline()}"] 

1268 

1269 # This regexp replaces commas between double quotes 

1270 pheno.extend( 

1271 re.sub(r',(?=[^"]*"(?:[^"]*"[^"]*")*[^"]*$)', ";", line) 

1272 for line in pheno_f 

1273 ) 

1274 # bytes (encode()) needed for python 2/3 compat with numpy 

1275 pheno = "\n".join(pheno).encode() 

1276 pheno = BytesIO(pheno) 

1277 pheno = pd.read_csv(pheno, comment="$") 

1278 

1279 # First, filter subjects with no filename 

1280 pheno = pheno[pheno["FILE_ID"] != "no_filename"] 

1281 # Apply user defined filters 

1282 user_filter = filter_columns(pheno, kwargs) 

1283 pheno = pheno[user_filter] 

1284 

1285 # Go into specific data folder and url 

1286 data_dir = data_dir / pipeline / strategy 

1287 url = f"{url}/Outputs/{pipeline}/{strategy}" 

1288 

1289 # Get the files 

1290 file_ids = pheno["FILE_ID"].tolist() 

1291 if n_subjects is not None: 

1292 file_ids = file_ids[:n_subjects] 

1293 pheno = pheno[:n_subjects] 

1294 

1295 results = { 

1296 "description": get_dataset_descr(dataset_name), 

1297 "phenotypic": pheno, 

1298 } 

1299 for derivative in derivatives: 

1300 ext = ".1D" if derivative.startswith("rois") else ".nii.gz" 

1301 files = [] 

1302 for file_id in file_ids: 

1303 file_ = [ 

1304 ( 

1305 f"{file_id}_{derivative}{ext}", 

1306 "/".join( 

1307 [url, derivative, f"{file_id}_{derivative}{ext}"] 

1308 ), 

1309 {}, 

1310 ) 

1311 ] 

1312 files.append(fetch_files(data_dir, file_, verbose=verbose)[0]) 

1313 # Load derivatives if needed 

1314 if ext == ".1D": 

1315 files = [np.loadtxt(f) for f in files] 

1316 results[derivative] = files 

1317 return Bunch(**results) 

1318 

1319 

1320def _load_mixed_gambles(zmap_imgs): 

1321 """Ravel zmaps (one per subject) along time axis, resulting, \ 

1322 in a n_subjects * n_trials 3D niimgs and, and then make \ 

1323 gain vector y of same length. 

1324 """ 

1325 X = [] 

1326 y = [] 

1327 mask = [] 

1328 for zmap_img in zmap_imgs: 

1329 # load subject data 

1330 this_X = get_data(zmap_img) 

1331 affine = zmap_img.affine 

1332 finite_mask = np.all(np.isfinite(this_X), axis=-1) 

1333 this_mask = np.logical_and(np.all(this_X != 0, axis=-1), finite_mask) 

1334 this_y = np.array([np.arange(1, 9)] * 6).ravel() 

1335 

1336 # gain levels 

1337 if len(this_y) != this_X.shape[-1]: 

1338 raise RuntimeError( 

1339 f"{zmap_img}: Expecting {len(this_y)} volumes, " 

1340 f"got {this_X.shape[-1]}!" 

1341 ) 

1342 

1343 # standardize subject data 

1344 this_X -= this_X.mean(axis=-1)[..., np.newaxis] 

1345 std = this_X.std(axis=-1) 

1346 std[std == 0] = 1 

1347 this_X /= std[..., np.newaxis] 

1348 

1349 # commit subject data 

1350 X.append(this_X) 

1351 y.extend(this_y) 

1352 mask.append(this_mask) 

1353 y = pd.DataFrame({"gain": y}) 

1354 X = np.concatenate(X, axis=-1) 

1355 mask = np.sum(mask, axis=0) > 0.5 * len(mask) 

1356 mask = np.logical_and(mask, np.all(np.isfinite(X), axis=-1)) 

1357 X = X[mask, :].T 

1358 tmp = np.zeros([*mask.shape, len(X)]) 

1359 tmp[mask, :] = X.T 

1360 mask_img = Nifti1Image(mask.astype("uint8"), affine) 

1361 X = four_to_three(Nifti1Image(tmp, affine)) 

1362 return X, y, mask_img 

1363 

1364 

1365@fill_doc 

1366def fetch_mixed_gambles( 

1367 n_subjects=1, 

1368 data_dir=None, 

1369 url=None, 

1370 resume=True, 

1371 return_raw_data=False, 

1372 verbose=1, 

1373): 

1374 """Fetch Jimura "mixed gambles" dataset. 

1375 

1376 See the :ref:`dataset description <mixed_gamble_maps>` 

1377 for more information. 

1378 

1379 Parameters 

1380 ---------- 

1381 n_subjects : :obj:`int`, default=1 

1382 The number of subjects to load. If ``None`` is given, all the 

1383 subjects are used. 

1384 %(data_dir)s 

1385 %(url)s 

1386 %(resume)s 

1387 %(verbose)s 

1388 return_raw_data : :obj:`bool`, default=False 

1389 If ``False``, then the data will transformed into an ``(X, y)`` 

1390 pair, suitable for machine learning routines. ``X`` is a list 

1391 of ``n_subjects * 48`` :class:`~nibabel.nifti1.Nifti1Image` 

1392 objects (where 48 is the number of trials), and ``y`` is an 

1393 array of shape ``(n_subjects * 48,)``. 

1394 

1395 Returns 

1396 ------- 

1397 data : :class:`~sklearn.utils.Bunch` 

1398 Dictionary-like object, the attributes of interest are: 

1399 

1400 - 'zmaps': :obj:`list` of :obj:`str` 

1401 Paths to realigned gain betamaps (one nifti per subject). 

1402 - 'subject_id': pd.DataFrame of subjects IDs 

1403 - 'gain': :obj:`list` of :class:`~nibabel.nifti1.Nifti1Image` \ 

1404 or ``None`` 

1405 If ``return_raw_data`` is ``True``, 

1406 this is a list of 

1407 ``n_subjects * 48`` :class:`~nibabel.nifti1.Nifti1Image` objects, 

1408 else it is ``None``. 

1409 - 'y': DataFrame of shape ``(n_subjects * 48,)`` or ``None`` 

1410 If ``return_raw_data`` is ``True``, 

1411 then this is a DataFrame of shape ``(n_subjects * 48,)``, 

1412 else it is ``None``. 

1413 - 'description': data description 

1414 

1415 """ 

1416 check_params(locals()) 

1417 

1418 if n_subjects > 16: 

1419 warnings.warn( 

1420 "Warning: there are only 16 subjects!", 

1421 stacklevel=find_stack_level(), 

1422 ) 

1423 n_subjects = 16 

1424 if url is None: 

1425 url = ( 

1426 "https://www.nitrc.org/frs/download.php/7229/" 

1427 "jimura_poldrack_2012_zmaps.zip" 

1428 ) 

1429 opts = {"uncompress": True} 

1430 files = [ 

1431 (f"zmaps{os.sep}sub{int(j + 1):03}_zmaps.nii.gz", url, opts) 

1432 for j in range(n_subjects) 

1433 ] 

1434 data_dir = get_dataset_dir("jimura_poldrack_2012_zmaps", data_dir=data_dir) 

1435 zmap_fnames = fetch_files(data_dir, files, resume=resume, verbose=verbose) 

1436 subject_id = pd.DataFrame( 

1437 {"subject_id": np.repeat(np.arange(n_subjects), 6 * 8).tolist()} 

1438 ) 

1439 description = get_dataset_descr("mixed_gambles") 

1440 data = Bunch( 

1441 zmaps=zmap_fnames, subject_id=subject_id, description=description 

1442 ) 

1443 if not return_raw_data: 

1444 X, y, mask_img = _load_mixed_gambles( 

1445 check_niimg(data.zmaps, return_iterator=True) 

1446 ) 

1447 data.zmaps, data.gain, data.mask_img = X, y, mask_img 

1448 return data 

1449 

1450 

1451@fill_doc 

1452def fetch_megatrawls_netmats( 

1453 dimensionality=100, 

1454 timeseries="eigen_regression", 

1455 matrices="partial_correlation", 

1456 data_dir=None, 

1457 resume=True, 

1458 verbose=1, 

1459): 

1460 """Download and return Network Matrices data \ 

1461 from MegaTrawls release in HCP. 

1462 

1463 This data can be used to predict relationships between imaging data and 

1464 non-imaging behavioral measures such as age, sex, education, etc. 

1465 The network matrices are estimated from functional connectivity 

1466 datasets of 461 subjects. 

1467 

1468 .. admonition:: Technical details 

1469 :class: important 

1470 

1471 For more technical details about predicting the measures, refer to: 

1472 Stephen Smith et al, 

1473 HCP beta-release of the Functional Connectivity MegaTrawl. 

1474 April 2015 "HCP500-MegaTrawl" release. 

1475 https://db.humanconnectome.org/megatrawl/ 

1476 

1477 .. admonition:: Terms and conditions 

1478 :class: attention 

1479 

1480 This is open access data. You must agree to Terms and conditions 

1481 of using this data before using it, available at: 

1482 http://humanconnectome.org/data/data-use-terms/open-access.html 

1483 

1484 Parameters 

1485 ---------- 

1486 dimensionality : :obj:`int`, default=100 

1487 Valid inputs are 25, 50, 100, 200, 300. By default, network matrices 

1488 estimated using Group :term:`ICA` brain :term:`parcellation` 

1489 of 100 components/dimensions will be returned. 

1490 

1491 timeseries : :obj:`str`, default='eigen_regression' 

1492 Valid inputs are 'multiple_spatial_regression' or 'eigen_regression'. 

1493 By default 'eigen_regression', matrices estimated using first principal 

1494 eigen component timeseries signals extracted from each subject data 

1495 parcellations will be returned. 

1496 Otherwise, 'multiple_spatial_regression' 

1497 matrices estimated using spatial regressor based timeseries signals 

1498 extracted from each subject data parcellations will be returned. 

1499 

1500 matrices : :obj:`str`, default='partial_correlation' 

1501 Valid inputs are 'full_correlation' or 'partial_correlation'. 

1502 By default, partial correlation matrices will be returned 

1503 otherwise if selected full correlation matrices will be returned. 

1504 %(data_dir)s 

1505 %(resume)s 

1506 %(verbose)s 

1507 

1508 Returns 

1509 ------- 

1510 data : Bunch 

1511 Dictionary-like object, the attributes are : 

1512 

1513 - 'dimensions': int, consists of given input in dimensions. 

1514 

1515 - 'timeseries': str, consists of given input in timeseries method. 

1516 

1517 - 'matrices': str, consists of given type of specific matrices. 

1518 

1519 - 'correlation_matrices': pd.DataFrame 

1520 consists of correlation matrices 

1521 based on given type of matrices. Array size will depend on given 

1522 dimensions (n, n). 

1523 

1524 - 'description': data description 

1525 

1526 Notes 

1527 ----- 

1528 For more information 

1529 see the :ref:`dataset description <megatrawls_maps>`. 

1530 

1531 """ 

1532 check_params(locals()) 

1533 

1534 url = "http://www.nitrc.org/frs/download.php/8037/Megatrawls.tgz" 

1535 opts = {"uncompress": True} 

1536 

1537 error_message = ( 

1538 "Invalid {0} input is provided: {1}, choose one of them {2}" 

1539 ) 

1540 # standard dataset terms 

1541 dimensionalities = [25, 50, 100, 200, 300] 

1542 if dimensionality not in dimensionalities: 

1543 raise ValueError( 

1544 error_message.format( 

1545 "dimensionality", dimensionality, dimensionalities 

1546 ) 

1547 ) 

1548 timeseries_methods = ["multiple_spatial_regression", "eigen_regression"] 

1549 if timeseries not in timeseries_methods: 

1550 raise ValueError( 

1551 error_message.format("timeseries", timeseries, timeseries_methods) 

1552 ) 

1553 output_matrices_names = ["full_correlation", "partial_correlation"] 

1554 if matrices not in output_matrices_names: 

1555 raise ValueError( 

1556 error_message.format("matrices", matrices, output_matrices_names) 

1557 ) 

1558 

1559 dataset_name = "Megatrawls" 

1560 data_dir = get_dataset_dir( 

1561 dataset_name, data_dir=data_dir, verbose=verbose 

1562 ) 

1563 description = get_dataset_descr(dataset_name) 

1564 

1565 timeseries_map = { 

1566 "multiple_spatial_regression": "ts2", 

1567 "eigen_regression": "ts3", 

1568 } 

1569 matrices_map = { 

1570 "full_correlation": "Znet1.txt", 

1571 "partial_correlation": "Znet2.txt", 

1572 } 

1573 filepath = [ 

1574 ( 

1575 Path( 

1576 f"3T_Q1-Q6related468_MSMsulc_d{dimensionality}_{timeseries_map[timeseries]}", 

1577 matrices_map[matrices], 

1578 ), 

1579 url, 

1580 opts, 

1581 ) 

1582 ] 

1583 

1584 # Fetch all the files 

1585 files = fetch_files(data_dir, filepath, resume=resume, verbose=verbose) 

1586 

1587 # Load the files into dataframe 

1588 correlation_matrices = pd.read_table(files[0], sep=r"\s+", header=None) 

1589 

1590 return Bunch( 

1591 dimensions=dimensionality, 

1592 timeseries=timeseries, 

1593 matrices=matrices, 

1594 correlation_matrices=correlation_matrices, 

1595 description=description, 

1596 ) 

1597 

1598 

1599def nki_ids(): 

1600 """Return the subject ids of the NKI dataset.""" 

1601 return [ 

1602 "A00028185", 

1603 "A00033747", 

1604 "A00035072", 

1605 "A00035827", 

1606 "A00035840", 

1607 "A00037112", 

1608 "A00037511", 

1609 "A00038998", 

1610 "A00039391", 

1611 "A00039431", 

1612 "A00039488", 

1613 "A00040524", 

1614 "A00040623", 

1615 "A00040944", 

1616 "A00043299", 

1617 "A00043520", 

1618 "A00043677", 

1619 "A00043722", 

1620 "A00045589", 

1621 "A00050998", 

1622 "A00051063", 

1623 "A00051064", 

1624 "A00051456", 

1625 "A00051457", 

1626 "A00051477", 

1627 "A00051513", 

1628 "A00051514", 

1629 "A00051517", 

1630 "A00051528", 

1631 "A00051529", 

1632 "A00051539", 

1633 "A00051604", 

1634 "A00051638", 

1635 "A00051658", 

1636 "A00051676", 

1637 "A00051678", 

1638 "A00051679", 

1639 "A00051726", 

1640 "A00051774", 

1641 "A00051796", 

1642 "A00051835", 

1643 "A00051882", 

1644 "A00051925", 

1645 "A00051927", 

1646 "A00052070", 

1647 "A00052117", 

1648 "A00052118", 

1649 "A00052126", 

1650 "A00052180", 

1651 "A00052197", 

1652 "A00052214", 

1653 "A00052234", 

1654 "A00052307", 

1655 "A00052319", 

1656 "A00052499", 

1657 "A00052502", 

1658 "A00052577", 

1659 "A00052612", 

1660 "A00052639", 

1661 "A00053202", 

1662 "A00053369", 

1663 "A00053456", 

1664 "A00053474", 

1665 "A00053546", 

1666 "A00053576", 

1667 "A00053577", 

1668 "A00053578", 

1669 "A00053625", 

1670 "A00053626", 

1671 "A00053627", 

1672 "A00053874", 

1673 "A00053901", 

1674 "A00053927", 

1675 "A00053949", 

1676 "A00054038", 

1677 "A00054153", 

1678 "A00054173", 

1679 "A00054358", 

1680 "A00054482", 

1681 "A00054532", 

1682 "A00054533", 

1683 "A00054534", 

1684 "A00054621", 

1685 "A00054895", 

1686 "A00054897", 

1687 "A00054913", 

1688 "A00054929", 

1689 "A00055061", 

1690 "A00055215", 

1691 "A00055352", 

1692 "A00055353", 

1693 "A00055542", 

1694 "A00055738", 

1695 "A00055763", 

1696 "A00055806", 

1697 "A00056097", 

1698 "A00056098", 

1699 "A00056164", 

1700 "A00056372", 

1701 "A00056452", 

1702 "A00056489", 

1703 "A00056949", 

1704 ] 

1705 

1706 

1707@fill_doc 

1708def fetch_surf_nki_enhanced( 

1709 n_subjects=10, data_dir=None, url=None, resume=True, verbose=1 

1710): 

1711 """Download and load the NKI enhanced :term:`resting-state` dataset, \ 

1712 preprocessed and projected to the fsaverage5 space surface. 

1713 

1714 .. versionadded:: 0.3 

1715 

1716 Parameters 

1717 ---------- 

1718 n_subjects : :obj:`int`, default=10 

1719 The number of subjects to load from maximum of 102 subjects. 

1720 By default, 10 subjects will be loaded. If None is given, 

1721 all 102 subjects will be loaded. 

1722 %(data_dir)s 

1723 %(url)s 

1724 %(resume)s 

1725 %(verbose)s 

1726 

1727 Returns 

1728 ------- 

1729 data : :obj:`sklearn.utils.Bunch` 

1730 Dictionary-like object, the interest attributes are : 

1731 

1732 - 'func_left': Paths to Gifti files containing resting state 

1733 time series left hemisphere 

1734 - 'func_right': Paths to Gifti files containing resting state 

1735 time series right hemisphere 

1736 - 'phenotypic': pd.DataFrame containing tuple with subject ID, age, 

1737 dominant hand and sex for each subject. 

1738 - 'description': data description of the release and references. 

1739 

1740 .. admonition:: scipy >= 0.14.0 compatibility 

1741 :class: important 

1742 

1743 It may be necessary 

1744 to coerce to float the data loaded from the Gifti files 

1745 to avoid issues with scipy >= 0.14.0. 

1746 

1747 Notes 

1748 ----- 

1749 For more information 

1750 see the :ref:`dataset description <nki_dataset>`. 

1751 """ 

1752 check_params(locals()) 

1753 

1754 if url is None: 

1755 url = "https://www.nitrc.org/frs/download.php/" 

1756 

1757 # Preliminary checks and declarations 

1758 dataset_name = "nki_enhanced_surface" 

1759 data_dir = get_dataset_dir( 

1760 dataset_name, data_dir=data_dir, verbose=verbose 

1761 ) 

1762 

1763 nitrc_ids = range(8260, 8464) 

1764 ids = nki_ids() 

1765 max_subjects = len(ids) 

1766 if n_subjects is None: 

1767 n_subjects = max_subjects 

1768 if n_subjects > max_subjects: 

1769 warnings.warn( 

1770 f"Warning: there are only {max_subjects} subjects.", 

1771 stacklevel=find_stack_level(), 

1772 ) 

1773 n_subjects = max_subjects 

1774 ids = ids[:n_subjects] 

1775 

1776 # Dataset description 

1777 fdescr = get_dataset_descr(dataset_name) 

1778 

1779 # First, get the metadata 

1780 phenotypic_file = "NKI_enhanced_surface_phenotypics.csv" 

1781 phenotypic = ( 

1782 phenotypic_file, 

1783 url + "8470/pheno_nki_nilearn.csv", 

1784 {"move": phenotypic_file}, 

1785 ) 

1786 

1787 phenotypic = fetch_files( 

1788 data_dir, [phenotypic], resume=resume, verbose=verbose 

1789 )[0] 

1790 

1791 # Load the csv file 

1792 phenotypic = pd.read_csv( 

1793 phenotypic, 

1794 header=1, 

1795 names=["Subject", "Age", "Dominant Hand", "Sex"], 

1796 ) 

1797 

1798 # Keep phenotypic information for selected subjects 

1799 mask = phenotypic["Subject"].apply(lambda x: str(x) in ids) 

1800 phenotypic = phenotypic[mask] 

1801 

1802 # Download subjects' datasets 

1803 func_right = [] 

1804 func_left = [] 

1805 for i, ids_i in enumerate(ids): 

1806 archive = f"{url}%i{os.sep}%s_%s_preprocessed_fsaverage5_fwhm6.gii" 

1807 func = f"%s{os.sep}%s_%s_preprocessed_fwhm6.gii" 

1808 rh = fetch_files( 

1809 data_dir, 

1810 [ 

1811 ( 

1812 func % (ids_i, ids_i, "right"), 

1813 archive % (nitrc_ids[2 * i + 1], ids_i, "rh"), 

1814 {"move": func % (ids_i, ids_i, "right")}, 

1815 ) 

1816 ], 

1817 resume=resume, 

1818 verbose=verbose, 

1819 ) 

1820 lh = fetch_files( 

1821 data_dir, 

1822 [ 

1823 ( 

1824 func % (ids_i, ids_i, "left"), 

1825 archive % (nitrc_ids[2 * i], ids_i, "lh"), 

1826 {"move": func % (ids_i, ids_i, "left")}, 

1827 ) 

1828 ], 

1829 resume=resume, 

1830 verbose=verbose, 

1831 ) 

1832 

1833 func_right.append(rh[0]) 

1834 func_left.append(lh[0]) 

1835 

1836 return Bunch( 

1837 func_left=func_left, 

1838 func_right=func_right, 

1839 phenotypic=phenotypic, 

1840 description=fdescr, 

1841 ) 

1842 

1843 

1844@fill_doc 

1845def load_nki( 

1846 mesh="fsaverage5", 

1847 mesh_type="pial", 

1848 n_subjects=1, 

1849 data_dir=None, 

1850 url=None, 

1851 resume=True, 

1852 verbose=1, 

1853): 

1854 """Load NKI enhanced surface data into a surface object. 

1855 

1856 .. versionadded:: 0.11.0 

1857 

1858 Parameters 

1859 ---------- 

1860 mesh : :obj:`str`, default='fsaverage5' 

1861 Which :term:`mesh` to fetch. 

1862 Should be one of the following values: 

1863 %(fsaverage_options)s 

1864 

1865 mesh_type : :obj:`str`, default='pial' 

1866 Must be one of: 

1867 - ``"pial"`` 

1868 - ``"white_matter"`` 

1869 - ``"inflated"`` 

1870 - ``"sphere"`` 

1871 - ``"flat"`` 

1872 

1873 n_subjects : :obj:`int`, default=1 

1874 The number of subjects to load from maximum of 102 subjects. 

1875 By default, 1 subjects will be loaded. 

1876 If None is given, all 102 subjects will be loaded. 

1877 

1878 %(data_dir)s 

1879 

1880 %(url)s 

1881 

1882 %(resume)s 

1883 

1884 %(verbose)s 

1885 

1886 Returns 

1887 ------- 

1888 list of SurfaceImage objects 

1889 One image per subject. 

1890 

1891 Notes 

1892 ----- 

1893 For more information 

1894 see the :ref:`dataset description <nki_dataset>`. 

1895 """ 

1896 check_params(locals()) 

1897 

1898 if mesh_type not in ALLOWED_MESH_TYPES: 

1899 raise ValueError( 

1900 f"'mesh_type' must be one of {ALLOWED_MESH_TYPES}.\n" 

1901 f"Got: {mesh_type}." 

1902 ) 

1903 

1904 fsaverage = load_fsaverage(mesh=mesh, data_dir=data_dir) 

1905 

1906 nki_dataset = fetch_surf_nki_enhanced( 

1907 n_subjects=n_subjects, 

1908 data_dir=data_dir, 

1909 url=url, 

1910 resume=resume, 

1911 verbose=verbose, 

1912 ) 

1913 

1914 images = [] 

1915 for i, (left, right) in enumerate( 

1916 zip(nki_dataset["func_left"], nki_dataset["func_right"]), start=1 

1917 ): 

1918 logger.log(f"Loading subject {i} of {n_subjects}.", verbose=verbose) 

1919 

1920 img = SurfaceImage( 

1921 mesh=fsaverage[mesh_type], 

1922 data={ 

1923 "left": left, 

1924 "right": right, 

1925 }, 

1926 ) 

1927 images.append(img) 

1928 

1929 return images 

1930 

1931 

1932@fill_doc 

1933def _fetch_development_fmri_participants(data_dir, url, verbose): 

1934 """Use in fetch_development_fmri function. 

1935 

1936 This function helps in downloading and loading participants data from .tsv 

1937 uploaded on Open Science Framework (OSF). 

1938 

1939 The original .tsv file contains many columns but this function picks only 

1940 those columns that are relevant. 

1941 

1942 Parameters 

1943 ---------- 

1944 %(data_dir)s 

1945 %(url)s 

1946 %(verbose)s 

1947 

1948 Returns 

1949 ------- 

1950 participants : pandas.DataFrame 

1951 Contains data of each subject age, age group, child or adult, 

1952 gender, handedness. 

1953 

1954 """ 

1955 check_params(locals()) 

1956 

1957 dataset_name = "development_fmri" 

1958 data_dir = get_dataset_dir( 

1959 dataset_name, data_dir=data_dir, verbose=verbose 

1960 ) 

1961 

1962 if url is None: 

1963 url = "https://osf.io/yr3av/download" 

1964 

1965 files = [("participants.tsv", url, {"move": "participants.tsv"})] 

1966 path_to_participants = fetch_files(data_dir, files, verbose=verbose)[0] 

1967 

1968 # Load path to participants 

1969 names = [ 

1970 "participant_id", 

1971 "Age", 

1972 "AgeGroup", 

1973 "Child_Adult", 

1974 "Gender", 

1975 "Handedness", 

1976 ] 

1977 participants = pd.read_table(path_to_participants, usecols=names) 

1978 return participants 

1979 

1980 

1981@fill_doc 

1982def _fetch_development_fmri_functional( 

1983 participants, data_dir, url, resume, verbose 

1984): 

1985 """Help to fetch_development_fmri. 

1986 

1987 This function helps in downloading functional MRI data in Nifti 

1988 and its confound corresponding to each subject. 

1989 

1990 The files are downloaded from Open Science Framework (OSF). 

1991 

1992 Parameters 

1993 ---------- 

1994 participants : pandas.DataFrame 

1995 Should contain column participant_id which represents subjects id. The 

1996 number of files are fetched based on ids in this column. 

1997 %(data_dir)s 

1998 %(url)s 

1999 %(resume)s 

2000 %(verbose)s 

2001 

2002 Returns 

2003 ------- 

2004 func : list of str (Nifti files) 

2005 Paths to functional MRI data (4D) for each subject. 

2006 

2007 regressors : list of str (tsv files) 

2008 Paths to regressors related to each subject. 

2009 

2010 """ 

2011 check_params(locals()) 

2012 

2013 dataset_name = "development_fmri" 

2014 data_dir = get_dataset_dir( 

2015 dataset_name, data_dir=data_dir, verbose=verbose 

2016 ) 

2017 

2018 if url is None: 

2019 # Download from the relevant OSF project, using hashes generated 

2020 # from the OSF API. Note the trailing slash. For more info, see: 

2021 # https://gist.github.com/emdupre/3cb4d564511d495ea6bf89c6a577da74 

2022 url = "https://osf.io/download/{}/" 

2023 

2024 confounds = "{}_task-pixar_desc-confounds_regressors.tsv" 

2025 func = "{0}_task-pixar_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz" 

2026 

2027 # The gzip contains unique download keys per Nifti file and confound 

2028 # pre-extracted from OSF. Required for downloading files. 

2029 dtype = [ 

2030 ("participant_id", "U12"), 

2031 ("key_regressor", "U24"), 

2032 ("key_bold", "U24"), 

2033 ] 

2034 names = ["participant_id", "key_r", "key_b"] 

2035 # csv file contains download information related to OpenScience(osf) 

2036 osf_data = csv_to_array( 

2037 (PACKAGE_DIRECTORY / "data" / "development_fmri.csv"), 

2038 skip_header=True, 

2039 dtype=dtype, 

2040 names=names, 

2041 ) 

2042 

2043 funcs = [] 

2044 regressors = [] 

2045 

2046 for participant_id in participants["participant_id"]: 

2047 this_osf_id = osf_data[osf_data["participant_id"] == participant_id] 

2048 # Download regressors 

2049 confound_url = url.format(this_osf_id["key_r"][0]) 

2050 regressor_file = [ 

2051 ( 

2052 confounds.format(participant_id), 

2053 confound_url, 

2054 {"move": confounds.format(participant_id)}, 

2055 ) 

2056 ] 

2057 path_to_regressor = fetch_files( 

2058 data_dir, regressor_file, verbose=verbose 

2059 )[0] 

2060 regressors.append(path_to_regressor) 

2061 # Download bold images 

2062 func_url = url.format(this_osf_id["key_b"][0]) 

2063 func_file = [ 

2064 ( 

2065 func.format(participant_id, participant_id), 

2066 func_url, 

2067 {"move": func.format(participant_id)}, 

2068 ) 

2069 ] 

2070 path_to_func = fetch_files( 

2071 data_dir, func_file, resume=resume, verbose=verbose 

2072 )[0] 

2073 funcs.append(path_to_func) 

2074 return funcs, regressors 

2075 

2076 

2077@fill_doc 

2078def fetch_development_fmri( 

2079 n_subjects=None, 

2080 reduce_confounds=True, 

2081 data_dir=None, 

2082 resume=True, 

2083 verbose=1, 

2084 age_group="both", 

2085): 

2086 """Fetch movie watching based brain development dataset (fMRI). 

2087 

2088 The data is downsampled to 4mm resolution for convenience 

2089 with a repetition time (t_r) of 2 secs. 

2090 The origin of the data is coming from OpenNeuro. See Notes below. 

2091 

2092 Please cite :footcite:t:`Richardson2018` 

2093 if you are using this dataset. 

2094 

2095 .. versionadded:: 0.5.2 

2096 

2097 Parameters 

2098 ---------- 

2099 n_subjects : :obj:`int`, default=None 

2100 The number of subjects to load. If None, all the subjects are 

2101 loaded. Total 155 subjects. 

2102 

2103 reduce_confounds : :obj:`bool`, default=True 

2104 If True, the returned confounds only include 6 motion parameters, 

2105 mean framewise displacement, signal from white matter, csf, and 

2106 6 anatomical compcor parameters. This selection only serves the 

2107 purpose of having realistic examples. Depending on your research 

2108 question, other confounds might be more appropriate. 

2109 If False, returns all :term:`fMRIPrep` confounds. 

2110 %(data_dir)s 

2111 %(resume)s 

2112 %(verbose)s 

2113 age_group : :obj:`str`, default='both' 

2114 Which age group to fetch 

2115 

2116 - 'adults' = fetch adults only (n=33, ages 18-39) 

2117 - 'child' = fetch children only (n=122, ages 3-12) 

2118 - 'both' = fetch full sample (n=155) 

2119 

2120 Returns 

2121 ------- 

2122 data : Bunch 

2123 Dictionary-like object, the interest attributes are : 

2124 

2125 - 'func': :obj:`list` of :obj:`str` (Nifti files) 

2126 Paths to downsampled functional MRI data (4D) for each subject. 

2127 

2128 - 'confounds': :obj:`list` of :obj:`str` (tsv files) 

2129 Paths to confounds related to each subject. 

2130 

2131 - 'phenotypic': pandas.DataFame 

2132 Contains each subject age, age group, child or adult, gender, 

2133 handedness. 

2134 

2135 Notes 

2136 ----- 

2137 The original data is downloaded from OpenNeuro 

2138 https://openneuro.org/datasets/ds000228/versions/1.0.0 

2139 

2140 This fetcher downloads downsampled data that are available on Open 

2141 Science Framework (OSF). Located here: https://osf.io/5hju4/files/ 

2142 

2143 Preprocessing details: https://osf.io/wjtyq/ 

2144 

2145 Note that if n_subjects > 2, and age_group is 'both', 

2146 fetcher will return a ratio of children and adults representative 

2147 of the total sample. 

2148 

2149 References 

2150 ---------- 

2151 .. footbibliography:: 

2152 

2153 """ 

2154 check_params(locals()) 

2155 

2156 dataset_name = "development_fmri" 

2157 data_dir = get_dataset_dir(dataset_name, data_dir=data_dir, verbose=1) 

2158 keep_confounds = [ 

2159 "trans_x", 

2160 "trans_y", 

2161 "trans_z", 

2162 "rot_x", 

2163 "rot_y", 

2164 "rot_z", 

2165 "framewise_displacement", 

2166 "a_comp_cor_00", 

2167 "a_comp_cor_01", 

2168 "a_comp_cor_02", 

2169 "a_comp_cor_03", 

2170 "a_comp_cor_04", 

2171 "a_comp_cor_05", 

2172 "csf", 

2173 "white_matter", 

2174 ] 

2175 

2176 # Dataset description 

2177 fdescr = get_dataset_descr(dataset_name) 

2178 

2179 # Participants data: ids, demographics, etc 

2180 participants = _fetch_development_fmri_participants( 

2181 data_dir=data_dir, url=None, verbose=verbose 

2182 ) 

2183 

2184 adult_count, child_count = _filter_func_regressors_by_participants( 

2185 participants, age_group 

2186 ) 

2187 max_subjects = adult_count + child_count 

2188 

2189 n_subjects = _set_invalid_n_subjects_to_max( 

2190 n_subjects, max_subjects, age_group 

2191 ) 

2192 

2193 # To keep the proportion of children versus adults 

2194 percent_total = float(n_subjects) / max_subjects 

2195 n_child = np.round(percent_total * child_count).astype(int) 

2196 n_adult = np.round(percent_total * adult_count).astype(int) 

2197 

2198 # We want to return adults by default (i.e., `age_group=both`) or 

2199 # if explicitly requested. 

2200 if (age_group != "child") and (n_subjects == 1): 

2201 n_adult, n_child = 1, 0 

2202 

2203 if (age_group == "both") and (n_subjects == 2): 

2204 n_adult, n_child = 1, 1 

2205 

2206 participants = _filter_csv_by_n_subjects(participants, n_adult, n_child) 

2207 

2208 funcs, regressors = _fetch_development_fmri_functional( 

2209 participants, 

2210 data_dir=data_dir, 

2211 url=None, 

2212 resume=resume, 

2213 verbose=verbose, 

2214 ) 

2215 

2216 if reduce_confounds: 

2217 regressors = _reduce_confounds(regressors, keep_confounds) 

2218 return Bunch( 

2219 func=funcs, 

2220 confounds=regressors, 

2221 phenotypic=participants, 

2222 description=fdescr, 

2223 ) 

2224 

2225 

2226def _filter_func_regressors_by_participants(participants, age_group): 

2227 """Filter functional and regressors based on participants.""" 

2228 valid_age_groups = ("both", "child", "adult") 

2229 if age_group not in valid_age_groups: 

2230 raise ValueError( 

2231 f"Wrong value for age_group={age_group}. " 

2232 f"Valid arguments are: {valid_age_groups}" 

2233 ) 

2234 

2235 child_adult = participants["Child_Adult"].to_list() 

2236 

2237 child_count = child_adult.count("child") if age_group != "adult" else 0 

2238 adult_count = child_adult.count("adult") if age_group != "child" else 0 

2239 return adult_count, child_count 

2240 

2241 

2242def _filter_csv_by_n_subjects(participants, n_adult, n_child): 

2243 """Restrict the csv files to the adequate number of subjects.""" 

2244 child_ids = participants[participants["Child_Adult"] == "child"][ 

2245 "participant_id" 

2246 ][:n_child] 

2247 adult_ids = participants[participants["Child_Adult"] == "adult"][ 

2248 "participant_id" 

2249 ][:n_adult] 

2250 ids = np.hstack([adult_ids, child_ids]) 

2251 participants = participants[np.isin(participants["participant_id"], ids)] 

2252 participants = participants.sort_values(by=["Child_Adult"]) 

2253 return participants 

2254 

2255 

2256def _set_invalid_n_subjects_to_max(n_subjects, max_subjects, age_group): 

2257 """If n_subjects is invalid, sets it to max.""" 

2258 if n_subjects is None: 

2259 n_subjects = max_subjects 

2260 

2261 if isinstance(n_subjects, numbers.Number) and ( 

2262 (n_subjects > max_subjects) or (n_subjects < 1) 

2263 ): 

2264 warnings.warn( 

2265 f"Wrong value for n_subjects={n_subjects}. " 

2266 f"The maximum value (for age_group={age_group}) " 

2267 f"will be used instead: n_subjects={max_subjects}.", 

2268 stacklevel=find_stack_level(), 

2269 ) 

2270 n_subjects = max_subjects 

2271 return n_subjects 

2272 

2273 

2274def _reduce_confounds(regressors, keep_confounds): 

2275 reduced_regressors = [] 

2276 for in_file in regressors: 

2277 out_file = in_file.replace("desc-confounds", "desc-reducedConfounds") 

2278 if not Path(out_file).is_file(): 

2279 confounds = pd.read_csv(in_file, delimiter="\t").to_records() 

2280 selected_confounds = confounds[keep_confounds] 

2281 header = "\t".join(selected_confounds.dtype.names) 

2282 np.savetxt( 

2283 out_file, 

2284 np.array(selected_confounds.tolist()), 

2285 header=header, 

2286 delimiter="\t", 

2287 comments="", 

2288 ) 

2289 reduced_regressors.append(out_file) 

2290 return reduced_regressors 

2291 

2292 

2293# datasets originally belonging to nistats follow 

2294 

2295 

2296@fill_doc 

2297def fetch_language_localizer_demo_dataset( 

2298 data_dir=None, verbose=1, legacy_output=True 

2299): 

2300 """Download language localizer demo dataset. 

2301 

2302 Parameters 

2303 ---------- 

2304 %(data_dir)s 

2305 

2306 %(verbose)s 

2307 

2308 legacy_output : :obj:`bool`, default=True 

2309 

2310 .. versionadded:: 0.10.3 

2311 .. deprecated::0.10.3 

2312 

2313 Starting from version 0.13.0 

2314 the ``legacy_ouput`` argument will be removed 

2315 and the fetcher will always return 

2316 a :obj:`sklearn.utils.Bunch`. 

2317 

2318 

2319 Returns 

2320 ------- 

2321 data : :class:`sklearn.utils.Bunch` 

2322 Dictionary-like object, the interest attributes are : 

2323 

2324 - ``'data_dir'``: :obj:`str` Path to downloaded dataset. 

2325 

2326 - ``'func'``: :obj:`list` of :obj:`str`, 

2327 Absolute paths of downloaded files on disk 

2328 

2329 - ``'description'`` : :obj:`str`, dataset description 

2330 

2331 .. warning:: 

2332 

2333 LEGACY OUTPUT: 

2334 

2335 **data_dir** : :obj:`str` 

2336 Path to downloaded dataset. 

2337 

2338 **downloaded_files** : :obj:`list` of :obj:`str` 

2339 Absolute paths of downloaded files on disk 

2340 

2341 """ 

2342 check_params(locals()) 

2343 

2344 url = "https://osf.io/3dj2a/download" 

2345 # When it starts working again change back to: 

2346 # url = 'https://osf.io/nh987/download' 

2347 main_folder = "fMRI-language-localizer-demo-dataset" 

2348 

2349 data_dir = get_dataset_dir(main_folder, data_dir=data_dir, verbose=verbose) 

2350 # The files_spec needed for fetch_files 

2351 files_spec = [(f"{main_folder}.zip", url, {"move": f"{main_folder}.zip"})] 

2352 # Only download if directory is empty 

2353 # Directory will have been created by the call to get_dataset_dir above 

2354 if not list(data_dir.iterdir()): 

2355 downloaded_files = fetch_files( 

2356 data_dir, files_spec, resume=True, verbose=verbose 

2357 ) 

2358 uncompress_file(downloaded_files[0]) 

2359 

2360 file_list = [str(path) for path in data_dir.rglob("*") if path.is_file()] 

2361 if legacy_output: 

2362 warnings.warn( 

2363 category=DeprecationWarning, 

2364 stacklevel=find_stack_level(), 

2365 message=( 

2366 "From version 0.13.0 this fetcher" 

2367 "will always return a Bunch.\n" 

2368 "Use `legacy_output=False` " 

2369 "to start switch to this new behavior." 

2370 ), 

2371 ) 

2372 return str(data_dir), sorted(file_list) 

2373 

2374 description = get_dataset_descr("language_localizer_demo") 

2375 return Bunch( 

2376 data_dir=str(data_dir), func=sorted(file_list), description=description 

2377 ) 

2378 

2379 

2380@fill_doc 

2381def fetch_bids_langloc_dataset(data_dir=None, verbose=1): 

2382 """Download language localizer example :term:`bids<BIDS>` dataset. 

2383 

2384 .. deprecated:: 0.10.3 

2385 

2386 This fetcher function will be removed as it returns the same data 

2387 as :func:`nilearn.datasets.fetch_language_localizer_demo_dataset`. 

2388 

2389 Please use 

2390 :func:`nilearn.datasets.fetch_language_localizer_demo_dataset` 

2391 instead. 

2392 

2393 Parameters 

2394 ---------- 

2395 %(data_dir)s 

2396 %(verbose)s 

2397 

2398 Returns 

2399 ------- 

2400 data_dir : :obj:`str` 

2401 Path to downloaded dataset. 

2402 

2403 downloaded_files : :obj:`list` of :obj:`str` 

2404 Absolute paths of downloaded files on disk. 

2405 """ 

2406 check_params(locals()) 

2407 

2408 warnings.warn( 

2409 ( 

2410 "The 'fetch_bids_langloc_dataset' function will be removed " 

2411 "in version 0.13.0 as it returns the same data " 

2412 "as 'fetch_language_localizer_demo_dataset'.\n" 

2413 "Please use 'fetch_language_localizer_demo_dataset' instead.'" 

2414 ), 

2415 DeprecationWarning, 

2416 stacklevel=find_stack_level(), 

2417 ) 

2418 url = "https://files.osf.io/v1/resources/9q7dv/providers/osfstorage/5888d9a76c613b01fc6acc4e" 

2419 dataset_name = "bids_langloc_example" 

2420 main_folder = "bids_langloc_dataset" 

2421 data_dir = get_dataset_dir( 

2422 dataset_name, data_dir=data_dir, verbose=verbose 

2423 ) 

2424 

2425 # The files_spec needed for fetch_files 

2426 files_spec = [(f"{main_folder}.zip", url, {"move": f"{main_folder}.zip"})] 

2427 if not (data_dir / main_folder).exists(): 

2428 downloaded_files = fetch_files( 

2429 data_dir, files_spec, resume=True, verbose=verbose 

2430 ) 

2431 uncompress_file(downloaded_files[0]) 

2432 main_path = data_dir / main_folder 

2433 file_list = [str(path) for path in main_path.rglob("*") if path.is_file()] 

2434 return str(data_dir / main_folder), sorted(file_list) 

2435 

2436 

2437@fill_doc 

2438def fetch_ds000030_urls(data_dir=None, verbose=1): 

2439 """Fetch URLs for files from the ds000030 :term:`BIDS` dataset. 

2440 

2441 .. versionadded:: 0.9.2 

2442 

2443 This dataset is version 1.0.4 of the "UCLA Consortium for 

2444 Neuropsychiatric Phenomics LA5c" dataset 

2445 :footcite:p:`Poldrack2016`. 

2446 

2447 Downloading the index allows users to explore the dataset directories 

2448 to select specific files to download. 

2449 The index is a sorted list of urls. 

2450 

2451 Parameters 

2452 ---------- 

2453 %(data_dir)s 

2454 %(verbose)s 

2455 

2456 Returns 

2457 ------- 

2458 urls_path : :obj:`str` 

2459 Path to downloaded dataset index. 

2460 

2461 urls : :obj:`list` of :obj:`str` 

2462 Sorted list of dataset directories. 

2463 

2464 References 

2465 ---------- 

2466 .. footbibliography:: 

2467 """ 

2468 check_params(locals()) 

2469 

2470 DATA_PREFIX = "ds000030/ds000030_R1.0.4/uncompressed" 

2471 FILE_URL = "https://osf.io/86xj7/download" 

2472 

2473 data_dir = get_dataset_dir( 

2474 DATA_PREFIX, 

2475 data_dir=data_dir, 

2476 verbose=verbose, 

2477 ) 

2478 

2479 final_download_path = data_dir / "urls.json" 

2480 downloaded_file_path = fetch_files( 

2481 data_dir=data_dir, 

2482 files=[ 

2483 ( 

2484 final_download_path, 

2485 FILE_URL, 

2486 {"move": final_download_path}, 

2487 ) 

2488 ], 

2489 resume=True, 

2490 ) 

2491 urls_path = downloaded_file_path[0] 

2492 with Path(urls_path).open() as json_file: 

2493 urls = json.load(json_file) 

2494 

2495 return urls_path, urls 

2496 

2497 

2498def select_from_index( 

2499 urls, inclusion_filters=None, exclusion_filters=None, n_subjects=None 

2500): 

2501 """Select subset of urls with given filters. 

2502 

2503 Parameters 

2504 ---------- 

2505 urls : :obj:`list` of :obj:`str` 

2506 List of dataset urls obtained from index download. 

2507 

2508 inclusion_filters : :obj:`list` of :obj:`str` or None, default=None 

2509 List of unix shell-style wildcard strings 

2510 that will be used to filter the url list. 

2511 If a filter matches the url it is retained for download. 

2512 Multiple filters work on top of each other. 

2513 Like an "and" logical operator, creating a more restrictive query. 

2514 Inclusion and exclusion filters apply together. 

2515 For example the filter '*task-rest*'' would keep only urls 

2516 that contain the 'task-rest' string. 

2517 

2518 exclusion_filters : :obj:`list` of :obj:`str` or None, default=None 

2519 List of unix shell-style wildcard strings 

2520 that will be used to filter the url list. 

2521 If a filter matches the url it is discarded for download. 

2522 Multiple filters work on top of each other. 

2523 Like an "and" logical operator, creating a more restrictive query. 

2524 Inclusion and exclusion filters apply together. 

2525 For example the filter '*task-rest*' would discard all urls 

2526 that contain the 'task-rest' string. 

2527 

2528 n_subjects : :obj:`int`, default=None 

2529 Number of subjects to download from the dataset. All by default. 

2530 

2531 Returns 

2532 ------- 

2533 urls : :obj:`list` of :obj:`str` 

2534 Sorted list of filtered dataset directories. 

2535 

2536 """ 

2537 inclusion_filters = inclusion_filters or [] 

2538 exclusion_filters = exclusion_filters or [] 

2539 # We apply filters to the urls 

2540 for exclusion in exclusion_filters: 

2541 urls = [url for url in urls if not fnmatch.fnmatch(url, exclusion)] 

2542 for inclusion in inclusion_filters: 

2543 urls = [url for url in urls if fnmatch.fnmatch(url, inclusion)] 

2544 

2545 # subject selection filter 

2546 # from the url list we infer all available subjects like 'sub-xxx/' 

2547 subject_regex = "sub-[a-z|A-Z|0-9]*[_./]" 

2548 

2549 def infer_subjects(urls): 

2550 subjects = set() 

2551 for url in urls: 

2552 if "sub-" in url: 

2553 subjects.add(re.search(subject_regex, url)[0][:-1]) 

2554 return sorted(subjects) 

2555 

2556 # We get a list of subjects (for the moment the first n subjects) 

2557 selected_subjects = set(infer_subjects(urls)[:n_subjects]) 

2558 # We exclude urls of subjects not selected 

2559 urls = [ 

2560 url 

2561 for url in urls 

2562 if "sub-" not in url 

2563 or re.search(subject_regex, url)[0][:-1] in selected_subjects 

2564 ] 

2565 return urls 

2566 

2567 

2568def patch_openneuro_dataset(file_list): 

2569 """Add symlinks for files not named according to :term:`BIDS` conventions. 

2570 

2571 .. warning:: 

2572 This function uses a series of hardcoded patterns to generate the 

2573 corrected filenames. 

2574 These patterns are not comprehensive and this function is not 

2575 guaranteed to produce BIDS-compliant files. 

2576 

2577 Parameters 

2578 ---------- 

2579 file_list : :obj:`list` of :obj:`str` 

2580 A list of filenames to update. 

2581 """ 

2582 REPLACEMENTS = { 

2583 "_T1w_brainmask": "_desc-brain_mask", 

2584 "_T1w_preproc": "_desc-preproc_T1w", 

2585 "_T1w_space-MNI152NLin2009cAsym_brainmask": "_space-MNI152NLin2009cAsym_desc-brain_mask", # noqa: E501 

2586 "_T1w_space-MNI152NLin2009cAsym_class-": "_space-MNI152NLin2009cAsym_label-", # noqa: E501 

2587 "_T1w_space-MNI152NLin2009cAsym_preproc": "_space-MNI152NLin2009cAsym_desc-preproc_T1w", # noqa: E501 

2588 "_bold_confounds": "_desc-confounds_regressors", 

2589 "_bold_space-MNI152NLin2009cAsym_brainmask": "_space-MNI152NLin2009cAsym_desc-brain_mask", # noqa: E501 

2590 "_bold_space-MNI152NLin2009cAsym_preproc": "_space-MNI152NLin2009cAsym_desc-preproc_bold", # noqa: E501 

2591 } 

2592 

2593 # Create a symlink if a file with the modified filename does not exist 

2594 for old_pattern, new_pattern in REPLACEMENTS.items(): 

2595 for name in file_list: 

2596 if old_pattern in name: 

2597 new_name = name.replace(old_pattern, new_pattern) 

2598 if not Path(new_name).exists(): 

2599 os.symlink(name, new_name) 

2600 

2601 

2602@fill_doc 

2603def fetch_openneuro_dataset( 

2604 urls=None, 

2605 data_dir=None, 

2606 dataset_version="ds000030_R1.0.4", 

2607 verbose=1, 

2608): 

2609 """Download OpenNeuro :term:`BIDS` dataset. 

2610 

2611 This function specifically downloads files from a series of URLs. 

2612 Unless you use :func:`fetch_ds000030_urls` or the default parameters, 

2613 it is up to the user to ensure that the URLs are correct, 

2614 and that they are associated with an OpenNeuro dataset. 

2615 

2616 Parameters 

2617 ---------- 

2618 urls : :obj:`list` of :obj:`str`, default=None 

2619 List of URLs to dataset files to download. 

2620 If not specified, all files from the default dataset 

2621 (``ds000030_R1.0.4``) will be downloaded. 

2622 %(data_dir)s 

2623 dataset_version : :obj:`str`, default='ds000030_R1.0.4' 

2624 Dataset version name. Assumes it is of the form [name]_[version]. 

2625 %(verbose)s 

2626 

2627 Returns 

2628 ------- 

2629 data_dir : :obj:`str` 

2630 Path to downloaded dataset. 

2631 

2632 downloaded_files : :obj:`list` of :obj:`str` 

2633 Absolute paths of downloaded files on disk. 

2634 

2635 Notes 

2636 ----- 

2637 The default dataset downloaded by this function is the 

2638 "UCLA Consortium for Neuropsychiatric Phenomics LA5c" dataset 

2639 :footcite:p:`Poldrack2016`. 

2640 

2641 This copy includes filenames that are not compliant with the current 

2642 version of :term:`BIDS`, so this function also calls 

2643 :func:`patch_openneuro_dataset` to generate BIDS-compliant symlinks. 

2644 

2645 See Also 

2646 -------- 

2647 :func:`fetch_ds000030_urls` 

2648 :func:`patch_openneuro_dataset` 

2649 

2650 References 

2651 ---------- 

2652 .. footbibliography:: 

2653 """ 

2654 check_params(locals()) 

2655 

2656 # if urls are not specified we download the complete dataset index 

2657 if urls is None: 

2658 DATASET_VERSION = "ds000030_R1.0.4" 

2659 if dataset_version != DATASET_VERSION: 

2660 warnings.warn( 

2661 'If `dataset_version` is not "ds000030_R1.0.4", ' 

2662 '`urls` must be specified. Downloading "ds000030_R1.0.4".', 

2663 stacklevel=find_stack_level(), 

2664 ) 

2665 

2666 data_prefix = ( 

2667 f"{DATASET_VERSION.split('_')[0]}/{DATASET_VERSION}/uncompressed" 

2668 ) 

2669 orig_data_dir = data_dir 

2670 data_dir = get_dataset_dir( 

2671 data_prefix, 

2672 data_dir=data_dir, 

2673 verbose=verbose, 

2674 ) 

2675 

2676 _, urls = fetch_ds000030_urls( 

2677 data_dir=orig_data_dir, 

2678 verbose=verbose, 

2679 ) 

2680 else: 

2681 data_prefix = ( 

2682 f"{dataset_version.split('_')[0]}/{dataset_version}/uncompressed" 

2683 ) 

2684 data_dir = get_dataset_dir( 

2685 data_prefix, 

2686 data_dir=data_dir, 

2687 verbose=verbose, 

2688 ) 

2689 

2690 # The files_spec needed for fetch_files 

2691 files_spec = [] 

2692 files_dir = [] 

2693 

2694 # Check that data prefix is found in each URL 

2695 bad_urls = [url for url in urls if data_prefix not in url] 

2696 if bad_urls: 

2697 raise ValueError( 

2698 f"data_prefix ({data_prefix}) is not found in at least one URL. " 

2699 "This indicates that the URLs do not correspond to the " 

2700 "dataset_version provided.\n" 

2701 f"Affected URLs: {bad_urls}" 

2702 ) 

2703 

2704 for url in urls: 

2705 url_path = url.split(data_prefix + "/")[1] 

2706 file_dir = data_dir / url_path 

2707 files_spec.append((file_dir.name, url, {})) 

2708 files_dir.append(file_dir.parent) 

2709 

2710 # download the files 

2711 downloaded = [] 

2712 for file_spec, file_dir in zip(files_spec, files_dir): 

2713 # Timeout errors are common in the s3 connection so we try to avoid 

2714 # failure of the dataset download for a transient instability 

2715 success = False 

2716 download_attempts = 4 

2717 while download_attempts > 0 and not success: 

2718 try: 

2719 downloaded_files = fetch_files( 

2720 file_dir, 

2721 [file_spec], 

2722 resume=True, 

2723 verbose=verbose, 

2724 ) 

2725 downloaded += downloaded_files 

2726 success = True 

2727 except Exception: 

2728 download_attempts -= 1 

2729 

2730 if not success: 

2731 raise Exception(f"multiple failures downloading {file_spec[1]}") 

2732 

2733 patch_openneuro_dataset(downloaded) 

2734 

2735 return str(data_dir), sorted(downloaded) 

2736 

2737 

2738@fill_doc 

2739def fetch_localizer_first_level(data_dir=None, verbose=1): 

2740 """Download a first-level localizer :term:`fMRI` dataset. 

2741 

2742 Parameters 

2743 ---------- 

2744 %(data_dir)s 

2745 %(verbose)s 

2746 

2747 Returns 

2748 ------- 

2749 data : :obj:`sklearn.utils.Bunch` 

2750 Dictionary-like object, with the keys: 

2751 

2752 - epi_img: the input 4D image 

2753 

2754 - events: a csv file describing the paradigm 

2755 

2756 - description: data description 

2757 

2758 """ 

2759 check_params(locals()) 

2760 

2761 url = "https://osf.io/2bqxn/download" 

2762 epi_img = "sub-12069_task-localizer_space-MNI305.nii.gz" 

2763 events = "sub-12069_task-localizer_events.tsv" 

2764 opts = {"uncompress": True} 

2765 options = ("epi_img", "events", "description") 

2766 dir_ = Path("localizer_first_level") 

2767 filenames = [(dir_ / name, url, opts) for name in [epi_img, events]] 

2768 

2769 dataset_name = "localizer_first_level" 

2770 data_dir = get_dataset_dir( 

2771 dataset_name, data_dir=data_dir, verbose=verbose 

2772 ) 

2773 files = fetch_files(data_dir, filenames, verbose=verbose) 

2774 

2775 params = dict(list(zip(options, files))) 

2776 data = Bunch(**params) 

2777 

2778 description = get_dataset_descr(dataset_name) 

2779 data.description = description 

2780 return data 

2781 

2782 

2783def _download_spm_auditory_data(data_dir): 

2784 logger.log("Data absent, downloading...") 

2785 url = ( 

2786 "https://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/" 

2787 "MoAEpilot.bids.zip" 

2788 ) 

2789 archive_path = data_dir / Path(url).name 

2790 fetch_single_file(url, data_dir) 

2791 try: 

2792 uncompress_file(archive_path) 

2793 except Exception: 

2794 logger.log("Archive corrupted, trying to download it again.") 

2795 return fetch_spm_auditory(data_dir=data_dir, data_name="") 

2796 

2797 

2798@fill_doc 

2799@remove_parameters( 

2800 removed_params=["subject_id"], 

2801 reason="The spm_auditory dataset contains only one subject.", 

2802 end_version="0.13.0", 

2803) 

2804def fetch_spm_auditory( 

2805 data_dir=None, 

2806 data_name="spm_auditory", 

2807 subject_id=None, 

2808 verbose=1, 

2809): 

2810 """Fetch :term:`SPM` auditory single-subject data. 

2811 

2812 See :footcite:t:`spm_auditory`. 

2813 

2814 Parameters 

2815 ---------- 

2816 %(data_dir)s 

2817 

2818 data_name : :obj:`str`, default='spm_auditory' 

2819 Name of the dataset. 

2820 

2821 subject_id : :obj:`str`, default=None 

2822 Indicates which subject to retrieve. 

2823 Will be removed in version ``0.13.0``. 

2824 

2825 %(verbose)s 

2826 

2827 Returns 

2828 ------- 

2829 data : :obj:`sklearn.utils.Bunch` 

2830 Dictionary-like object, the interest attributes are: 

2831 - 'anat': :obj:`list` of :obj:`str`. Path to anat image 

2832 - 'func': :obj:`list` of :obj:`str`. Path to functional image 

2833 - 'events': :obj:`list` of :obj:`str`. Path to events.tsv file 

2834 - 'description': :obj:`str`. Data description 

2835 

2836 References 

2837 ---------- 

2838 .. footbibliography:: 

2839 

2840 """ 

2841 check_params(locals()) 

2842 

2843 data_dir = get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) 

2844 

2845 if not (data_dir / "MoAEpilot" / "sub-01").exists(): 

2846 _download_spm_auditory_data(data_dir) 

2847 

2848 anat = get_bids_files( 

2849 main_path=data_dir / "MoAEpilot", 

2850 modality_folder="anat", 

2851 file_tag="T1w", 

2852 )[0] 

2853 func = get_bids_files( 

2854 main_path=data_dir / "MoAEpilot", 

2855 modality_folder="func", 

2856 file_tag="bold", 

2857 ) 

2858 events = get_bids_files( 

2859 main_path=data_dir / "MoAEpilot", 

2860 modality_folder="func", 

2861 file_tag="events", 

2862 )[0] 

2863 spm_auditory_data = { 

2864 "anat": anat, 

2865 "func": func, 

2866 "events": events, 

2867 "description": get_dataset_descr("spm_auditory"), 

2868 } 

2869 return Bunch(**spm_auditory_data) 

2870 

2871 

2872def _get_func_data_spm_multimodal(subject_dir, session, _subject_data): 

2873 session_func = sorted( 

2874 subject_dir.glob( 

2875 f"fMRI/Session{session}/fMETHODS-000{session + 4}-*-01.img" 

2876 ) 

2877 ) 

2878 if len(session_func) < 390: 

2879 logger.log( 

2880 f"Missing {390 - len(session_func)} functional scans " 

2881 f"for session {session}." 

2882 ) 

2883 return None 

2884 

2885 _subject_data[f"func{int(session)}"] = [str(path) for path in session_func] 

2886 return _subject_data 

2887 

2888 

2889def _get_session_trials_spm_multimodal(subject_dir, session, _subject_data): 

2890 sess_trials = subject_dir / f"fMRI/trials_ses{int(session)}.mat" 

2891 if not sess_trials.is_file(): 

2892 logger.log(f"Missing session file: {sess_trials}") 

2893 return None 

2894 

2895 _subject_data[f"trials_ses{int(session)}"] = str(sess_trials) 

2896 return _subject_data 

2897 

2898 

2899def _get_anatomical_data_spm_multimodal(subject_dir, _subject_data): 

2900 anat = subject_dir / "sMRI/smri.img" 

2901 if not anat.is_file(): 

2902 logger.log("Missing structural image.") 

2903 return None 

2904 

2905 _subject_data["anat"] = str(anat) 

2906 return _subject_data 

2907 

2908 

2909def _glob_spm_multimodal_fmri_data(subject_dir): 

2910 """Glob data from subject_dir.""" 

2911 _subject_data = {"slice_order": "descending"} 

2912 

2913 for session in range(1, 3): 

2914 # glob func data for session 

2915 _subject_data = _get_func_data_spm_multimodal( 

2916 subject_dir, session, _subject_data 

2917 ) 

2918 if not _subject_data: 

2919 return None 

2920 # glob trials .mat file 

2921 _subject_data = _get_session_trials_spm_multimodal( 

2922 subject_dir, session, _subject_data 

2923 ) 

2924 if not _subject_data: 

2925 return None 

2926 try: 

2927 events = _make_events_file_spm_multimodal_fmri( 

2928 _subject_data, session 

2929 ) 

2930 except MatReadError as mat_err: 

2931 warnings.warn( 

2932 f"{mat_err!s}. An events.tsv file cannot be generated", 

2933 stacklevel=find_stack_level(), 

2934 ) 

2935 else: 

2936 events_filepath = _make_events_filepath_spm_multimodal_fmri( 

2937 _subject_data, session 

2938 ) 

2939 events.to_csv(events_filepath, sep="\t", index=False) 

2940 _subject_data[f"events{session}"] = events_filepath 

2941 

2942 # glob for anat data 

2943 _subject_data = _get_anatomical_data_spm_multimodal( 

2944 subject_dir, _subject_data 

2945 ) 

2946 return Bunch(**_subject_data) if _subject_data else None 

2947 

2948 

2949def _download_data_spm_multimodal(data_dir, subject_dir): 

2950 logger.log("Data absent, downloading...") 

2951 urls = [ 

2952 # fmri 

2953 ( 

2954 "https://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" 

2955 "multimodal_fmri.zip" 

2956 ), 

2957 # structural 

2958 ( 

2959 "https://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" 

2960 "multimodal_smri.zip" 

2961 ), 

2962 ] 

2963 

2964 for url in urls: 

2965 archive_path = subject_dir / Path(url).name 

2966 fetch_single_file(url, subject_dir) 

2967 try: 

2968 uncompress_file(archive_path) 

2969 except Exception: 

2970 logger.log("Archive corrupted, trying to download it again.") 

2971 return fetch_spm_multimodal_fmri(data_dir=data_dir, data_name="") 

2972 

2973 return _glob_spm_multimodal_fmri_data(subject_dir) 

2974 

2975 

2976def _make_events_filepath_spm_multimodal_fmri(_subject_data, session): 

2977 key = f"trials_ses{session}" 

2978 events_file_location = Path(_subject_data[key]).parent 

2979 events_filename = f"session{session}_events.tsv" 

2980 events_filepath = str(events_file_location / events_filename) 

2981 return events_filepath 

2982 

2983 

2984def _make_events_file_spm_multimodal_fmri(_subject_data, session): 

2985 t_r = 2.0 

2986 timing = loadmat( 

2987 _subject_data[f"trials_ses{int(session)}"], 

2988 squeeze_me=True, 

2989 struct_as_record=False, 

2990 ) 

2991 faces_onsets = timing["onsets"][0].ravel() 

2992 scrambled_onsets = timing["onsets"][1].ravel() 

2993 onsets = np.hstack((faces_onsets, scrambled_onsets)) 

2994 onsets *= t_r # because onsets were reporting in 'scans' units 

2995 conditions = ["faces"] * len(faces_onsets) + ["scrambled"] * len( 

2996 scrambled_onsets 

2997 ) 

2998 duration = np.ones_like(onsets) 

2999 events = pd.DataFrame( 

3000 {"trial_type": conditions, "onset": onsets, "duration": duration} 

3001 ) 

3002 return events 

3003 

3004 

3005@fill_doc 

3006@remove_parameters( 

3007 removed_params=["subject_id"], 

3008 reason="The spm_multimodal_fmri dataset contains only one subject.", 

3009 end_version="0.13.0", 

3010) 

3011@fill_doc 

3012def fetch_spm_multimodal_fmri( 

3013 data_dir=None, 

3014 data_name="spm_multimodal_fmri", 

3015 subject_id=None, 

3016 verbose=1, 

3017): 

3018 """Fetcher for Multi-modal Face Dataset. 

3019 

3020 For more information, 

3021 see the :ref:`dataset description <spm_multimodal_dataset>`. 

3022 

3023 Parameters 

3024 ---------- 

3025 %(data_dir)s 

3026 

3027 data_name : :obj:`str`, default='spm_multimodal_fmri' 

3028 Name of the dataset. 

3029 

3030 subject_id : :obj:`str`, default=None 

3031 

3032 .. deprecated:: 0.11.2 

3033 

3034 Will be removed in version ``0.13.0``. 

3035 

3036 %(verbose)s 

3037 

3038 Returns 

3039 ------- 

3040 data : :obj:`sklearn.utils.Bunch` 

3041 Dictionary-like object, the interest attributes are: 

3042 

3043 - 'func1' : list of :obj:`str`. Paths to functional images for run 1 

3044 

3045 - 'func2' : list of :obj:`str`. Paths to functional images for run 2 

3046 

3047 - 'events1' : :obj:`str`. Path to onsets TSV file for run 1 

3048 

3049 - 'events2' : :obj:`str`. Path to onsets TSV file for run 2 

3050 

3051 - 'trials_ses1' : :obj:`str`. 

3052 Path to .mat file containing onsets for run 1 

3053 

3054 - 'trials_ses1' : :obj:`str`. 

3055 Path to .mat file containing onsets for run 2 

3056 

3057 - 'anat' : :obj:`str`. Path to anat file 

3058 

3059 - 'description' : :obj:`str`. Description of the data 

3060 

3061 - 't_r' : :obj:`float`. Repetition time in seconds 

3062 of the functional images. 

3063 

3064 """ 

3065 check_params(locals()) 

3066 

3067 data_dir = get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) 

3068 subject_id = "sub001" 

3069 subject_dir = data_dir / subject_id 

3070 

3071 description = get_dataset_descr("spm_multimodal") 

3072 

3073 # maybe data_dir already contains the data ? 

3074 data = _glob_spm_multimodal_fmri_data(subject_dir) 

3075 if data is None: 

3076 # No. Download the data 

3077 data = _download_data_spm_multimodal(data_dir, subject_dir) 

3078 

3079 data.description = description 

3080 data.t_r = 2 

3081 return data 

3082 

3083 

3084@fill_doc 

3085def fetch_fiac_first_level(data_dir=None, verbose=1): 

3086 """Download a first-level fiac :term:`fMRI` dataset (2 runs). 

3087 

3088 Parameters 

3089 ---------- 

3090 %(data_dir)s 

3091 %(verbose)s 

3092 

3093 Returns 

3094 ------- 

3095 data : :obj:`sklearn.utils.Bunch` 

3096 Dictionary-like object, the interest attributes are: 

3097 

3098 - 'design_matrix1': :obj:`pandas.DataFrame`. 

3099 Design matrix for run 1 

3100 - 'func1': :obj:`str`. Path to Nifti file of run 1 

3101 - 'design_matrix2': :obj:`pandas.DataFrame`. 

3102 Design matrix for run 2 

3103 - 'func2': :obj:`str`. Path to Nifti file of run 2 

3104 - 'mask': :obj:`str`. Path to mask file 

3105 - 'description': :obj:`str`. Data description 

3106 

3107 Notes 

3108 ----- 

3109 For more information 

3110 see the :ref:`dataset description <fiac_dataset>`. 

3111 

3112 """ 

3113 check_params(locals()) 

3114 

3115 data_dir = get_dataset_dir( 

3116 "fiac_nilearn.glm", data_dir=data_dir, verbose=verbose 

3117 ) 

3118 

3119 def _glob_fiac_data(): 

3120 """Glob data from subject_dir.""" 

3121 _subject_data = {} 

3122 subject_dir = data_dir / "nipy-data-0.2/data/fiac/fiac0" 

3123 for run in [1, 2]: 

3124 # glob func data for session 

3125 session_func = subject_dir / f"run{int(run)}.nii.gz" 

3126 if not session_func.is_file(): 

3127 logger.log(f"Missing functional scan for session {int(run)}.") 

3128 return None 

3129 

3130 _subject_data[f"func{int(run)}"] = str(session_func) 

3131 

3132 # glob design matrix .npz file 

3133 sess_dmtx = subject_dir / f"run{int(run)}_design.npz" 

3134 if not sess_dmtx.is_file(): 

3135 logger.log(f"Missing run file: {sess_dmtx}") 

3136 return None 

3137 

3138 design_matrix_data = np.load(str(sess_dmtx)) 

3139 columns = [x.decode() for x in design_matrix_data["conditions"]] 

3140 

3141 _subject_data[f"design_matrix{int(run)}"] = pd.DataFrame( 

3142 design_matrix_data["X"], columns=columns 

3143 ) 

3144 

3145 # glob for mask data 

3146 mask = subject_dir / "mask.nii.gz" 

3147 if not mask.is_file(): 

3148 logger.log("Missing mask image.") 

3149 return None 

3150 

3151 _subject_data["mask"] = str(mask) 

3152 return Bunch(**_subject_data) 

3153 

3154 description = get_dataset_descr("fiac") 

3155 

3156 # maybe data_dir already contains the data ? 

3157 data = _glob_fiac_data() 

3158 if data is not None: 

3159 data.description = description 

3160 return data 

3161 

3162 # No. Download the data 

3163 logger.log("Data absent, downloading...") 

3164 url = "https://nipy.org/data-packages/nipy-data-0.2.tar.gz" 

3165 

3166 archive_path = data_dir / Path(url).name 

3167 fetch_single_file(url, data_dir) 

3168 try: 

3169 uncompress_file(archive_path) 

3170 except Exception: 

3171 logger.log("Archive corrupted, trying to download it again.") 

3172 data = fetch_fiac_first_level(data_dir=data_dir) 

3173 data.description = description 

3174 return data 

3175 

3176 data = _glob_fiac_data() 

3177 data.description = description 

3178 return data 

3179 

3180 

3181@functools.lru_cache 

3182def load_sample_motor_activation_image(): 

3183 """Load a single functional image showing motor activations. 

3184 

3185 Returns 

3186 ------- 

3187 str 

3188 Path to the sample functional image. 

3189 

3190 Notes 

3191 ----- 

3192 The 'left vs right button press' contrast is used: 

3193 https://neurovault.org/images/10426/ 

3194 

3195 See Also 

3196 -------- 

3197 nilearn.datasets.fetch_neurovault_ids 

3198 nilearn.datasets.fetch_neurovault 

3199 nilearn.datasets.fetch_neurovault_auditory_computation_task 

3200 """ 

3201 return str(Path(__file__).parent / "data" / "image_10426.nii.gz")