Coverage for nilearn/datasets/func.py: 10%
770 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Downloading NeuroImaging datasets: \
2functional datasets (task + resting-state).
3"""
5import fnmatch
6import functools
7import itertools
8import json
9import numbers
10import os
11import re
12import warnings
13from io import BytesIO
14from pathlib import Path
16import numpy as np
17import pandas as pd
18from nibabel import Nifti1Image, four_to_three
19from scipy.io import loadmat
20from scipy.io.matlab import MatReadError
21from sklearn.utils import Bunch
23from nilearn._utils import check_niimg, fill_doc, logger, remove_parameters
24from nilearn._utils.logger import find_stack_level
25from nilearn._utils.param_validation import check_params
26from nilearn.datasets._utils import (
27 ALLOWED_MESH_TYPES,
28 PACKAGE_DIRECTORY,
29 fetch_files,
30 fetch_single_file,
31 filter_columns,
32 get_dataset_descr,
33 get_dataset_dir,
34 read_md5_sum_file,
35 tree,
36 uncompress_file,
37)
38from nilearn.datasets.struct import load_fsaverage
39from nilearn.image import get_data
40from nilearn.interfaces.bids import get_bids_files
41from nilearn.surface import SurfaceImage
43from .._utils.numpy_conversions import csv_to_array
46@fill_doc
47def fetch_haxby(
48 data_dir=None,
49 subjects=(2,),
50 fetch_stimuli=False,
51 url=None,
52 resume=True,
53 verbose=1,
54):
55 """Download and loads complete haxby dataset.
57 See :footcite:t:`Haxby2001`.
59 Parameters
60 ----------
61 %(data_dir)s
62 subjects : :obj:`list` or :obj:`tuple` or :obj:`int`, default=(2,)
63 Either a list of subjects or the number of subjects to load,
64 from 1 to 6.
65 By default, 2nd subject will be loaded.
66 Empty list returns no subject data.
68 fetch_stimuli : :obj:`bool`, default=False
69 Indicate if stimuli images must be downloaded.
70 They will be presented as a dictionary of categories.
71 %(url)s
72 %(resume)s
73 %(verbose)s
75 Returns
76 -------
77 data : :obj:`sklearn.utils.Bunch`
78 Dictionary-like object, the interest attributes are :
80 - 'anat': :obj:`list` of :obj:`str`. Paths to anatomic images.
81 - 'func': :obj:`list` of :obj:`str`.
82 Paths to nifti file with :term:`BOLD` data.
83 - 'session_target': :obj:`list` of :obj:`str`.
84 Paths to text file containing run and target data.
85 - 'mask': :obj:`str`. Path to fullbrain mask file.
86 - 'mask_vt': :obj:`list` of :obj:`str`.
87 Paths to nifti ventral temporal mask file.
88 - 'mask_face': :obj:`list` of :obj:`str`.
89 Paths to nifti with face-responsive brain regions.
90 - 'mask_face_little': :obj:`list` of :obj:`str`.
91 Spatially more constrained version of the above.
92 - 'mask_house': :obj:`list` of :obj:`str`.
93 Paths to nifti with house-responsive brain regions.
94 - 'mask_house_little': :obj:`list` of :obj:`str`.
95 Spatially more constrained version of the above.
97 References
98 ----------
99 .. footbibliography::
101 Notes
102 -----
103 PyMVPA provides a tutorial making use of this dataset:
104 http://www.pymvpa.org/tutorial.html
106 More information about its structure:
107 http://dev.pymvpa.org/datadb/haxby2001.html
109 See `additional information
110 <https://www.science.org/doi/10.1126/science.1063736>`
112 Run 8 in subject 5 does not contain any task labels.
113 The anatomical image for subject 6 is unavailable.
115 """
116 check_params(locals())
118 if isinstance(subjects, numbers.Number) and subjects > 6:
119 subjects = 6
121 if subjects is not None and isinstance(subjects, (list, tuple)):
122 for sub_id in subjects:
123 if sub_id not in [1, 2, 3, 4, 5, 6]:
124 raise ValueError(
125 f"You provided invalid subject id {sub_id} in a "
126 "list. Subjects must be selected in "
127 "[1, 2, 3, 4, 5, 6]"
128 )
130 dataset_name = "haxby2001"
131 data_dir = get_dataset_dir(
132 dataset_name, data_dir=data_dir, verbose=verbose
133 )
135 # Get the mask
136 url_mask = "https://www.nitrc.org/frs/download.php/7868/mask.nii.gz"
137 mask = fetch_files(
138 data_dir, [("mask.nii.gz", url_mask, {})], verbose=verbose
139 )[0]
141 # Dataset files
142 if url is None:
143 url = "http://data.pymvpa.org/datasets/haxby2001/"
144 md5sums = fetch_files(
145 data_dir, [("MD5SUMS", url + "MD5SUMS", {})], verbose=verbose
146 )[0]
147 md5sums = read_md5_sum_file(md5sums)
149 # definition of dataset files
150 sub_files = [
151 "bold.nii.gz",
152 "labels.txt",
153 "mask4_vt.nii.gz",
154 "mask8b_face_vt.nii.gz",
155 "mask8b_house_vt.nii.gz",
156 "mask8_face_vt.nii.gz",
157 "mask8_house_vt.nii.gz",
158 "anat.nii.gz",
159 ]
160 n_files = len(sub_files)
162 if subjects is None:
163 subjects = []
165 if isinstance(subjects, numbers.Number):
166 subject_mask = np.arange(1, subjects + 1)
167 else:
168 subject_mask = np.array(subjects)
170 files = [
171 (
172 Path(f"subj{int(i)}") / sub_file,
173 url + f"subj{int(i)}-2010.01.14.tar.gz",
174 {
175 "uncompress": True,
176 "md5sum": md5sums.get(f"subj{int(i)}-2010.01.14.tar.gz"),
177 },
178 )
179 for i in subject_mask
180 for sub_file in sub_files
181 if sub_file != "anat.nii.gz" or i != 6
182 ]
184 files = fetch_files(data_dir, files, resume=resume, verbose=verbose)
186 if (isinstance(subjects, numbers.Number) and subjects == 6) or np.any(
187 subject_mask == 6
188 ):
189 files.append(None) # None value because subject 6 has no anat
191 kwargs = {}
192 if fetch_stimuli:
193 stimuli_files = [
194 (
195 Path("stimuli") / "README",
196 url + "stimuli-2010.01.14.tar.gz",
197 {"uncompress": True},
198 )
199 ]
200 readme = fetch_files(
201 data_dir, stimuli_files, resume=resume, verbose=verbose
202 )[0]
203 kwargs["stimuli"] = tree(
204 Path(readme).parent, pattern="*.jpg", dictionary=True
205 )
207 fdescr = get_dataset_descr(dataset_name)
209 # return the data
210 return Bunch(
211 anat=files[7::n_files],
212 func=files[0::n_files],
213 session_target=files[1::n_files],
214 mask_vt=files[2::n_files],
215 mask_face=files[3::n_files],
216 mask_house=files[4::n_files],
217 mask_face_little=files[5::n_files],
218 mask_house_little=files[6::n_files],
219 mask=mask,
220 description=fdescr,
221 **kwargs,
222 )
225def adhd_ids():
226 """Return subject ids for the ADHD dataset."""
227 return [
228 "0010042",
229 "0010064",
230 "0010128",
231 "0021019",
232 "0023008",
233 "0023012",
234 "0027011",
235 "0027018",
236 "0027034",
237 "0027037",
238 "1019436",
239 "1206380",
240 "1418396",
241 "1517058",
242 "1552181",
243 "1562298",
244 "1679142",
245 "2014113",
246 "2497695",
247 "2950754",
248 "3007585",
249 "3154996",
250 "3205761",
251 "3520880",
252 "3624598",
253 "3699991",
254 "3884955",
255 "3902469",
256 "3994098",
257 "4016887",
258 "4046678",
259 "4134561",
260 "4164316",
261 "4275075",
262 "6115230",
263 "7774305",
264 "8409791",
265 "8697774",
266 "9744150",
267 "9750701",
268 ]
271@fill_doc
272def fetch_adhd(n_subjects=30, data_dir=None, url=None, resume=True, verbose=1):
273 """Download and load the ADHD :term:`resting-state` dataset.
275 See :footcite:t:`ADHDdataset`.
277 Parameters
278 ----------
279 n_subjects : :obj:`int`, default=30
280 The number of subjects to load from maximum of 40 subjects.
281 By default, 30 subjects will be loaded. If None is given,
282 all 40 subjects will be loaded.
283 %(data_dir)s
284 %(url)s
285 %(resume)s
286 %(verbose)s
288 Returns
289 -------
290 data : :obj:`sklearn.utils.Bunch`
291 Dictionary-like object, the interest attributes are :
293 - 'func': Paths to functional :term:`resting-state` images
294 - 'phenotypic': pd.dataframe with explanations of preprocessing steps
295 - 'confounds': CSV files containing the nuisance variables
297 References
298 ----------
299 .. footbibliography::
301 """
302 check_params(locals())
304 if url is None:
305 url = "https://www.nitrc.org/frs/download.php/"
307 # Preliminary checks and declarations
308 dataset_name = "adhd"
309 data_dir = get_dataset_dir(
310 dataset_name, data_dir=data_dir, verbose=verbose
311 )
312 ids = adhd_ids()
313 nitrc_ids = range(7782, 7822)
314 max_subjects = len(ids)
315 if n_subjects is None:
316 n_subjects = max_subjects
317 if n_subjects > max_subjects:
318 warnings.warn(
319 f"Warning: there are only {max_subjects} subjects.",
320 stacklevel=find_stack_level(),
321 )
322 n_subjects = max_subjects
323 ids = ids[:n_subjects]
324 nitrc_ids = nitrc_ids[:n_subjects]
326 opts = {"uncompress": True}
328 # Dataset description
329 fdescr = get_dataset_descr(dataset_name)
331 # First, get the metadata
332 phenotypic = (
333 "ADHD200_40subs_motion_parameters_and_phenotypics.csv",
334 url + "7781/adhd40_metadata.tgz",
335 opts,
336 )
338 phenotypic = fetch_files(
339 data_dir, [phenotypic], resume=resume, verbose=verbose
340 )[0]
342 # Load the csv file
343 phenotypic = pd.read_table(phenotypic, delimiter=",")
345 # Keep phenotypic information for selected subjects
346 mask = phenotypic["Subject"].apply(lambda x: str(x) in ids)
347 phenotypic = phenotypic[mask]
349 # Download dataset files
351 archives = [
352 url + f"{int(ni)}/adhd40_{ii}.tgz" for ni, ii in zip(nitrc_ids, ids)
353 ]
354 functionals = [
355 f"data/{i}/{i}_rest_tshift_RPI_voreg_mni.nii.gz" for i in ids
356 ]
357 confounds = [f"data/{i}/{i}_regressors.csv" for i in ids]
359 functionals = fetch_files(
360 data_dir,
361 zip(functionals, archives, (opts,) * n_subjects),
362 resume=resume,
363 verbose=verbose,
364 )
366 confounds = fetch_files(
367 data_dir,
368 zip(confounds, archives, (opts,) * n_subjects),
369 resume=resume,
370 verbose=verbose,
371 )
373 return Bunch(
374 func=functionals,
375 confounds=confounds,
376 phenotypic=phenotypic,
377 description=fdescr,
378 )
381def miyawaki2008_file_mask():
382 """Return file listing for the miyawaki 2008 dataset."""
383 return [
384 "mask.nii.gz",
385 "LHlag0to1.nii.gz",
386 "LHlag10to11.nii.gz",
387 "LHlag1to2.nii.gz",
388 "LHlag2to3.nii.gz",
389 "LHlag3to4.nii.gz",
390 "LHlag4to5.nii.gz",
391 "LHlag5to6.nii.gz",
392 "LHlag6to7.nii.gz",
393 "LHlag7to8.nii.gz",
394 "LHlag8to9.nii.gz",
395 "LHlag9to10.nii.gz",
396 "LHV1d.nii.gz",
397 "LHV1v.nii.gz",
398 "LHV2d.nii.gz",
399 "LHV2v.nii.gz",
400 "LHV3A.nii.gz",
401 "LHV3.nii.gz",
402 "LHV4v.nii.gz",
403 "LHVP.nii.gz",
404 "RHlag0to1.nii.gz",
405 "RHlag10to11.nii.gz",
406 "RHlag1to2.nii.gz",
407 "RHlag2to3.nii.gz",
408 "RHlag3to4.nii.gz",
409 "RHlag4to5.nii.gz",
410 "RHlag5to6.nii.gz",
411 "RHlag6to7.nii.gz",
412 "RHlag7to8.nii.gz",
413 "RHlag8to9.nii.gz",
414 "RHlag9to10.nii.gz",
415 "RHV1d.nii.gz",
416 "RHV1v.nii.gz",
417 "RHV2d.nii.gz",
418 "RHV2v.nii.gz",
419 "RHV3A.nii.gz",
420 "RHV3.nii.gz",
421 "RHV4v.nii.gz",
422 "RHVP.nii.gz",
423 ]
426@fill_doc
427def fetch_miyawaki2008(data_dir=None, url=None, resume=True, verbose=1):
428 """Download and loads Miyawaki et al. 2008 dataset (153MB).
430 See :footcite:t:`Miyawaki2008`.
432 Parameters
433 ----------
434 %(data_dir)s
435 %(url)s
436 %(resume)s
437 %(verbose)s
439 Returns
440 -------
441 data : Bunch
442 Dictionary-like object, the interest attributes are :
444 - 'func': :obj:`list` of :obj:`str`
445 Paths to nifti file with :term:`BOLD` data
446 - 'label': :obj:`list` of :obj:`str`
447 Paths to text file containing run and target data
448 - 'mask': :obj:`str`
449 Path to nifti mask file to define target volume in visual
450 cortex
451 - 'background': :obj:`str`
452 Path to nifti file containing a background image usable as a
453 background image for miyawaki images.
455 References
456 ----------
457 .. footbibliography::
459 Notes
460 -----
461 This dataset is available on the `brainliner website
462 <http://brainliner.jp/restrictedProject.atr>`_
464 See `additional information
465 <https://bicr.atr.jp//dni/en/downloads/\
466 fmri-data-set-for-visual-image-reconstruction/>`_
468 """
469 check_params(locals())
471 url = (
472 "https://www.nitrc.org/frs/download.php"
473 "/8486/miyawaki2008.tgz?i_agree=1&download_now=1"
474 )
475 opts = {"uncompress": True}
477 # Dataset files
479 # Functional MRI:
480 # * 20 random scans (usually used for training)
481 # * 12 figure scans (usually used for testing)
483 func_figure = [
484 (Path("func", f"data_figure_run{int(i):02}.nii.gz"), url, opts)
485 for i in range(1, 13)
486 ]
488 func_random = [
489 (Path("func", f"data_random_run{int(i):02}.nii.gz"), url, opts)
490 for i in range(1, 21)
491 ]
493 # Labels, 10x10 patches, stimuli shown to the subject:
494 # * 20 random labels
495 # * 12 figure labels (letters and shapes)
497 label_filename = "data_%s_run%02d_label.csv"
498 label_figure = [
499 (Path("label", label_filename % ("figure", i)), url, opts)
500 for i in range(1, 13)
501 ]
503 label_random = [
504 (Path("label", label_filename % ("random", i)), url, opts)
505 for i in range(1, 21)
506 ]
508 # Masks
509 file_mask = [
510 (Path("mask", m), url, opts) for m in miyawaki2008_file_mask()
511 ]
513 file_names = (
514 func_figure + func_random + label_figure + label_random + file_mask
515 )
517 dataset_name = "miyawaki2008"
518 data_dir = get_dataset_dir(
519 dataset_name, data_dir=data_dir, verbose=verbose
520 )
521 files = fetch_files(data_dir, file_names, resume=resume, verbose=verbose)
523 # Fetch the background image
524 bg_img = fetch_files(
525 data_dir, [("bg.nii.gz", url, opts)], resume=resume, verbose=verbose
526 )[0]
528 fdescr = get_dataset_descr(dataset_name)
530 # Return the data
531 return Bunch(
532 func=files[:32],
533 label=files[32:64],
534 mask=files[64],
535 mask_roi=files[65:],
536 background=bg_img,
537 description=fdescr,
538 )
541# we allow the user to use alternatives to Brainomics contrast names
542CONTRAST_NAME_WRAPPER = {
543 # Checkerboard
544 "checkerboard": "checkerboard",
545 "horizontal checkerboard": "horizontal checkerboard",
546 "vertical checkerboard": "vertical checkerboard",
547 "horizontal vs vertical checkerboard": "horizontal vs vertical checkerboard", # noqa: E501
548 "vertical vs horizontal checkerboard": "vertical vs horizontal checkerboard", # noqa: E501
549 # Sentences
550 "sentence listening": "auditory sentences",
551 "sentence reading": "visual sentences",
552 "sentence listening and reading": "auditory&visual sentences",
553 "sentence reading vs checkerboard": "visual sentences vs checkerboard",
554 # Calculation
555 "calculation (auditory cue)": "auditory calculation",
556 "calculation (visual cue)": "visual calculation",
557 "calculation (auditory and visual cue)": "auditory&visual calculation",
558 "calculation (auditory cue) vs sentence listening": "auditory calculation vs auditory sentences", # noqa: E501
559 "calculation (visual cue) vs sentence reading": "visual calculation vs sentences", # noqa: E501
560 "calculation vs sentences": "auditory&visual calculation vs sentences",
561 # Calculation + Sentences
562 "calculation (auditory cue) and sentence listening": "auditory processing",
563 "calculation (visual cue) and sentence reading": "visual processing",
564 "calculation (visual cue) and sentence reading vs "
565 "calculation (auditory cue) and sentence listening": "visual processing vs auditory processing", # noqa: E501
566 "calculation (auditory cue) and sentence listening vs "
567 "calculation (visual cue) and sentence reading": "auditory processing vs visual processing", # noqa: E501
568 "calculation (visual cue) and sentence reading vs checkerboard": "visual processing vs checkerboard", # noqa: E501
569 "calculation and sentence listening/reading vs button press": "cognitive processing vs motor", # noqa: E501
570 # Button press
571 "left button press (auditory cue)": "left auditory click",
572 "left button press (visual cue)": "left visual click",
573 "left button press": "left auditory&visual click",
574 "left vs right button press": "left auditory & visual click vs right auditory&visual click", # noqa: E501
575 "right button press (auditory cue)": "right auditory click",
576 "right button press (visual cue)": "right visual click",
577 "right button press": "right auditory & visual click",
578 "right vs left button press": "right auditory & visual click vs left auditory&visual click", # noqa: E501
579 "button press (auditory cue) vs sentence listening": "auditory click vs auditory sentences", # noqa: E501
580 "button press (visual cue) vs sentence reading": "visual click vs visual sentences", # noqa: E501
581 "button press vs calculation and sentence listening/reading": "auditory&visual motor vs cognitive processing", # noqa: E501
582}
583ALLOWED_CONTRASTS = list(CONTRAST_NAME_WRAPPER.values())
586@fill_doc
587def fetch_localizer_contrasts(
588 contrasts,
589 n_subjects=None,
590 get_tmaps=False,
591 get_masks=False,
592 get_anats=False,
593 data_dir=None,
594 resume=True,
595 verbose=1,
596):
597 """Download and load Brainomics/Localizer dataset (94 subjects).
599 "The Functional Localizer is a simple and fast acquisition
600 procedure based on a 5-minute functional magnetic resonance
601 imaging (fMRI) sequence that can be run as easily and as
602 systematically as an anatomical scan. This protocol captures the
603 cerebral bases of auditory and visual perception, motor actions,
604 reading, language comprehension and mental calculation at an
605 individual level. Individual functional maps are reliable and
606 quite precise. The procedure is described in more detail on the
607 Functional Localizer page."
608 (see https://osf.io/vhtf6/)
610 You may cite :footcite:t:`Papadopoulos-Orfanos2017`
611 when using this dataset.
613 Scientific results obtained using this dataset are described
614 in :footcite:t:`Pinel2007`.
616 Parameters
617 ----------
618 contrasts : :obj:`list` of :obj:`str`
619 The contrasts to be fetched (for all 94 subjects available).
620 Allowed values are::
622 - "checkerboard"
623 - "horizontal checkerboard"
624 - "vertical checkerboard"
625 - "horizontal vs vertical checkerboard"
626 - "vertical vs horizontal checkerboard"
627 - "sentence listening"
628 - "sentence reading"
629 - "sentence listening and reading"
630 - "sentence reading vs checkerboard"
631 - "calculation (auditory cue)"
632 - "calculation (visual cue)"
633 - "calculation (auditory and visual cue)"
634 - "calculation (auditory cue) vs sentence listening"
635 - "calculation (visual cue) vs sentence reading"
636 - "calculation vs sentences"
637 - "calculation (auditory cue) and sentence listening"
638 - "calculation (visual cue) and sentence reading"
639 - "calculation and sentence listening/reading"
640 - "calculation (auditory cue) and sentence listening vs "
641 - "calculation (visual cue) and sentence reading"
642 - "calculation (visual cue) and sentence reading vs checkerboard"
643 - "calculation and sentence listening/reading vs button press"
644 - "left button press (auditory cue)"
645 - "left button press (visual cue)"
646 - "left button press"
647 - "left vs right button press"
648 - "right button press (auditory cue)"
649 - "right button press (visual cue)"
650 - "right button press"
651 - "right vs left button press"
652 - "button press (auditory cue) vs sentence listening"
653 - "button press (visual cue) vs sentence reading"
654 - "button press vs calculation and sentence listening/reading"
656 or equivalently on can use the original names::
658 - "checkerboard"
659 - "horizontal checkerboard"
660 - "vertical checkerboard"
661 - "horizontal vs vertical checkerboard"
662 - "vertical vs horizontal checkerboard"
663 - "auditory sentences"
664 - "visual sentences"
665 - "auditory&visual sentences"
666 - "visual sentences vs checkerboard"
667 - "auditory calculation"
668 - "visual calculation"
669 - "auditory&visual calculation"
670 - "auditory calculation vs auditory sentences"
671 - "visual calculation vs sentences"
672 - "auditory&visual calculation vs sentences"
673 - "auditory processing"
674 - "visual processing"
675 - "visual processing vs auditory processing"
676 - "auditory processing vs visual processing"
677 - "visual processing vs checkerboard"
678 - "cognitive processing vs motor"
679 - "left auditory click"
680 - "left visual click"
681 - "left auditory&visual click"
682 - "left auditory & visual click vs right auditory&visual click"
683 - "right auditory click"
684 - "right visual click"
685 - "right auditory&visual click"
686 - "right auditory & visual click vs left auditory&visual click"
687 - "auditory click vs auditory sentences"
688 - "visual click vs visual sentences"
689 - "auditory&visual motor vs cognitive processing"
691 n_subjects : :obj:`int` or :obj:`list` or None, default=None
692 The number or list of subjects to load. If None is given,
693 all 94 subjects are used.
695 get_tmaps : :obj:`bool`, default=False
696 Whether t maps should be fetched or not.
698 get_masks : :obj:`bool`, default=False
699 Whether individual masks should be fetched or not.
701 get_anats : :obj:`bool`, default=False
702 Whether individual structural images should be fetched or not.
704 %(data_dir)s
706 %(resume)s
708 %(verbose)s
710 Returns
711 -------
712 data : Bunch
713 Dictionary-like object, the interest attributes are :
715 - 'cmaps': :obj:`list` of :obj:`str`
716 Paths to nifti contrast maps
717 - 'tmaps' :obj:`list` of :obj:`str` (if 'get_tmaps' set to True)
718 Paths to nifti t maps
719 - 'masks': :obj:`list` of :obj:`str`
720 Paths to nifti files corresponding to the subjects individual masks
721 - 'anats': :obj:`str`
722 Path to nifti files corresponding to the subjects structural images
724 References
725 ----------
726 .. footbibliography::
728 See Also
729 --------
730 nilearn.datasets.fetch_localizer_calculation_task
731 nilearn.datasets.fetch_localizer_button_task
733 """
734 check_params(locals())
736 _check_inputs_fetch_localizer_contrasts(contrasts)
738 if n_subjects is None:
739 n_subjects = 94 # 94 subjects available
740 if isinstance(n_subjects, numbers.Number) and (
741 (n_subjects > 94) or (n_subjects < 1)
742 ):
743 warnings.warn(
744 "Wrong value for 'n_subjects' (%d). The maximum "
745 "value will be used instead ('n_subjects=94').",
746 stacklevel=find_stack_level(),
747 )
748 n_subjects = 94 # 94 subjects available
750 # convert contrast names
751 contrasts_wrapped = []
752 # get a unique ID for each contrast. It is used to give a unique name to
753 # each download file and avoid name collisions.
754 contrasts_indices = []
755 for contrast in contrasts:
756 if contrast in ALLOWED_CONTRASTS:
757 contrasts_wrapped.append(contrast.title().replace(" ", ""))
758 contrasts_indices.append(ALLOWED_CONTRASTS.index(contrast))
759 elif contrast in CONTRAST_NAME_WRAPPER:
760 name = CONTRAST_NAME_WRAPPER[contrast]
761 contrasts_wrapped.append(name.title().replace(" ", ""))
762 contrasts_indices.append(ALLOWED_CONTRASTS.index(name))
764 # Get the dataset OSF index
765 dataset_name = "brainomics_localizer"
766 index_url = "https://osf.io/hwbm2/download"
767 data_dir = get_dataset_dir(
768 dataset_name, data_dir=data_dir, verbose=verbose
769 )
771 index_file = fetch_single_file(
772 index_url, data_dir, verbose=verbose, resume=resume
773 )
774 with index_file.open() as of:
775 index = json.load(of)
777 if isinstance(n_subjects, numbers.Number):
778 subject_mask = np.arange(1, n_subjects + 1)
779 else:
780 subject_mask = np.array(n_subjects)
781 subject_ids = [f"S{int(s):02}" for s in subject_mask]
783 data_types = ["cmaps"]
784 if get_tmaps:
785 data_types.append("tmaps")
787 # Build data URLs that will be fetched
788 # Download from the relevant OSF project,
789 # using hashes generated from the OSF API.
790 # Note the trailing slash.
791 # For more info, see:
792 # https://gist.github.com/emdupre/3cb4d564511d495ea6bf89c6a577da74
793 root_url = "https://osf.io/download/{0}/"
794 files = {}
795 filenames = []
797 for subject_id, data_type, contrast in itertools.product(
798 subject_ids, data_types, contrasts_wrapped
799 ):
800 name_aux = f"{data_type}_{contrast}"
801 name_aux.replace(" ", "_")
802 file_path = Path("brainomics_data", subject_id, f"{name_aux}.nii.gz")
804 path = "/".join(
805 [
806 "/localizer",
807 "derivatives",
808 "spm_1st_level",
809 f"sub-{subject_id}",
810 (
811 f"sub-{subject_id}_task-localizer"
812 f"_acq-{contrast}_{data_type}.nii.gz"
813 ),
814 ]
815 )
817 if _is_valid_path(path, index, verbose=verbose):
818 file_url = root_url.format(index[path][1:])
819 opts = {"move": file_path}
820 filenames.append((file_path, file_url, opts))
821 files.setdefault(data_type, []).append(file_path)
823 # Fetch masks if asked by user
824 if get_masks:
825 for subject_id in subject_ids:
826 file_path = Path(
827 "brainomics_data", subject_id, "boolean_mask_mask.nii.gz"
828 )
830 path = "/".join(
831 [
832 "/localizer",
833 "derivatives",
834 "spm_1st_level",
835 f"sub-{subject_id}",
836 f"sub-{subject_id}_mask.nii.gz",
837 ]
838 )
840 if _is_valid_path(path, index, verbose=verbose):
841 file_url = root_url.format(index[path][1:])
842 opts = {"move": file_path}
843 filenames.append((file_path, file_url, opts))
844 files.setdefault("masks", []).append(file_path)
846 # Fetch anats if asked by user
847 if get_anats:
848 for subject_id in subject_ids:
849 file_path = Path(
850 "brainomics_data",
851 subject_id,
852 "normalized_T1_anat_defaced.nii.gz",
853 )
855 path = "/".join(
856 [
857 "/localizer",
858 "derivatives",
859 "spm_preprocessing",
860 f"sub-{subject_id}",
861 f"sub-{subject_id}_T1w.nii.gz",
862 ]
863 )
865 if _is_valid_path(path, index, verbose=verbose):
866 file_url = root_url.format(index[path][1:])
867 opts = {"move": file_path}
868 filenames.append((file_path, file_url, opts))
869 files.setdefault("anats", []).append(file_path)
871 # Fetch subject characteristics
872 participants_file = Path("brainomics_data", "participants.tsv")
873 path = "/localizer/participants.tsv"
874 if _is_valid_path(path, index, verbose=verbose):
875 file_url = root_url.format(index[path][1:])
876 opts = {"move": participants_file}
877 filenames.append((participants_file, file_url, opts))
879 # Fetch behavioral
880 behavioural_file = Path("brainomics_data", "phenotype", "behavioural.tsv")
882 path = "/localizer/phenotype/behavioural.tsv"
883 if _is_valid_path(path, index, verbose=verbose):
884 file_url = root_url.format(index[path][1:])
885 opts = {"move": behavioural_file}
886 filenames.append((behavioural_file, file_url, opts))
888 # Actual data fetching
889 fdescr = get_dataset_descr(dataset_name)
890 fetch_files(data_dir, filenames, verbose=verbose)
891 for key, value in files.items():
892 files[key] = [str(data_dir / val) for val in value]
894 # Load covariates file
895 participants_file = data_dir / participants_file
896 csv_data = pd.read_csv(participants_file, delimiter="\t")
897 behavioural_file = data_dir / behavioural_file
898 csv_data2 = pd.read_csv(behavioural_file, delimiter="\t")
899 csv_data = csv_data.merge(csv_data2)
900 subject_names = csv_data["participant_id"].tolist()
901 subjects_indices = [
902 subject_names.index(name)
903 for name in subject_ids
904 if name in subject_names
905 ]
906 csv_data = csv_data.iloc[subjects_indices]
908 return Bunch(ext_vars=csv_data, description=fdescr, **files)
911def _check_inputs_fetch_localizer_contrasts(contrasts):
912 """Check that requested contrast name exists."""
913 if isinstance(contrasts, str):
914 raise ValueError(
915 "Contrasts should be a list of strings, but "
916 f'a single string was given: "{contrasts}"'
917 )
918 unknown_contrasts = [
919 x
920 for x in contrasts
921 if (x not in ALLOWED_CONTRASTS and x not in CONTRAST_NAME_WRAPPER)
922 ]
923 if unknown_contrasts:
924 raise ValueError(
925 "The following contrasts are not available:\n"
926 f"- {'- '.join(unknown_contrasts)}"
927 )
930def _is_valid_path(path, index, verbose):
931 if path not in index:
932 logger.log(f"Skipping path '{path}'...", verbose)
933 return False
934 return True
937@fill_doc
938def fetch_localizer_calculation_task(n_subjects=1, data_dir=None, verbose=1):
939 """Fetch calculation task contrast maps from the localizer.
941 Parameters
942 ----------
943 n_subjects : :obj:`int`, default=1
944 The number of subjects to load. If None is given,
945 all 94 subjects are used.
947 %(data_dir)s
949 %(verbose)s
951 Returns
952 -------
953 data : Bunch
954 Dictionary-like object, the interest attributes are :
955 'cmaps': string list, giving paths to nifti contrast maps
957 Notes
958 -----
959 This function is only a caller for the fetch_localizer_contrasts in order
960 to simplify examples reading and understanding.
961 The 'calculation (auditory and visual cue)' contrast is used.
963 See Also
964 --------
965 nilearn.datasets.fetch_localizer_button_task
966 nilearn.datasets.fetch_localizer_contrasts
968 """
969 check_params(locals())
971 data = fetch_localizer_contrasts(
972 ["calculation (auditory and visual cue)"],
973 n_subjects=n_subjects,
974 get_tmaps=False,
975 get_masks=False,
976 get_anats=False,
977 data_dir=data_dir,
978 resume=True,
979 verbose=verbose,
980 )
981 return data
984@fill_doc
985def fetch_localizer_button_task(data_dir=None, verbose=1):
986 """Fetch left vs right button press :term:`contrast` maps \
987 from the localizer.
989 Parameters
990 ----------
991 %(data_dir)s
993 %(verbose)s
995 Returns
996 -------
997 data : Bunch
998 Dictionary-like object, the interest attributes are :
1000 - 'cmaps': string list, giving paths to nifti :term:`contrast` maps
1001 - 'tmap': string, giving paths to nifti :term:`contrast` maps
1002 - 'anat': string, giving paths to normalized anatomical image
1004 Notes
1005 -----
1006 This function is only a caller for the fetch_localizer_contrasts in order
1007 to simplify examples reading and understanding.
1008 The 'left vs right button press' contrast is used.
1010 See Also
1011 --------
1012 nilearn.datasets.fetch_localizer_calculation_task
1013 nilearn.datasets.fetch_localizer_contrasts
1015 """
1016 check_params(locals())
1018 data = fetch_localizer_contrasts(
1019 ["left vs right button press"],
1020 n_subjects=[2],
1021 get_tmaps=True,
1022 get_masks=False,
1023 get_anats=True,
1024 data_dir=data_dir,
1025 resume=True,
1026 verbose=verbose,
1027 )
1028 # Additional keys for backward compatibility
1029 data["tmap"] = data["tmaps"][0]
1030 data["anat"] = data["anats"][0]
1031 return data
1034@fill_doc
1035def fetch_abide_pcp(
1036 data_dir=None,
1037 n_subjects=None,
1038 pipeline="cpac",
1039 band_pass_filtering=False,
1040 global_signal_regression=False,
1041 derivatives=None,
1042 quality_checked=True,
1043 url=None,
1044 verbose=1,
1045 **kwargs,
1046):
1047 """Fetch ABIDE dataset.
1049 Fetch the Autism Brain Imaging Data Exchange (ABIDE) dataset wrt criteria
1050 that can be passed as parameter. Note that this is the preprocessed
1051 version of ABIDE provided by the preprocess connectome projects (PCP).
1052 See :footcite:t:`Nielsen2013`.
1054 Parameters
1055 ----------
1056 %(data_dir)s
1057 n_subjects : :obj:`int`, default=None
1058 The number of subjects to load. If None is given,
1059 all available subjects are used (this number depends on the
1060 preprocessing pipeline used).
1062 pipeline : :obj:`str` {'cpac', 'css', 'dparsf', 'niak'}, default='cpac'
1063 Possible pipelines are "ccs", "cpac", "dparsf" and "niak".
1065 band_pass_filtering : :obj:`bool`, default=False
1066 Due to controversies in the literature, band pass filtering is
1067 optional. If true, signal is band filtered between 0.01Hz and 0.1Hz.
1069 global_signal_regression : :obj:`bool`, default=False
1070 Indicates if global signal regression should be applied on the
1071 signals.
1073 derivatives : :obj:`list` of :obj:`str`, default=None
1074 Types of downloaded files. Possible values are: alff, degree_binarize,
1075 degree_weighted, dual_regression, eigenvector_binarize,
1076 eigenvector_weighted, falff, func_mask, func_mean, func_preproc, lfcd,
1077 reho, rois_aal, rois_cc200, rois_cc400, rois_dosenbach160, rois_ez,
1078 rois_ho, rois_tt, and vmhc. Please refer to the PCP site for more
1079 details.
1080 Will default to ``['func_preproc']`` if ``None`` is passed.
1082 quality_checked : :obj:`bool`, default=True
1083 If true (default), restrict the list of the subjects to the one that
1084 passed quality assessment for all raters.
1085 %(url)s
1086 %(verbose)s
1088 kwargs : extra parameters, optional
1089 Any extra keyword argument will be used to filter downloaded subjects
1090 according to the CSV phenotypic file. Some examples of filters are
1091 indicated below.
1093 SUB_ID : :obj:`list` of :obj:`int` in [50001, 50607], optional
1094 Ids of the subjects to be loaded.
1096 DX_GROUP : :obj:`int` in {1, 2}, optional
1097 1 is autism, 2 is control.
1099 DSM_IV_TR : :obj:`int` in [0, 4], optional
1100 O is control, 1 is autism, 2 is Asperger, 3 is PPD-NOS,
1101 4 is Asperger or PPD-NOS.
1103 AGE_AT_SCAN : :obj:`float` in [6.47, 64], optional
1104 Age of the subject.
1106 SEX : :obj:`int` in {1, 2}, optional
1107 1 is male, 2 is female.
1109 HANDEDNESS_CATEGORY : :obj:`str` in {'R', 'L', 'Mixed', 'Ambi'}, optional
1110 R = Right, L = Left, Ambi = Ambidextrous.
1112 HANDEDNESS_SCORE : :obj:`int` in [-100, 100], optional
1113 Positive = Right, Negative = Left, 0 = Ambidextrous.
1115 Returns
1116 -------
1117 data : :class:`sklearn.utils.Bunch`
1118 Dictionary-like object, the keys are described below.
1120 - 'description': :obj:`str`, description of the dataset.
1122 - 'phenotypic': :obj:`pandas.DataFrame`
1123 phenotypic information for each subject.
1125 - Specific Derivative Keys:
1126 Additional keys,'func_preproc' being the default, are
1127 introduced based on the provided 'derivatives'
1128 parameter during fetching. Any combination of the
1129 parameters below may occur.
1131 - 'func_preproc' (default): :obj:`numpy.ndarray`,
1132 paths to preprocessed functional MRI data in NIfTI format.
1133 This key is present by default when fetching the dataset.
1134 - 'alff': :obj:`numpy.ndarray`,
1135 amplitude values of low-frequency fluctuations
1136 in functional MRI data.
1137 - 'degree_binarize': :obj:`numpy.ndarray`,
1138 data specific to binarized node degree in brain networks.
1139 - 'degree_weighted': :obj:`numpy.ndarray`,
1140 data specific to weighted node degree,
1141 considering connectivity strength in brain networks.
1142 - 'dual_regression': :obj:`numpy.ndarray`,
1143 results from dual regression analysis,
1144 often involving the identification of resting-state networks.
1145 - 'eigenvector_binarize': :obj:`numpy.ndarray`,
1146 data specific to binarized eigenvector
1147 centrality, a measure of node influence in brain networks.
1148 - 'eigenvector_weighted': :obj:`numpy.ndarray`,
1149 data specific to weighted eigenvector
1150 centrality, reflecting node influence with consideration
1151 of connectivity strength.
1152 - 'falff': :obj:`numpy.ndarray`,
1153 data specific to fractional amplitude values of
1154 low-frequency fluctuations.
1155 - 'func_mask': :obj:`numpy.ndarray`,
1156 functional mask data, often used to define regions of interest.
1157 - 'func_mean': :obj:`numpy.ndarray`,
1158 mean functional MRI data,
1159 representing average activity across the brain.
1160 - 'lfcd': :obj:`numpy.ndarray`,
1161 data specific to local functional connectivity density
1162 in brain networks.
1163 - 'reho': :obj:`numpy.ndarray`,
1164 data specific to regional homogeneity in functional MRI data.
1165 - 'rois_aal': :obj:`numpy.ndarray`,
1166 data specific to anatomical regions
1167 defined by the Automatic Anatomical Labeling atlas.
1168 - 'rois_cc200': :obj:`numpy.ndarray`
1169 data specific to regions defined by the Craddock 200 atlas.
1170 - 'rois_cc400': :obj:`numpy.ndarray`,
1171 data specific to regions defined by the Craddock 400 atlas.
1172 - 'rois_dosenbach160': :obj:`numpy.ndarray`,
1173 data specific to regions defined by the Dosenbach 160 atlas.
1174 - 'rois_ez': :obj:`numpy.ndarray`,
1175 data specific to regions defined by the EZ atlas.
1176 - 'rois_ho': :obj:`numpy.ndarray`,
1177 data specific to regions defined by the Harvard-Oxford atlas.
1178 - 'rois_tt': :obj:`numpy.ndarray`,
1179 data specific to regions defined by the Talairach atlas.
1180 - 'vmhc': :obj:`numpy.ndarray`,
1181 data specific to voxel-mirrored homotopic connectivity in
1182 functional MRI data.
1184 Notes
1185 -----
1186 Code and description of preprocessing pipelines are provided on the
1187 `PCP website <http://preprocessed-connectomes-project.org/>`_.
1189 References
1190 ----------
1191 .. footbibliography::
1193 """
1194 check_params(locals())
1196 if derivatives is None:
1197 derivatives = ["func_preproc"]
1198 # People keep getting it wrong and submitting a string instead of a
1199 # list of strings. We'll make their life easy
1200 if isinstance(derivatives, str):
1201 derivatives = [derivatives]
1203 # Parameter check
1204 for derivative in derivatives:
1205 if derivative not in [
1206 "alff",
1207 "degree_binarize",
1208 "degree_weighted",
1209 "dual_regression",
1210 "eigenvector_binarize",
1211 "eigenvector_weighted",
1212 "falff",
1213 "func_mask",
1214 "func_mean",
1215 "func_preproc",
1216 "lfcd",
1217 "reho",
1218 "rois_aal",
1219 "rois_cc200",
1220 "rois_cc400",
1221 "rois_dosenbach160",
1222 "rois_ez",
1223 "rois_ho",
1224 "rois_tt",
1225 "vmhc",
1226 ]:
1227 raise KeyError(f"{derivative} is not a valid derivative")
1229 strategy = ""
1230 if not band_pass_filtering:
1231 strategy += "no"
1232 strategy += "filt_"
1233 if not global_signal_regression:
1234 strategy += "no"
1235 strategy += "global"
1237 # General file: phenotypic information
1238 dataset_name = "ABIDE_pcp"
1239 data_dir = get_dataset_dir(
1240 dataset_name, data_dir=data_dir, verbose=verbose
1241 )
1243 if url is None:
1244 url = (
1245 "https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative"
1246 )
1248 if quality_checked:
1249 kwargs["qc_rater_1"] = "OK"
1250 kwargs["qc_anat_rater_2"] = ["OK", "maybe"]
1251 kwargs["qc_func_rater_2"] = ["OK", "maybe"]
1252 kwargs["qc_anat_rater_3"] = "OK"
1253 kwargs["qc_func_rater_3"] = "OK"
1255 # Fetch the phenotypic file and load it
1256 csv = "Phenotypic_V1_0b_preprocessed1.csv"
1257 path_csv = Path(
1258 fetch_files(data_dir, [(csv, f"{url}/{csv}", {})], verbose=verbose)[0]
1259 )
1261 # Note: the phenotypic file contains string that contains comma which mess
1262 # up numpy array csv loading. This is why I do a pass to remove the last
1263 # field. This can be
1264 # done simply with pandas but we don't want such dependency ATM
1265 # pheno = pandas.read_csv(path_csv).to_records()
1266 with path_csv.open() as pheno_f:
1267 pheno = [f"i{pheno_f.readline()}"]
1269 # This regexp replaces commas between double quotes
1270 pheno.extend(
1271 re.sub(r',(?=[^"]*"(?:[^"]*"[^"]*")*[^"]*$)', ";", line)
1272 for line in pheno_f
1273 )
1274 # bytes (encode()) needed for python 2/3 compat with numpy
1275 pheno = "\n".join(pheno).encode()
1276 pheno = BytesIO(pheno)
1277 pheno = pd.read_csv(pheno, comment="$")
1279 # First, filter subjects with no filename
1280 pheno = pheno[pheno["FILE_ID"] != "no_filename"]
1281 # Apply user defined filters
1282 user_filter = filter_columns(pheno, kwargs)
1283 pheno = pheno[user_filter]
1285 # Go into specific data folder and url
1286 data_dir = data_dir / pipeline / strategy
1287 url = f"{url}/Outputs/{pipeline}/{strategy}"
1289 # Get the files
1290 file_ids = pheno["FILE_ID"].tolist()
1291 if n_subjects is not None:
1292 file_ids = file_ids[:n_subjects]
1293 pheno = pheno[:n_subjects]
1295 results = {
1296 "description": get_dataset_descr(dataset_name),
1297 "phenotypic": pheno,
1298 }
1299 for derivative in derivatives:
1300 ext = ".1D" if derivative.startswith("rois") else ".nii.gz"
1301 files = []
1302 for file_id in file_ids:
1303 file_ = [
1304 (
1305 f"{file_id}_{derivative}{ext}",
1306 "/".join(
1307 [url, derivative, f"{file_id}_{derivative}{ext}"]
1308 ),
1309 {},
1310 )
1311 ]
1312 files.append(fetch_files(data_dir, file_, verbose=verbose)[0])
1313 # Load derivatives if needed
1314 if ext == ".1D":
1315 files = [np.loadtxt(f) for f in files]
1316 results[derivative] = files
1317 return Bunch(**results)
1320def _load_mixed_gambles(zmap_imgs):
1321 """Ravel zmaps (one per subject) along time axis, resulting, \
1322 in a n_subjects * n_trials 3D niimgs and, and then make \
1323 gain vector y of same length.
1324 """
1325 X = []
1326 y = []
1327 mask = []
1328 for zmap_img in zmap_imgs:
1329 # load subject data
1330 this_X = get_data(zmap_img)
1331 affine = zmap_img.affine
1332 finite_mask = np.all(np.isfinite(this_X), axis=-1)
1333 this_mask = np.logical_and(np.all(this_X != 0, axis=-1), finite_mask)
1334 this_y = np.array([np.arange(1, 9)] * 6).ravel()
1336 # gain levels
1337 if len(this_y) != this_X.shape[-1]:
1338 raise RuntimeError(
1339 f"{zmap_img}: Expecting {len(this_y)} volumes, "
1340 f"got {this_X.shape[-1]}!"
1341 )
1343 # standardize subject data
1344 this_X -= this_X.mean(axis=-1)[..., np.newaxis]
1345 std = this_X.std(axis=-1)
1346 std[std == 0] = 1
1347 this_X /= std[..., np.newaxis]
1349 # commit subject data
1350 X.append(this_X)
1351 y.extend(this_y)
1352 mask.append(this_mask)
1353 y = pd.DataFrame({"gain": y})
1354 X = np.concatenate(X, axis=-1)
1355 mask = np.sum(mask, axis=0) > 0.5 * len(mask)
1356 mask = np.logical_and(mask, np.all(np.isfinite(X), axis=-1))
1357 X = X[mask, :].T
1358 tmp = np.zeros([*mask.shape, len(X)])
1359 tmp[mask, :] = X.T
1360 mask_img = Nifti1Image(mask.astype("uint8"), affine)
1361 X = four_to_three(Nifti1Image(tmp, affine))
1362 return X, y, mask_img
1365@fill_doc
1366def fetch_mixed_gambles(
1367 n_subjects=1,
1368 data_dir=None,
1369 url=None,
1370 resume=True,
1371 return_raw_data=False,
1372 verbose=1,
1373):
1374 """Fetch Jimura "mixed gambles" dataset.
1376 See the :ref:`dataset description <mixed_gamble_maps>`
1377 for more information.
1379 Parameters
1380 ----------
1381 n_subjects : :obj:`int`, default=1
1382 The number of subjects to load. If ``None`` is given, all the
1383 subjects are used.
1384 %(data_dir)s
1385 %(url)s
1386 %(resume)s
1387 %(verbose)s
1388 return_raw_data : :obj:`bool`, default=False
1389 If ``False``, then the data will transformed into an ``(X, y)``
1390 pair, suitable for machine learning routines. ``X`` is a list
1391 of ``n_subjects * 48`` :class:`~nibabel.nifti1.Nifti1Image`
1392 objects (where 48 is the number of trials), and ``y`` is an
1393 array of shape ``(n_subjects * 48,)``.
1395 Returns
1396 -------
1397 data : :class:`~sklearn.utils.Bunch`
1398 Dictionary-like object, the attributes of interest are:
1400 - 'zmaps': :obj:`list` of :obj:`str`
1401 Paths to realigned gain betamaps (one nifti per subject).
1402 - 'subject_id': pd.DataFrame of subjects IDs
1403 - 'gain': :obj:`list` of :class:`~nibabel.nifti1.Nifti1Image` \
1404 or ``None``
1405 If ``return_raw_data`` is ``True``,
1406 this is a list of
1407 ``n_subjects * 48`` :class:`~nibabel.nifti1.Nifti1Image` objects,
1408 else it is ``None``.
1409 - 'y': DataFrame of shape ``(n_subjects * 48,)`` or ``None``
1410 If ``return_raw_data`` is ``True``,
1411 then this is a DataFrame of shape ``(n_subjects * 48,)``,
1412 else it is ``None``.
1413 - 'description': data description
1415 """
1416 check_params(locals())
1418 if n_subjects > 16:
1419 warnings.warn(
1420 "Warning: there are only 16 subjects!",
1421 stacklevel=find_stack_level(),
1422 )
1423 n_subjects = 16
1424 if url is None:
1425 url = (
1426 "https://www.nitrc.org/frs/download.php/7229/"
1427 "jimura_poldrack_2012_zmaps.zip"
1428 )
1429 opts = {"uncompress": True}
1430 files = [
1431 (f"zmaps{os.sep}sub{int(j + 1):03}_zmaps.nii.gz", url, opts)
1432 for j in range(n_subjects)
1433 ]
1434 data_dir = get_dataset_dir("jimura_poldrack_2012_zmaps", data_dir=data_dir)
1435 zmap_fnames = fetch_files(data_dir, files, resume=resume, verbose=verbose)
1436 subject_id = pd.DataFrame(
1437 {"subject_id": np.repeat(np.arange(n_subjects), 6 * 8).tolist()}
1438 )
1439 description = get_dataset_descr("mixed_gambles")
1440 data = Bunch(
1441 zmaps=zmap_fnames, subject_id=subject_id, description=description
1442 )
1443 if not return_raw_data:
1444 X, y, mask_img = _load_mixed_gambles(
1445 check_niimg(data.zmaps, return_iterator=True)
1446 )
1447 data.zmaps, data.gain, data.mask_img = X, y, mask_img
1448 return data
1451@fill_doc
1452def fetch_megatrawls_netmats(
1453 dimensionality=100,
1454 timeseries="eigen_regression",
1455 matrices="partial_correlation",
1456 data_dir=None,
1457 resume=True,
1458 verbose=1,
1459):
1460 """Download and return Network Matrices data \
1461 from MegaTrawls release in HCP.
1463 This data can be used to predict relationships between imaging data and
1464 non-imaging behavioral measures such as age, sex, education, etc.
1465 The network matrices are estimated from functional connectivity
1466 datasets of 461 subjects.
1468 .. admonition:: Technical details
1469 :class: important
1471 For more technical details about predicting the measures, refer to:
1472 Stephen Smith et al,
1473 HCP beta-release of the Functional Connectivity MegaTrawl.
1474 April 2015 "HCP500-MegaTrawl" release.
1475 https://db.humanconnectome.org/megatrawl/
1477 .. admonition:: Terms and conditions
1478 :class: attention
1480 This is open access data. You must agree to Terms and conditions
1481 of using this data before using it, available at:
1482 http://humanconnectome.org/data/data-use-terms/open-access.html
1484 Parameters
1485 ----------
1486 dimensionality : :obj:`int`, default=100
1487 Valid inputs are 25, 50, 100, 200, 300. By default, network matrices
1488 estimated using Group :term:`ICA` brain :term:`parcellation`
1489 of 100 components/dimensions will be returned.
1491 timeseries : :obj:`str`, default='eigen_regression'
1492 Valid inputs are 'multiple_spatial_regression' or 'eigen_regression'.
1493 By default 'eigen_regression', matrices estimated using first principal
1494 eigen component timeseries signals extracted from each subject data
1495 parcellations will be returned.
1496 Otherwise, 'multiple_spatial_regression'
1497 matrices estimated using spatial regressor based timeseries signals
1498 extracted from each subject data parcellations will be returned.
1500 matrices : :obj:`str`, default='partial_correlation'
1501 Valid inputs are 'full_correlation' or 'partial_correlation'.
1502 By default, partial correlation matrices will be returned
1503 otherwise if selected full correlation matrices will be returned.
1504 %(data_dir)s
1505 %(resume)s
1506 %(verbose)s
1508 Returns
1509 -------
1510 data : Bunch
1511 Dictionary-like object, the attributes are :
1513 - 'dimensions': int, consists of given input in dimensions.
1515 - 'timeseries': str, consists of given input in timeseries method.
1517 - 'matrices': str, consists of given type of specific matrices.
1519 - 'correlation_matrices': pd.DataFrame
1520 consists of correlation matrices
1521 based on given type of matrices. Array size will depend on given
1522 dimensions (n, n).
1524 - 'description': data description
1526 Notes
1527 -----
1528 For more information
1529 see the :ref:`dataset description <megatrawls_maps>`.
1531 """
1532 check_params(locals())
1534 url = "http://www.nitrc.org/frs/download.php/8037/Megatrawls.tgz"
1535 opts = {"uncompress": True}
1537 error_message = (
1538 "Invalid {0} input is provided: {1}, choose one of them {2}"
1539 )
1540 # standard dataset terms
1541 dimensionalities = [25, 50, 100, 200, 300]
1542 if dimensionality not in dimensionalities:
1543 raise ValueError(
1544 error_message.format(
1545 "dimensionality", dimensionality, dimensionalities
1546 )
1547 )
1548 timeseries_methods = ["multiple_spatial_regression", "eigen_regression"]
1549 if timeseries not in timeseries_methods:
1550 raise ValueError(
1551 error_message.format("timeseries", timeseries, timeseries_methods)
1552 )
1553 output_matrices_names = ["full_correlation", "partial_correlation"]
1554 if matrices not in output_matrices_names:
1555 raise ValueError(
1556 error_message.format("matrices", matrices, output_matrices_names)
1557 )
1559 dataset_name = "Megatrawls"
1560 data_dir = get_dataset_dir(
1561 dataset_name, data_dir=data_dir, verbose=verbose
1562 )
1563 description = get_dataset_descr(dataset_name)
1565 timeseries_map = {
1566 "multiple_spatial_regression": "ts2",
1567 "eigen_regression": "ts3",
1568 }
1569 matrices_map = {
1570 "full_correlation": "Znet1.txt",
1571 "partial_correlation": "Znet2.txt",
1572 }
1573 filepath = [
1574 (
1575 Path(
1576 f"3T_Q1-Q6related468_MSMsulc_d{dimensionality}_{timeseries_map[timeseries]}",
1577 matrices_map[matrices],
1578 ),
1579 url,
1580 opts,
1581 )
1582 ]
1584 # Fetch all the files
1585 files = fetch_files(data_dir, filepath, resume=resume, verbose=verbose)
1587 # Load the files into dataframe
1588 correlation_matrices = pd.read_table(files[0], sep=r"\s+", header=None)
1590 return Bunch(
1591 dimensions=dimensionality,
1592 timeseries=timeseries,
1593 matrices=matrices,
1594 correlation_matrices=correlation_matrices,
1595 description=description,
1596 )
1599def nki_ids():
1600 """Return the subject ids of the NKI dataset."""
1601 return [
1602 "A00028185",
1603 "A00033747",
1604 "A00035072",
1605 "A00035827",
1606 "A00035840",
1607 "A00037112",
1608 "A00037511",
1609 "A00038998",
1610 "A00039391",
1611 "A00039431",
1612 "A00039488",
1613 "A00040524",
1614 "A00040623",
1615 "A00040944",
1616 "A00043299",
1617 "A00043520",
1618 "A00043677",
1619 "A00043722",
1620 "A00045589",
1621 "A00050998",
1622 "A00051063",
1623 "A00051064",
1624 "A00051456",
1625 "A00051457",
1626 "A00051477",
1627 "A00051513",
1628 "A00051514",
1629 "A00051517",
1630 "A00051528",
1631 "A00051529",
1632 "A00051539",
1633 "A00051604",
1634 "A00051638",
1635 "A00051658",
1636 "A00051676",
1637 "A00051678",
1638 "A00051679",
1639 "A00051726",
1640 "A00051774",
1641 "A00051796",
1642 "A00051835",
1643 "A00051882",
1644 "A00051925",
1645 "A00051927",
1646 "A00052070",
1647 "A00052117",
1648 "A00052118",
1649 "A00052126",
1650 "A00052180",
1651 "A00052197",
1652 "A00052214",
1653 "A00052234",
1654 "A00052307",
1655 "A00052319",
1656 "A00052499",
1657 "A00052502",
1658 "A00052577",
1659 "A00052612",
1660 "A00052639",
1661 "A00053202",
1662 "A00053369",
1663 "A00053456",
1664 "A00053474",
1665 "A00053546",
1666 "A00053576",
1667 "A00053577",
1668 "A00053578",
1669 "A00053625",
1670 "A00053626",
1671 "A00053627",
1672 "A00053874",
1673 "A00053901",
1674 "A00053927",
1675 "A00053949",
1676 "A00054038",
1677 "A00054153",
1678 "A00054173",
1679 "A00054358",
1680 "A00054482",
1681 "A00054532",
1682 "A00054533",
1683 "A00054534",
1684 "A00054621",
1685 "A00054895",
1686 "A00054897",
1687 "A00054913",
1688 "A00054929",
1689 "A00055061",
1690 "A00055215",
1691 "A00055352",
1692 "A00055353",
1693 "A00055542",
1694 "A00055738",
1695 "A00055763",
1696 "A00055806",
1697 "A00056097",
1698 "A00056098",
1699 "A00056164",
1700 "A00056372",
1701 "A00056452",
1702 "A00056489",
1703 "A00056949",
1704 ]
1707@fill_doc
1708def fetch_surf_nki_enhanced(
1709 n_subjects=10, data_dir=None, url=None, resume=True, verbose=1
1710):
1711 """Download and load the NKI enhanced :term:`resting-state` dataset, \
1712 preprocessed and projected to the fsaverage5 space surface.
1714 .. versionadded:: 0.3
1716 Parameters
1717 ----------
1718 n_subjects : :obj:`int`, default=10
1719 The number of subjects to load from maximum of 102 subjects.
1720 By default, 10 subjects will be loaded. If None is given,
1721 all 102 subjects will be loaded.
1722 %(data_dir)s
1723 %(url)s
1724 %(resume)s
1725 %(verbose)s
1727 Returns
1728 -------
1729 data : :obj:`sklearn.utils.Bunch`
1730 Dictionary-like object, the interest attributes are :
1732 - 'func_left': Paths to Gifti files containing resting state
1733 time series left hemisphere
1734 - 'func_right': Paths to Gifti files containing resting state
1735 time series right hemisphere
1736 - 'phenotypic': pd.DataFrame containing tuple with subject ID, age,
1737 dominant hand and sex for each subject.
1738 - 'description': data description of the release and references.
1740 .. admonition:: scipy >= 0.14.0 compatibility
1741 :class: important
1743 It may be necessary
1744 to coerce to float the data loaded from the Gifti files
1745 to avoid issues with scipy >= 0.14.0.
1747 Notes
1748 -----
1749 For more information
1750 see the :ref:`dataset description <nki_dataset>`.
1751 """
1752 check_params(locals())
1754 if url is None:
1755 url = "https://www.nitrc.org/frs/download.php/"
1757 # Preliminary checks and declarations
1758 dataset_name = "nki_enhanced_surface"
1759 data_dir = get_dataset_dir(
1760 dataset_name, data_dir=data_dir, verbose=verbose
1761 )
1763 nitrc_ids = range(8260, 8464)
1764 ids = nki_ids()
1765 max_subjects = len(ids)
1766 if n_subjects is None:
1767 n_subjects = max_subjects
1768 if n_subjects > max_subjects:
1769 warnings.warn(
1770 f"Warning: there are only {max_subjects} subjects.",
1771 stacklevel=find_stack_level(),
1772 )
1773 n_subjects = max_subjects
1774 ids = ids[:n_subjects]
1776 # Dataset description
1777 fdescr = get_dataset_descr(dataset_name)
1779 # First, get the metadata
1780 phenotypic_file = "NKI_enhanced_surface_phenotypics.csv"
1781 phenotypic = (
1782 phenotypic_file,
1783 url + "8470/pheno_nki_nilearn.csv",
1784 {"move": phenotypic_file},
1785 )
1787 phenotypic = fetch_files(
1788 data_dir, [phenotypic], resume=resume, verbose=verbose
1789 )[0]
1791 # Load the csv file
1792 phenotypic = pd.read_csv(
1793 phenotypic,
1794 header=1,
1795 names=["Subject", "Age", "Dominant Hand", "Sex"],
1796 )
1798 # Keep phenotypic information for selected subjects
1799 mask = phenotypic["Subject"].apply(lambda x: str(x) in ids)
1800 phenotypic = phenotypic[mask]
1802 # Download subjects' datasets
1803 func_right = []
1804 func_left = []
1805 for i, ids_i in enumerate(ids):
1806 archive = f"{url}%i{os.sep}%s_%s_preprocessed_fsaverage5_fwhm6.gii"
1807 func = f"%s{os.sep}%s_%s_preprocessed_fwhm6.gii"
1808 rh = fetch_files(
1809 data_dir,
1810 [
1811 (
1812 func % (ids_i, ids_i, "right"),
1813 archive % (nitrc_ids[2 * i + 1], ids_i, "rh"),
1814 {"move": func % (ids_i, ids_i, "right")},
1815 )
1816 ],
1817 resume=resume,
1818 verbose=verbose,
1819 )
1820 lh = fetch_files(
1821 data_dir,
1822 [
1823 (
1824 func % (ids_i, ids_i, "left"),
1825 archive % (nitrc_ids[2 * i], ids_i, "lh"),
1826 {"move": func % (ids_i, ids_i, "left")},
1827 )
1828 ],
1829 resume=resume,
1830 verbose=verbose,
1831 )
1833 func_right.append(rh[0])
1834 func_left.append(lh[0])
1836 return Bunch(
1837 func_left=func_left,
1838 func_right=func_right,
1839 phenotypic=phenotypic,
1840 description=fdescr,
1841 )
1844@fill_doc
1845def load_nki(
1846 mesh="fsaverage5",
1847 mesh_type="pial",
1848 n_subjects=1,
1849 data_dir=None,
1850 url=None,
1851 resume=True,
1852 verbose=1,
1853):
1854 """Load NKI enhanced surface data into a surface object.
1856 .. versionadded:: 0.11.0
1858 Parameters
1859 ----------
1860 mesh : :obj:`str`, default='fsaverage5'
1861 Which :term:`mesh` to fetch.
1862 Should be one of the following values:
1863 %(fsaverage_options)s
1865 mesh_type : :obj:`str`, default='pial'
1866 Must be one of:
1867 - ``"pial"``
1868 - ``"white_matter"``
1869 - ``"inflated"``
1870 - ``"sphere"``
1871 - ``"flat"``
1873 n_subjects : :obj:`int`, default=1
1874 The number of subjects to load from maximum of 102 subjects.
1875 By default, 1 subjects will be loaded.
1876 If None is given, all 102 subjects will be loaded.
1878 %(data_dir)s
1880 %(url)s
1882 %(resume)s
1884 %(verbose)s
1886 Returns
1887 -------
1888 list of SurfaceImage objects
1889 One image per subject.
1891 Notes
1892 -----
1893 For more information
1894 see the :ref:`dataset description <nki_dataset>`.
1895 """
1896 check_params(locals())
1898 if mesh_type not in ALLOWED_MESH_TYPES:
1899 raise ValueError(
1900 f"'mesh_type' must be one of {ALLOWED_MESH_TYPES}.\n"
1901 f"Got: {mesh_type}."
1902 )
1904 fsaverage = load_fsaverage(mesh=mesh, data_dir=data_dir)
1906 nki_dataset = fetch_surf_nki_enhanced(
1907 n_subjects=n_subjects,
1908 data_dir=data_dir,
1909 url=url,
1910 resume=resume,
1911 verbose=verbose,
1912 )
1914 images = []
1915 for i, (left, right) in enumerate(
1916 zip(nki_dataset["func_left"], nki_dataset["func_right"]), start=1
1917 ):
1918 logger.log(f"Loading subject {i} of {n_subjects}.", verbose=verbose)
1920 img = SurfaceImage(
1921 mesh=fsaverage[mesh_type],
1922 data={
1923 "left": left,
1924 "right": right,
1925 },
1926 )
1927 images.append(img)
1929 return images
1932@fill_doc
1933def _fetch_development_fmri_participants(data_dir, url, verbose):
1934 """Use in fetch_development_fmri function.
1936 This function helps in downloading and loading participants data from .tsv
1937 uploaded on Open Science Framework (OSF).
1939 The original .tsv file contains many columns but this function picks only
1940 those columns that are relevant.
1942 Parameters
1943 ----------
1944 %(data_dir)s
1945 %(url)s
1946 %(verbose)s
1948 Returns
1949 -------
1950 participants : pandas.DataFrame
1951 Contains data of each subject age, age group, child or adult,
1952 gender, handedness.
1954 """
1955 check_params(locals())
1957 dataset_name = "development_fmri"
1958 data_dir = get_dataset_dir(
1959 dataset_name, data_dir=data_dir, verbose=verbose
1960 )
1962 if url is None:
1963 url = "https://osf.io/yr3av/download"
1965 files = [("participants.tsv", url, {"move": "participants.tsv"})]
1966 path_to_participants = fetch_files(data_dir, files, verbose=verbose)[0]
1968 # Load path to participants
1969 names = [
1970 "participant_id",
1971 "Age",
1972 "AgeGroup",
1973 "Child_Adult",
1974 "Gender",
1975 "Handedness",
1976 ]
1977 participants = pd.read_table(path_to_participants, usecols=names)
1978 return participants
1981@fill_doc
1982def _fetch_development_fmri_functional(
1983 participants, data_dir, url, resume, verbose
1984):
1985 """Help to fetch_development_fmri.
1987 This function helps in downloading functional MRI data in Nifti
1988 and its confound corresponding to each subject.
1990 The files are downloaded from Open Science Framework (OSF).
1992 Parameters
1993 ----------
1994 participants : pandas.DataFrame
1995 Should contain column participant_id which represents subjects id. The
1996 number of files are fetched based on ids in this column.
1997 %(data_dir)s
1998 %(url)s
1999 %(resume)s
2000 %(verbose)s
2002 Returns
2003 -------
2004 func : list of str (Nifti files)
2005 Paths to functional MRI data (4D) for each subject.
2007 regressors : list of str (tsv files)
2008 Paths to regressors related to each subject.
2010 """
2011 check_params(locals())
2013 dataset_name = "development_fmri"
2014 data_dir = get_dataset_dir(
2015 dataset_name, data_dir=data_dir, verbose=verbose
2016 )
2018 if url is None:
2019 # Download from the relevant OSF project, using hashes generated
2020 # from the OSF API. Note the trailing slash. For more info, see:
2021 # https://gist.github.com/emdupre/3cb4d564511d495ea6bf89c6a577da74
2022 url = "https://osf.io/download/{}/"
2024 confounds = "{}_task-pixar_desc-confounds_regressors.tsv"
2025 func = "{0}_task-pixar_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz"
2027 # The gzip contains unique download keys per Nifti file and confound
2028 # pre-extracted from OSF. Required for downloading files.
2029 dtype = [
2030 ("participant_id", "U12"),
2031 ("key_regressor", "U24"),
2032 ("key_bold", "U24"),
2033 ]
2034 names = ["participant_id", "key_r", "key_b"]
2035 # csv file contains download information related to OpenScience(osf)
2036 osf_data = csv_to_array(
2037 (PACKAGE_DIRECTORY / "data" / "development_fmri.csv"),
2038 skip_header=True,
2039 dtype=dtype,
2040 names=names,
2041 )
2043 funcs = []
2044 regressors = []
2046 for participant_id in participants["participant_id"]:
2047 this_osf_id = osf_data[osf_data["participant_id"] == participant_id]
2048 # Download regressors
2049 confound_url = url.format(this_osf_id["key_r"][0])
2050 regressor_file = [
2051 (
2052 confounds.format(participant_id),
2053 confound_url,
2054 {"move": confounds.format(participant_id)},
2055 )
2056 ]
2057 path_to_regressor = fetch_files(
2058 data_dir, regressor_file, verbose=verbose
2059 )[0]
2060 regressors.append(path_to_regressor)
2061 # Download bold images
2062 func_url = url.format(this_osf_id["key_b"][0])
2063 func_file = [
2064 (
2065 func.format(participant_id, participant_id),
2066 func_url,
2067 {"move": func.format(participant_id)},
2068 )
2069 ]
2070 path_to_func = fetch_files(
2071 data_dir, func_file, resume=resume, verbose=verbose
2072 )[0]
2073 funcs.append(path_to_func)
2074 return funcs, regressors
2077@fill_doc
2078def fetch_development_fmri(
2079 n_subjects=None,
2080 reduce_confounds=True,
2081 data_dir=None,
2082 resume=True,
2083 verbose=1,
2084 age_group="both",
2085):
2086 """Fetch movie watching based brain development dataset (fMRI).
2088 The data is downsampled to 4mm resolution for convenience
2089 with a repetition time (t_r) of 2 secs.
2090 The origin of the data is coming from OpenNeuro. See Notes below.
2092 Please cite :footcite:t:`Richardson2018`
2093 if you are using this dataset.
2095 .. versionadded:: 0.5.2
2097 Parameters
2098 ----------
2099 n_subjects : :obj:`int`, default=None
2100 The number of subjects to load. If None, all the subjects are
2101 loaded. Total 155 subjects.
2103 reduce_confounds : :obj:`bool`, default=True
2104 If True, the returned confounds only include 6 motion parameters,
2105 mean framewise displacement, signal from white matter, csf, and
2106 6 anatomical compcor parameters. This selection only serves the
2107 purpose of having realistic examples. Depending on your research
2108 question, other confounds might be more appropriate.
2109 If False, returns all :term:`fMRIPrep` confounds.
2110 %(data_dir)s
2111 %(resume)s
2112 %(verbose)s
2113 age_group : :obj:`str`, default='both'
2114 Which age group to fetch
2116 - 'adults' = fetch adults only (n=33, ages 18-39)
2117 - 'child' = fetch children only (n=122, ages 3-12)
2118 - 'both' = fetch full sample (n=155)
2120 Returns
2121 -------
2122 data : Bunch
2123 Dictionary-like object, the interest attributes are :
2125 - 'func': :obj:`list` of :obj:`str` (Nifti files)
2126 Paths to downsampled functional MRI data (4D) for each subject.
2128 - 'confounds': :obj:`list` of :obj:`str` (tsv files)
2129 Paths to confounds related to each subject.
2131 - 'phenotypic': pandas.DataFame
2132 Contains each subject age, age group, child or adult, gender,
2133 handedness.
2135 Notes
2136 -----
2137 The original data is downloaded from OpenNeuro
2138 https://openneuro.org/datasets/ds000228/versions/1.0.0
2140 This fetcher downloads downsampled data that are available on Open
2141 Science Framework (OSF). Located here: https://osf.io/5hju4/files/
2143 Preprocessing details: https://osf.io/wjtyq/
2145 Note that if n_subjects > 2, and age_group is 'both',
2146 fetcher will return a ratio of children and adults representative
2147 of the total sample.
2149 References
2150 ----------
2151 .. footbibliography::
2153 """
2154 check_params(locals())
2156 dataset_name = "development_fmri"
2157 data_dir = get_dataset_dir(dataset_name, data_dir=data_dir, verbose=1)
2158 keep_confounds = [
2159 "trans_x",
2160 "trans_y",
2161 "trans_z",
2162 "rot_x",
2163 "rot_y",
2164 "rot_z",
2165 "framewise_displacement",
2166 "a_comp_cor_00",
2167 "a_comp_cor_01",
2168 "a_comp_cor_02",
2169 "a_comp_cor_03",
2170 "a_comp_cor_04",
2171 "a_comp_cor_05",
2172 "csf",
2173 "white_matter",
2174 ]
2176 # Dataset description
2177 fdescr = get_dataset_descr(dataset_name)
2179 # Participants data: ids, demographics, etc
2180 participants = _fetch_development_fmri_participants(
2181 data_dir=data_dir, url=None, verbose=verbose
2182 )
2184 adult_count, child_count = _filter_func_regressors_by_participants(
2185 participants, age_group
2186 )
2187 max_subjects = adult_count + child_count
2189 n_subjects = _set_invalid_n_subjects_to_max(
2190 n_subjects, max_subjects, age_group
2191 )
2193 # To keep the proportion of children versus adults
2194 percent_total = float(n_subjects) / max_subjects
2195 n_child = np.round(percent_total * child_count).astype(int)
2196 n_adult = np.round(percent_total * adult_count).astype(int)
2198 # We want to return adults by default (i.e., `age_group=both`) or
2199 # if explicitly requested.
2200 if (age_group != "child") and (n_subjects == 1):
2201 n_adult, n_child = 1, 0
2203 if (age_group == "both") and (n_subjects == 2):
2204 n_adult, n_child = 1, 1
2206 participants = _filter_csv_by_n_subjects(participants, n_adult, n_child)
2208 funcs, regressors = _fetch_development_fmri_functional(
2209 participants,
2210 data_dir=data_dir,
2211 url=None,
2212 resume=resume,
2213 verbose=verbose,
2214 )
2216 if reduce_confounds:
2217 regressors = _reduce_confounds(regressors, keep_confounds)
2218 return Bunch(
2219 func=funcs,
2220 confounds=regressors,
2221 phenotypic=participants,
2222 description=fdescr,
2223 )
2226def _filter_func_regressors_by_participants(participants, age_group):
2227 """Filter functional and regressors based on participants."""
2228 valid_age_groups = ("both", "child", "adult")
2229 if age_group not in valid_age_groups:
2230 raise ValueError(
2231 f"Wrong value for age_group={age_group}. "
2232 f"Valid arguments are: {valid_age_groups}"
2233 )
2235 child_adult = participants["Child_Adult"].to_list()
2237 child_count = child_adult.count("child") if age_group != "adult" else 0
2238 adult_count = child_adult.count("adult") if age_group != "child" else 0
2239 return adult_count, child_count
2242def _filter_csv_by_n_subjects(participants, n_adult, n_child):
2243 """Restrict the csv files to the adequate number of subjects."""
2244 child_ids = participants[participants["Child_Adult"] == "child"][
2245 "participant_id"
2246 ][:n_child]
2247 adult_ids = participants[participants["Child_Adult"] == "adult"][
2248 "participant_id"
2249 ][:n_adult]
2250 ids = np.hstack([adult_ids, child_ids])
2251 participants = participants[np.isin(participants["participant_id"], ids)]
2252 participants = participants.sort_values(by=["Child_Adult"])
2253 return participants
2256def _set_invalid_n_subjects_to_max(n_subjects, max_subjects, age_group):
2257 """If n_subjects is invalid, sets it to max."""
2258 if n_subjects is None:
2259 n_subjects = max_subjects
2261 if isinstance(n_subjects, numbers.Number) and (
2262 (n_subjects > max_subjects) or (n_subjects < 1)
2263 ):
2264 warnings.warn(
2265 f"Wrong value for n_subjects={n_subjects}. "
2266 f"The maximum value (for age_group={age_group}) "
2267 f"will be used instead: n_subjects={max_subjects}.",
2268 stacklevel=find_stack_level(),
2269 )
2270 n_subjects = max_subjects
2271 return n_subjects
2274def _reduce_confounds(regressors, keep_confounds):
2275 reduced_regressors = []
2276 for in_file in regressors:
2277 out_file = in_file.replace("desc-confounds", "desc-reducedConfounds")
2278 if not Path(out_file).is_file():
2279 confounds = pd.read_csv(in_file, delimiter="\t").to_records()
2280 selected_confounds = confounds[keep_confounds]
2281 header = "\t".join(selected_confounds.dtype.names)
2282 np.savetxt(
2283 out_file,
2284 np.array(selected_confounds.tolist()),
2285 header=header,
2286 delimiter="\t",
2287 comments="",
2288 )
2289 reduced_regressors.append(out_file)
2290 return reduced_regressors
2293# datasets originally belonging to nistats follow
2296@fill_doc
2297def fetch_language_localizer_demo_dataset(
2298 data_dir=None, verbose=1, legacy_output=True
2299):
2300 """Download language localizer demo dataset.
2302 Parameters
2303 ----------
2304 %(data_dir)s
2306 %(verbose)s
2308 legacy_output : :obj:`bool`, default=True
2310 .. versionadded:: 0.10.3
2311 .. deprecated::0.10.3
2313 Starting from version 0.13.0
2314 the ``legacy_ouput`` argument will be removed
2315 and the fetcher will always return
2316 a :obj:`sklearn.utils.Bunch`.
2319 Returns
2320 -------
2321 data : :class:`sklearn.utils.Bunch`
2322 Dictionary-like object, the interest attributes are :
2324 - ``'data_dir'``: :obj:`str` Path to downloaded dataset.
2326 - ``'func'``: :obj:`list` of :obj:`str`,
2327 Absolute paths of downloaded files on disk
2329 - ``'description'`` : :obj:`str`, dataset description
2331 .. warning::
2333 LEGACY OUTPUT:
2335 **data_dir** : :obj:`str`
2336 Path to downloaded dataset.
2338 **downloaded_files** : :obj:`list` of :obj:`str`
2339 Absolute paths of downloaded files on disk
2341 """
2342 check_params(locals())
2344 url = "https://osf.io/3dj2a/download"
2345 # When it starts working again change back to:
2346 # url = 'https://osf.io/nh987/download'
2347 main_folder = "fMRI-language-localizer-demo-dataset"
2349 data_dir = get_dataset_dir(main_folder, data_dir=data_dir, verbose=verbose)
2350 # The files_spec needed for fetch_files
2351 files_spec = [(f"{main_folder}.zip", url, {"move": f"{main_folder}.zip"})]
2352 # Only download if directory is empty
2353 # Directory will have been created by the call to get_dataset_dir above
2354 if not list(data_dir.iterdir()):
2355 downloaded_files = fetch_files(
2356 data_dir, files_spec, resume=True, verbose=verbose
2357 )
2358 uncompress_file(downloaded_files[0])
2360 file_list = [str(path) for path in data_dir.rglob("*") if path.is_file()]
2361 if legacy_output:
2362 warnings.warn(
2363 category=DeprecationWarning,
2364 stacklevel=find_stack_level(),
2365 message=(
2366 "From version 0.13.0 this fetcher"
2367 "will always return a Bunch.\n"
2368 "Use `legacy_output=False` "
2369 "to start switch to this new behavior."
2370 ),
2371 )
2372 return str(data_dir), sorted(file_list)
2374 description = get_dataset_descr("language_localizer_demo")
2375 return Bunch(
2376 data_dir=str(data_dir), func=sorted(file_list), description=description
2377 )
2380@fill_doc
2381def fetch_bids_langloc_dataset(data_dir=None, verbose=1):
2382 """Download language localizer example :term:`bids<BIDS>` dataset.
2384 .. deprecated:: 0.10.3
2386 This fetcher function will be removed as it returns the same data
2387 as :func:`nilearn.datasets.fetch_language_localizer_demo_dataset`.
2389 Please use
2390 :func:`nilearn.datasets.fetch_language_localizer_demo_dataset`
2391 instead.
2393 Parameters
2394 ----------
2395 %(data_dir)s
2396 %(verbose)s
2398 Returns
2399 -------
2400 data_dir : :obj:`str`
2401 Path to downloaded dataset.
2403 downloaded_files : :obj:`list` of :obj:`str`
2404 Absolute paths of downloaded files on disk.
2405 """
2406 check_params(locals())
2408 warnings.warn(
2409 (
2410 "The 'fetch_bids_langloc_dataset' function will be removed "
2411 "in version 0.13.0 as it returns the same data "
2412 "as 'fetch_language_localizer_demo_dataset'.\n"
2413 "Please use 'fetch_language_localizer_demo_dataset' instead.'"
2414 ),
2415 DeprecationWarning,
2416 stacklevel=find_stack_level(),
2417 )
2418 url = "https://files.osf.io/v1/resources/9q7dv/providers/osfstorage/5888d9a76c613b01fc6acc4e"
2419 dataset_name = "bids_langloc_example"
2420 main_folder = "bids_langloc_dataset"
2421 data_dir = get_dataset_dir(
2422 dataset_name, data_dir=data_dir, verbose=verbose
2423 )
2425 # The files_spec needed for fetch_files
2426 files_spec = [(f"{main_folder}.zip", url, {"move": f"{main_folder}.zip"})]
2427 if not (data_dir / main_folder).exists():
2428 downloaded_files = fetch_files(
2429 data_dir, files_spec, resume=True, verbose=verbose
2430 )
2431 uncompress_file(downloaded_files[0])
2432 main_path = data_dir / main_folder
2433 file_list = [str(path) for path in main_path.rglob("*") if path.is_file()]
2434 return str(data_dir / main_folder), sorted(file_list)
2437@fill_doc
2438def fetch_ds000030_urls(data_dir=None, verbose=1):
2439 """Fetch URLs for files from the ds000030 :term:`BIDS` dataset.
2441 .. versionadded:: 0.9.2
2443 This dataset is version 1.0.4 of the "UCLA Consortium for
2444 Neuropsychiatric Phenomics LA5c" dataset
2445 :footcite:p:`Poldrack2016`.
2447 Downloading the index allows users to explore the dataset directories
2448 to select specific files to download.
2449 The index is a sorted list of urls.
2451 Parameters
2452 ----------
2453 %(data_dir)s
2454 %(verbose)s
2456 Returns
2457 -------
2458 urls_path : :obj:`str`
2459 Path to downloaded dataset index.
2461 urls : :obj:`list` of :obj:`str`
2462 Sorted list of dataset directories.
2464 References
2465 ----------
2466 .. footbibliography::
2467 """
2468 check_params(locals())
2470 DATA_PREFIX = "ds000030/ds000030_R1.0.4/uncompressed"
2471 FILE_URL = "https://osf.io/86xj7/download"
2473 data_dir = get_dataset_dir(
2474 DATA_PREFIX,
2475 data_dir=data_dir,
2476 verbose=verbose,
2477 )
2479 final_download_path = data_dir / "urls.json"
2480 downloaded_file_path = fetch_files(
2481 data_dir=data_dir,
2482 files=[
2483 (
2484 final_download_path,
2485 FILE_URL,
2486 {"move": final_download_path},
2487 )
2488 ],
2489 resume=True,
2490 )
2491 urls_path = downloaded_file_path[0]
2492 with Path(urls_path).open() as json_file:
2493 urls = json.load(json_file)
2495 return urls_path, urls
2498def select_from_index(
2499 urls, inclusion_filters=None, exclusion_filters=None, n_subjects=None
2500):
2501 """Select subset of urls with given filters.
2503 Parameters
2504 ----------
2505 urls : :obj:`list` of :obj:`str`
2506 List of dataset urls obtained from index download.
2508 inclusion_filters : :obj:`list` of :obj:`str` or None, default=None
2509 List of unix shell-style wildcard strings
2510 that will be used to filter the url list.
2511 If a filter matches the url it is retained for download.
2512 Multiple filters work on top of each other.
2513 Like an "and" logical operator, creating a more restrictive query.
2514 Inclusion and exclusion filters apply together.
2515 For example the filter '*task-rest*'' would keep only urls
2516 that contain the 'task-rest' string.
2518 exclusion_filters : :obj:`list` of :obj:`str` or None, default=None
2519 List of unix shell-style wildcard strings
2520 that will be used to filter the url list.
2521 If a filter matches the url it is discarded for download.
2522 Multiple filters work on top of each other.
2523 Like an "and" logical operator, creating a more restrictive query.
2524 Inclusion and exclusion filters apply together.
2525 For example the filter '*task-rest*' would discard all urls
2526 that contain the 'task-rest' string.
2528 n_subjects : :obj:`int`, default=None
2529 Number of subjects to download from the dataset. All by default.
2531 Returns
2532 -------
2533 urls : :obj:`list` of :obj:`str`
2534 Sorted list of filtered dataset directories.
2536 """
2537 inclusion_filters = inclusion_filters or []
2538 exclusion_filters = exclusion_filters or []
2539 # We apply filters to the urls
2540 for exclusion in exclusion_filters:
2541 urls = [url for url in urls if not fnmatch.fnmatch(url, exclusion)]
2542 for inclusion in inclusion_filters:
2543 urls = [url for url in urls if fnmatch.fnmatch(url, inclusion)]
2545 # subject selection filter
2546 # from the url list we infer all available subjects like 'sub-xxx/'
2547 subject_regex = "sub-[a-z|A-Z|0-9]*[_./]"
2549 def infer_subjects(urls):
2550 subjects = set()
2551 for url in urls:
2552 if "sub-" in url:
2553 subjects.add(re.search(subject_regex, url)[0][:-1])
2554 return sorted(subjects)
2556 # We get a list of subjects (for the moment the first n subjects)
2557 selected_subjects = set(infer_subjects(urls)[:n_subjects])
2558 # We exclude urls of subjects not selected
2559 urls = [
2560 url
2561 for url in urls
2562 if "sub-" not in url
2563 or re.search(subject_regex, url)[0][:-1] in selected_subjects
2564 ]
2565 return urls
2568def patch_openneuro_dataset(file_list):
2569 """Add symlinks for files not named according to :term:`BIDS` conventions.
2571 .. warning::
2572 This function uses a series of hardcoded patterns to generate the
2573 corrected filenames.
2574 These patterns are not comprehensive and this function is not
2575 guaranteed to produce BIDS-compliant files.
2577 Parameters
2578 ----------
2579 file_list : :obj:`list` of :obj:`str`
2580 A list of filenames to update.
2581 """
2582 REPLACEMENTS = {
2583 "_T1w_brainmask": "_desc-brain_mask",
2584 "_T1w_preproc": "_desc-preproc_T1w",
2585 "_T1w_space-MNI152NLin2009cAsym_brainmask": "_space-MNI152NLin2009cAsym_desc-brain_mask", # noqa: E501
2586 "_T1w_space-MNI152NLin2009cAsym_class-": "_space-MNI152NLin2009cAsym_label-", # noqa: E501
2587 "_T1w_space-MNI152NLin2009cAsym_preproc": "_space-MNI152NLin2009cAsym_desc-preproc_T1w", # noqa: E501
2588 "_bold_confounds": "_desc-confounds_regressors",
2589 "_bold_space-MNI152NLin2009cAsym_brainmask": "_space-MNI152NLin2009cAsym_desc-brain_mask", # noqa: E501
2590 "_bold_space-MNI152NLin2009cAsym_preproc": "_space-MNI152NLin2009cAsym_desc-preproc_bold", # noqa: E501
2591 }
2593 # Create a symlink if a file with the modified filename does not exist
2594 for old_pattern, new_pattern in REPLACEMENTS.items():
2595 for name in file_list:
2596 if old_pattern in name:
2597 new_name = name.replace(old_pattern, new_pattern)
2598 if not Path(new_name).exists():
2599 os.symlink(name, new_name)
2602@fill_doc
2603def fetch_openneuro_dataset(
2604 urls=None,
2605 data_dir=None,
2606 dataset_version="ds000030_R1.0.4",
2607 verbose=1,
2608):
2609 """Download OpenNeuro :term:`BIDS` dataset.
2611 This function specifically downloads files from a series of URLs.
2612 Unless you use :func:`fetch_ds000030_urls` or the default parameters,
2613 it is up to the user to ensure that the URLs are correct,
2614 and that they are associated with an OpenNeuro dataset.
2616 Parameters
2617 ----------
2618 urls : :obj:`list` of :obj:`str`, default=None
2619 List of URLs to dataset files to download.
2620 If not specified, all files from the default dataset
2621 (``ds000030_R1.0.4``) will be downloaded.
2622 %(data_dir)s
2623 dataset_version : :obj:`str`, default='ds000030_R1.0.4'
2624 Dataset version name. Assumes it is of the form [name]_[version].
2625 %(verbose)s
2627 Returns
2628 -------
2629 data_dir : :obj:`str`
2630 Path to downloaded dataset.
2632 downloaded_files : :obj:`list` of :obj:`str`
2633 Absolute paths of downloaded files on disk.
2635 Notes
2636 -----
2637 The default dataset downloaded by this function is the
2638 "UCLA Consortium for Neuropsychiatric Phenomics LA5c" dataset
2639 :footcite:p:`Poldrack2016`.
2641 This copy includes filenames that are not compliant with the current
2642 version of :term:`BIDS`, so this function also calls
2643 :func:`patch_openneuro_dataset` to generate BIDS-compliant symlinks.
2645 See Also
2646 --------
2647 :func:`fetch_ds000030_urls`
2648 :func:`patch_openneuro_dataset`
2650 References
2651 ----------
2652 .. footbibliography::
2653 """
2654 check_params(locals())
2656 # if urls are not specified we download the complete dataset index
2657 if urls is None:
2658 DATASET_VERSION = "ds000030_R1.0.4"
2659 if dataset_version != DATASET_VERSION:
2660 warnings.warn(
2661 'If `dataset_version` is not "ds000030_R1.0.4", '
2662 '`urls` must be specified. Downloading "ds000030_R1.0.4".',
2663 stacklevel=find_stack_level(),
2664 )
2666 data_prefix = (
2667 f"{DATASET_VERSION.split('_')[0]}/{DATASET_VERSION}/uncompressed"
2668 )
2669 orig_data_dir = data_dir
2670 data_dir = get_dataset_dir(
2671 data_prefix,
2672 data_dir=data_dir,
2673 verbose=verbose,
2674 )
2676 _, urls = fetch_ds000030_urls(
2677 data_dir=orig_data_dir,
2678 verbose=verbose,
2679 )
2680 else:
2681 data_prefix = (
2682 f"{dataset_version.split('_')[0]}/{dataset_version}/uncompressed"
2683 )
2684 data_dir = get_dataset_dir(
2685 data_prefix,
2686 data_dir=data_dir,
2687 verbose=verbose,
2688 )
2690 # The files_spec needed for fetch_files
2691 files_spec = []
2692 files_dir = []
2694 # Check that data prefix is found in each URL
2695 bad_urls = [url for url in urls if data_prefix not in url]
2696 if bad_urls:
2697 raise ValueError(
2698 f"data_prefix ({data_prefix}) is not found in at least one URL. "
2699 "This indicates that the URLs do not correspond to the "
2700 "dataset_version provided.\n"
2701 f"Affected URLs: {bad_urls}"
2702 )
2704 for url in urls:
2705 url_path = url.split(data_prefix + "/")[1]
2706 file_dir = data_dir / url_path
2707 files_spec.append((file_dir.name, url, {}))
2708 files_dir.append(file_dir.parent)
2710 # download the files
2711 downloaded = []
2712 for file_spec, file_dir in zip(files_spec, files_dir):
2713 # Timeout errors are common in the s3 connection so we try to avoid
2714 # failure of the dataset download for a transient instability
2715 success = False
2716 download_attempts = 4
2717 while download_attempts > 0 and not success:
2718 try:
2719 downloaded_files = fetch_files(
2720 file_dir,
2721 [file_spec],
2722 resume=True,
2723 verbose=verbose,
2724 )
2725 downloaded += downloaded_files
2726 success = True
2727 except Exception:
2728 download_attempts -= 1
2730 if not success:
2731 raise Exception(f"multiple failures downloading {file_spec[1]}")
2733 patch_openneuro_dataset(downloaded)
2735 return str(data_dir), sorted(downloaded)
2738@fill_doc
2739def fetch_localizer_first_level(data_dir=None, verbose=1):
2740 """Download a first-level localizer :term:`fMRI` dataset.
2742 Parameters
2743 ----------
2744 %(data_dir)s
2745 %(verbose)s
2747 Returns
2748 -------
2749 data : :obj:`sklearn.utils.Bunch`
2750 Dictionary-like object, with the keys:
2752 - epi_img: the input 4D image
2754 - events: a csv file describing the paradigm
2756 - description: data description
2758 """
2759 check_params(locals())
2761 url = "https://osf.io/2bqxn/download"
2762 epi_img = "sub-12069_task-localizer_space-MNI305.nii.gz"
2763 events = "sub-12069_task-localizer_events.tsv"
2764 opts = {"uncompress": True}
2765 options = ("epi_img", "events", "description")
2766 dir_ = Path("localizer_first_level")
2767 filenames = [(dir_ / name, url, opts) for name in [epi_img, events]]
2769 dataset_name = "localizer_first_level"
2770 data_dir = get_dataset_dir(
2771 dataset_name, data_dir=data_dir, verbose=verbose
2772 )
2773 files = fetch_files(data_dir, filenames, verbose=verbose)
2775 params = dict(list(zip(options, files)))
2776 data = Bunch(**params)
2778 description = get_dataset_descr(dataset_name)
2779 data.description = description
2780 return data
2783def _download_spm_auditory_data(data_dir):
2784 logger.log("Data absent, downloading...")
2785 url = (
2786 "https://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/"
2787 "MoAEpilot.bids.zip"
2788 )
2789 archive_path = data_dir / Path(url).name
2790 fetch_single_file(url, data_dir)
2791 try:
2792 uncompress_file(archive_path)
2793 except Exception:
2794 logger.log("Archive corrupted, trying to download it again.")
2795 return fetch_spm_auditory(data_dir=data_dir, data_name="")
2798@fill_doc
2799@remove_parameters(
2800 removed_params=["subject_id"],
2801 reason="The spm_auditory dataset contains only one subject.",
2802 end_version="0.13.0",
2803)
2804def fetch_spm_auditory(
2805 data_dir=None,
2806 data_name="spm_auditory",
2807 subject_id=None,
2808 verbose=1,
2809):
2810 """Fetch :term:`SPM` auditory single-subject data.
2812 See :footcite:t:`spm_auditory`.
2814 Parameters
2815 ----------
2816 %(data_dir)s
2818 data_name : :obj:`str`, default='spm_auditory'
2819 Name of the dataset.
2821 subject_id : :obj:`str`, default=None
2822 Indicates which subject to retrieve.
2823 Will be removed in version ``0.13.0``.
2825 %(verbose)s
2827 Returns
2828 -------
2829 data : :obj:`sklearn.utils.Bunch`
2830 Dictionary-like object, the interest attributes are:
2831 - 'anat': :obj:`list` of :obj:`str`. Path to anat image
2832 - 'func': :obj:`list` of :obj:`str`. Path to functional image
2833 - 'events': :obj:`list` of :obj:`str`. Path to events.tsv file
2834 - 'description': :obj:`str`. Data description
2836 References
2837 ----------
2838 .. footbibliography::
2840 """
2841 check_params(locals())
2843 data_dir = get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
2845 if not (data_dir / "MoAEpilot" / "sub-01").exists():
2846 _download_spm_auditory_data(data_dir)
2848 anat = get_bids_files(
2849 main_path=data_dir / "MoAEpilot",
2850 modality_folder="anat",
2851 file_tag="T1w",
2852 )[0]
2853 func = get_bids_files(
2854 main_path=data_dir / "MoAEpilot",
2855 modality_folder="func",
2856 file_tag="bold",
2857 )
2858 events = get_bids_files(
2859 main_path=data_dir / "MoAEpilot",
2860 modality_folder="func",
2861 file_tag="events",
2862 )[0]
2863 spm_auditory_data = {
2864 "anat": anat,
2865 "func": func,
2866 "events": events,
2867 "description": get_dataset_descr("spm_auditory"),
2868 }
2869 return Bunch(**spm_auditory_data)
2872def _get_func_data_spm_multimodal(subject_dir, session, _subject_data):
2873 session_func = sorted(
2874 subject_dir.glob(
2875 f"fMRI/Session{session}/fMETHODS-000{session + 4}-*-01.img"
2876 )
2877 )
2878 if len(session_func) < 390:
2879 logger.log(
2880 f"Missing {390 - len(session_func)} functional scans "
2881 f"for session {session}."
2882 )
2883 return None
2885 _subject_data[f"func{int(session)}"] = [str(path) for path in session_func]
2886 return _subject_data
2889def _get_session_trials_spm_multimodal(subject_dir, session, _subject_data):
2890 sess_trials = subject_dir / f"fMRI/trials_ses{int(session)}.mat"
2891 if not sess_trials.is_file():
2892 logger.log(f"Missing session file: {sess_trials}")
2893 return None
2895 _subject_data[f"trials_ses{int(session)}"] = str(sess_trials)
2896 return _subject_data
2899def _get_anatomical_data_spm_multimodal(subject_dir, _subject_data):
2900 anat = subject_dir / "sMRI/smri.img"
2901 if not anat.is_file():
2902 logger.log("Missing structural image.")
2903 return None
2905 _subject_data["anat"] = str(anat)
2906 return _subject_data
2909def _glob_spm_multimodal_fmri_data(subject_dir):
2910 """Glob data from subject_dir."""
2911 _subject_data = {"slice_order": "descending"}
2913 for session in range(1, 3):
2914 # glob func data for session
2915 _subject_data = _get_func_data_spm_multimodal(
2916 subject_dir, session, _subject_data
2917 )
2918 if not _subject_data:
2919 return None
2920 # glob trials .mat file
2921 _subject_data = _get_session_trials_spm_multimodal(
2922 subject_dir, session, _subject_data
2923 )
2924 if not _subject_data:
2925 return None
2926 try:
2927 events = _make_events_file_spm_multimodal_fmri(
2928 _subject_data, session
2929 )
2930 except MatReadError as mat_err:
2931 warnings.warn(
2932 f"{mat_err!s}. An events.tsv file cannot be generated",
2933 stacklevel=find_stack_level(),
2934 )
2935 else:
2936 events_filepath = _make_events_filepath_spm_multimodal_fmri(
2937 _subject_data, session
2938 )
2939 events.to_csv(events_filepath, sep="\t", index=False)
2940 _subject_data[f"events{session}"] = events_filepath
2942 # glob for anat data
2943 _subject_data = _get_anatomical_data_spm_multimodal(
2944 subject_dir, _subject_data
2945 )
2946 return Bunch(**_subject_data) if _subject_data else None
2949def _download_data_spm_multimodal(data_dir, subject_dir):
2950 logger.log("Data absent, downloading...")
2951 urls = [
2952 # fmri
2953 (
2954 "https://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
2955 "multimodal_fmri.zip"
2956 ),
2957 # structural
2958 (
2959 "https://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/"
2960 "multimodal_smri.zip"
2961 ),
2962 ]
2964 for url in urls:
2965 archive_path = subject_dir / Path(url).name
2966 fetch_single_file(url, subject_dir)
2967 try:
2968 uncompress_file(archive_path)
2969 except Exception:
2970 logger.log("Archive corrupted, trying to download it again.")
2971 return fetch_spm_multimodal_fmri(data_dir=data_dir, data_name="")
2973 return _glob_spm_multimodal_fmri_data(subject_dir)
2976def _make_events_filepath_spm_multimodal_fmri(_subject_data, session):
2977 key = f"trials_ses{session}"
2978 events_file_location = Path(_subject_data[key]).parent
2979 events_filename = f"session{session}_events.tsv"
2980 events_filepath = str(events_file_location / events_filename)
2981 return events_filepath
2984def _make_events_file_spm_multimodal_fmri(_subject_data, session):
2985 t_r = 2.0
2986 timing = loadmat(
2987 _subject_data[f"trials_ses{int(session)}"],
2988 squeeze_me=True,
2989 struct_as_record=False,
2990 )
2991 faces_onsets = timing["onsets"][0].ravel()
2992 scrambled_onsets = timing["onsets"][1].ravel()
2993 onsets = np.hstack((faces_onsets, scrambled_onsets))
2994 onsets *= t_r # because onsets were reporting in 'scans' units
2995 conditions = ["faces"] * len(faces_onsets) + ["scrambled"] * len(
2996 scrambled_onsets
2997 )
2998 duration = np.ones_like(onsets)
2999 events = pd.DataFrame(
3000 {"trial_type": conditions, "onset": onsets, "duration": duration}
3001 )
3002 return events
3005@fill_doc
3006@remove_parameters(
3007 removed_params=["subject_id"],
3008 reason="The spm_multimodal_fmri dataset contains only one subject.",
3009 end_version="0.13.0",
3010)
3011@fill_doc
3012def fetch_spm_multimodal_fmri(
3013 data_dir=None,
3014 data_name="spm_multimodal_fmri",
3015 subject_id=None,
3016 verbose=1,
3017):
3018 """Fetcher for Multi-modal Face Dataset.
3020 For more information,
3021 see the :ref:`dataset description <spm_multimodal_dataset>`.
3023 Parameters
3024 ----------
3025 %(data_dir)s
3027 data_name : :obj:`str`, default='spm_multimodal_fmri'
3028 Name of the dataset.
3030 subject_id : :obj:`str`, default=None
3032 .. deprecated:: 0.11.2
3034 Will be removed in version ``0.13.0``.
3036 %(verbose)s
3038 Returns
3039 -------
3040 data : :obj:`sklearn.utils.Bunch`
3041 Dictionary-like object, the interest attributes are:
3043 - 'func1' : list of :obj:`str`. Paths to functional images for run 1
3045 - 'func2' : list of :obj:`str`. Paths to functional images for run 2
3047 - 'events1' : :obj:`str`. Path to onsets TSV file for run 1
3049 - 'events2' : :obj:`str`. Path to onsets TSV file for run 2
3051 - 'trials_ses1' : :obj:`str`.
3052 Path to .mat file containing onsets for run 1
3054 - 'trials_ses1' : :obj:`str`.
3055 Path to .mat file containing onsets for run 2
3057 - 'anat' : :obj:`str`. Path to anat file
3059 - 'description' : :obj:`str`. Description of the data
3061 - 't_r' : :obj:`float`. Repetition time in seconds
3062 of the functional images.
3064 """
3065 check_params(locals())
3067 data_dir = get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose)
3068 subject_id = "sub001"
3069 subject_dir = data_dir / subject_id
3071 description = get_dataset_descr("spm_multimodal")
3073 # maybe data_dir already contains the data ?
3074 data = _glob_spm_multimodal_fmri_data(subject_dir)
3075 if data is None:
3076 # No. Download the data
3077 data = _download_data_spm_multimodal(data_dir, subject_dir)
3079 data.description = description
3080 data.t_r = 2
3081 return data
3084@fill_doc
3085def fetch_fiac_first_level(data_dir=None, verbose=1):
3086 """Download a first-level fiac :term:`fMRI` dataset (2 runs).
3088 Parameters
3089 ----------
3090 %(data_dir)s
3091 %(verbose)s
3093 Returns
3094 -------
3095 data : :obj:`sklearn.utils.Bunch`
3096 Dictionary-like object, the interest attributes are:
3098 - 'design_matrix1': :obj:`pandas.DataFrame`.
3099 Design matrix for run 1
3100 - 'func1': :obj:`str`. Path to Nifti file of run 1
3101 - 'design_matrix2': :obj:`pandas.DataFrame`.
3102 Design matrix for run 2
3103 - 'func2': :obj:`str`. Path to Nifti file of run 2
3104 - 'mask': :obj:`str`. Path to mask file
3105 - 'description': :obj:`str`. Data description
3107 Notes
3108 -----
3109 For more information
3110 see the :ref:`dataset description <fiac_dataset>`.
3112 """
3113 check_params(locals())
3115 data_dir = get_dataset_dir(
3116 "fiac_nilearn.glm", data_dir=data_dir, verbose=verbose
3117 )
3119 def _glob_fiac_data():
3120 """Glob data from subject_dir."""
3121 _subject_data = {}
3122 subject_dir = data_dir / "nipy-data-0.2/data/fiac/fiac0"
3123 for run in [1, 2]:
3124 # glob func data for session
3125 session_func = subject_dir / f"run{int(run)}.nii.gz"
3126 if not session_func.is_file():
3127 logger.log(f"Missing functional scan for session {int(run)}.")
3128 return None
3130 _subject_data[f"func{int(run)}"] = str(session_func)
3132 # glob design matrix .npz file
3133 sess_dmtx = subject_dir / f"run{int(run)}_design.npz"
3134 if not sess_dmtx.is_file():
3135 logger.log(f"Missing run file: {sess_dmtx}")
3136 return None
3138 design_matrix_data = np.load(str(sess_dmtx))
3139 columns = [x.decode() for x in design_matrix_data["conditions"]]
3141 _subject_data[f"design_matrix{int(run)}"] = pd.DataFrame(
3142 design_matrix_data["X"], columns=columns
3143 )
3145 # glob for mask data
3146 mask = subject_dir / "mask.nii.gz"
3147 if not mask.is_file():
3148 logger.log("Missing mask image.")
3149 return None
3151 _subject_data["mask"] = str(mask)
3152 return Bunch(**_subject_data)
3154 description = get_dataset_descr("fiac")
3156 # maybe data_dir already contains the data ?
3157 data = _glob_fiac_data()
3158 if data is not None:
3159 data.description = description
3160 return data
3162 # No. Download the data
3163 logger.log("Data absent, downloading...")
3164 url = "https://nipy.org/data-packages/nipy-data-0.2.tar.gz"
3166 archive_path = data_dir / Path(url).name
3167 fetch_single_file(url, data_dir)
3168 try:
3169 uncompress_file(archive_path)
3170 except Exception:
3171 logger.log("Archive corrupted, trying to download it again.")
3172 data = fetch_fiac_first_level(data_dir=data_dir)
3173 data.description = description
3174 return data
3176 data = _glob_fiac_data()
3177 data.description = description
3178 return data
3181@functools.lru_cache
3182def load_sample_motor_activation_image():
3183 """Load a single functional image showing motor activations.
3185 Returns
3186 -------
3187 str
3188 Path to the sample functional image.
3190 Notes
3191 -----
3192 The 'left vs right button press' contrast is used:
3193 https://neurovault.org/images/10426/
3195 See Also
3196 --------
3197 nilearn.datasets.fetch_neurovault_ids
3198 nilearn.datasets.fetch_neurovault
3199 nilearn.datasets.fetch_neurovault_auditory_computation_task
3200 """
3201 return str(Path(__file__).parent / "data" / "image_10426.nii.gz")