Coverage for nilearn/datasets/tests/test_func.py: 0%
489 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
1"""Test the datasets module."""
3import json
4import re
5import shutil
6import tempfile
7import uuid
8from collections import OrderedDict
9from pathlib import Path
11import numpy as np
12import pandas as pd
13import pytest
14from nibabel import Nifti1Image
15from sklearn.utils import Bunch
17from nilearn._utils.data_gen import create_fake_bids_dataset
18from nilearn.datasets import fetch_development_fmri, func
19from nilearn.datasets._utils import PACKAGE_DIRECTORY, get_dataset_dir
20from nilearn.datasets.tests._testing import (
21 check_type_fetcher,
22 dict_to_archive,
23 list_to_archive,
24)
25from nilearn.image import load_img
28def test_is_valid_path():
29 assert func._is_valid_path(path="foo", index=["foo"], verbose=1)
30 assert not func._is_valid_path(path="bar", index=["foo"], verbose=1)
33@pytest.mark.parametrize(
34 "fn",
35 [
36 func.fetch_localizer_first_level,
37 func.fetch_miyawaki2008,
38 ],
39)
40def test_func_fetcher_return_bunch(fn):
41 data = fn()
42 assert isinstance(data, Bunch)
43 check_type_fetcher(data)
46def _load_localizer_index():
47 data_dir = Path(__file__).parent / "data"
48 with (data_dir / "localizer_index.json").open() as of:
49 localizer_template = json.load(of)
50 localizer_index = {}
51 for idx in range(1, 95):
52 sid = f"S{idx:02}"
53 localizer_index.update(
54 {key.format(sid): uuid.uuid4().hex for key in localizer_template}
55 )
56 localizer_index["/localizer/phenotype/behavioural.tsv"] = uuid.uuid4().hex
57 localizer_index["/localizer/participants.tsv"] = uuid.uuid4().hex
58 tsv_files = {
59 "/localizer/phenotype/behavioural.tsv": pd.read_csv(
60 data_dir / "localizer_behavioural.tsv", sep="\t"
61 )
62 }
63 tsv_files["/localizer/participants.tsv"] = pd.read_csv(
64 data_dir / "localizer_participants.tsv", sep="\t"
65 )
66 return localizer_index, tsv_files
69@pytest.fixture()
70def localizer_mocker(request_mocker):
71 """Mock the index for localizer dataset."""
72 index, tsv_files = _load_localizer_index()
73 request_mocker.url_mapping["https://osf.io/hwbm2/download"] = json.dumps(
74 index
75 )
76 for k, v in tsv_files.items():
77 request_mocker.url_mapping[f"*{index[k][1:]}?"] = v.to_csv(
78 index=False, sep="\t"
79 )
82def _make_haxby_subject_data(match, response): # noqa: ARG001
83 sub_files = [
84 "bold.nii.gz",
85 "labels.txt",
86 "mask4_vt.nii.gz",
87 "mask8b_face_vt.nii.gz",
88 "mask8b_house_vt.nii.gz",
89 "mask8_face_vt.nii.gz",
90 "mask8_house_vt.nii.gz",
91 "anat.nii.gz",
92 ]
93 return list_to_archive(Path(match.group(1), f) for f in sub_files)
96@pytest.mark.parametrize("subjects", [None, 7])
97def test_fetch_haxby_more_than_6(tmp_path, request_mocker, subjects):
98 """Test edge cases to extend coverage."""
99 request_mocker.url_mapping[re.compile(r".*(subj\d).*\.tar\.gz")] = (
100 _make_haxby_subject_data
101 )
102 func.fetch_haxby(data_dir=tmp_path, subjects=subjects, verbose=1)
105def test_fetch_haxby(tmp_path, request_mocker):
106 request_mocker.url_mapping[re.compile(r".*(subj\d).*\.tar\.gz")] = (
107 _make_haxby_subject_data
108 )
109 for i in range(1, 6):
110 haxby = func.fetch_haxby(data_dir=tmp_path, subjects=[i], verbose=0)
111 # subject_data + (md5 + mask if first subj)
113 assert isinstance(haxby, Bunch)
114 check_type_fetcher(haxby)
116 assert request_mocker.url_count == i + 2
117 assert len(haxby.func) == 1
118 assert len(haxby.anat) == 1
119 assert len(haxby.session_target) == 1
120 assert haxby.mask is not None
121 assert len(haxby.mask_vt) == 1
122 assert len(haxby.mask_face) == 1
123 assert len(haxby.mask_house) == 1
124 assert len(haxby.mask_face_little) == 1
125 assert len(haxby.mask_house_little) == 1
127 # subjects with list
128 subjects = [1, 2, 6]
129 request_mocker.url_mapping[re.compile(r".*stimuli.*")] = list_to_archive(
130 [Path("stimuli", "README")]
131 )
133 haxby = func.fetch_haxby(
134 data_dir=tmp_path, subjects=subjects, fetch_stimuli=True, verbose=0
135 )
137 assert len(haxby.func) == len(subjects)
138 assert len(haxby.mask_house_little) == len(subjects)
139 assert len(haxby.anat) == len(subjects)
140 assert haxby.anat[2] is None
141 assert isinstance(haxby.mask, str)
142 assert len(haxby.mask_face) == len(subjects)
143 assert len(haxby.session_target) == len(subjects)
144 assert len(haxby.mask_vt) == len(subjects)
145 assert len(haxby.mask_face_little) == len(subjects)
146 assert "stimuli" in haxby
148 subjects = ["a", 8]
149 message = "You provided invalid subject id {0} in a list"
151 for sub_id in subjects:
152 with pytest.raises(ValueError, match=message.format(sub_id)):
153 func.fetch_haxby(data_dir=tmp_path, subjects=[sub_id])
156def _adhd_example_subject(match, request): # noqa: ARG001
157 contents = [
158 Path("data", match.group(1), match.expand(r"\1_regressors.csv")),
159 Path(
160 "data",
161 match.group(1),
162 match.expand(r"\1_rest_tshift_RPI_voreg_mni.nii.gz"),
163 ),
164 ]
165 return list_to_archive(contents)
168def _adhd_metadata():
169 sub1 = [3902469, 7774305, 3699991]
170 sub2 = [
171 2014113,
172 4275075,
173 1019436,
174 3154996,
175 3884955,
176 27034,
177 4134561,
178 27018,
179 6115230,
180 27037,
181 8409791,
182 27011,
183 ]
184 sub3 = [
185 3007585,
186 8697774,
187 9750701,
188 10064,
189 21019,
190 10042,
191 10128,
192 2497695,
193 4164316,
194 1552181,
195 4046678,
196 23012,
197 ]
198 sub4 = [
199 1679142,
200 1206380,
201 23008,
202 4016887,
203 1418396,
204 2950754,
205 3994098,
206 3520880,
207 1517058,
208 9744150,
209 1562298,
210 3205761,
211 3624598,
212 ]
213 subs = pd.DataFrame({"Subject": sub1 + sub2 + sub3 + sub4})
214 tmp = "ADHD200_40subs_motion_parameters_and_phenotypics.csv"
215 return dict_to_archive({tmp: subs.to_csv(index=False)})
218@pytest.mark.parametrize("subjects", [None, 9999])
219def test_fetch_adhd_edge_cases(tmp_path, request_mocker, subjects):
220 request_mocker.url_mapping["*metadata.tgz"] = _adhd_metadata()
221 request_mocker.url_mapping[re.compile(r".*adhd40_([0-9]+)\.tgz")] = (
222 _adhd_example_subject
223 )
224 func.fetch_adhd(
225 data_dir=tmp_path, n_subjects=subjects, verbose=0, url=None
226 )
229def test_fetch_adhd(tmp_path, request_mocker):
230 request_mocker.url_mapping["*metadata.tgz"] = _adhd_metadata()
231 request_mocker.url_mapping[re.compile(r".*adhd40_([0-9]+)\.tgz")] = (
232 _adhd_example_subject
233 )
234 adhd = func.fetch_adhd(data_dir=tmp_path, n_subjects=12, verbose=0)
236 assert isinstance(adhd, Bunch)
237 check_type_fetcher(adhd)
238 assert len(adhd.func) == 12
239 assert len(adhd.confounds) == 12
240 assert request_mocker.url_count == 13 # Subjects + phenotypic
243def test_miyawaki2008(tmp_path, request_mocker):
244 dataset = func.fetch_miyawaki2008(data_dir=tmp_path, verbose=0)
246 assert len(dataset.func) == 32
247 assert len(dataset.label) == 32
248 assert isinstance(dataset.mask, str)
249 assert len(dataset.mask_roi) == 38
250 assert isinstance(dataset.background, str)
251 assert request_mocker.url_count == 1
254def test_fetch_localizer_contrasts_errors(
255 tmp_path,
256 localizer_mocker, # noqa: ARG001
257):
258 with pytest.raises(ValueError, match="should be a list of strings"):
259 func.fetch_localizer_contrasts(
260 "checkerboard",
261 n_subjects=2,
262 data_dir=tmp_path,
263 )
264 with pytest.raises(
265 ValueError, match="following contrasts are not available"
266 ):
267 func.fetch_localizer_contrasts(
268 ["foo"],
269 n_subjects=2,
270 data_dir=tmp_path,
271 )
274@pytest.mark.parametrize("subjects", [None, 9999])
275def test_fetch_localizer_contrasts_edge_cases(
276 tmp_path,
277 localizer_mocker, # noqa: ARG001
278 subjects,
279):
280 func.fetch_localizer_contrasts(
281 ["checkerboard"], n_subjects=subjects, data_dir=tmp_path, verbose=1
282 )
285def test_fetch_localizer_contrasts(tmp_path, localizer_mocker): # noqa: ARG001
286 dataset = func.fetch_localizer_contrasts(
287 ["checkerboard"], n_subjects=2, data_dir=tmp_path, verbose=1
288 )
290 check_type_fetcher(dataset)
291 assert not hasattr(dataset, "anats")
292 assert not hasattr(dataset, "tmaps")
293 assert not hasattr(dataset, "masks")
294 assert isinstance(dataset.cmaps[0], str)
295 assert isinstance(dataset.ext_vars, pd.DataFrame)
296 assert len(dataset.cmaps) == 2
297 assert len(dataset["ext_vars"]) == 2
300def test_fetch_localizer_contrasts_multiple_contrasts(
301 tmp_path,
302 localizer_mocker, # noqa: ARG001
303):
304 dataset = func.fetch_localizer_contrasts(
305 ["checkerboard", "horizontal checkerboard"],
306 n_subjects=2,
307 data_dir=tmp_path,
308 verbose=1,
309 )
311 assert isinstance(dataset.ext_vars, pd.DataFrame)
312 assert isinstance(dataset.cmaps[0], str)
313 assert len(dataset.cmaps) == 2 * 2 # two contrasts are fetched
314 assert len(dataset["ext_vars"]) == 2
317def test_fetch_localizer_contrasts_get_all(tmp_path, localizer_mocker): # noqa: ARG001
318 # all get_*=True
319 dataset = func.fetch_localizer_contrasts(
320 ["checkerboard"],
321 n_subjects=1,
322 data_dir=tmp_path,
323 get_anats=True,
324 get_masks=True,
325 get_tmaps=True,
326 verbose=1,
327 )
329 assert isinstance(dataset.ext_vars, pd.DataFrame)
330 assert isinstance(dataset.anats[0], str)
331 assert isinstance(dataset.cmaps[0], str)
332 assert isinstance(dataset.masks[0], str)
333 assert isinstance(dataset.tmaps[0], str)
334 assert len(dataset["ext_vars"]) == 1
335 assert len(dataset.anats) == 1
336 assert len(dataset.cmaps) == 1
337 assert len(dataset.masks) == 1
338 assert len(dataset.tmaps) == 1
341def test_fetch_localizer_contrasts_list_subjects(tmp_path, localizer_mocker): # noqa: ARG001
342 # grab a given list of subjects
343 dataset2 = func.fetch_localizer_contrasts(
344 ["checkerboard"],
345 n_subjects=[2, 3, 5],
346 data_dir=tmp_path,
347 verbose=1,
348 )
350 assert len(dataset2["ext_vars"]) == 3
351 assert len(dataset2.cmaps) == 3
352 assert list(dataset2["ext_vars"]["participant_id"].values) == [
353 "S02",
354 "S03",
355 "S05",
356 ]
359def test_fetch_localizer_calculation_task(tmp_path, localizer_mocker): # noqa: ARG001
360 # 2 subjects
361 dataset = func.fetch_localizer_calculation_task(
362 n_subjects=2, data_dir=tmp_path, verbose=1
363 )
365 assert isinstance(dataset, Bunch)
366 check_type_fetcher(dataset)
367 assert isinstance(dataset.ext_vars, pd.DataFrame)
368 assert isinstance(dataset.cmaps[0], str)
369 assert len(dataset["ext_vars"]) == 2
370 assert len(dataset.cmaps) == 2
373def test_fetch_localizer_button_task(tmp_path, localizer_mocker): # noqa: ARG001
374 # Disabled: cannot be tested without actually fetching covariates CSV file
375 # Only one subject
376 dataset = func.fetch_localizer_button_task(data_dir=tmp_path, verbose=1)
378 assert isinstance(dataset, Bunch)
380 assert isinstance(dataset.tmaps, list)
381 assert isinstance(dataset.anats, list)
383 assert len(dataset.tmaps) == 1
384 assert len(dataset.anats) == 1
386 assert isinstance(dataset.tmap, str)
387 assert isinstance(dataset.anat, str)
390@pytest.mark.parametrize("quality_checked", [False, True])
391def test_fetch_abide_pcp(tmp_path, request_mocker, quality_checked):
392 n_subjects = 800
393 ids = list(range(n_subjects))
394 filenames = ["no_filename"] * n_subjects
395 filenames[::2] = ["filename"] * (n_subjects // 2)
396 qc_rater_1 = ["OK"] * n_subjects
397 qc_rater_1[::4] = ["fail"] * (n_subjects // 4)
398 pheno = pd.DataFrame(
399 {
400 "subject_id": ids,
401 "FILE_ID": filenames,
402 "qc_rater_1": qc_rater_1,
403 "qc_anat_rater_2": qc_rater_1,
404 "qc_func_rater_2": qc_rater_1,
405 "qc_anat_rater_3": qc_rater_1,
406 "qc_func_rater_3": qc_rater_1,
407 },
408 columns=[
409 "subject_id",
410 "FILE_ID",
411 "qc_rater_1",
412 "qc_anat_rater_2",
413 "qc_func_rater_2",
414 "qc_anat_rater_3",
415 "qc_func_rater_3",
416 ],
417 )
418 request_mocker.url_mapping["*rocessed1.csv"] = pheno.to_csv(index=False)
420 # All subjects
421 dataset = func.fetch_abide_pcp(
422 data_dir=tmp_path, quality_checked=quality_checked, verbose=0
423 )
424 div = 4 if quality_checked else 2
426 assert isinstance(dataset, Bunch)
428 check_type_fetcher(dataset)
430 assert len(dataset.func_preproc) == n_subjects / div
432 # Smoke test using only a string, rather than a list of strings
433 dataset = func.fetch_abide_pcp(
434 data_dir=tmp_path,
435 quality_checked=quality_checked,
436 verbose=0,
437 derivatives="func_preproc",
438 )
441def test__load_mixed_gambles(rng, affine_eye):
442 n_trials = 48
443 for n_subjects in [1, 5, 16]:
444 zmaps = [
445 Nifti1Image(rng.standard_normal((3, 4, 5, n_trials)), affine_eye)
446 for _ in range(n_subjects)
447 ]
448 zmaps, gain, _ = func._load_mixed_gambles(zmaps)
450 assert len(zmaps) == n_subjects * n_trials
451 assert len(zmaps) == len(gain)
454@pytest.mark.parametrize("n_subjects", [1, 5, 16])
455def test_fetch_mixed_gambles(tmp_path, n_subjects):
456 mgambles = func.fetch_mixed_gambles(
457 n_subjects=n_subjects,
458 data_dir=tmp_path,
459 verbose=1,
460 return_raw_data=True,
461 url=None,
462 )
463 datasetdir = tmp_path / "jimura_poldrack_2012_zmaps"
465 assert mgambles["zmaps"][0] == str(
466 datasetdir / "zmaps" / "sub001_zmaps.nii.gz"
467 )
468 assert len(mgambles["zmaps"]) == n_subjects
470 assert isinstance(mgambles, Bunch)
471 check_type_fetcher(mgambles)
474def test_check_parameters_megatrawls_datasets():
475 # testing whether the function raises the same error message
476 # if invalid input parameters are provided
477 message = "Invalid {0} input is provided: {1}."
479 for invalid_input_dim in [1, 5, 30]:
480 with pytest.raises(
481 ValueError,
482 match=message.format("dimensionality", invalid_input_dim),
483 ):
484 func.fetch_megatrawls_netmats(dimensionality=invalid_input_dim)
486 for invalid_input_timeserie in ["asdf", "time", "st2"]:
487 with pytest.raises(
488 ValueError,
489 match=message.format("timeseries", invalid_input_timeserie),
490 ):
491 func.fetch_megatrawls_netmats(timeseries=invalid_input_timeserie)
493 for invalid_output_name in ["net1", "net2"]:
494 with pytest.raises(
495 ValueError, match=message.format("matrices", invalid_output_name)
496 ):
497 func.fetch_megatrawls_netmats(matrices=invalid_output_name)
500def test_fetch_megatrawls_netmats(tmp_path):
501 # smoke test to see that files are fetched and read properly
502 # since we are loading data present in it
503 for file, folder in zip(
504 ["Znet2.txt", "Znet1.txt"],
505 [
506 "3T_Q1-Q6related468_MSMsulc_d100_ts3",
507 "3T_Q1-Q6related468_MSMsulc_d300_ts2",
508 ],
509 ):
510 files_dir = tmp_path / "Megatrawls" / folder
511 files_dir.mkdir(parents=True, exist_ok=True)
512 with (files_dir / file).open("w") as net_file:
513 net_file.write(" 1 2\n 2 3")
515 dataset = func.fetch_megatrawls_netmats(data_dir=tmp_path)
517 assert isinstance(dataset, Bunch)
519 check_type_fetcher(dataset)
521 # expected number of returns in output name should be equal
522 assert len(dataset) == 5
523 # check if returned bunch should not be empty
524 # dimensions
525 assert dataset.dimensions != ""
526 # timeseries
527 assert dataset.timeseries != ""
528 # matrices
529 assert dataset.matrices != ""
530 # correlation matrices
531 assert isinstance(dataset.correlation_matrices, pd.DataFrame)
533 # check if input provided for dimensions, timeseries, matrices to be same
534 # to user settings
535 dataset = func.fetch_megatrawls_netmats(
536 data_dir=tmp_path,
537 dimensionality=300,
538 timeseries="multiple_spatial_regression",
539 matrices="full_correlation",
540 )
541 check_type_fetcher(dataset)
542 assert dataset.dimensions == 300
543 assert dataset.timeseries == "multiple_spatial_regression"
544 assert dataset.matrices == "full_correlation"
547def test_fetch_surf_nki_enhanced(tmp_path, request_mocker):
548 ids = np.asarray(
549 [
550 "A00028185",
551 "A00035827",
552 "A00037511",
553 "A00039431",
554 "A00033747",
555 "A00035840",
556 "A00038998",
557 "A00035072",
558 "A00037112",
559 "A00039391",
560 ],
561 dtype="U9",
562 )
563 age = np.ones(len(ids), dtype="<f8")
564 hand = np.asarray(len(ids) * ["x"], dtype="U1")
565 sex = np.asarray(len(ids) * ["x"], dtype="U1")
566 pheno_data = pd.DataFrame(
567 OrderedDict([("id", ids), ("age", age), ("hand", hand), ("sex", sex)])
568 )
569 request_mocker.url_mapping["*pheno_nki_nilearn.csv"] = pheno_data.to_csv(
570 index=False
571 )
572 nki_data = func.fetch_surf_nki_enhanced(data_dir=tmp_path)
574 assert isinstance(nki_data, Bunch)
575 check_type_fetcher(nki_data)
576 assert len(nki_data.func_left) == 10
577 assert len(nki_data.func_right) == 10
578 assert isinstance(nki_data.phenotypic, pd.DataFrame)
579 assert nki_data.phenotypic.shape == (9, 4)
582def test_load_nki_error():
583 """Give incorrect mesh_type argument."""
584 with pytest.raises(ValueError, match="'mesh_type' must be one of"):
585 func.load_nki(mesh_type="foo")
588def _mock_participants_data(n_ids=5):
589 """Maximum 8 ids are allowed to mock."""
590 ids = [
591 "sub-pixar052",
592 "sub-pixar073",
593 "sub-pixar074",
594 "sub-pixar110",
595 "sub-pixar042",
596 "sub-pixar109",
597 "sub-pixar068",
598 "sub-pixar007",
599 ][:n_ids]
600 age = np.ones(len(ids))
601 age_group = len(ids) * ["2yo"]
602 child_adult = [["child", "adult"][i % 2] for i in range(n_ids)]
603 gender = len(ids) * ["m"]
604 handedness = len(ids) * ["r"]
605 participants = pd.DataFrame(
606 OrderedDict(
607 [
608 ("participant_id", ids),
609 ("Age", age),
610 ("AgeGroup", age_group),
611 ("Child_Adult", child_adult),
612 ("Gender", gender),
613 ("Handedness", handedness),
614 ]
615 )
616 )
617 return participants
620def _mock_development_confounds():
621 keep_confounds = [
622 "trans_x",
623 "trans_y",
624 "trans_z",
625 "rot_x",
626 "rot_y",
627 "rot_z",
628 "framewise_displacement",
629 "a_comp_cor_00",
630 "a_comp_cor_01",
631 "a_comp_cor_02",
632 "a_comp_cor_03",
633 "a_comp_cor_04",
634 "a_comp_cor_05",
635 "csf",
636 "white_matter",
637 ]
638 other_confounds = ["some_confound"] * 13
639 confounds = keep_confounds + other_confounds
640 return pd.DataFrame(np.ones((10, len(confounds))), columns=confounds)
643def test_fetch_development_fmri_participants(tmp_path, request_mocker):
644 mock_participants = _mock_participants_data()
645 request_mocker.url_mapping["https://osf.io/yr3av/download"] = (
646 mock_participants.to_csv(index=False, sep="\t")
647 )
648 participants = func._fetch_development_fmri_participants(
649 data_dir=tmp_path, url=None, verbose=1
650 )
652 assert isinstance(participants, pd.DataFrame)
653 assert participants.shape == (5, 6)
656def test_fetch_development_fmri_functional(tmp_path):
657 mock_participants = _mock_participants_data(n_ids=8)
658 funcs, confounds = func._fetch_development_fmri_functional(
659 mock_participants, data_dir=tmp_path, url=None, resume=True, verbose=1
660 )
662 assert len(funcs) == 8
663 assert len(confounds) == 8
666def test_fetch_development_fmri(tmp_path, request_mocker):
667 """Test for fetch_development_fmri."""
668 mock_participants = _mock_participants_data()
669 request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv(
670 index=False, sep="\t"
671 )
672 request_mocker.url_mapping["https://osf.io/yr3av/download"] = (
673 mock_participants.to_csv(index=False, sep="\t")
674 )
676 data = fetch_development_fmri(n_subjects=2, data_dir=tmp_path, verbose=1)
678 assert isinstance(data, Bunch)
679 check_type_fetcher(data)
680 assert len(data.func) == 2
681 assert len(data.confounds) == 2
682 assert isinstance(data.phenotypic, pd.DataFrame)
683 assert data.phenotypic.shape == (2, 6)
686def test_fetch_development_fmri_n_confounds(request_mocker):
687 """Check number of confounds returned by fetch_development_fmri."""
688 mock_participants = _mock_participants_data()
689 request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv(
690 index=False, sep="\t"
691 )
692 request_mocker.url_mapping["https://osf.io/yr3av/download"] = (
693 mock_participants.to_csv(index=False, sep="\t")
694 )
696 data = fetch_development_fmri(n_subjects=2, verbose=1)
698 # check reduced confounds
699 confounds = np.genfromtxt(data.confounds[0], delimiter="\t")
701 assert len(confounds[0]) == 15
703 # check full confounds
704 data = fetch_development_fmri(
705 n_subjects=2, reduce_confounds=False, verbose=1
706 )
707 confounds = np.genfromtxt(data.confounds[0], delimiter="\t")
709 assert len(confounds[0]) == 28
712def test_fetch_development_fmri_phenotype(request_mocker):
713 """Check phenotype returned by fetch_development_fmri."""
714 mock_participants = _mock_participants_data()
715 request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv(
716 index=False, sep="\t"
717 )
718 request_mocker.url_mapping["https://osf.io/yr3av/download"] = (
719 mock_participants.to_csv(index=False, sep="\t")
720 )
722 # check first subject is an adult
723 data = fetch_development_fmri(n_subjects=1, verbose=1)
724 age_group = data.phenotypic["Child_Adult"].to_list()[0]
726 assert age_group == "adult"
728 # check one of each age group returned if n_subject == 2
729 # and age_group == 'both
730 data = fetch_development_fmri(n_subjects=2, verbose=1, age_group="both")
731 age_group = data.phenotypic["Child_Adult"]
733 assert all(age_group == ["adult", "child"])
735 # check first subject is an child if requested with age_group
736 data = fetch_development_fmri(n_subjects=1, verbose=1, age_group="child")
737 age_group = data.phenotypic["Child_Adult"][0]
739 assert age_group == "child"
741 # check age_group
742 data = fetch_development_fmri(n_subjects=2, verbose=1, age_group="child")
744 assert all(x == "child" for x in data.phenotypic["Child_Adult"])
747def test_fetch_development_fmri_invalid_n_subjects():
748 max_subjects = 155
749 n_subjects = func._set_invalid_n_subjects_to_max(
750 n_subjects=None, max_subjects=max_subjects, age_group="adult"
751 )
753 assert n_subjects == max_subjects
754 with pytest.warns(UserWarning, match="Wrong value for n_subjects="):
755 func._set_invalid_n_subjects_to_max(
756 n_subjects=-1, max_subjects=max_subjects, age_group="adult"
757 )
760def test_fetch_development_fmri_exception():
761 with pytest.raises(ValueError, match="Wrong value for age_group"):
762 func._filter_func_regressors_by_participants(
763 participants="junk", age_group="junk for test"
764 )
767# datasets tests originally belonging to nistats follow
769datadir = PACKAGE_DIRECTORY / "data"
772def test_fetch_bids_langloc_dataset(tmp_path):
773 data_dir = tmp_path / "bids_langloc_example"
774 main_folder = data_dir / "bids_langloc_dataset"
775 main_folder.mkdir(parents=True)
777 datadir, dl_files = func.fetch_bids_langloc_dataset(tmp_path)
779 assert isinstance(datadir, str)
780 assert isinstance(dl_files, list)
783def test_select_from_index():
784 dataset_version = "ds000030_R1.0.4"
785 data_prefix = (
786 f"{dataset_version.split('_')[0]}/{dataset_version}/uncompressed"
787 )
788 # Prepare url files for subject and filter tests
789 urls = [
790 f"{data_prefix}/{f}"
791 for f in [
792 "stuff.html",
793 "sub-xxx.html",
794 "sub-yyy.html",
795 "sub-xxx/ses-01_task-rest.txt",
796 "sub-xxx/ses-01_task-other.txt",
797 "sub-xxx/ses-02_task-rest.txt",
798 "sub-xxx/ses-02_task-other.txt",
799 "sub-yyy/ses-01.txt",
800 "sub-yyy/ses-02.txt",
801 ]
802 ]
804 # Only 1 subject and not subject specific files get downloaded
805 new_urls = func.select_from_index(urls, n_subjects=1)
807 assert len(new_urls) == 6
808 assert data_prefix + "/sub-yyy.html" not in new_urls
810 # 2 subjects and not subject specific files get downloaded
811 new_urls = func.select_from_index(urls, n_subjects=2)
813 assert len(new_urls) == 9
814 assert data_prefix + "/sub-yyy.html" in new_urls
816 # ALL subjects and not subject specific files get downloaded
817 new_urls = func.select_from_index(urls, n_subjects=None)
819 assert len(new_urls) == 9
821 # test inclusive filters. Only files with task-rest
822 new_urls = func.select_from_index(urls, inclusion_filters=["*task-rest*"])
824 assert len(new_urls) == 2
825 assert data_prefix + "/stuff.html" not in new_urls
827 # test exclusive filters. only files without ses-01
828 new_urls = func.select_from_index(urls, exclusion_filters=["*ses-01*"])
830 assert len(new_urls) == 6
831 assert data_prefix + "/stuff.html" in new_urls
833 # test filter combination. only files with task-rest and without ses-01
834 new_urls = func.select_from_index(
835 urls, inclusion_filters=["*task-rest*"], exclusion_filters=["*ses-01*"]
836 )
838 assert len(new_urls) == 1
839 assert data_prefix + "/sub-xxx/ses-02_task-rest.txt" in new_urls
842def test_fetch_ds000030_urls():
843 with tempfile.TemporaryDirectory() as tmpdir:
844 subdir_names = ["ds000030", "ds000030_R1.0.4", "uncompressed"]
845 tmp_list = []
846 for subdir in subdir_names:
847 tmp_list.append(subdir)
848 subdirpath = Path(tmpdir, *tmp_list)
849 subdirpath.mkdir()
851 filepath = subdirpath / "urls.json"
852 mock_json_content = ["junk1", "junk2"]
853 with filepath.open("w") as f:
854 json.dump(mock_json_content, f)
856 # fetch_ds000030_urls should retrieve the appropriate URLs
857 urls_path, urls = func.fetch_ds000030_urls(
858 data_dir=tmpdir,
859 verbose=1,
860 )
862 assert urls_path == str(filepath)
863 assert urls == mock_json_content
866def test_fetch_openneuro_dataset(tmp_path):
867 dataset_version = "ds000030_R1.0.4"
868 data_prefix = (
869 f"{dataset_version.split('_')[0]}/{dataset_version}/uncompressed"
870 )
871 data_dir = get_dataset_dir(
872 data_prefix,
873 data_dir=tmp_path,
874 verbose=1,
875 )
876 url_file = data_dir / "urls.json"
878 # Prepare url files for subject and filter tests
879 urls = [
880 f"https://example.com/{data_prefix}/stuff.html",
881 f"https://example.com/{data_prefix}/sub-xxx.html",
882 f"https://example.com/{data_prefix}/sub-yyy.html",
883 f"https://example.com/{data_prefix}/sub-xxx/ses-01_task-rest.txt",
884 f"https://example.com/{data_prefix}/sub-xxx/ses-01_task-other.txt",
885 f"https://example.com/{data_prefix}/sub-xxx/ses-02_task-rest.txt",
886 f"https://example.com/{data_prefix}/sub-xxx/ses-02_task-other.txt",
887 f"https://example.com/{data_prefix}/sub-yyy/ses-01.txt",
888 f"https://example.com/{data_prefix}/sub-yyy/ses-02.txt",
889 ]
890 with url_file.open("w") as f:
891 json.dump(urls, f)
893 # Only 1 subject and not subject specific files get downloaded
894 datadir, dl_files = func.fetch_openneuro_dataset(
895 urls, tmp_path, dataset_version
896 )
898 assert isinstance(datadir, str)
899 assert isinstance(dl_files, list)
900 assert len(dl_files) == 9
902 # Try downloading a different dataset without providing URLs
903 # This should raise a warning and download ds000030.
904 with pytest.warns(
905 UserWarning,
906 match='Downloading "ds000030_R1.0.4".',
907 ):
908 _, urls = func.fetch_openneuro_dataset(
909 urls=None,
910 data_dir=tmp_path,
911 dataset_version="ds500_v2",
912 verbose=1,
913 )
916def test_fetch_openneuro_dataset_errors(tmp_path):
917 dataset_version = "ds000030_R1.0.4"
918 # URLs do not contain the data_prefix, which should raise a ValueError
919 urls = [
920 "https://example.com/stuff.html",
921 "https://example.com/sub-yyy/ses-01.txt",
922 ]
923 with pytest.raises(ValueError, match="This indicates that the URLs"):
924 func.fetch_openneuro_dataset(urls, tmp_path, dataset_version)
927def test_fetch_localizer(tmp_path):
928 dataset = func.fetch_localizer_first_level(data_dir=tmp_path)
930 assert isinstance(dataset["events"], str)
931 assert isinstance(dataset.epi_img, str)
934@pytest.mark.parametrize("legacy", [True, False])
935def test_fetch_language_localizer_demo_dataset(tmp_path, legacy):
936 data_dir = tmp_path
937 expected_data_dir = tmp_path / "fMRI-language-localizer-demo-dataset"
938 contents_dir = Path(__file__).parent / "data" / "archive_contents"
939 contents_list_file = contents_dir / "language_localizer.txt"
940 with contents_list_file.open() as f:
941 expected_files = [
942 str(expected_data_dir / file_path.strip())
943 for file_path in f.readlines()[1:]
944 ]
945 if legacy:
946 with pytest.deprecated_call(match="Bunch"):
947 (
948 actual_dir,
949 actual_subdirs,
950 ) = func.fetch_language_localizer_demo_dataset(
951 data_dir, legacy_output=legacy
952 )
954 assert actual_dir == str(expected_data_dir)
955 assert actual_subdirs == sorted(expected_files)
956 else:
957 bunch = func.fetch_language_localizer_demo_dataset(
958 data_dir, legacy_output=legacy
959 )
961 assert isinstance(bunch, Bunch)
962 check_type_fetcher(bunch)
963 assert bunch.data_dir == str(expected_data_dir)
964 assert bunch.func == sorted(expected_files)
967def test_download_spm_auditory_data(tmp_path, request_mocker):
968 request_mocker.url_mapping[re.compile(r".*MoAEpilot.bids.zip")] = (
969 list_to_archive([Path("spm_auditory", "MoAEpilot", "README.txt")])
970 )
971 func._download_spm_auditory_data(data_dir=tmp_path)
973 assert (tmp_path / "spm_auditory" / "MoAEpilot" / "README.txt").exists()
976def test_fetch_spm_auditory(tmp_path):
977 create_fake_bids_dataset(
978 base_dir=tmp_path,
979 n_sub=1,
980 n_ses=0,
981 tasks=["auditory"],
982 n_runs=[1],
983 with_derivatives=False,
984 )
985 data_dir = tmp_path / "spm_auditory" / "MoAEpilot"
986 shutil.move(tmp_path / "bids_dataset", data_dir)
988 dataset = func.fetch_spm_auditory(data_dir=tmp_path)
990 assert isinstance(dataset, Bunch)
991 check_type_fetcher(dataset)
992 assert isinstance(dataset.anat, str)
993 assert isinstance(dataset.events, str)
994 assert isinstance(dataset.func[0], str)
997def _generate_spm_multimodal(subject_dir=None, n_sessions=2, n_vol=390):
998 files = ["sMRI/smri.img"]
999 for session in range(n_sessions):
1000 files.append(f"fMRI/trials_ses{int(session + 1)}.mat")
1001 files.extend(
1002 [
1003 f"fMRI/Session{int(session + 1)}/"
1004 f"fMETHODS-000{int(session + 5)}-{int(i)}-01.img"
1005 for i in range(n_vol)
1006 ]
1007 )
1009 if subject_dir is None:
1010 return list_to_archive(files, archive_format="zip")
1011 for file_ in files:
1012 file_ = subject_dir / file_
1013 file_.parent.mkdir(parents=True, exist_ok=True)
1014 file_.touch()
1015 return
1018def test_fetch_spm_multimodal(tmp_path):
1019 subject_dir = tmp_path / "spm_multimodal_fmri" / "sub001"
1020 _generate_spm_multimodal(subject_dir=subject_dir)
1022 dataset = func.fetch_spm_multimodal_fmri(data_dir=tmp_path, verbose=0)
1024 assert isinstance(dataset, Bunch)
1025 check_type_fetcher(dataset)
1026 assert isinstance(dataset.anat, str)
1027 assert isinstance(dataset.func1[0], str)
1028 assert len(dataset.func1) == 390
1029 assert isinstance(dataset.func2[0], str)
1030 assert len(dataset.func2) == 390
1031 assert dataset.slice_order == "descending"
1032 assert isinstance(dataset.trials_ses1, str)
1033 assert isinstance(dataset.trials_ses2, str)
1036def test_fetch_spm_multimodal_missing_data(tmp_path, request_mocker):
1037 request_mocker.url_mapping[re.compile(r".*multimodal_.*mri.zip")] = (
1038 _generate_spm_multimodal()
1039 )
1041 subject_id = "sub001"
1042 subject_dir = tmp_path / "spm_multimodal_fmri" / subject_id
1044 dataset = func.fetch_spm_multimodal_fmri(data_dir=tmp_path, verbose=1)
1045 assert (subject_dir / "fMRI").exists()
1046 assert (subject_dir / "sMRI").exists()
1047 assert isinstance(dataset, Bunch)
1048 check_type_fetcher(dataset)
1049 assert isinstance(dataset.anat, str)
1050 assert isinstance(dataset.func1[0], str)
1051 assert len(dataset.func1) == 390
1052 assert isinstance(dataset.func2[0], str)
1053 assert len(dataset.func2) == 390
1054 assert dataset.slice_order == "descending"
1055 assert isinstance(dataset.trials_ses1, str)
1056 assert isinstance(dataset.trials_ses2, str)
1059def test_fiac(tmp_path):
1060 # Create dummy 'files'
1061 fiac_dir = (
1062 tmp_path / "fiac_nilearn.glm" / "nipy-data-0.2" / "data" / "fiac"
1063 )
1064 fiac0_dir = fiac_dir / "fiac0"
1065 fiac0_dir.mkdir(parents=True)
1066 for run in [1, 2]:
1067 # glob func data for run + 1
1068 (fiac0_dir / f"run{int(run)}.nii.gz").touch()
1070 X = np.ones((2, 2))
1071 conditions = [b"cdt_1", b"cdt_2"]
1072 np.savez(
1073 fiac0_dir / f"run{int(run)}_design.npz", X=X, conditions=conditions
1074 )
1076 (fiac0_dir / "mask.nii.gz").touch()
1078 dataset = func.fetch_fiac_first_level(data_dir=tmp_path)
1080 assert isinstance(dataset, Bunch)
1081 check_type_fetcher(dataset)
1082 assert isinstance(dataset.func1, str)
1083 assert isinstance(dataset.func2, str)
1084 assert isinstance(dataset.design_matrix1, pd.DataFrame)
1085 assert isinstance(dataset.design_matrix2, pd.DataFrame)
1086 assert isinstance(dataset.mask, str)
1089def test_load_sample_motor_activation_image():
1090 path_img = func.load_sample_motor_activation_image()
1092 check_type_fetcher(path_img)
1093 assert Path(path_img).exists()
1094 assert load_img(path_img)