Coverage for nilearn/datasets/tests/test_func.py: 0%

489 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-16 12:32 +0200

1"""Test the datasets module.""" 

2 

3import json 

4import re 

5import shutil 

6import tempfile 

7import uuid 

8from collections import OrderedDict 

9from pathlib import Path 

10 

11import numpy as np 

12import pandas as pd 

13import pytest 

14from nibabel import Nifti1Image 

15from sklearn.utils import Bunch 

16 

17from nilearn._utils.data_gen import create_fake_bids_dataset 

18from nilearn.datasets import fetch_development_fmri, func 

19from nilearn.datasets._utils import PACKAGE_DIRECTORY, get_dataset_dir 

20from nilearn.datasets.tests._testing import ( 

21 check_type_fetcher, 

22 dict_to_archive, 

23 list_to_archive, 

24) 

25from nilearn.image import load_img 

26 

27 

28def test_is_valid_path(): 

29 assert func._is_valid_path(path="foo", index=["foo"], verbose=1) 

30 assert not func._is_valid_path(path="bar", index=["foo"], verbose=1) 

31 

32 

33@pytest.mark.parametrize( 

34 "fn", 

35 [ 

36 func.fetch_localizer_first_level, 

37 func.fetch_miyawaki2008, 

38 ], 

39) 

40def test_func_fetcher_return_bunch(fn): 

41 data = fn() 

42 assert isinstance(data, Bunch) 

43 check_type_fetcher(data) 

44 

45 

46def _load_localizer_index(): 

47 data_dir = Path(__file__).parent / "data" 

48 with (data_dir / "localizer_index.json").open() as of: 

49 localizer_template = json.load(of) 

50 localizer_index = {} 

51 for idx in range(1, 95): 

52 sid = f"S{idx:02}" 

53 localizer_index.update( 

54 {key.format(sid): uuid.uuid4().hex for key in localizer_template} 

55 ) 

56 localizer_index["/localizer/phenotype/behavioural.tsv"] = uuid.uuid4().hex 

57 localizer_index["/localizer/participants.tsv"] = uuid.uuid4().hex 

58 tsv_files = { 

59 "/localizer/phenotype/behavioural.tsv": pd.read_csv( 

60 data_dir / "localizer_behavioural.tsv", sep="\t" 

61 ) 

62 } 

63 tsv_files["/localizer/participants.tsv"] = pd.read_csv( 

64 data_dir / "localizer_participants.tsv", sep="\t" 

65 ) 

66 return localizer_index, tsv_files 

67 

68 

69@pytest.fixture() 

70def localizer_mocker(request_mocker): 

71 """Mock the index for localizer dataset.""" 

72 index, tsv_files = _load_localizer_index() 

73 request_mocker.url_mapping["https://osf.io/hwbm2/download"] = json.dumps( 

74 index 

75 ) 

76 for k, v in tsv_files.items(): 

77 request_mocker.url_mapping[f"*{index[k][1:]}?"] = v.to_csv( 

78 index=False, sep="\t" 

79 ) 

80 

81 

82def _make_haxby_subject_data(match, response): # noqa: ARG001 

83 sub_files = [ 

84 "bold.nii.gz", 

85 "labels.txt", 

86 "mask4_vt.nii.gz", 

87 "mask8b_face_vt.nii.gz", 

88 "mask8b_house_vt.nii.gz", 

89 "mask8_face_vt.nii.gz", 

90 "mask8_house_vt.nii.gz", 

91 "anat.nii.gz", 

92 ] 

93 return list_to_archive(Path(match.group(1), f) for f in sub_files) 

94 

95 

96@pytest.mark.parametrize("subjects", [None, 7]) 

97def test_fetch_haxby_more_than_6(tmp_path, request_mocker, subjects): 

98 """Test edge cases to extend coverage.""" 

99 request_mocker.url_mapping[re.compile(r".*(subj\d).*\.tar\.gz")] = ( 

100 _make_haxby_subject_data 

101 ) 

102 func.fetch_haxby(data_dir=tmp_path, subjects=subjects, verbose=1) 

103 

104 

105def test_fetch_haxby(tmp_path, request_mocker): 

106 request_mocker.url_mapping[re.compile(r".*(subj\d).*\.tar\.gz")] = ( 

107 _make_haxby_subject_data 

108 ) 

109 for i in range(1, 6): 

110 haxby = func.fetch_haxby(data_dir=tmp_path, subjects=[i], verbose=0) 

111 # subject_data + (md5 + mask if first subj) 

112 

113 assert isinstance(haxby, Bunch) 

114 check_type_fetcher(haxby) 

115 

116 assert request_mocker.url_count == i + 2 

117 assert len(haxby.func) == 1 

118 assert len(haxby.anat) == 1 

119 assert len(haxby.session_target) == 1 

120 assert haxby.mask is not None 

121 assert len(haxby.mask_vt) == 1 

122 assert len(haxby.mask_face) == 1 

123 assert len(haxby.mask_house) == 1 

124 assert len(haxby.mask_face_little) == 1 

125 assert len(haxby.mask_house_little) == 1 

126 

127 # subjects with list 

128 subjects = [1, 2, 6] 

129 request_mocker.url_mapping[re.compile(r".*stimuli.*")] = list_to_archive( 

130 [Path("stimuli", "README")] 

131 ) 

132 

133 haxby = func.fetch_haxby( 

134 data_dir=tmp_path, subjects=subjects, fetch_stimuli=True, verbose=0 

135 ) 

136 

137 assert len(haxby.func) == len(subjects) 

138 assert len(haxby.mask_house_little) == len(subjects) 

139 assert len(haxby.anat) == len(subjects) 

140 assert haxby.anat[2] is None 

141 assert isinstance(haxby.mask, str) 

142 assert len(haxby.mask_face) == len(subjects) 

143 assert len(haxby.session_target) == len(subjects) 

144 assert len(haxby.mask_vt) == len(subjects) 

145 assert len(haxby.mask_face_little) == len(subjects) 

146 assert "stimuli" in haxby 

147 

148 subjects = ["a", 8] 

149 message = "You provided invalid subject id {0} in a list" 

150 

151 for sub_id in subjects: 

152 with pytest.raises(ValueError, match=message.format(sub_id)): 

153 func.fetch_haxby(data_dir=tmp_path, subjects=[sub_id]) 

154 

155 

156def _adhd_example_subject(match, request): # noqa: ARG001 

157 contents = [ 

158 Path("data", match.group(1), match.expand(r"\1_regressors.csv")), 

159 Path( 

160 "data", 

161 match.group(1), 

162 match.expand(r"\1_rest_tshift_RPI_voreg_mni.nii.gz"), 

163 ), 

164 ] 

165 return list_to_archive(contents) 

166 

167 

168def _adhd_metadata(): 

169 sub1 = [3902469, 7774305, 3699991] 

170 sub2 = [ 

171 2014113, 

172 4275075, 

173 1019436, 

174 3154996, 

175 3884955, 

176 27034, 

177 4134561, 

178 27018, 

179 6115230, 

180 27037, 

181 8409791, 

182 27011, 

183 ] 

184 sub3 = [ 

185 3007585, 

186 8697774, 

187 9750701, 

188 10064, 

189 21019, 

190 10042, 

191 10128, 

192 2497695, 

193 4164316, 

194 1552181, 

195 4046678, 

196 23012, 

197 ] 

198 sub4 = [ 

199 1679142, 

200 1206380, 

201 23008, 

202 4016887, 

203 1418396, 

204 2950754, 

205 3994098, 

206 3520880, 

207 1517058, 

208 9744150, 

209 1562298, 

210 3205761, 

211 3624598, 

212 ] 

213 subs = pd.DataFrame({"Subject": sub1 + sub2 + sub3 + sub4}) 

214 tmp = "ADHD200_40subs_motion_parameters_and_phenotypics.csv" 

215 return dict_to_archive({tmp: subs.to_csv(index=False)}) 

216 

217 

218@pytest.mark.parametrize("subjects", [None, 9999]) 

219def test_fetch_adhd_edge_cases(tmp_path, request_mocker, subjects): 

220 request_mocker.url_mapping["*metadata.tgz"] = _adhd_metadata() 

221 request_mocker.url_mapping[re.compile(r".*adhd40_([0-9]+)\.tgz")] = ( 

222 _adhd_example_subject 

223 ) 

224 func.fetch_adhd( 

225 data_dir=tmp_path, n_subjects=subjects, verbose=0, url=None 

226 ) 

227 

228 

229def test_fetch_adhd(tmp_path, request_mocker): 

230 request_mocker.url_mapping["*metadata.tgz"] = _adhd_metadata() 

231 request_mocker.url_mapping[re.compile(r".*adhd40_([0-9]+)\.tgz")] = ( 

232 _adhd_example_subject 

233 ) 

234 adhd = func.fetch_adhd(data_dir=tmp_path, n_subjects=12, verbose=0) 

235 

236 assert isinstance(adhd, Bunch) 

237 check_type_fetcher(adhd) 

238 assert len(adhd.func) == 12 

239 assert len(adhd.confounds) == 12 

240 assert request_mocker.url_count == 13 # Subjects + phenotypic 

241 

242 

243def test_miyawaki2008(tmp_path, request_mocker): 

244 dataset = func.fetch_miyawaki2008(data_dir=tmp_path, verbose=0) 

245 

246 assert len(dataset.func) == 32 

247 assert len(dataset.label) == 32 

248 assert isinstance(dataset.mask, str) 

249 assert len(dataset.mask_roi) == 38 

250 assert isinstance(dataset.background, str) 

251 assert request_mocker.url_count == 1 

252 

253 

254def test_fetch_localizer_contrasts_errors( 

255 tmp_path, 

256 localizer_mocker, # noqa: ARG001 

257): 

258 with pytest.raises(ValueError, match="should be a list of strings"): 

259 func.fetch_localizer_contrasts( 

260 "checkerboard", 

261 n_subjects=2, 

262 data_dir=tmp_path, 

263 ) 

264 with pytest.raises( 

265 ValueError, match="following contrasts are not available" 

266 ): 

267 func.fetch_localizer_contrasts( 

268 ["foo"], 

269 n_subjects=2, 

270 data_dir=tmp_path, 

271 ) 

272 

273 

274@pytest.mark.parametrize("subjects", [None, 9999]) 

275def test_fetch_localizer_contrasts_edge_cases( 

276 tmp_path, 

277 localizer_mocker, # noqa: ARG001 

278 subjects, 

279): 

280 func.fetch_localizer_contrasts( 

281 ["checkerboard"], n_subjects=subjects, data_dir=tmp_path, verbose=1 

282 ) 

283 

284 

285def test_fetch_localizer_contrasts(tmp_path, localizer_mocker): # noqa: ARG001 

286 dataset = func.fetch_localizer_contrasts( 

287 ["checkerboard"], n_subjects=2, data_dir=tmp_path, verbose=1 

288 ) 

289 

290 check_type_fetcher(dataset) 

291 assert not hasattr(dataset, "anats") 

292 assert not hasattr(dataset, "tmaps") 

293 assert not hasattr(dataset, "masks") 

294 assert isinstance(dataset.cmaps[0], str) 

295 assert isinstance(dataset.ext_vars, pd.DataFrame) 

296 assert len(dataset.cmaps) == 2 

297 assert len(dataset["ext_vars"]) == 2 

298 

299 

300def test_fetch_localizer_contrasts_multiple_contrasts( 

301 tmp_path, 

302 localizer_mocker, # noqa: ARG001 

303): 

304 dataset = func.fetch_localizer_contrasts( 

305 ["checkerboard", "horizontal checkerboard"], 

306 n_subjects=2, 

307 data_dir=tmp_path, 

308 verbose=1, 

309 ) 

310 

311 assert isinstance(dataset.ext_vars, pd.DataFrame) 

312 assert isinstance(dataset.cmaps[0], str) 

313 assert len(dataset.cmaps) == 2 * 2 # two contrasts are fetched 

314 assert len(dataset["ext_vars"]) == 2 

315 

316 

317def test_fetch_localizer_contrasts_get_all(tmp_path, localizer_mocker): # noqa: ARG001 

318 # all get_*=True 

319 dataset = func.fetch_localizer_contrasts( 

320 ["checkerboard"], 

321 n_subjects=1, 

322 data_dir=tmp_path, 

323 get_anats=True, 

324 get_masks=True, 

325 get_tmaps=True, 

326 verbose=1, 

327 ) 

328 

329 assert isinstance(dataset.ext_vars, pd.DataFrame) 

330 assert isinstance(dataset.anats[0], str) 

331 assert isinstance(dataset.cmaps[0], str) 

332 assert isinstance(dataset.masks[0], str) 

333 assert isinstance(dataset.tmaps[0], str) 

334 assert len(dataset["ext_vars"]) == 1 

335 assert len(dataset.anats) == 1 

336 assert len(dataset.cmaps) == 1 

337 assert len(dataset.masks) == 1 

338 assert len(dataset.tmaps) == 1 

339 

340 

341def test_fetch_localizer_contrasts_list_subjects(tmp_path, localizer_mocker): # noqa: ARG001 

342 # grab a given list of subjects 

343 dataset2 = func.fetch_localizer_contrasts( 

344 ["checkerboard"], 

345 n_subjects=[2, 3, 5], 

346 data_dir=tmp_path, 

347 verbose=1, 

348 ) 

349 

350 assert len(dataset2["ext_vars"]) == 3 

351 assert len(dataset2.cmaps) == 3 

352 assert list(dataset2["ext_vars"]["participant_id"].values) == [ 

353 "S02", 

354 "S03", 

355 "S05", 

356 ] 

357 

358 

359def test_fetch_localizer_calculation_task(tmp_path, localizer_mocker): # noqa: ARG001 

360 # 2 subjects 

361 dataset = func.fetch_localizer_calculation_task( 

362 n_subjects=2, data_dir=tmp_path, verbose=1 

363 ) 

364 

365 assert isinstance(dataset, Bunch) 

366 check_type_fetcher(dataset) 

367 assert isinstance(dataset.ext_vars, pd.DataFrame) 

368 assert isinstance(dataset.cmaps[0], str) 

369 assert len(dataset["ext_vars"]) == 2 

370 assert len(dataset.cmaps) == 2 

371 

372 

373def test_fetch_localizer_button_task(tmp_path, localizer_mocker): # noqa: ARG001 

374 # Disabled: cannot be tested without actually fetching covariates CSV file 

375 # Only one subject 

376 dataset = func.fetch_localizer_button_task(data_dir=tmp_path, verbose=1) 

377 

378 assert isinstance(dataset, Bunch) 

379 

380 assert isinstance(dataset.tmaps, list) 

381 assert isinstance(dataset.anats, list) 

382 

383 assert len(dataset.tmaps) == 1 

384 assert len(dataset.anats) == 1 

385 

386 assert isinstance(dataset.tmap, str) 

387 assert isinstance(dataset.anat, str) 

388 

389 

390@pytest.mark.parametrize("quality_checked", [False, True]) 

391def test_fetch_abide_pcp(tmp_path, request_mocker, quality_checked): 

392 n_subjects = 800 

393 ids = list(range(n_subjects)) 

394 filenames = ["no_filename"] * n_subjects 

395 filenames[::2] = ["filename"] * (n_subjects // 2) 

396 qc_rater_1 = ["OK"] * n_subjects 

397 qc_rater_1[::4] = ["fail"] * (n_subjects // 4) 

398 pheno = pd.DataFrame( 

399 { 

400 "subject_id": ids, 

401 "FILE_ID": filenames, 

402 "qc_rater_1": qc_rater_1, 

403 "qc_anat_rater_2": qc_rater_1, 

404 "qc_func_rater_2": qc_rater_1, 

405 "qc_anat_rater_3": qc_rater_1, 

406 "qc_func_rater_3": qc_rater_1, 

407 }, 

408 columns=[ 

409 "subject_id", 

410 "FILE_ID", 

411 "qc_rater_1", 

412 "qc_anat_rater_2", 

413 "qc_func_rater_2", 

414 "qc_anat_rater_3", 

415 "qc_func_rater_3", 

416 ], 

417 ) 

418 request_mocker.url_mapping["*rocessed1.csv"] = pheno.to_csv(index=False) 

419 

420 # All subjects 

421 dataset = func.fetch_abide_pcp( 

422 data_dir=tmp_path, quality_checked=quality_checked, verbose=0 

423 ) 

424 div = 4 if quality_checked else 2 

425 

426 assert isinstance(dataset, Bunch) 

427 

428 check_type_fetcher(dataset) 

429 

430 assert len(dataset.func_preproc) == n_subjects / div 

431 

432 # Smoke test using only a string, rather than a list of strings 

433 dataset = func.fetch_abide_pcp( 

434 data_dir=tmp_path, 

435 quality_checked=quality_checked, 

436 verbose=0, 

437 derivatives="func_preproc", 

438 ) 

439 

440 

441def test__load_mixed_gambles(rng, affine_eye): 

442 n_trials = 48 

443 for n_subjects in [1, 5, 16]: 

444 zmaps = [ 

445 Nifti1Image(rng.standard_normal((3, 4, 5, n_trials)), affine_eye) 

446 for _ in range(n_subjects) 

447 ] 

448 zmaps, gain, _ = func._load_mixed_gambles(zmaps) 

449 

450 assert len(zmaps) == n_subjects * n_trials 

451 assert len(zmaps) == len(gain) 

452 

453 

454@pytest.mark.parametrize("n_subjects", [1, 5, 16]) 

455def test_fetch_mixed_gambles(tmp_path, n_subjects): 

456 mgambles = func.fetch_mixed_gambles( 

457 n_subjects=n_subjects, 

458 data_dir=tmp_path, 

459 verbose=1, 

460 return_raw_data=True, 

461 url=None, 

462 ) 

463 datasetdir = tmp_path / "jimura_poldrack_2012_zmaps" 

464 

465 assert mgambles["zmaps"][0] == str( 

466 datasetdir / "zmaps" / "sub001_zmaps.nii.gz" 

467 ) 

468 assert len(mgambles["zmaps"]) == n_subjects 

469 

470 assert isinstance(mgambles, Bunch) 

471 check_type_fetcher(mgambles) 

472 

473 

474def test_check_parameters_megatrawls_datasets(): 

475 # testing whether the function raises the same error message 

476 # if invalid input parameters are provided 

477 message = "Invalid {0} input is provided: {1}." 

478 

479 for invalid_input_dim in [1, 5, 30]: 

480 with pytest.raises( 

481 ValueError, 

482 match=message.format("dimensionality", invalid_input_dim), 

483 ): 

484 func.fetch_megatrawls_netmats(dimensionality=invalid_input_dim) 

485 

486 for invalid_input_timeserie in ["asdf", "time", "st2"]: 

487 with pytest.raises( 

488 ValueError, 

489 match=message.format("timeseries", invalid_input_timeserie), 

490 ): 

491 func.fetch_megatrawls_netmats(timeseries=invalid_input_timeserie) 

492 

493 for invalid_output_name in ["net1", "net2"]: 

494 with pytest.raises( 

495 ValueError, match=message.format("matrices", invalid_output_name) 

496 ): 

497 func.fetch_megatrawls_netmats(matrices=invalid_output_name) 

498 

499 

500def test_fetch_megatrawls_netmats(tmp_path): 

501 # smoke test to see that files are fetched and read properly 

502 # since we are loading data present in it 

503 for file, folder in zip( 

504 ["Znet2.txt", "Znet1.txt"], 

505 [ 

506 "3T_Q1-Q6related468_MSMsulc_d100_ts3", 

507 "3T_Q1-Q6related468_MSMsulc_d300_ts2", 

508 ], 

509 ): 

510 files_dir = tmp_path / "Megatrawls" / folder 

511 files_dir.mkdir(parents=True, exist_ok=True) 

512 with (files_dir / file).open("w") as net_file: 

513 net_file.write(" 1 2\n 2 3") 

514 

515 dataset = func.fetch_megatrawls_netmats(data_dir=tmp_path) 

516 

517 assert isinstance(dataset, Bunch) 

518 

519 check_type_fetcher(dataset) 

520 

521 # expected number of returns in output name should be equal 

522 assert len(dataset) == 5 

523 # check if returned bunch should not be empty 

524 # dimensions 

525 assert dataset.dimensions != "" 

526 # timeseries 

527 assert dataset.timeseries != "" 

528 # matrices 

529 assert dataset.matrices != "" 

530 # correlation matrices 

531 assert isinstance(dataset.correlation_matrices, pd.DataFrame) 

532 

533 # check if input provided for dimensions, timeseries, matrices to be same 

534 # to user settings 

535 dataset = func.fetch_megatrawls_netmats( 

536 data_dir=tmp_path, 

537 dimensionality=300, 

538 timeseries="multiple_spatial_regression", 

539 matrices="full_correlation", 

540 ) 

541 check_type_fetcher(dataset) 

542 assert dataset.dimensions == 300 

543 assert dataset.timeseries == "multiple_spatial_regression" 

544 assert dataset.matrices == "full_correlation" 

545 

546 

547def test_fetch_surf_nki_enhanced(tmp_path, request_mocker): 

548 ids = np.asarray( 

549 [ 

550 "A00028185", 

551 "A00035827", 

552 "A00037511", 

553 "A00039431", 

554 "A00033747", 

555 "A00035840", 

556 "A00038998", 

557 "A00035072", 

558 "A00037112", 

559 "A00039391", 

560 ], 

561 dtype="U9", 

562 ) 

563 age = np.ones(len(ids), dtype="<f8") 

564 hand = np.asarray(len(ids) * ["x"], dtype="U1") 

565 sex = np.asarray(len(ids) * ["x"], dtype="U1") 

566 pheno_data = pd.DataFrame( 

567 OrderedDict([("id", ids), ("age", age), ("hand", hand), ("sex", sex)]) 

568 ) 

569 request_mocker.url_mapping["*pheno_nki_nilearn.csv"] = pheno_data.to_csv( 

570 index=False 

571 ) 

572 nki_data = func.fetch_surf_nki_enhanced(data_dir=tmp_path) 

573 

574 assert isinstance(nki_data, Bunch) 

575 check_type_fetcher(nki_data) 

576 assert len(nki_data.func_left) == 10 

577 assert len(nki_data.func_right) == 10 

578 assert isinstance(nki_data.phenotypic, pd.DataFrame) 

579 assert nki_data.phenotypic.shape == (9, 4) 

580 

581 

582def test_load_nki_error(): 

583 """Give incorrect mesh_type argument.""" 

584 with pytest.raises(ValueError, match="'mesh_type' must be one of"): 

585 func.load_nki(mesh_type="foo") 

586 

587 

588def _mock_participants_data(n_ids=5): 

589 """Maximum 8 ids are allowed to mock.""" 

590 ids = [ 

591 "sub-pixar052", 

592 "sub-pixar073", 

593 "sub-pixar074", 

594 "sub-pixar110", 

595 "sub-pixar042", 

596 "sub-pixar109", 

597 "sub-pixar068", 

598 "sub-pixar007", 

599 ][:n_ids] 

600 age = np.ones(len(ids)) 

601 age_group = len(ids) * ["2yo"] 

602 child_adult = [["child", "adult"][i % 2] for i in range(n_ids)] 

603 gender = len(ids) * ["m"] 

604 handedness = len(ids) * ["r"] 

605 participants = pd.DataFrame( 

606 OrderedDict( 

607 [ 

608 ("participant_id", ids), 

609 ("Age", age), 

610 ("AgeGroup", age_group), 

611 ("Child_Adult", child_adult), 

612 ("Gender", gender), 

613 ("Handedness", handedness), 

614 ] 

615 ) 

616 ) 

617 return participants 

618 

619 

620def _mock_development_confounds(): 

621 keep_confounds = [ 

622 "trans_x", 

623 "trans_y", 

624 "trans_z", 

625 "rot_x", 

626 "rot_y", 

627 "rot_z", 

628 "framewise_displacement", 

629 "a_comp_cor_00", 

630 "a_comp_cor_01", 

631 "a_comp_cor_02", 

632 "a_comp_cor_03", 

633 "a_comp_cor_04", 

634 "a_comp_cor_05", 

635 "csf", 

636 "white_matter", 

637 ] 

638 other_confounds = ["some_confound"] * 13 

639 confounds = keep_confounds + other_confounds 

640 return pd.DataFrame(np.ones((10, len(confounds))), columns=confounds) 

641 

642 

643def test_fetch_development_fmri_participants(tmp_path, request_mocker): 

644 mock_participants = _mock_participants_data() 

645 request_mocker.url_mapping["https://osf.io/yr3av/download"] = ( 

646 mock_participants.to_csv(index=False, sep="\t") 

647 ) 

648 participants = func._fetch_development_fmri_participants( 

649 data_dir=tmp_path, url=None, verbose=1 

650 ) 

651 

652 assert isinstance(participants, pd.DataFrame) 

653 assert participants.shape == (5, 6) 

654 

655 

656def test_fetch_development_fmri_functional(tmp_path): 

657 mock_participants = _mock_participants_data(n_ids=8) 

658 funcs, confounds = func._fetch_development_fmri_functional( 

659 mock_participants, data_dir=tmp_path, url=None, resume=True, verbose=1 

660 ) 

661 

662 assert len(funcs) == 8 

663 assert len(confounds) == 8 

664 

665 

666def test_fetch_development_fmri(tmp_path, request_mocker): 

667 """Test for fetch_development_fmri.""" 

668 mock_participants = _mock_participants_data() 

669 request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv( 

670 index=False, sep="\t" 

671 ) 

672 request_mocker.url_mapping["https://osf.io/yr3av/download"] = ( 

673 mock_participants.to_csv(index=False, sep="\t") 

674 ) 

675 

676 data = fetch_development_fmri(n_subjects=2, data_dir=tmp_path, verbose=1) 

677 

678 assert isinstance(data, Bunch) 

679 check_type_fetcher(data) 

680 assert len(data.func) == 2 

681 assert len(data.confounds) == 2 

682 assert isinstance(data.phenotypic, pd.DataFrame) 

683 assert data.phenotypic.shape == (2, 6) 

684 

685 

686def test_fetch_development_fmri_n_confounds(request_mocker): 

687 """Check number of confounds returned by fetch_development_fmri.""" 

688 mock_participants = _mock_participants_data() 

689 request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv( 

690 index=False, sep="\t" 

691 ) 

692 request_mocker.url_mapping["https://osf.io/yr3av/download"] = ( 

693 mock_participants.to_csv(index=False, sep="\t") 

694 ) 

695 

696 data = fetch_development_fmri(n_subjects=2, verbose=1) 

697 

698 # check reduced confounds 

699 confounds = np.genfromtxt(data.confounds[0], delimiter="\t") 

700 

701 assert len(confounds[0]) == 15 

702 

703 # check full confounds 

704 data = fetch_development_fmri( 

705 n_subjects=2, reduce_confounds=False, verbose=1 

706 ) 

707 confounds = np.genfromtxt(data.confounds[0], delimiter="\t") 

708 

709 assert len(confounds[0]) == 28 

710 

711 

712def test_fetch_development_fmri_phenotype(request_mocker): 

713 """Check phenotype returned by fetch_development_fmri.""" 

714 mock_participants = _mock_participants_data() 

715 request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv( 

716 index=False, sep="\t" 

717 ) 

718 request_mocker.url_mapping["https://osf.io/yr3av/download"] = ( 

719 mock_participants.to_csv(index=False, sep="\t") 

720 ) 

721 

722 # check first subject is an adult 

723 data = fetch_development_fmri(n_subjects=1, verbose=1) 

724 age_group = data.phenotypic["Child_Adult"].to_list()[0] 

725 

726 assert age_group == "adult" 

727 

728 # check one of each age group returned if n_subject == 2 

729 # and age_group == 'both 

730 data = fetch_development_fmri(n_subjects=2, verbose=1, age_group="both") 

731 age_group = data.phenotypic["Child_Adult"] 

732 

733 assert all(age_group == ["adult", "child"]) 

734 

735 # check first subject is an child if requested with age_group 

736 data = fetch_development_fmri(n_subjects=1, verbose=1, age_group="child") 

737 age_group = data.phenotypic["Child_Adult"][0] 

738 

739 assert age_group == "child" 

740 

741 # check age_group 

742 data = fetch_development_fmri(n_subjects=2, verbose=1, age_group="child") 

743 

744 assert all(x == "child" for x in data.phenotypic["Child_Adult"]) 

745 

746 

747def test_fetch_development_fmri_invalid_n_subjects(): 

748 max_subjects = 155 

749 n_subjects = func._set_invalid_n_subjects_to_max( 

750 n_subjects=None, max_subjects=max_subjects, age_group="adult" 

751 ) 

752 

753 assert n_subjects == max_subjects 

754 with pytest.warns(UserWarning, match="Wrong value for n_subjects="): 

755 func._set_invalid_n_subjects_to_max( 

756 n_subjects=-1, max_subjects=max_subjects, age_group="adult" 

757 ) 

758 

759 

760def test_fetch_development_fmri_exception(): 

761 with pytest.raises(ValueError, match="Wrong value for age_group"): 

762 func._filter_func_regressors_by_participants( 

763 participants="junk", age_group="junk for test" 

764 ) 

765 

766 

767# datasets tests originally belonging to nistats follow 

768 

769datadir = PACKAGE_DIRECTORY / "data" 

770 

771 

772def test_fetch_bids_langloc_dataset(tmp_path): 

773 data_dir = tmp_path / "bids_langloc_example" 

774 main_folder = data_dir / "bids_langloc_dataset" 

775 main_folder.mkdir(parents=True) 

776 

777 datadir, dl_files = func.fetch_bids_langloc_dataset(tmp_path) 

778 

779 assert isinstance(datadir, str) 

780 assert isinstance(dl_files, list) 

781 

782 

783def test_select_from_index(): 

784 dataset_version = "ds000030_R1.0.4" 

785 data_prefix = ( 

786 f"{dataset_version.split('_')[0]}/{dataset_version}/uncompressed" 

787 ) 

788 # Prepare url files for subject and filter tests 

789 urls = [ 

790 f"{data_prefix}/{f}" 

791 for f in [ 

792 "stuff.html", 

793 "sub-xxx.html", 

794 "sub-yyy.html", 

795 "sub-xxx/ses-01_task-rest.txt", 

796 "sub-xxx/ses-01_task-other.txt", 

797 "sub-xxx/ses-02_task-rest.txt", 

798 "sub-xxx/ses-02_task-other.txt", 

799 "sub-yyy/ses-01.txt", 

800 "sub-yyy/ses-02.txt", 

801 ] 

802 ] 

803 

804 # Only 1 subject and not subject specific files get downloaded 

805 new_urls = func.select_from_index(urls, n_subjects=1) 

806 

807 assert len(new_urls) == 6 

808 assert data_prefix + "/sub-yyy.html" not in new_urls 

809 

810 # 2 subjects and not subject specific files get downloaded 

811 new_urls = func.select_from_index(urls, n_subjects=2) 

812 

813 assert len(new_urls) == 9 

814 assert data_prefix + "/sub-yyy.html" in new_urls 

815 

816 # ALL subjects and not subject specific files get downloaded 

817 new_urls = func.select_from_index(urls, n_subjects=None) 

818 

819 assert len(new_urls) == 9 

820 

821 # test inclusive filters. Only files with task-rest 

822 new_urls = func.select_from_index(urls, inclusion_filters=["*task-rest*"]) 

823 

824 assert len(new_urls) == 2 

825 assert data_prefix + "/stuff.html" not in new_urls 

826 

827 # test exclusive filters. only files without ses-01 

828 new_urls = func.select_from_index(urls, exclusion_filters=["*ses-01*"]) 

829 

830 assert len(new_urls) == 6 

831 assert data_prefix + "/stuff.html" in new_urls 

832 

833 # test filter combination. only files with task-rest and without ses-01 

834 new_urls = func.select_from_index( 

835 urls, inclusion_filters=["*task-rest*"], exclusion_filters=["*ses-01*"] 

836 ) 

837 

838 assert len(new_urls) == 1 

839 assert data_prefix + "/sub-xxx/ses-02_task-rest.txt" in new_urls 

840 

841 

842def test_fetch_ds000030_urls(): 

843 with tempfile.TemporaryDirectory() as tmpdir: 

844 subdir_names = ["ds000030", "ds000030_R1.0.4", "uncompressed"] 

845 tmp_list = [] 

846 for subdir in subdir_names: 

847 tmp_list.append(subdir) 

848 subdirpath = Path(tmpdir, *tmp_list) 

849 subdirpath.mkdir() 

850 

851 filepath = subdirpath / "urls.json" 

852 mock_json_content = ["junk1", "junk2"] 

853 with filepath.open("w") as f: 

854 json.dump(mock_json_content, f) 

855 

856 # fetch_ds000030_urls should retrieve the appropriate URLs 

857 urls_path, urls = func.fetch_ds000030_urls( 

858 data_dir=tmpdir, 

859 verbose=1, 

860 ) 

861 

862 assert urls_path == str(filepath) 

863 assert urls == mock_json_content 

864 

865 

866def test_fetch_openneuro_dataset(tmp_path): 

867 dataset_version = "ds000030_R1.0.4" 

868 data_prefix = ( 

869 f"{dataset_version.split('_')[0]}/{dataset_version}/uncompressed" 

870 ) 

871 data_dir = get_dataset_dir( 

872 data_prefix, 

873 data_dir=tmp_path, 

874 verbose=1, 

875 ) 

876 url_file = data_dir / "urls.json" 

877 

878 # Prepare url files for subject and filter tests 

879 urls = [ 

880 f"https://example.com/{data_prefix}/stuff.html", 

881 f"https://example.com/{data_prefix}/sub-xxx.html", 

882 f"https://example.com/{data_prefix}/sub-yyy.html", 

883 f"https://example.com/{data_prefix}/sub-xxx/ses-01_task-rest.txt", 

884 f"https://example.com/{data_prefix}/sub-xxx/ses-01_task-other.txt", 

885 f"https://example.com/{data_prefix}/sub-xxx/ses-02_task-rest.txt", 

886 f"https://example.com/{data_prefix}/sub-xxx/ses-02_task-other.txt", 

887 f"https://example.com/{data_prefix}/sub-yyy/ses-01.txt", 

888 f"https://example.com/{data_prefix}/sub-yyy/ses-02.txt", 

889 ] 

890 with url_file.open("w") as f: 

891 json.dump(urls, f) 

892 

893 # Only 1 subject and not subject specific files get downloaded 

894 datadir, dl_files = func.fetch_openneuro_dataset( 

895 urls, tmp_path, dataset_version 

896 ) 

897 

898 assert isinstance(datadir, str) 

899 assert isinstance(dl_files, list) 

900 assert len(dl_files) == 9 

901 

902 # Try downloading a different dataset without providing URLs 

903 # This should raise a warning and download ds000030. 

904 with pytest.warns( 

905 UserWarning, 

906 match='Downloading "ds000030_R1.0.4".', 

907 ): 

908 _, urls = func.fetch_openneuro_dataset( 

909 urls=None, 

910 data_dir=tmp_path, 

911 dataset_version="ds500_v2", 

912 verbose=1, 

913 ) 

914 

915 

916def test_fetch_openneuro_dataset_errors(tmp_path): 

917 dataset_version = "ds000030_R1.0.4" 

918 # URLs do not contain the data_prefix, which should raise a ValueError 

919 urls = [ 

920 "https://example.com/stuff.html", 

921 "https://example.com/sub-yyy/ses-01.txt", 

922 ] 

923 with pytest.raises(ValueError, match="This indicates that the URLs"): 

924 func.fetch_openneuro_dataset(urls, tmp_path, dataset_version) 

925 

926 

927def test_fetch_localizer(tmp_path): 

928 dataset = func.fetch_localizer_first_level(data_dir=tmp_path) 

929 

930 assert isinstance(dataset["events"], str) 

931 assert isinstance(dataset.epi_img, str) 

932 

933 

934@pytest.mark.parametrize("legacy", [True, False]) 

935def test_fetch_language_localizer_demo_dataset(tmp_path, legacy): 

936 data_dir = tmp_path 

937 expected_data_dir = tmp_path / "fMRI-language-localizer-demo-dataset" 

938 contents_dir = Path(__file__).parent / "data" / "archive_contents" 

939 contents_list_file = contents_dir / "language_localizer.txt" 

940 with contents_list_file.open() as f: 

941 expected_files = [ 

942 str(expected_data_dir / file_path.strip()) 

943 for file_path in f.readlines()[1:] 

944 ] 

945 if legacy: 

946 with pytest.deprecated_call(match="Bunch"): 

947 ( 

948 actual_dir, 

949 actual_subdirs, 

950 ) = func.fetch_language_localizer_demo_dataset( 

951 data_dir, legacy_output=legacy 

952 ) 

953 

954 assert actual_dir == str(expected_data_dir) 

955 assert actual_subdirs == sorted(expected_files) 

956 else: 

957 bunch = func.fetch_language_localizer_demo_dataset( 

958 data_dir, legacy_output=legacy 

959 ) 

960 

961 assert isinstance(bunch, Bunch) 

962 check_type_fetcher(bunch) 

963 assert bunch.data_dir == str(expected_data_dir) 

964 assert bunch.func == sorted(expected_files) 

965 

966 

967def test_download_spm_auditory_data(tmp_path, request_mocker): 

968 request_mocker.url_mapping[re.compile(r".*MoAEpilot.bids.zip")] = ( 

969 list_to_archive([Path("spm_auditory", "MoAEpilot", "README.txt")]) 

970 ) 

971 func._download_spm_auditory_data(data_dir=tmp_path) 

972 

973 assert (tmp_path / "spm_auditory" / "MoAEpilot" / "README.txt").exists() 

974 

975 

976def test_fetch_spm_auditory(tmp_path): 

977 create_fake_bids_dataset( 

978 base_dir=tmp_path, 

979 n_sub=1, 

980 n_ses=0, 

981 tasks=["auditory"], 

982 n_runs=[1], 

983 with_derivatives=False, 

984 ) 

985 data_dir = tmp_path / "spm_auditory" / "MoAEpilot" 

986 shutil.move(tmp_path / "bids_dataset", data_dir) 

987 

988 dataset = func.fetch_spm_auditory(data_dir=tmp_path) 

989 

990 assert isinstance(dataset, Bunch) 

991 check_type_fetcher(dataset) 

992 assert isinstance(dataset.anat, str) 

993 assert isinstance(dataset.events, str) 

994 assert isinstance(dataset.func[0], str) 

995 

996 

997def _generate_spm_multimodal(subject_dir=None, n_sessions=2, n_vol=390): 

998 files = ["sMRI/smri.img"] 

999 for session in range(n_sessions): 

1000 files.append(f"fMRI/trials_ses{int(session + 1)}.mat") 

1001 files.extend( 

1002 [ 

1003 f"fMRI/Session{int(session + 1)}/" 

1004 f"fMETHODS-000{int(session + 5)}-{int(i)}-01.img" 

1005 for i in range(n_vol) 

1006 ] 

1007 ) 

1008 

1009 if subject_dir is None: 

1010 return list_to_archive(files, archive_format="zip") 

1011 for file_ in files: 

1012 file_ = subject_dir / file_ 

1013 file_.parent.mkdir(parents=True, exist_ok=True) 

1014 file_.touch() 

1015 return 

1016 

1017 

1018def test_fetch_spm_multimodal(tmp_path): 

1019 subject_dir = tmp_path / "spm_multimodal_fmri" / "sub001" 

1020 _generate_spm_multimodal(subject_dir=subject_dir) 

1021 

1022 dataset = func.fetch_spm_multimodal_fmri(data_dir=tmp_path, verbose=0) 

1023 

1024 assert isinstance(dataset, Bunch) 

1025 check_type_fetcher(dataset) 

1026 assert isinstance(dataset.anat, str) 

1027 assert isinstance(dataset.func1[0], str) 

1028 assert len(dataset.func1) == 390 

1029 assert isinstance(dataset.func2[0], str) 

1030 assert len(dataset.func2) == 390 

1031 assert dataset.slice_order == "descending" 

1032 assert isinstance(dataset.trials_ses1, str) 

1033 assert isinstance(dataset.trials_ses2, str) 

1034 

1035 

1036def test_fetch_spm_multimodal_missing_data(tmp_path, request_mocker): 

1037 request_mocker.url_mapping[re.compile(r".*multimodal_.*mri.zip")] = ( 

1038 _generate_spm_multimodal() 

1039 ) 

1040 

1041 subject_id = "sub001" 

1042 subject_dir = tmp_path / "spm_multimodal_fmri" / subject_id 

1043 

1044 dataset = func.fetch_spm_multimodal_fmri(data_dir=tmp_path, verbose=1) 

1045 assert (subject_dir / "fMRI").exists() 

1046 assert (subject_dir / "sMRI").exists() 

1047 assert isinstance(dataset, Bunch) 

1048 check_type_fetcher(dataset) 

1049 assert isinstance(dataset.anat, str) 

1050 assert isinstance(dataset.func1[0], str) 

1051 assert len(dataset.func1) == 390 

1052 assert isinstance(dataset.func2[0], str) 

1053 assert len(dataset.func2) == 390 

1054 assert dataset.slice_order == "descending" 

1055 assert isinstance(dataset.trials_ses1, str) 

1056 assert isinstance(dataset.trials_ses2, str) 

1057 

1058 

1059def test_fiac(tmp_path): 

1060 # Create dummy 'files' 

1061 fiac_dir = ( 

1062 tmp_path / "fiac_nilearn.glm" / "nipy-data-0.2" / "data" / "fiac" 

1063 ) 

1064 fiac0_dir = fiac_dir / "fiac0" 

1065 fiac0_dir.mkdir(parents=True) 

1066 for run in [1, 2]: 

1067 # glob func data for run + 1 

1068 (fiac0_dir / f"run{int(run)}.nii.gz").touch() 

1069 

1070 X = np.ones((2, 2)) 

1071 conditions = [b"cdt_1", b"cdt_2"] 

1072 np.savez( 

1073 fiac0_dir / f"run{int(run)}_design.npz", X=X, conditions=conditions 

1074 ) 

1075 

1076 (fiac0_dir / "mask.nii.gz").touch() 

1077 

1078 dataset = func.fetch_fiac_first_level(data_dir=tmp_path) 

1079 

1080 assert isinstance(dataset, Bunch) 

1081 check_type_fetcher(dataset) 

1082 assert isinstance(dataset.func1, str) 

1083 assert isinstance(dataset.func2, str) 

1084 assert isinstance(dataset.design_matrix1, pd.DataFrame) 

1085 assert isinstance(dataset.design_matrix2, pd.DataFrame) 

1086 assert isinstance(dataset.mask, str) 

1087 

1088 

1089def test_load_sample_motor_activation_image(): 

1090 path_img = func.load_sample_motor_activation_image() 

1091 

1092 check_type_fetcher(path_img) 

1093 assert Path(path_img).exists() 

1094 assert load_img(path_img)