Coverage for nilearn/datasets/tests/test_neurovault.py: 0%

487 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-16 12:32 +0200

1"""Test the neurovault module.""" 

2 

3import hashlib 

4import json 

5import os 

6import re 

7import stat 

8from pathlib import Path 

9from urllib import parse 

10 

11import numpy as np 

12import pandas as pd 

13import pytest 

14import requests 

15 

16from nilearn._utils.data_gen import generate_fake_fmri 

17from nilearn.conftest import _rng 

18from nilearn.datasets import fetch_neurovault_ids, neurovault 

19from nilearn.image import load_img 

20 

21 

22def _get_neurovault_data(): 

23 """Make fake images and collections to mock neurovault in the unit tests. 

24 

25 Returns two pandas DataFrames: collections and images. Each row contains 

26 some metadata (e.g. "map_type", "is_thresholded" for images, or 

27 "number_of_images" for collections) for a single (fake) image or 

28 collection. 

29 

30 These two dataframes are like a fake neurovault database and the 

31 `_neurovault` function, used to simulate responses from the neurovault 

32 API, uses this data. 

33 

34 """ 

35 if getattr(_get_neurovault_data, "data", None) is not None: 

36 return _get_neurovault_data.data 

37 

38 rng = _rng() 

39 

40 n_collections = 73 

41 collection_ids = rng.choice( 

42 np.arange(1000), size=n_collections, replace=False 

43 ) 

44 collections = pd.DataFrame({"id": collection_ids}) 

45 

46 n_images = 546 

47 image_ids = rng.choice(np.arange(10000), size=n_images, replace=False) 

48 images = pd.DataFrame({"id": image_ids}) 

49 not_empty = rng.binomial(1, 0.9, n_collections).astype(bool) 

50 images["collection_id"] = rng.choice( 

51 collection_ids[not_empty], size=n_images 

52 ) 

53 

54 collection_sizes = images.groupby("collection_id").count() 

55 

56 collections["true_number_of_images"] = collection_sizes.reindex( 

57 index=collections["id"].to_numpy(), fill_value=0 

58 ).to_numpy() 

59 collections["number_of_images"] = collections[ 

60 "true_number_of_images" 

61 ] + rng.binomial(1, 0.1, n_collections) * rng.integers( 

62 0, 100, n_collections 

63 ) 

64 

65 images["not_mni"] = rng.binomial(1, 0.1, size=n_images).astype(bool) 

66 images["is_valid"] = rng.binomial(1, 0.1, size=n_images).astype(bool) 

67 images["is_thresholded"] = rng.binomial(1, 0.1, size=n_images).astype(bool) 

68 images["map_type"] = rng.choice( 

69 [ 

70 "T map", 

71 "Z map", 

72 "ROI/mask", 

73 "anatomical", 

74 "parcellation", 

75 "something else", 

76 ], 

77 size=n_images, 

78 p=[0.4, 0.3, 0.1, 0.1, 0.05, 0.05], 

79 ) 

80 images["image_type"] = rng.choice( 

81 ["statistic_map", "atlas", "other type"], 

82 size=n_images, 

83 p=[0.4, 0.4, 0.2], 

84 ) 

85 images["some_key"] = "some_value" 

86 images[13] = rng.standard_normal(n_images) 

87 url = "https://neurovault.org/media/images/{}/{}.nii.gz" 

88 image_names = [ 

89 hashlib.sha1(bytes(img_id)).hexdigest()[:4] for img_id in image_ids 

90 ] 

91 images["file"] = [ 

92 url.format(col_id, img_name) 

93 for (col_id, img_name) in zip(images["collection_id"], image_names) 

94 ] 

95 

96 collections = collections.set_index("id", drop=False) 

97 images = images.set_index("id", drop=False) 

98 _get_neurovault_data.data = collections, images 

99 

100 return collections, images 

101 

102 

103def _parse_query(query): 

104 """Extract key-value pairs from a url query string. 

105 

106 for example 

107 "collection=23&someoption&format=json" 

108 -> {"collection": "23", "someoption": None, "format": "json"} 

109 

110 """ 

111 parts = [p.split("=") for p in query.split("&")] 

112 result = {} 

113 for p in parts: 

114 if len(p) == 2: 

115 result[p[0]] = p[1] 

116 if len(p) == 1: 

117 result[p[0]] = None 

118 return result 

119 

120 

121def _neurovault_collections(parts, query): 

122 """Mock the Neurovault API behind the `/api/collections/` path. 

123 

124 parts : the parts of the URL path after "collections" 

125 ie [], ["<somecollectionid>"], or ["<somecollectionid>", "images"] 

126 

127 query : the parsed query string, e.g. {"offset": "15", "limit": "5"} 

128 

129 returns a dictionary of API results 

130 

131 See the neurovault API docs for details: https://neurovault.org/api-docs 

132 

133 """ 

134 if parts: 

135 return _neurovault_one_collection(parts) 

136 collections, _ = _get_neurovault_data() 

137 offset, limit = int(query.get("offset", 0)), int(query.get("limit", 2)) 

138 batch = collections.iloc[offset : offset + limit].to_dict(orient="records") 

139 return {"count": len(collections), "results": batch} 

140 

141 

142def _neurovault_one_collection(parts): 

143 """Mock Neurovault API \ 

144 behind the `/api/collections/<somecollectionid>` path. 

145 

146 parts : parts of the URL path after "collections", 

147 ie ["<somecollectionid>"] or ["<somecollectionid>", "images"] 

148 

149 returns a dictionary of API results 

150 

151 See the neurovault API docs for details: https://neurovault.org/api-docs 

152 

153 """ 

154 col_id = int(parts[0]) 

155 collections, images = _get_neurovault_data() 

156 if col_id not in collections.index: 

157 return {"detail": "Not found."} 

158 if len(parts) == 1: 

159 return collections.loc[col_id].to_dict() 

160 if parts[1] != "images": 

161 return "" 

162 col_images = images[images["collection_id"] == col_id] 

163 return { 

164 "count": len(col_images), 

165 "results": col_images.to_dict(orient="records"), 

166 } 

167 

168 

169def _neurovault_images(parts, query): 

170 """Mock the Neurovault API behind the `/api/images/` path. 

171 

172 parts : parts of the URL path after "images", 

173 ie [] or ["<someimageid>"] 

174 

175 query : the parsed query string, e.g. {"offset": "15", "limit": "5"} 

176 

177 returns a dictionary of API results 

178 

179 See the neurovault API docs for details: https://neurovault.org/api-docs 

180 

181 """ 

182 if parts: 

183 return _neurovault_one_image(parts[0]) 

184 _, images = _get_neurovault_data() 

185 offset, limit = int(query.get("offset", 0)), int(query.get("limit", 2)) 

186 batch = images.iloc[offset : offset + limit].to_dict(orient="records") 

187 return {"count": len(images), "results": batch} 

188 

189 

190def _neurovault_one_image(img_id): 

191 """Mock the Neurovault API behind the `/api/images/<someimageid>` path. 

192 

193 returns a dictionary of API results 

194 

195 See the neurovault API docs for details: https://neurovault.org/api-docs 

196 

197 """ 

198 img_id = int(img_id) 

199 _, images = _get_neurovault_data() 

200 if img_id not in images.index: 

201 return {"detail": "Not found."} 

202 return images.loc[img_id].to_dict() 

203 

204 

205def _neurovault_file(parts, query): # noqa: ARG001 

206 """Mock the Neurovault API behind the `/media/images/` path.""" 

207 return generate_fake_fmri(length=1)[0] 

208 

209 

210class _NumpyJsonEncoder(json.JSONEncoder): 

211 """A json encoder that can handle numpy objects.""" 

212 

213 def default(self, obj): 

214 if hasattr(obj, "tolist"): 

215 return obj.tolist() 

216 return json.JSONEncoder.default(self, obj) 

217 

218 

219def _neurovault(match, request): # noqa: ARG001 

220 """Mock response content from the Neurovault API. 

221 

222 The fake data used to generate responses is provided by 

223 `_get_neurovault_data`. 

224 

225 See the neurovault API docs for details on the queries and corresponding 

226 responses: https://neurovault.org/api-docs 

227 

228 """ 

229 handlers = { 

230 "media": {"images": _neurovault_file}, 

231 "api": { 

232 "collections": _neurovault_collections, 

233 "images": _neurovault_images, 

234 }, 

235 } 

236 info = parse.urlparse(request.url) 

237 parts = list(filter(bool, info.path.split("/"))) 

238 endpoint, section = parts[0], parts[1] 

239 

240 result = handlers[endpoint][section](parts[2:], _parse_query(info.query)) 

241 should_jsonify_response = endpoint == "api" 

242 return ( 

243 json.dumps(result, cls=_NumpyJsonEncoder).encode("UTF-8") 

244 if should_jsonify_response 

245 else result 

246 ) 

247 

248 

249@pytest.fixture(autouse=True) 

250def neurovault_mocker(request_mocker): 

251 request_mocker.url_mapping["*neurovault.org*"] = _neurovault 

252 

253 

254def test_remove_none_strings(): 

255 info = { 

256 "a": "None / Other", 

257 "b": "", 

258 "c": "N/A", 

259 "d": None, 

260 "e": 0, 

261 "f": "a", 

262 "g": "Name", 

263 } 

264 

265 assert neurovault._remove_none_strings(info) == { 

266 "a": None, 

267 "b": None, 

268 "c": None, 

269 "d": None, 

270 "e": 0, 

271 "f": "a", 

272 "g": "Name", 

273 } 

274 

275 

276def test_append_filters_to_query(): 

277 "Test _append_filters_to_query." 

278 query = neurovault._append_filters_to_query( 

279 neurovault._NEUROVAULT_COLLECTIONS_URL, {"DOI": 17} 

280 ) 

281 

282 assert query == "https://neurovault.org/api/collections/?DOI=17" 

283 

284 query = neurovault._append_filters_to_query( 

285 neurovault._NEUROVAULT_COLLECTIONS_URL, {"id": 40} 

286 ) 

287 assert query == "https://neurovault.org/api/collections/40" 

288 

289 

290def test_get_batch(): 

291 "Test _get_batch." 

292 batch = neurovault._get_batch(neurovault._NEUROVAULT_COLLECTIONS_URL) 

293 assert "results" in batch 

294 assert "count" in batch 

295 

296 

297def test_get_batch_error(tmp_path): 

298 "Test _get_batch errors." 

299 with pytest.raises(requests.RequestException): 

300 neurovault._get_batch("http://") 

301 with pytest.raises(ValueError): 

302 neurovault._get_batch( 

303 f"file://{tmp_path / 'test_nv.txt'!s}", 

304 ) 

305 

306 no_results_url = ( 

307 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" 

308 "esearch.fcgi?db=pmc&retmode=json&term=fmri" 

309 ) 

310 

311 with pytest.raises(ValueError): 

312 neurovault._get_batch(no_results_url) 

313 

314 

315def test_scroll_server_results(): 

316 "Test _scroll_server_results." 

317 result = list( 

318 neurovault._scroll_server_results( 

319 neurovault._NEUROVAULT_COLLECTIONS_URL, max_results=6, batch_size=3 

320 ) 

321 ) 

322 

323 assert len(result) == 6 

324 

325 result = list( 

326 neurovault._scroll_server_results( 

327 neurovault._NEUROVAULT_COLLECTIONS_URL, 

328 max_results=3, 

329 local_filter=lambda r: False, 

330 ) 

331 ) 

332 assert not result 

333 

334 no_results = neurovault._scroll_server_results( 

335 "http://BAD_URL", max_results=3, local_filter=lambda r: True 

336 ) 

337 next(no_results) 

338 

339 

340def test_is_null(): 

341 "Test IsNull." 

342 is_null = neurovault.IsNull() 

343 

344 assert is_null != "a" 

345 assert is_null == "" 

346 assert str(is_null) == "IsNull()" 

347 

348 

349def test_not_null(): 

350 "Test NotNull." 

351 not_null = neurovault.NotNull() 

352 

353 assert not_null == "a" 

354 assert not_null != "" 

355 assert str(not_null) == "NotNull()" 

356 

357 

358def test_not_equal(): 

359 "Test NotEqual." 

360 not_equal = neurovault.NotEqual("a") 

361 

362 assert not_equal == "b" 

363 assert not_equal == 1 

364 assert not_equal != "a" 

365 assert not_equal != "a" 

366 assert str(not_equal) == "NotEqual('a')" 

367 

368 

369def test_order_comp(): 

370 "Test lt, gt, goe, loe." 

371 geq = neurovault.GreaterOrEqual("2016-07-12T11:29:12.263046Z") 

372 

373 assert geq == "2016-08-12T11:29:12.263046Z" 

374 assert geq != "2016-06-12T11:29:12.263046Z" 

375 assert str(geq) == "GreaterOrEqual('2016-07-12T11:29:12.263046Z')" 

376 

377 gt = neurovault.GreaterThan("abc") 

378 

379 assert gt != "abc" 

380 assert gt == "abd" # codespell:ignore abd 

381 assert str(gt) == "GreaterThan('abc')" 

382 

383 lt = neurovault.LessThan(7) 

384 

385 assert lt != 7 

386 assert lt == 5 

387 assert lt != "a" 

388 assert str(lt) == "LessThan(7)" 

389 

390 leq = neurovault.LessOrEqual(4.5) 

391 

392 assert leq == 4.4 

393 assert leq != 4.6 

394 assert str(leq) == "LessOrEqual(4.5)" 

395 

396 

397def test_is_in(): 

398 "Test IsIn." 

399 is_in = neurovault.IsIn(0, 1) 

400 

401 assert is_in == 0 

402 assert is_in != 2 

403 assert str(is_in) == "IsIn(0, 1)" 

404 

405 countable = neurovault.IsIn(*range(11)) 

406 

407 assert countable == 7 

408 assert countable != 12 

409 

410 

411def test_not_in(): 

412 "Test NotIn." 

413 not_in = neurovault.NotIn(0, 1) 

414 

415 assert not_in != 0 

416 assert not_in == 2 

417 assert str(not_in) == "NotIn(0, 1)" 

418 

419 

420def test_contains(): 

421 "Test Contains." 

422 contains = neurovault.Contains("a", 0) 

423 

424 assert contains != 10 

425 assert contains == ["b", 1, "a", 0] 

426 assert contains != ["b", 1, 0] 

427 assert contains != ["b", 1, "a"] 

428 assert str(contains) == "Contains('a', 0)" 

429 

430 contains = neurovault.Contains("house", "face") 

431 

432 assert contains == "face vs house" 

433 assert contains != "smiling face vs frowning face" 

434 

435 

436def test_not_contains(): 

437 "Test NotContains." 

438 not_contains = neurovault.NotContains("ab") 

439 

440 assert not_contains is not None 

441 assert not_contains == "a_b" 

442 assert not_contains == "bcd" 

443 assert not_contains != "_abcd" 

444 assert not_contains != "_abcd" 

445 assert str(not_contains) == "NotContains('ab',)" 

446 

447 

448def test_pattern(): 

449 "Test Pattern." 

450 # Python std lib doc poker hand example 

451 pattern_0 = neurovault.Pattern(r"[0-9akqj]{5}$") 

452 

453 assert str(pattern_0) == "Pattern(pattern='[0-9akqj]{5}$', flags=0)" 

454 

455 pattern_1 = neurovault.Pattern(r"[0-9akqj]{5}$", re.IGNORECASE) 

456 

457 assert pattern_0 == "ak05q" 

458 assert pattern_0 != "Ak05q" 

459 assert pattern_0 != "ak05e" 

460 assert pattern_1 == "ak05q" 

461 assert pattern_1 == "Ak05q" 

462 assert pattern_1 != "ak05e" 

463 

464 

465def test_result_filter(): 

466 "Test ResultFilter IsIn NotIn." 

467 filter_0 = neurovault.ResultFilter( 

468 query_terms={"a": 0}, callable_filter=lambda d: len(d) < 5, b=1 

469 ) 

470 

471 assert str(filter_0) == "ResultFilter" 

472 assert filter_0["a"] == 0 

473 assert filter_0({"a": 0, "b": 1, "c": 2}) 

474 assert not filter_0({"a": 0, "b": 1, "c": 2, "d": 3, "e": 4}) 

475 assert not filter_0({"b": 1, "c": 2, "d": 3}) 

476 assert not filter_0({"a": 1, "b": 1, "c": 2}) 

477 

478 filter_1 = neurovault.ResultFilter(query_terms={"c": 2}) 

479 filter_1["d"] = neurovault.NotNull() 

480 

481 assert filter_1({"c": 2, "d": 1}) 

482 assert not filter_1({"c": 2, "d": 0}) 

483 

484 filter_1["d"] = neurovault.IsIn(0, 1) 

485 

486 assert filter_1({"c": 2, "d": 1}) 

487 assert not filter_1({"c": 2, "d": 2}) 

488 

489 del filter_1["d"] 

490 

491 assert filter_1({"c": 2, "d": 2}) 

492 

493 filter_1["d"] = neurovault.NotIn(0, 1) 

494 

495 assert not filter_1({"c": 2, "d": 1}) 

496 assert filter_1({"c": 2, "d": 3}) 

497 

498 filter_1.add_filter(lambda d: len(d) > 2) 

499 

500 assert not filter_1({"c": 2, "d": 3}) 

501 assert filter_1({"c": 2, "d": 3, "e": 4}) 

502 

503 

504def test_result_filter_combinations(): 

505 "Test ResultFilter AND OR XOR NOT." 

506 filter_0 = neurovault.ResultFilter(a=0, b=1) 

507 filter_1 = neurovault.ResultFilter(c=2, d=3) 

508 

509 filter_0_and_1 = filter_0.AND(filter_1) 

510 

511 assert filter_0_and_1({"a": 0, "b": 1, "c": 2, "d": 3}) 

512 assert not filter_0_and_1({"a": 0, "b": 1, "c": 2, "d": None}) 

513 assert not filter_0_and_1({"a": None, "b": 1, "c": 2, "d": 3}) 

514 

515 filter_0_or_1 = filter_0.OR(filter_1) 

516 

517 assert filter_0_or_1({"a": 0, "b": 1, "c": 2, "d": 3}) 

518 assert filter_0_or_1({"a": 0, "b": 1, "c": 2, "d": None}) 

519 assert filter_0_or_1({"a": None, "b": 1, "c": 2, "d": 3}) 

520 assert not filter_0_or_1({"a": None, "b": 1, "c": 2, "d": None}) 

521 

522 filter_0_xor_1 = filter_0.XOR(filter_1) 

523 

524 assert not filter_0_xor_1({"a": 0, "b": 1, "c": 2, "d": 3}) 

525 assert filter_0_xor_1({"a": 0, "b": 1, "c": 2, "d": None}) 

526 assert filter_0_xor_1({"a": None, "b": 1, "c": 2, "d": 3}) 

527 assert not filter_0_xor_1({"a": None, "b": 1, "c": 2, "d": None}) 

528 

529 not_filter_0 = filter_0.NOT() 

530 

531 assert not_filter_0({}) 

532 assert not not_filter_0({"a": 0, "b": 1}) 

533 

534 filter_2 = neurovault.ResultFilter({"a": neurovault.NotNull()}).AND( 

535 lambda d: len(d) < 2 

536 ) 

537 

538 assert filter_2({"a": "a"}) 

539 assert not filter_2({"a": ""}) 

540 assert not filter_2({"a": "a", "b": 0}) 

541 

542 filt = neurovault.ResultFilter(a=0).AND( 

543 neurovault.ResultFilter(b=1).OR(neurovault.ResultFilter(b=2)) 

544 ) 

545 

546 assert filt({"a": 0, "b": 1}) 

547 assert not filt({"a": 0, "b": 0}) 

548 

549 

550def test_simple_download(tmp_path): 

551 "Test _simple_download." 

552 downloaded_file = neurovault._simple_download( 

553 "https://neurovault.org/media/images/35/Fig3B_zstat1.nii.gz", 

554 tmp_path / "image_35.nii.gz", 

555 tmp_path, 

556 ) 

557 assert downloaded_file.is_file() 

558 

559 

560def test_simple_download_error(tmp_path, request_mocker): 

561 "Test _simple_download error." 

562 request_mocker.url_mapping["*"] = requests.RequestException() 

563 

564 with pytest.raises(requests.RequestException): 

565 neurovault._simple_download( 

566 "http://", 

567 tmp_path / "bad.nii.gz", 

568 tmp_path, 

569 ) 

570 

571 

572def test_neurosynth_words_vectorized(tmp_path): 

573 "Test neurosynth_words_vectorized." 

574 n_im = 5 

575 words_files = [tmp_path / f"words_for_image_{i}.json" for i in range(n_im)] 

576 words = [str(i) for i in range(n_im)] 

577 for i, file_name in enumerate(words_files): 

578 word_weights = np.zeros(n_im) 

579 word_weights[i] = 1 

580 words_dict = {"data": {"values": dict(zip(words, word_weights))}} 

581 with file_name.open("wb") as words_file: 

582 words_file.write(json.dumps(words_dict).encode("utf-8")) 

583 

584 freq, _ = neurovault.neurosynth_words_vectorized(words_files) 

585 

586 assert freq.shape == (n_im, n_im) 

587 assert (freq.sum(axis=0) == np.ones(n_im)).all() 

588 

589 

590def test_neurosynth_words_vectorized_warning(tmp_path): 

591 "Test neurosynth_words_vectorized warning." 

592 with pytest.warns(UserWarning): 

593 neurovault.neurosynth_words_vectorized( 

594 ((tmp_path, "no_words_here.json"),) 

595 ) 

596 

597 

598def test_write_read_metadata(tmp_path): 

599 "Test _write_metadata and _add_absolute_paths." 

600 metadata = { 

601 "relative_path": "collection_1", 

602 "absolute_path": Path("tmp", "collection_1"), 

603 } 

604 metadata_path = tmp_path / "metadata.json" 

605 

606 neurovault._write_metadata(metadata, metadata_path) 

607 with metadata_path.open("rb") as meta_file: 

608 written_metadata = json.loads(meta_file.read().decode("utf-8")) 

609 

610 assert "relative_path" in written_metadata 

611 assert "absolute_path" not in written_metadata 

612 

613 read_metadata = neurovault._add_absolute_paths( 

614 Path("tmp"), written_metadata 

615 ) 

616 

617 assert read_metadata["absolute_path"] == Path("tmp", "collection_1") 

618 

619 

620def test_add_absolute_paths(): 

621 "Test _add_absolute_paths." 

622 meta = { 

623 "col_relative_path": "collection_1", 

624 "col_absolute_path": Path("dir_0", "neurovault", "collection_1"), 

625 } 

626 

627 meta = neurovault._add_absolute_paths( 

628 Path("dir_1", "neurovault"), meta, force=False 

629 ) 

630 

631 assert meta["col_absolute_path"] == Path( 

632 "dir_0", "neurovault", "collection_1" 

633 ) 

634 

635 meta = neurovault._add_absolute_paths( 

636 Path("dir_1", "neurovault"), meta, force=True 

637 ) 

638 

639 assert meta["col_absolute_path"] == Path( 

640 "dir_1", "neurovault", "collection_1" 

641 ) 

642 

643 meta = {"id": 0} 

644 

645 meta_transformed = neurovault._add_absolute_paths( 

646 Path("dir_1", "neurovault"), meta, force=True 

647 ) 

648 

649 assert meta == meta_transformed 

650 

651 

652def test_json_add_collection_dir(tmp_path): 

653 "Test _json_add_collection_dir." 

654 coll_dir = tmp_path / "collection_1" 

655 coll_dir.mkdir() 

656 coll_file_name = coll_dir / "collection_1.json" 

657 with coll_file_name.open("wb") as coll_file: 

658 coll_file.write(json.dumps({"id": 1}).encode("utf-8")) 

659 

660 loaded = neurovault._json_add_collection_dir(coll_file_name) 

661 

662 assert loaded["absolute_path"] == coll_dir.absolute() 

663 assert loaded["relative_path"] == coll_dir 

664 

665 

666def test_json_add_im_files_paths(tmp_path): 

667 "Test _json_add_im_files_paths." 

668 coll_dir = tmp_path / "collection_1" 

669 coll_dir.mkdir() 

670 im_file_name = coll_dir / "image_1.json" 

671 with im_file_name.open("wb") as im_file: 

672 im_file.write(json.dumps({"id": 1}).encode("utf-8")) 

673 

674 loaded = neurovault._json_add_im_files_paths(im_file_name) 

675 

676 assert loaded["relative_path"] == coll_dir / "image_1.nii.gz" 

677 assert loaded.get("neurosynth_words_relative_path") is None 

678 

679 

680def test_split_terms(): 

681 "Test _split_terms." 

682 terms, server_terms = neurovault._split_terms( 

683 { 

684 "DOI": neurovault.NotNull(), 

685 "name": "my_name", 

686 "unknown_term": "something", 

687 }, 

688 neurovault._COL_FILTERS_AVAILABLE_ON_SERVER, 

689 ) 

690 

691 assert terms == {"DOI": neurovault.NotNull(), "unknown_term": "something"} 

692 assert server_terms == {"name": "my_name"} 

693 

694 

695def test_move_unknown_terms_to_local_filter(): 

696 "Test _move_unknown_terms_to_local_filter." 

697 terms, new_filter = neurovault._move_unknown_terms_to_local_filter( 

698 {"a": 0, "b": 1}, neurovault.ResultFilter(), ("a",) 

699 ) 

700 

701 assert terms == {"a": 0} 

702 assert not new_filter({"b": 0}) 

703 assert new_filter({"b": 1}) 

704 

705 

706def test_move_col_id(): 

707 "Test _move_col_id." 

708 im_terms, col_terms = neurovault._move_col_id( 

709 {"collection_id": 1, "not_mni": False}, {} 

710 ) 

711 

712 assert im_terms == {"not_mni": False} 

713 assert col_terms == {"id": 1} 

714 

715 with pytest.warns(UserWarning): 

716 neurovault._move_col_id( 

717 {"collection_id": 1, "not_mni": False}, {"id": 2} 

718 ) 

719 

720 

721def test_download_image_terms(tmp_path, request_mocker): 

722 """Test _download_image_terms.""" 

723 image_info = {"id": "a"} 

724 collection = { 

725 "relative_path": "collection", 

726 "absolute_path": tmp_path / "collection", 

727 } 

728 collection["absolute_path"].mkdir(parents=True) 

729 download_params = { 

730 "temp_dir": tmp_path, 

731 "verbose": 3, 

732 "fetch_neurosynth_words": True, 

733 } 

734 request_mocker.url_mapping["*"] = requests.RequestException() 

735 

736 neurovault._download_image_terms(image_info, collection, download_params) 

737 

738 

739def test_download_image_terms_error(tmp_path, request_mocker): 

740 """Test _download_image_terms errors.""" 

741 image_info = {"id": "a"} 

742 collection = { 

743 "relative_path": "collection", 

744 "absolute_path": tmp_path / "collection", 

745 } 

746 collection["absolute_path"].mkdir(parents=True) 

747 download_params = { 

748 "temp_dir": tmp_path, 

749 "verbose": 3, 

750 "fetch_neurosynth_words": True, 

751 } 

752 request_mocker.url_mapping["*"] = requests.RequestException() 

753 download_params["allow_neurosynth_failure"] = False 

754 

755 with pytest.raises(RuntimeError): 

756 neurovault._download_image_terms( 

757 image_info, 

758 collection, 

759 download_params, 

760 ) 

761 

762 # no fail if file already exists 

763 with Path( 

764 collection["absolute_path"], "neurosynth_words_for_image_a.json" 

765 ).open("w"): 

766 pass 

767 

768 neurovault._download_image_terms(image_info, collection, download_params) 

769 

770 

771def test_download_image(): 

772 """Test _download_image.""" 

773 image = neurovault._download_image(None, {}) 

774 assert image is None 

775 

776 

777def test_fetch_neurovault(tmp_path): 

778 """Run several several checks on fetch_neurovault.""" 

779 data = neurovault.fetch_neurovault(mode="offline", data_dir=tmp_path) 

780 

781 assert len(data.images) == 0 

782 

783 # try to download an image 

784 data = neurovault.fetch_neurovault( 

785 max_images=11, 

786 fetch_neurosynth_words=True, 

787 mode="overwrite", 

788 data_dir=tmp_path, 

789 ) 

790 # specifying a filter while leaving the default term 

791 # filters in place should raise a warning. 

792 with pytest.warns(UserWarning): 

793 neurovault.fetch_neurovault( 

794 image_filter=lambda x: True, max_images=1, mode="offline" 

795 ) 

796 

797 assert data.images 

798 assert len(data.images) == 11 

799 for meta in data.images_meta: 

800 assert not meta["not_mni"] 

801 assert not meta["is_thresholded"] 

802 assert meta["map_type"] not in [ 

803 "ROI/mask", 

804 "anatomical", 

805 "parcellation", 

806 ] 

807 assert meta["image_type"] != "atlas" 

808 

809 # using a data directory we can't write into should raise a 

810 # warning unless mode is 'offline' 

811 tmp_path.chmod(stat.S_IREAD | stat.S_IEXEC) 

812 (tmp_path / "neurovault").chmod(stat.S_IREAD | stat.S_IEXEC) 

813 if os.access(tmp_path / "neurovault", os.W_OK): 

814 return 

815 

816 with pytest.warns(UserWarning): 

817 neurovault.fetch_neurovault(data_dir=tmp_path) 

818 

819 

820def test_fetch_neurovault_errors(capsys, request_mocker): 

821 """Test that errors are logged when the server returns an error code. 

822 

823 May "spam" your standard output with requests exceptions, 

824 but that's expected. 

825 """ 

826 request_mocker.url_mapping["*"] = 500 

827 data = neurovault.fetch_neurovault() 

828 

829 captured = capsys.readouterr() 

830 match = re.search(r"500 Error", captured.err) 

831 assert match is not None 

832 

833 assert len(data.images) == 0 

834 

835 

836def test_fetch_neurovault_ids(tmp_path): 

837 """Test fetch_neurovault_ids.""" 

838 collections, images = _get_neurovault_data() 

839 collections = collections.sort_values( 

840 by="true_number_of_images", ascending=False 

841 ) 

842 other_col_id, *col_ids = collections["id"].to_numpy()[:3] 

843 img_ids = images[images["collection_id"] == other_col_id]["id"].to_numpy()[ 

844 :3 

845 ] 

846 img_from_cols_ids = images[images["collection_id"].isin(col_ids)][ 

847 "id" 

848 ].to_numpy() 

849 

850 data = fetch_neurovault_ids( 

851 image_ids=img_ids, collection_ids=col_ids, data_dir=tmp_path 

852 ) 

853 

854 expected_images = list(img_ids) + list(img_from_cols_ids) 

855 

856 assert len(data.images) == len(expected_images) 

857 assert {img["id"] for img in data["images_meta"]} == set(expected_images) 

858 assert Path(data["images"][0]).parent == Path( 

859 data["collections_meta"][0]["absolute_path"] 

860 ) 

861 

862 # check that there are no Path objects 

863 for image in data.images: 

864 assert isinstance(image, str) 

865 for meta in data.images_meta + data.collections_meta: 

866 for value in meta.values(): 

867 assert not isinstance(value, Path) 

868 

869 

870def test_fetch_neurovault_ids_error(): 

871 """Test fetch_neurovault_ids errors.""" 

872 with pytest.raises( 

873 ValueError, 

874 match="Supported download modes are: overwrite, download_new, offline", 

875 ): 

876 fetch_neurovault_ids(mode="bad") 

877 

878 

879def test_fetch_neurovault_ids_offline(tmp_path): 

880 """Check image can be loaded again from disk.""" 

881 collections, images = _get_neurovault_data() 

882 collections = collections.sort_values( 

883 by="true_number_of_images", ascending=False 

884 ) 

885 other_col_id, *col_ids = collections["id"].to_numpy()[:3] 

886 img_ids = images[images["collection_id"] == other_col_id]["id"].to_numpy()[ 

887 :3 

888 ] 

889 

890 data = fetch_neurovault_ids( 

891 image_ids=img_ids, collection_ids=col_ids, data_dir=tmp_path 

892 ) 

893 

894 data = fetch_neurovault_ids( 

895 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline" 

896 ) 

897 

898 assert len(data.images) == 1 

899 

900 

901def test_fetch_neurovault_ids_overwrite(tmp_path): 

902 """Check that download_new mode forces overwrite.""" 

903 collections, images = _get_neurovault_data() 

904 collections = collections.sort_values( 

905 by="true_number_of_images", ascending=False 

906 ) 

907 other_col_id, *col_ids = collections["id"].to_numpy()[:3] 

908 img_ids = images[images["collection_id"] == other_col_id]["id"].to_numpy()[ 

909 :3 

910 ] 

911 data = fetch_neurovault_ids( 

912 image_ids=img_ids, collection_ids=col_ids, data_dir=tmp_path 

913 ) 

914 

915 data = fetch_neurovault_ids( 

916 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline" 

917 ) 

918 

919 modified_meta = data["images_meta"][0] 

920 

921 assert modified_meta["some_key"] == "some_value" 

922 

923 modified_meta["some_key"] = "some_other_value" 

924 # mess it up on disk 

925 meta_path = ( 

926 Path(modified_meta["absolute_path"]).parent 

927 / f"image_{img_ids[0]}_metadata.json" 

928 ) 

929 # convert Path to str for JSON serialization 

930 modified_meta = { 

931 k: str(v) if isinstance(v, Path) else v 

932 for k, v in modified_meta.items() 

933 } 

934 with meta_path.open("wb") as meta_f: 

935 meta_f.write(json.dumps(modified_meta).encode("UTF-8")) 

936 

937 # fresh download 

938 data = fetch_neurovault_ids( 

939 image_ids=[img_ids[0]], data_dir=tmp_path, mode="download_new" 

940 ) 

941 data = fetch_neurovault_ids( 

942 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline" 

943 ) 

944 

945 # should not have changed 

946 assert data["images_meta"][0]["some_key"] == "some_other_value" 

947 

948 data = fetch_neurovault_ids( 

949 image_ids=[img_ids[0]], data_dir=tmp_path, mode="overwrite" 

950 ) 

951 data = fetch_neurovault_ids( 

952 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline" 

953 ) 

954 

955 # should be back to the original version 

956 assert data["images_meta"][0]["some_key"] == "some_value" 

957 

958 

959def test_should_download_resampled_images_only_if_no_previous_download( 

960 tmp_path, 

961): 

962 collections, _ = _get_neurovault_data() 

963 

964 sample_collection = collections.iloc[0] 

965 sample_collection_id = sample_collection["id"] 

966 expected_number_of_images = sample_collection["true_number_of_images"] 

967 

968 data = fetch_neurovault_ids( 

969 collection_ids=[sample_collection_id], 

970 data_dir=tmp_path, 

971 resample=True, 

972 ) 

973 

974 # Check the expected size of the dataset 

975 assert (len(data["images_meta"])) == expected_number_of_images 

976 

977 _check_resampled_version_is_here(data) 

978 

979 _check_all_affines_match_neurovault_affine(data) 

980 

981 _check_original_version_is_not_here(data) 

982 

983 

984def test_download_original_images_along_resamp_images_if_previously_downloaded( 

985 tmp_path, 

986): 

987 collections, _ = _get_neurovault_data() 

988 

989 sample_collection = collections.iloc[0] 

990 sample_collection_id = sample_collection["id"] 

991 

992 # Fetch non-resampled images 

993 data = fetch_neurovault_ids( 

994 collection_ids=[sample_collection_id], 

995 data_dir=tmp_path, 

996 resample=True, 

997 ) 

998 

999 # Check that only the resampled version is here 

1000 _check_resampled_version_is_here(data) 

1001 _check_original_version_is_not_here(data) 

1002 

1003 # Get the time of the last access to the resampled data 

1004 access_time_resampled = ( 

1005 Path(data["images_meta"][0]["resampled_absolute_path"]).stat().st_atime 

1006 ) 

1007 

1008 # Download original data 

1009 data_orig = fetch_neurovault_ids( 

1010 collection_ids=[sample_collection_id], 

1011 data_dir=tmp_path, 

1012 resample=False, 

1013 ) 

1014 

1015 # Get the time of the last access to one of the original files 

1016 # (which should be download time) 

1017 access_time = ( 

1018 Path(data_orig["images_meta"][0]["absolute_path"]).stat().st_atime 

1019 ) 

1020 

1021 # Check that the last access to the original data is after the access 

1022 # to the resampled data 

1023 assert access_time - access_time_resampled > 0 

1024 

1025 # Check that the original version is now here 

1026 # (previous test should have failed anyway if not) 

1027 _check_original_version_is_here(data_orig) 

1028 

1029 _check_no_affine_match_neurovault_affine(data_orig) 

1030 

1031 

1032def test_download_resamp_images_along_original_images_if_previously_downloaded( 

1033 tmp_path, 

1034): 

1035 collections, _ = _get_neurovault_data() 

1036 

1037 sample_collection = collections.iloc[0] 

1038 sample_collection_id = sample_collection["id"] 

1039 

1040 # Fetch non-resampled images 

1041 data_orig = fetch_neurovault_ids( 

1042 collection_ids=[sample_collection_id], 

1043 data_dir=tmp_path, 

1044 resample=False, 

1045 ) 

1046 

1047 _check_original_version_is_here(data_orig) 

1048 

1049 _check_resampled_version_is_not_here(data_orig) 

1050 

1051 # Asks for the resampled version. This should only resample, not download. 

1052 

1053 # Get the time of the last modification to the original data 

1054 modif_time_original = ( 

1055 Path(data_orig["images_meta"][0]["absolute_path"]).stat().st_mtime 

1056 ) 

1057 

1058 # Ask for resampled data, which should only trigger resample 

1059 data = fetch_neurovault_ids( 

1060 collection_ids=[sample_collection_id], 

1061 data_dir=tmp_path, 

1062 resample=True, 

1063 ) 

1064 

1065 # Get the time of the last modification to the original data, after fetch 

1066 modif_time_original_after = ( 

1067 Path(data["images_meta"][0]["absolute_path"]).stat().st_mtime 

1068 ) 

1069 

1070 # The time difference should be 0 

1071 assert np.isclose(modif_time_original, modif_time_original_after) 

1072 

1073 _check_resampled_version_is_here(data) 

1074 # And the original version should still be here as well 

1075 _check_original_version_is_here(data) 

1076 

1077 _check_all_affines_match_neurovault_affine(data) 

1078 

1079 _check_no_affine_match_neurovault_affine(data_orig) 

1080 

1081 

1082def _check_resampled_version_is_here(data): 

1083 assert np.all( 

1084 [ 

1085 Path(im_meta["resampled_absolute_path"]).is_file() 

1086 for im_meta in data["images_meta"] 

1087 ] 

1088 ) 

1089 

1090 

1091def _check_resampled_version_is_not_here(data): 

1092 assert not np.any( 

1093 [ 

1094 Path(im_meta["resampled_absolute_path"]).is_file() 

1095 for im_meta in data["images_meta"] 

1096 ] 

1097 ) 

1098 

1099 

1100def _check_original_version_is_here(data): 

1101 assert np.all( 

1102 [ 

1103 Path(im_meta["absolute_path"]).is_file() 

1104 for im_meta in data["images_meta"] 

1105 ] 

1106 ) 

1107 

1108 

1109def _check_original_version_is_not_here(data): 

1110 assert not np.any( 

1111 [ 

1112 Path(im_meta["absolute_path"]).is_file() 

1113 for im_meta in data["images_meta"] 

1114 ] 

1115 ) 

1116 

1117 

1118def _check_all_affines_match_neurovault_affine(data): 

1119 affines = [load_img(cur_im).affine for cur_im in data["images"]] 

1120 

1121 assert np.all( 

1122 [np.all(affine == neurovault.STD_AFFINE) for affine in affines] 

1123 ) 

1124 

1125 

1126def _check_no_affine_match_neurovault_affine(data): 

1127 affines = [load_img(cur_im).affine for cur_im in data["images"]] 

1128 

1129 assert not np.any( 

1130 [np.all(affine == neurovault.STD_AFFINE) for affine in affines] 

1131 ) 

1132 

1133 

1134def test_timeout_error(capsys, request_mocker): 

1135 """Check the proper log message is thrown on timeout.""" 

1136 request_mocker.url_mapping["*"] = requests.exceptions.ReadTimeout() 

1137 data = neurovault.fetch_neurovault(verbose=0) 

1138 

1139 assert len(data.images) == 0 

1140 

1141 captured = capsys.readouterr() 

1142 match = re.search("Try increasing", captured.out) 

1143 assert match is not None 

1144 

1145 

1146def test_fetch_neurovault_motor_task(): 

1147 with pytest.warns(DeprecationWarning, match="will be removed"): 

1148 neurovault.fetch_neurovault_motor_task(verbose=0)