Coverage for nilearn/datasets/tests/test_neurovault.py: 0%
487 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 12:32 +0200
1"""Test the neurovault module."""
3import hashlib
4import json
5import os
6import re
7import stat
8from pathlib import Path
9from urllib import parse
11import numpy as np
12import pandas as pd
13import pytest
14import requests
16from nilearn._utils.data_gen import generate_fake_fmri
17from nilearn.conftest import _rng
18from nilearn.datasets import fetch_neurovault_ids, neurovault
19from nilearn.image import load_img
22def _get_neurovault_data():
23 """Make fake images and collections to mock neurovault in the unit tests.
25 Returns two pandas DataFrames: collections and images. Each row contains
26 some metadata (e.g. "map_type", "is_thresholded" for images, or
27 "number_of_images" for collections) for a single (fake) image or
28 collection.
30 These two dataframes are like a fake neurovault database and the
31 `_neurovault` function, used to simulate responses from the neurovault
32 API, uses this data.
34 """
35 if getattr(_get_neurovault_data, "data", None) is not None:
36 return _get_neurovault_data.data
38 rng = _rng()
40 n_collections = 73
41 collection_ids = rng.choice(
42 np.arange(1000), size=n_collections, replace=False
43 )
44 collections = pd.DataFrame({"id": collection_ids})
46 n_images = 546
47 image_ids = rng.choice(np.arange(10000), size=n_images, replace=False)
48 images = pd.DataFrame({"id": image_ids})
49 not_empty = rng.binomial(1, 0.9, n_collections).astype(bool)
50 images["collection_id"] = rng.choice(
51 collection_ids[not_empty], size=n_images
52 )
54 collection_sizes = images.groupby("collection_id").count()
56 collections["true_number_of_images"] = collection_sizes.reindex(
57 index=collections["id"].to_numpy(), fill_value=0
58 ).to_numpy()
59 collections["number_of_images"] = collections[
60 "true_number_of_images"
61 ] + rng.binomial(1, 0.1, n_collections) * rng.integers(
62 0, 100, n_collections
63 )
65 images["not_mni"] = rng.binomial(1, 0.1, size=n_images).astype(bool)
66 images["is_valid"] = rng.binomial(1, 0.1, size=n_images).astype(bool)
67 images["is_thresholded"] = rng.binomial(1, 0.1, size=n_images).astype(bool)
68 images["map_type"] = rng.choice(
69 [
70 "T map",
71 "Z map",
72 "ROI/mask",
73 "anatomical",
74 "parcellation",
75 "something else",
76 ],
77 size=n_images,
78 p=[0.4, 0.3, 0.1, 0.1, 0.05, 0.05],
79 )
80 images["image_type"] = rng.choice(
81 ["statistic_map", "atlas", "other type"],
82 size=n_images,
83 p=[0.4, 0.4, 0.2],
84 )
85 images["some_key"] = "some_value"
86 images[13] = rng.standard_normal(n_images)
87 url = "https://neurovault.org/media/images/{}/{}.nii.gz"
88 image_names = [
89 hashlib.sha1(bytes(img_id)).hexdigest()[:4] for img_id in image_ids
90 ]
91 images["file"] = [
92 url.format(col_id, img_name)
93 for (col_id, img_name) in zip(images["collection_id"], image_names)
94 ]
96 collections = collections.set_index("id", drop=False)
97 images = images.set_index("id", drop=False)
98 _get_neurovault_data.data = collections, images
100 return collections, images
103def _parse_query(query):
104 """Extract key-value pairs from a url query string.
106 for example
107 "collection=23&someoption&format=json"
108 -> {"collection": "23", "someoption": None, "format": "json"}
110 """
111 parts = [p.split("=") for p in query.split("&")]
112 result = {}
113 for p in parts:
114 if len(p) == 2:
115 result[p[0]] = p[1]
116 if len(p) == 1:
117 result[p[0]] = None
118 return result
121def _neurovault_collections(parts, query):
122 """Mock the Neurovault API behind the `/api/collections/` path.
124 parts : the parts of the URL path after "collections"
125 ie [], ["<somecollectionid>"], or ["<somecollectionid>", "images"]
127 query : the parsed query string, e.g. {"offset": "15", "limit": "5"}
129 returns a dictionary of API results
131 See the neurovault API docs for details: https://neurovault.org/api-docs
133 """
134 if parts:
135 return _neurovault_one_collection(parts)
136 collections, _ = _get_neurovault_data()
137 offset, limit = int(query.get("offset", 0)), int(query.get("limit", 2))
138 batch = collections.iloc[offset : offset + limit].to_dict(orient="records")
139 return {"count": len(collections), "results": batch}
142def _neurovault_one_collection(parts):
143 """Mock Neurovault API \
144 behind the `/api/collections/<somecollectionid>` path.
146 parts : parts of the URL path after "collections",
147 ie ["<somecollectionid>"] or ["<somecollectionid>", "images"]
149 returns a dictionary of API results
151 See the neurovault API docs for details: https://neurovault.org/api-docs
153 """
154 col_id = int(parts[0])
155 collections, images = _get_neurovault_data()
156 if col_id not in collections.index:
157 return {"detail": "Not found."}
158 if len(parts) == 1:
159 return collections.loc[col_id].to_dict()
160 if parts[1] != "images":
161 return ""
162 col_images = images[images["collection_id"] == col_id]
163 return {
164 "count": len(col_images),
165 "results": col_images.to_dict(orient="records"),
166 }
169def _neurovault_images(parts, query):
170 """Mock the Neurovault API behind the `/api/images/` path.
172 parts : parts of the URL path after "images",
173 ie [] or ["<someimageid>"]
175 query : the parsed query string, e.g. {"offset": "15", "limit": "5"}
177 returns a dictionary of API results
179 See the neurovault API docs for details: https://neurovault.org/api-docs
181 """
182 if parts:
183 return _neurovault_one_image(parts[0])
184 _, images = _get_neurovault_data()
185 offset, limit = int(query.get("offset", 0)), int(query.get("limit", 2))
186 batch = images.iloc[offset : offset + limit].to_dict(orient="records")
187 return {"count": len(images), "results": batch}
190def _neurovault_one_image(img_id):
191 """Mock the Neurovault API behind the `/api/images/<someimageid>` path.
193 returns a dictionary of API results
195 See the neurovault API docs for details: https://neurovault.org/api-docs
197 """
198 img_id = int(img_id)
199 _, images = _get_neurovault_data()
200 if img_id not in images.index:
201 return {"detail": "Not found."}
202 return images.loc[img_id].to_dict()
205def _neurovault_file(parts, query): # noqa: ARG001
206 """Mock the Neurovault API behind the `/media/images/` path."""
207 return generate_fake_fmri(length=1)[0]
210class _NumpyJsonEncoder(json.JSONEncoder):
211 """A json encoder that can handle numpy objects."""
213 def default(self, obj):
214 if hasattr(obj, "tolist"):
215 return obj.tolist()
216 return json.JSONEncoder.default(self, obj)
219def _neurovault(match, request): # noqa: ARG001
220 """Mock response content from the Neurovault API.
222 The fake data used to generate responses is provided by
223 `_get_neurovault_data`.
225 See the neurovault API docs for details on the queries and corresponding
226 responses: https://neurovault.org/api-docs
228 """
229 handlers = {
230 "media": {"images": _neurovault_file},
231 "api": {
232 "collections": _neurovault_collections,
233 "images": _neurovault_images,
234 },
235 }
236 info = parse.urlparse(request.url)
237 parts = list(filter(bool, info.path.split("/")))
238 endpoint, section = parts[0], parts[1]
240 result = handlers[endpoint][section](parts[2:], _parse_query(info.query))
241 should_jsonify_response = endpoint == "api"
242 return (
243 json.dumps(result, cls=_NumpyJsonEncoder).encode("UTF-8")
244 if should_jsonify_response
245 else result
246 )
249@pytest.fixture(autouse=True)
250def neurovault_mocker(request_mocker):
251 request_mocker.url_mapping["*neurovault.org*"] = _neurovault
254def test_remove_none_strings():
255 info = {
256 "a": "None / Other",
257 "b": "",
258 "c": "N/A",
259 "d": None,
260 "e": 0,
261 "f": "a",
262 "g": "Name",
263 }
265 assert neurovault._remove_none_strings(info) == {
266 "a": None,
267 "b": None,
268 "c": None,
269 "d": None,
270 "e": 0,
271 "f": "a",
272 "g": "Name",
273 }
276def test_append_filters_to_query():
277 "Test _append_filters_to_query."
278 query = neurovault._append_filters_to_query(
279 neurovault._NEUROVAULT_COLLECTIONS_URL, {"DOI": 17}
280 )
282 assert query == "https://neurovault.org/api/collections/?DOI=17"
284 query = neurovault._append_filters_to_query(
285 neurovault._NEUROVAULT_COLLECTIONS_URL, {"id": 40}
286 )
287 assert query == "https://neurovault.org/api/collections/40"
290def test_get_batch():
291 "Test _get_batch."
292 batch = neurovault._get_batch(neurovault._NEUROVAULT_COLLECTIONS_URL)
293 assert "results" in batch
294 assert "count" in batch
297def test_get_batch_error(tmp_path):
298 "Test _get_batch errors."
299 with pytest.raises(requests.RequestException):
300 neurovault._get_batch("http://")
301 with pytest.raises(ValueError):
302 neurovault._get_batch(
303 f"file://{tmp_path / 'test_nv.txt'!s}",
304 )
306 no_results_url = (
307 "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
308 "esearch.fcgi?db=pmc&retmode=json&term=fmri"
309 )
311 with pytest.raises(ValueError):
312 neurovault._get_batch(no_results_url)
315def test_scroll_server_results():
316 "Test _scroll_server_results."
317 result = list(
318 neurovault._scroll_server_results(
319 neurovault._NEUROVAULT_COLLECTIONS_URL, max_results=6, batch_size=3
320 )
321 )
323 assert len(result) == 6
325 result = list(
326 neurovault._scroll_server_results(
327 neurovault._NEUROVAULT_COLLECTIONS_URL,
328 max_results=3,
329 local_filter=lambda r: False,
330 )
331 )
332 assert not result
334 no_results = neurovault._scroll_server_results(
335 "http://BAD_URL", max_results=3, local_filter=lambda r: True
336 )
337 next(no_results)
340def test_is_null():
341 "Test IsNull."
342 is_null = neurovault.IsNull()
344 assert is_null != "a"
345 assert is_null == ""
346 assert str(is_null) == "IsNull()"
349def test_not_null():
350 "Test NotNull."
351 not_null = neurovault.NotNull()
353 assert not_null == "a"
354 assert not_null != ""
355 assert str(not_null) == "NotNull()"
358def test_not_equal():
359 "Test NotEqual."
360 not_equal = neurovault.NotEqual("a")
362 assert not_equal == "b"
363 assert not_equal == 1
364 assert not_equal != "a"
365 assert not_equal != "a"
366 assert str(not_equal) == "NotEqual('a')"
369def test_order_comp():
370 "Test lt, gt, goe, loe."
371 geq = neurovault.GreaterOrEqual("2016-07-12T11:29:12.263046Z")
373 assert geq == "2016-08-12T11:29:12.263046Z"
374 assert geq != "2016-06-12T11:29:12.263046Z"
375 assert str(geq) == "GreaterOrEqual('2016-07-12T11:29:12.263046Z')"
377 gt = neurovault.GreaterThan("abc")
379 assert gt != "abc"
380 assert gt == "abd" # codespell:ignore abd
381 assert str(gt) == "GreaterThan('abc')"
383 lt = neurovault.LessThan(7)
385 assert lt != 7
386 assert lt == 5
387 assert lt != "a"
388 assert str(lt) == "LessThan(7)"
390 leq = neurovault.LessOrEqual(4.5)
392 assert leq == 4.4
393 assert leq != 4.6
394 assert str(leq) == "LessOrEqual(4.5)"
397def test_is_in():
398 "Test IsIn."
399 is_in = neurovault.IsIn(0, 1)
401 assert is_in == 0
402 assert is_in != 2
403 assert str(is_in) == "IsIn(0, 1)"
405 countable = neurovault.IsIn(*range(11))
407 assert countable == 7
408 assert countable != 12
411def test_not_in():
412 "Test NotIn."
413 not_in = neurovault.NotIn(0, 1)
415 assert not_in != 0
416 assert not_in == 2
417 assert str(not_in) == "NotIn(0, 1)"
420def test_contains():
421 "Test Contains."
422 contains = neurovault.Contains("a", 0)
424 assert contains != 10
425 assert contains == ["b", 1, "a", 0]
426 assert contains != ["b", 1, 0]
427 assert contains != ["b", 1, "a"]
428 assert str(contains) == "Contains('a', 0)"
430 contains = neurovault.Contains("house", "face")
432 assert contains == "face vs house"
433 assert contains != "smiling face vs frowning face"
436def test_not_contains():
437 "Test NotContains."
438 not_contains = neurovault.NotContains("ab")
440 assert not_contains is not None
441 assert not_contains == "a_b"
442 assert not_contains == "bcd"
443 assert not_contains != "_abcd"
444 assert not_contains != "_abcd"
445 assert str(not_contains) == "NotContains('ab',)"
448def test_pattern():
449 "Test Pattern."
450 # Python std lib doc poker hand example
451 pattern_0 = neurovault.Pattern(r"[0-9akqj]{5}$")
453 assert str(pattern_0) == "Pattern(pattern='[0-9akqj]{5}$', flags=0)"
455 pattern_1 = neurovault.Pattern(r"[0-9akqj]{5}$", re.IGNORECASE)
457 assert pattern_0 == "ak05q"
458 assert pattern_0 != "Ak05q"
459 assert pattern_0 != "ak05e"
460 assert pattern_1 == "ak05q"
461 assert pattern_1 == "Ak05q"
462 assert pattern_1 != "ak05e"
465def test_result_filter():
466 "Test ResultFilter IsIn NotIn."
467 filter_0 = neurovault.ResultFilter(
468 query_terms={"a": 0}, callable_filter=lambda d: len(d) < 5, b=1
469 )
471 assert str(filter_0) == "ResultFilter"
472 assert filter_0["a"] == 0
473 assert filter_0({"a": 0, "b": 1, "c": 2})
474 assert not filter_0({"a": 0, "b": 1, "c": 2, "d": 3, "e": 4})
475 assert not filter_0({"b": 1, "c": 2, "d": 3})
476 assert not filter_0({"a": 1, "b": 1, "c": 2})
478 filter_1 = neurovault.ResultFilter(query_terms={"c": 2})
479 filter_1["d"] = neurovault.NotNull()
481 assert filter_1({"c": 2, "d": 1})
482 assert not filter_1({"c": 2, "d": 0})
484 filter_1["d"] = neurovault.IsIn(0, 1)
486 assert filter_1({"c": 2, "d": 1})
487 assert not filter_1({"c": 2, "d": 2})
489 del filter_1["d"]
491 assert filter_1({"c": 2, "d": 2})
493 filter_1["d"] = neurovault.NotIn(0, 1)
495 assert not filter_1({"c": 2, "d": 1})
496 assert filter_1({"c": 2, "d": 3})
498 filter_1.add_filter(lambda d: len(d) > 2)
500 assert not filter_1({"c": 2, "d": 3})
501 assert filter_1({"c": 2, "d": 3, "e": 4})
504def test_result_filter_combinations():
505 "Test ResultFilter AND OR XOR NOT."
506 filter_0 = neurovault.ResultFilter(a=0, b=1)
507 filter_1 = neurovault.ResultFilter(c=2, d=3)
509 filter_0_and_1 = filter_0.AND(filter_1)
511 assert filter_0_and_1({"a": 0, "b": 1, "c": 2, "d": 3})
512 assert not filter_0_and_1({"a": 0, "b": 1, "c": 2, "d": None})
513 assert not filter_0_and_1({"a": None, "b": 1, "c": 2, "d": 3})
515 filter_0_or_1 = filter_0.OR(filter_1)
517 assert filter_0_or_1({"a": 0, "b": 1, "c": 2, "d": 3})
518 assert filter_0_or_1({"a": 0, "b": 1, "c": 2, "d": None})
519 assert filter_0_or_1({"a": None, "b": 1, "c": 2, "d": 3})
520 assert not filter_0_or_1({"a": None, "b": 1, "c": 2, "d": None})
522 filter_0_xor_1 = filter_0.XOR(filter_1)
524 assert not filter_0_xor_1({"a": 0, "b": 1, "c": 2, "d": 3})
525 assert filter_0_xor_1({"a": 0, "b": 1, "c": 2, "d": None})
526 assert filter_0_xor_1({"a": None, "b": 1, "c": 2, "d": 3})
527 assert not filter_0_xor_1({"a": None, "b": 1, "c": 2, "d": None})
529 not_filter_0 = filter_0.NOT()
531 assert not_filter_0({})
532 assert not not_filter_0({"a": 0, "b": 1})
534 filter_2 = neurovault.ResultFilter({"a": neurovault.NotNull()}).AND(
535 lambda d: len(d) < 2
536 )
538 assert filter_2({"a": "a"})
539 assert not filter_2({"a": ""})
540 assert not filter_2({"a": "a", "b": 0})
542 filt = neurovault.ResultFilter(a=0).AND(
543 neurovault.ResultFilter(b=1).OR(neurovault.ResultFilter(b=2))
544 )
546 assert filt({"a": 0, "b": 1})
547 assert not filt({"a": 0, "b": 0})
550def test_simple_download(tmp_path):
551 "Test _simple_download."
552 downloaded_file = neurovault._simple_download(
553 "https://neurovault.org/media/images/35/Fig3B_zstat1.nii.gz",
554 tmp_path / "image_35.nii.gz",
555 tmp_path,
556 )
557 assert downloaded_file.is_file()
560def test_simple_download_error(tmp_path, request_mocker):
561 "Test _simple_download error."
562 request_mocker.url_mapping["*"] = requests.RequestException()
564 with pytest.raises(requests.RequestException):
565 neurovault._simple_download(
566 "http://",
567 tmp_path / "bad.nii.gz",
568 tmp_path,
569 )
572def test_neurosynth_words_vectorized(tmp_path):
573 "Test neurosynth_words_vectorized."
574 n_im = 5
575 words_files = [tmp_path / f"words_for_image_{i}.json" for i in range(n_im)]
576 words = [str(i) for i in range(n_im)]
577 for i, file_name in enumerate(words_files):
578 word_weights = np.zeros(n_im)
579 word_weights[i] = 1
580 words_dict = {"data": {"values": dict(zip(words, word_weights))}}
581 with file_name.open("wb") as words_file:
582 words_file.write(json.dumps(words_dict).encode("utf-8"))
584 freq, _ = neurovault.neurosynth_words_vectorized(words_files)
586 assert freq.shape == (n_im, n_im)
587 assert (freq.sum(axis=0) == np.ones(n_im)).all()
590def test_neurosynth_words_vectorized_warning(tmp_path):
591 "Test neurosynth_words_vectorized warning."
592 with pytest.warns(UserWarning):
593 neurovault.neurosynth_words_vectorized(
594 ((tmp_path, "no_words_here.json"),)
595 )
598def test_write_read_metadata(tmp_path):
599 "Test _write_metadata and _add_absolute_paths."
600 metadata = {
601 "relative_path": "collection_1",
602 "absolute_path": Path("tmp", "collection_1"),
603 }
604 metadata_path = tmp_path / "metadata.json"
606 neurovault._write_metadata(metadata, metadata_path)
607 with metadata_path.open("rb") as meta_file:
608 written_metadata = json.loads(meta_file.read().decode("utf-8"))
610 assert "relative_path" in written_metadata
611 assert "absolute_path" not in written_metadata
613 read_metadata = neurovault._add_absolute_paths(
614 Path("tmp"), written_metadata
615 )
617 assert read_metadata["absolute_path"] == Path("tmp", "collection_1")
620def test_add_absolute_paths():
621 "Test _add_absolute_paths."
622 meta = {
623 "col_relative_path": "collection_1",
624 "col_absolute_path": Path("dir_0", "neurovault", "collection_1"),
625 }
627 meta = neurovault._add_absolute_paths(
628 Path("dir_1", "neurovault"), meta, force=False
629 )
631 assert meta["col_absolute_path"] == Path(
632 "dir_0", "neurovault", "collection_1"
633 )
635 meta = neurovault._add_absolute_paths(
636 Path("dir_1", "neurovault"), meta, force=True
637 )
639 assert meta["col_absolute_path"] == Path(
640 "dir_1", "neurovault", "collection_1"
641 )
643 meta = {"id": 0}
645 meta_transformed = neurovault._add_absolute_paths(
646 Path("dir_1", "neurovault"), meta, force=True
647 )
649 assert meta == meta_transformed
652def test_json_add_collection_dir(tmp_path):
653 "Test _json_add_collection_dir."
654 coll_dir = tmp_path / "collection_1"
655 coll_dir.mkdir()
656 coll_file_name = coll_dir / "collection_1.json"
657 with coll_file_name.open("wb") as coll_file:
658 coll_file.write(json.dumps({"id": 1}).encode("utf-8"))
660 loaded = neurovault._json_add_collection_dir(coll_file_name)
662 assert loaded["absolute_path"] == coll_dir.absolute()
663 assert loaded["relative_path"] == coll_dir
666def test_json_add_im_files_paths(tmp_path):
667 "Test _json_add_im_files_paths."
668 coll_dir = tmp_path / "collection_1"
669 coll_dir.mkdir()
670 im_file_name = coll_dir / "image_1.json"
671 with im_file_name.open("wb") as im_file:
672 im_file.write(json.dumps({"id": 1}).encode("utf-8"))
674 loaded = neurovault._json_add_im_files_paths(im_file_name)
676 assert loaded["relative_path"] == coll_dir / "image_1.nii.gz"
677 assert loaded.get("neurosynth_words_relative_path") is None
680def test_split_terms():
681 "Test _split_terms."
682 terms, server_terms = neurovault._split_terms(
683 {
684 "DOI": neurovault.NotNull(),
685 "name": "my_name",
686 "unknown_term": "something",
687 },
688 neurovault._COL_FILTERS_AVAILABLE_ON_SERVER,
689 )
691 assert terms == {"DOI": neurovault.NotNull(), "unknown_term": "something"}
692 assert server_terms == {"name": "my_name"}
695def test_move_unknown_terms_to_local_filter():
696 "Test _move_unknown_terms_to_local_filter."
697 terms, new_filter = neurovault._move_unknown_terms_to_local_filter(
698 {"a": 0, "b": 1}, neurovault.ResultFilter(), ("a",)
699 )
701 assert terms == {"a": 0}
702 assert not new_filter({"b": 0})
703 assert new_filter({"b": 1})
706def test_move_col_id():
707 "Test _move_col_id."
708 im_terms, col_terms = neurovault._move_col_id(
709 {"collection_id": 1, "not_mni": False}, {}
710 )
712 assert im_terms == {"not_mni": False}
713 assert col_terms == {"id": 1}
715 with pytest.warns(UserWarning):
716 neurovault._move_col_id(
717 {"collection_id": 1, "not_mni": False}, {"id": 2}
718 )
721def test_download_image_terms(tmp_path, request_mocker):
722 """Test _download_image_terms."""
723 image_info = {"id": "a"}
724 collection = {
725 "relative_path": "collection",
726 "absolute_path": tmp_path / "collection",
727 }
728 collection["absolute_path"].mkdir(parents=True)
729 download_params = {
730 "temp_dir": tmp_path,
731 "verbose": 3,
732 "fetch_neurosynth_words": True,
733 }
734 request_mocker.url_mapping["*"] = requests.RequestException()
736 neurovault._download_image_terms(image_info, collection, download_params)
739def test_download_image_terms_error(tmp_path, request_mocker):
740 """Test _download_image_terms errors."""
741 image_info = {"id": "a"}
742 collection = {
743 "relative_path": "collection",
744 "absolute_path": tmp_path / "collection",
745 }
746 collection["absolute_path"].mkdir(parents=True)
747 download_params = {
748 "temp_dir": tmp_path,
749 "verbose": 3,
750 "fetch_neurosynth_words": True,
751 }
752 request_mocker.url_mapping["*"] = requests.RequestException()
753 download_params["allow_neurosynth_failure"] = False
755 with pytest.raises(RuntimeError):
756 neurovault._download_image_terms(
757 image_info,
758 collection,
759 download_params,
760 )
762 # no fail if file already exists
763 with Path(
764 collection["absolute_path"], "neurosynth_words_for_image_a.json"
765 ).open("w"):
766 pass
768 neurovault._download_image_terms(image_info, collection, download_params)
771def test_download_image():
772 """Test _download_image."""
773 image = neurovault._download_image(None, {})
774 assert image is None
777def test_fetch_neurovault(tmp_path):
778 """Run several several checks on fetch_neurovault."""
779 data = neurovault.fetch_neurovault(mode="offline", data_dir=tmp_path)
781 assert len(data.images) == 0
783 # try to download an image
784 data = neurovault.fetch_neurovault(
785 max_images=11,
786 fetch_neurosynth_words=True,
787 mode="overwrite",
788 data_dir=tmp_path,
789 )
790 # specifying a filter while leaving the default term
791 # filters in place should raise a warning.
792 with pytest.warns(UserWarning):
793 neurovault.fetch_neurovault(
794 image_filter=lambda x: True, max_images=1, mode="offline"
795 )
797 assert data.images
798 assert len(data.images) == 11
799 for meta in data.images_meta:
800 assert not meta["not_mni"]
801 assert not meta["is_thresholded"]
802 assert meta["map_type"] not in [
803 "ROI/mask",
804 "anatomical",
805 "parcellation",
806 ]
807 assert meta["image_type"] != "atlas"
809 # using a data directory we can't write into should raise a
810 # warning unless mode is 'offline'
811 tmp_path.chmod(stat.S_IREAD | stat.S_IEXEC)
812 (tmp_path / "neurovault").chmod(stat.S_IREAD | stat.S_IEXEC)
813 if os.access(tmp_path / "neurovault", os.W_OK):
814 return
816 with pytest.warns(UserWarning):
817 neurovault.fetch_neurovault(data_dir=tmp_path)
820def test_fetch_neurovault_errors(capsys, request_mocker):
821 """Test that errors are logged when the server returns an error code.
823 May "spam" your standard output with requests exceptions,
824 but that's expected.
825 """
826 request_mocker.url_mapping["*"] = 500
827 data = neurovault.fetch_neurovault()
829 captured = capsys.readouterr()
830 match = re.search(r"500 Error", captured.err)
831 assert match is not None
833 assert len(data.images) == 0
836def test_fetch_neurovault_ids(tmp_path):
837 """Test fetch_neurovault_ids."""
838 collections, images = _get_neurovault_data()
839 collections = collections.sort_values(
840 by="true_number_of_images", ascending=False
841 )
842 other_col_id, *col_ids = collections["id"].to_numpy()[:3]
843 img_ids = images[images["collection_id"] == other_col_id]["id"].to_numpy()[
844 :3
845 ]
846 img_from_cols_ids = images[images["collection_id"].isin(col_ids)][
847 "id"
848 ].to_numpy()
850 data = fetch_neurovault_ids(
851 image_ids=img_ids, collection_ids=col_ids, data_dir=tmp_path
852 )
854 expected_images = list(img_ids) + list(img_from_cols_ids)
856 assert len(data.images) == len(expected_images)
857 assert {img["id"] for img in data["images_meta"]} == set(expected_images)
858 assert Path(data["images"][0]).parent == Path(
859 data["collections_meta"][0]["absolute_path"]
860 )
862 # check that there are no Path objects
863 for image in data.images:
864 assert isinstance(image, str)
865 for meta in data.images_meta + data.collections_meta:
866 for value in meta.values():
867 assert not isinstance(value, Path)
870def test_fetch_neurovault_ids_error():
871 """Test fetch_neurovault_ids errors."""
872 with pytest.raises(
873 ValueError,
874 match="Supported download modes are: overwrite, download_new, offline",
875 ):
876 fetch_neurovault_ids(mode="bad")
879def test_fetch_neurovault_ids_offline(tmp_path):
880 """Check image can be loaded again from disk."""
881 collections, images = _get_neurovault_data()
882 collections = collections.sort_values(
883 by="true_number_of_images", ascending=False
884 )
885 other_col_id, *col_ids = collections["id"].to_numpy()[:3]
886 img_ids = images[images["collection_id"] == other_col_id]["id"].to_numpy()[
887 :3
888 ]
890 data = fetch_neurovault_ids(
891 image_ids=img_ids, collection_ids=col_ids, data_dir=tmp_path
892 )
894 data = fetch_neurovault_ids(
895 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline"
896 )
898 assert len(data.images) == 1
901def test_fetch_neurovault_ids_overwrite(tmp_path):
902 """Check that download_new mode forces overwrite."""
903 collections, images = _get_neurovault_data()
904 collections = collections.sort_values(
905 by="true_number_of_images", ascending=False
906 )
907 other_col_id, *col_ids = collections["id"].to_numpy()[:3]
908 img_ids = images[images["collection_id"] == other_col_id]["id"].to_numpy()[
909 :3
910 ]
911 data = fetch_neurovault_ids(
912 image_ids=img_ids, collection_ids=col_ids, data_dir=tmp_path
913 )
915 data = fetch_neurovault_ids(
916 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline"
917 )
919 modified_meta = data["images_meta"][0]
921 assert modified_meta["some_key"] == "some_value"
923 modified_meta["some_key"] = "some_other_value"
924 # mess it up on disk
925 meta_path = (
926 Path(modified_meta["absolute_path"]).parent
927 / f"image_{img_ids[0]}_metadata.json"
928 )
929 # convert Path to str for JSON serialization
930 modified_meta = {
931 k: str(v) if isinstance(v, Path) else v
932 for k, v in modified_meta.items()
933 }
934 with meta_path.open("wb") as meta_f:
935 meta_f.write(json.dumps(modified_meta).encode("UTF-8"))
937 # fresh download
938 data = fetch_neurovault_ids(
939 image_ids=[img_ids[0]], data_dir=tmp_path, mode="download_new"
940 )
941 data = fetch_neurovault_ids(
942 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline"
943 )
945 # should not have changed
946 assert data["images_meta"][0]["some_key"] == "some_other_value"
948 data = fetch_neurovault_ids(
949 image_ids=[img_ids[0]], data_dir=tmp_path, mode="overwrite"
950 )
951 data = fetch_neurovault_ids(
952 image_ids=[img_ids[0]], data_dir=tmp_path, mode="offline"
953 )
955 # should be back to the original version
956 assert data["images_meta"][0]["some_key"] == "some_value"
959def test_should_download_resampled_images_only_if_no_previous_download(
960 tmp_path,
961):
962 collections, _ = _get_neurovault_data()
964 sample_collection = collections.iloc[0]
965 sample_collection_id = sample_collection["id"]
966 expected_number_of_images = sample_collection["true_number_of_images"]
968 data = fetch_neurovault_ids(
969 collection_ids=[sample_collection_id],
970 data_dir=tmp_path,
971 resample=True,
972 )
974 # Check the expected size of the dataset
975 assert (len(data["images_meta"])) == expected_number_of_images
977 _check_resampled_version_is_here(data)
979 _check_all_affines_match_neurovault_affine(data)
981 _check_original_version_is_not_here(data)
984def test_download_original_images_along_resamp_images_if_previously_downloaded(
985 tmp_path,
986):
987 collections, _ = _get_neurovault_data()
989 sample_collection = collections.iloc[0]
990 sample_collection_id = sample_collection["id"]
992 # Fetch non-resampled images
993 data = fetch_neurovault_ids(
994 collection_ids=[sample_collection_id],
995 data_dir=tmp_path,
996 resample=True,
997 )
999 # Check that only the resampled version is here
1000 _check_resampled_version_is_here(data)
1001 _check_original_version_is_not_here(data)
1003 # Get the time of the last access to the resampled data
1004 access_time_resampled = (
1005 Path(data["images_meta"][0]["resampled_absolute_path"]).stat().st_atime
1006 )
1008 # Download original data
1009 data_orig = fetch_neurovault_ids(
1010 collection_ids=[sample_collection_id],
1011 data_dir=tmp_path,
1012 resample=False,
1013 )
1015 # Get the time of the last access to one of the original files
1016 # (which should be download time)
1017 access_time = (
1018 Path(data_orig["images_meta"][0]["absolute_path"]).stat().st_atime
1019 )
1021 # Check that the last access to the original data is after the access
1022 # to the resampled data
1023 assert access_time - access_time_resampled > 0
1025 # Check that the original version is now here
1026 # (previous test should have failed anyway if not)
1027 _check_original_version_is_here(data_orig)
1029 _check_no_affine_match_neurovault_affine(data_orig)
1032def test_download_resamp_images_along_original_images_if_previously_downloaded(
1033 tmp_path,
1034):
1035 collections, _ = _get_neurovault_data()
1037 sample_collection = collections.iloc[0]
1038 sample_collection_id = sample_collection["id"]
1040 # Fetch non-resampled images
1041 data_orig = fetch_neurovault_ids(
1042 collection_ids=[sample_collection_id],
1043 data_dir=tmp_path,
1044 resample=False,
1045 )
1047 _check_original_version_is_here(data_orig)
1049 _check_resampled_version_is_not_here(data_orig)
1051 # Asks for the resampled version. This should only resample, not download.
1053 # Get the time of the last modification to the original data
1054 modif_time_original = (
1055 Path(data_orig["images_meta"][0]["absolute_path"]).stat().st_mtime
1056 )
1058 # Ask for resampled data, which should only trigger resample
1059 data = fetch_neurovault_ids(
1060 collection_ids=[sample_collection_id],
1061 data_dir=tmp_path,
1062 resample=True,
1063 )
1065 # Get the time of the last modification to the original data, after fetch
1066 modif_time_original_after = (
1067 Path(data["images_meta"][0]["absolute_path"]).stat().st_mtime
1068 )
1070 # The time difference should be 0
1071 assert np.isclose(modif_time_original, modif_time_original_after)
1073 _check_resampled_version_is_here(data)
1074 # And the original version should still be here as well
1075 _check_original_version_is_here(data)
1077 _check_all_affines_match_neurovault_affine(data)
1079 _check_no_affine_match_neurovault_affine(data_orig)
1082def _check_resampled_version_is_here(data):
1083 assert np.all(
1084 [
1085 Path(im_meta["resampled_absolute_path"]).is_file()
1086 for im_meta in data["images_meta"]
1087 ]
1088 )
1091def _check_resampled_version_is_not_here(data):
1092 assert not np.any(
1093 [
1094 Path(im_meta["resampled_absolute_path"]).is_file()
1095 for im_meta in data["images_meta"]
1096 ]
1097 )
1100def _check_original_version_is_here(data):
1101 assert np.all(
1102 [
1103 Path(im_meta["absolute_path"]).is_file()
1104 for im_meta in data["images_meta"]
1105 ]
1106 )
1109def _check_original_version_is_not_here(data):
1110 assert not np.any(
1111 [
1112 Path(im_meta["absolute_path"]).is_file()
1113 for im_meta in data["images_meta"]
1114 ]
1115 )
1118def _check_all_affines_match_neurovault_affine(data):
1119 affines = [load_img(cur_im).affine for cur_im in data["images"]]
1121 assert np.all(
1122 [np.all(affine == neurovault.STD_AFFINE) for affine in affines]
1123 )
1126def _check_no_affine_match_neurovault_affine(data):
1127 affines = [load_img(cur_im).affine for cur_im in data["images"]]
1129 assert not np.any(
1130 [np.all(affine == neurovault.STD_AFFINE) for affine in affines]
1131 )
1134def test_timeout_error(capsys, request_mocker):
1135 """Check the proper log message is thrown on timeout."""
1136 request_mocker.url_mapping["*"] = requests.exceptions.ReadTimeout()
1137 data = neurovault.fetch_neurovault(verbose=0)
1139 assert len(data.images) == 0
1141 captured = capsys.readouterr()
1142 match = re.search("Try increasing", captured.out)
1143 assert match is not None
1146def test_fetch_neurovault_motor_task():
1147 with pytest.warns(DeprecationWarning, match="will be removed"):
1148 neurovault.fetch_neurovault_motor_task(verbose=0)