Coverage for nilearn/datasets/neurovault.py: 17%
677 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Download statistical maps available \
2on Neurovault (https://neurovault.org).
3"""
5import json
6import os
7import re
8import shutil
9import traceback
10import uuid
11import warnings
12from collections.abc import Container
13from copy import copy, deepcopy
14from pathlib import Path
15from tempfile import mkdtemp
16from urllib.parse import urlencode, urljoin
18import numpy as np
19import requests
20from sklearn.feature_extraction import DictVectorizer
21from sklearn.utils import Bunch
23from nilearn._utils import fill_doc
24from nilearn._utils.logger import find_stack_level
25from nilearn._utils.param_validation import check_params
26from nilearn.image import resample_img
28from ._utils import (
29 fetch_single_file,
30 get_dataset_descr,
31 get_dataset_dir,
32 logger,
33)
35_NEUROVAULT_BASE_URL = "https://neurovault.org/api/"
36_NEUROVAULT_COLLECTIONS_URL = urljoin(_NEUROVAULT_BASE_URL, "collections/")
37_NEUROVAULT_IMAGES_URL = urljoin(_NEUROVAULT_BASE_URL, "images/")
38_NEUROSYNTH_FETCH_WORDS_URL = "https://neurosynth.org/api/decode/"
40_COL_FILTERS_AVAILABLE_ON_SERVER = ("DOI", "name", "owner", "id")
41_IM_FILTERS_AVAILABLE_ON_SERVER = ()
43_DEFAULT_BATCH_SIZE = 100
44_DEFAULT_MAX_IMAGES = 100
46STD_AFFINE = np.array(
47 [
48 [3.0, 0.0, 0.0, -90.0],
49 [0.0, 3.0, 0.0, -126.0],
50 [0.0, 0.0, 3.0, -72.0],
51 [0.0, 0.0, 0.0, 1.0],
52 ]
53)
55# if _MAX_CONSECUTIVE_FAILS downloads fail in a row, we consider there is a
56# problem(e.g. no internet connection, or the Neurovault server is down), and
57# we abort the fetching.
58_MAX_CONSECUTIVE_FAILS = 100
60# if _MAX_FAILS_IN_COLLECTION images fail to be downloaded from the same
61# collection, we consider this collection is garbage and we move on to the
62# next collection.
63_MAX_FAILS_IN_COLLECTION = 30
65_DEFAULT_TIME_OUT = 10.0
67_DEBUG = 3
68_INFO = 2
69_WARNING = 1
70_ERROR = 0
73def _requests_session():
74 if getattr(_requests_session, "session", None) is None:
75 _requests_session.session = requests.Session()
76 return _requests_session.session
79# Helpers for filtering images and collections.
82class _SpecialValue:
83 """Base class for special values used to filter terms.
85 Derived classes should override ``__eq__`` in order to create
86 objects that can be used for comparisons to particular sets of
87 values in filters.
89 """
91 def __eq__(self, other):
92 return NotImplemented
94 def __req__(self, other):
95 return self.__eq__(other)
97 def __ne__(self, other):
98 return not self.__eq__(other)
100 def __rne__(self, other):
101 return self.__ne__(other)
103 def __repr__(self):
104 if hasattr(self, "repr_arg_"):
105 return f"{self.__class__.__name__}({self.repr_arg_!r})"
106 return f"{self.__class__.__name__}()"
109class IsNull(_SpecialValue):
110 """Special value used to filter terms.
112 An instance of this class will always be equal to, and only to,
113 any null value of any type (by null we mean for which bool
114 returns False).
116 See Also
117 --------
118 nilearn.datasets.neurovault.NotNull,
119 nilearn.datasets.neurovault.NotEqual,
120 nilearn.datasets.neurovault.GreaterOrEqual,
121 nilearn.datasets.neurovault.GreaterThan,
122 nilearn.datasets.neurovault.LessOrEqual,
123 nilearn.datasets.neurovault.LessThan,
124 nilearn.datasets.neurovault.IsIn,
125 nilearn.datasets.neurovault.NotIn,
126 nilearn.datasets.neurovault.Contains,
127 nilearn.datasets.neurovault.NotContains,
128 nilearn.datasets.neurovault.Pattern.
130 Examples
131 --------
132 >>> from nilearn.datasets.neurovault import IsNull
133 >>> null = IsNull()
134 >>> null == 0
135 True
136 >>> null == ""
137 True
138 >>> null == None
139 True
140 >>> null == "a"
141 False
143 """
145 def __eq__(self, other):
146 return not bool(other)
149class NotNull(_SpecialValue):
150 """Special value used to filter terms.
152 An instance of this class will always be equal to, and only to,
153 any non-zero value of any type (by non-zero we mean for which bool
154 returns True).
156 See Also
157 --------
158 nilearn.datasets.neurovault.IsNull,
159 nilearn.datasets.neurovault.NotEqual,
160 nilearn.datasets.neurovault.GreaterOrEqual,
161 nilearn.datasets.neurovault.GreaterThan,
162 nilearn.datasets.neurovault.LessOrEqual,
163 nilearn.datasets.neurovault.LessThan,
164 nilearn.datasets.neurovault.IsIn,
165 nilearn.datasets.neurovault.NotIn,
166 nilearn.datasets.neurovault.Contains,
167 nilearn.datasets.neurovault.NotContains,
168 nilearn.datasets.neurovault.Pattern.
170 Examples
171 --------
172 >>> from nilearn.datasets.neurovault import NotNull
173 >>> not_null = NotNull()
174 >>> not_null == 0
175 False
176 >>> not_null == ""
177 False
178 >>> not_null == None
179 False
180 >>> not_null == "a"
181 True
183 """
185 def __eq__(self, other):
186 return bool(other)
189class NotEqual(_SpecialValue):
190 """Special value used to filter terms.
192 An instance of this class is constructed with `NotEqual(obj)`. It
193 will always be equal to, and only to, any value for which
194 ``obj == value`` is ``False``.
196 Parameters
197 ----------
198 negated : object
199 The object from which a candidate should be different in order
200 to pass through the filter.
202 See Also
203 --------
204 nilearn.datasets.neurovault.IsNull,
205 nilearn.datasets.neurovault.NotNull,
206 nilearn.datasets.neurovault.GreaterOrEqual,
207 nilearn.datasets.neurovault.GreaterThan,
208 nilearn.datasets.neurovault.LessOrEqual,
209 nilearn.datasets.neurovault.LessThan,
210 nilearn.datasets.neurovault.IsIn,
211 nilearn.datasets.neurovault.NotIn,
212 nilearn.datasets.neurovault.Contains,
213 nilearn.datasets.neurovault.NotContains,
214 nilearn.datasets.neurovault.Pattern.
216 Examples
217 --------
218 >>> from nilearn.datasets.neurovault import NotEqual
219 >>> not_0 = NotEqual(0)
220 >>> not_0 == 0
221 False
222 >>> not_0 == "0"
223 True
225 """
227 def __init__(self, negated):
228 self.negated_ = negated
229 self.repr_arg_ = self.negated_
231 def __eq__(self, other):
232 return not self.negated_ == other
235class _OrderComp(_SpecialValue):
236 """Base class for special values based on order comparisons."""
238 def __init__(self, bound):
239 self.bound_ = bound
240 self._cast = type(bound)
241 self.repr_arg_ = self.bound_
243 def __eq__(self, other):
244 try:
245 return self._eq_impl(self._cast(other))
246 except (TypeError, ValueError):
247 return False
250class GreaterOrEqual(_OrderComp):
251 """Special value used to filter terms.
253 An instance of this class is constructed with `GreaterOrEqual(obj)`. It
254 will always be equal to, and only to, any value for which
255 ``obj <= value`` is ``True``.
257 Parameters
258 ----------
259 bound : object
260 The object to which a candidate should be superior or equal in
261 order to pass through the filter.
263 See Also
264 --------
265 nilearn.datasets.neurovault.IsNull,
266 nilearn.datasets.neurovault.NotNull,
267 nilearn.datasets.neurovault.NotEqual,
268 nilearn.datasets.neurovault.GreaterThan,
269 nilearn.datasets.neurovault.LessOrEqual,
270 nilearn.datasets.neurovault.LessThan,
271 nilearn.datasets.neurovault.IsIn,
272 nilearn.datasets.neurovault.NotIn,
273 nilearn.datasets.neurovault.Contains,
274 nilearn.datasets.neurovault.NotContains,
275 nilearn.datasets.neurovault.Pattern.
277 Examples
278 --------
279 >>> from nilearn.datasets.neurovault import GreaterOrEqual
280 >>> nonnegative = GreaterOrEqual(0.0)
281 >>> nonnegative == -0.1
282 False
283 >>> nonnegative == 0
284 True
285 >>> nonnegative == 0.1
286 True
288 """
290 def _eq_impl(self, other):
291 return self.bound_ <= other
294class GreaterThan(_OrderComp):
295 """Special value used to filter terms.
297 An instance of this class is constructed with `GreaterThan(obj)`. It
298 will always be equal to, and only to, any value for which
299 ``obj < value`` is ``True``.
301 Parameters
302 ----------
303 bound : object
304 The object to which a candidate should be strictly superior in
305 order to pass through the filter.
307 See Also
308 --------
309 nilearn.datasets.neurovault.IsNull,
310 nilearn.datasets.neurovault.NotNull,
311 nilearn.datasets.neurovault.NotEqual,
312 nilearn.datasets.neurovault.GreaterOrEqual,
313 nilearn.datasets.neurovault.LessOrEqual,
314 nilearn.datasets.neurovault.LessThan,
315 nilearn.datasets.neurovault.IsIn,
316 nilearn.datasets.neurovault.NotIn,
317 nilearn.datasets.neurovault.Contains,
318 nilearn.datasets.neurovault.NotContains,
319 nilearn.datasets.neurovault.Pattern.
321 Examples
322 --------
323 >>> from nilearn.datasets.neurovault import GreaterThan
324 >>> positive = GreaterThan(0.0)
325 >>> positive == 0.0
326 False
327 >>> positive == 1.0
328 True
329 >>> positive == -1.0
330 False
332 """
334 def _eq_impl(self, other):
335 return self.bound_ < other
338class LessOrEqual(_OrderComp):
339 """Special value used to filter terms.
341 An instance of this class is constructed with `LessOrEqual(obj)`. It
342 will always be equal to, and only to, any value for which
343 ``value <= obj`` is ``True``.
345 Parameters
346 ----------
347 bound : object
348 The object to which a candidate should be inferior or equal in
349 order to pass through the filter.
351 See Also
352 --------
353 nilearn.datasets.neurovault.IsNull,
354 nilearn.datasets.neurovault.NotNull,
355 nilearn.datasets.neurovault.NotEqual,
356 nilearn.datasets.neurovault.GreaterOrEqual,
357 nilearn.datasets.neurovault.GreaterThan,
358 nilearn.datasets.neurovault.LessThan,
359 nilearn.datasets.neurovault.IsIn,
360 nilearn.datasets.neurovault.NotIn,
361 nilearn.datasets.neurovault.Contains,
362 nilearn.datasets.neurovault.NotContains,
363 nilearn.datasets.neurovault.Pattern.
365 Examples
366 --------
367 >>> from nilearn.datasets.neurovault import LessOrEqual
368 >>> nonpositive = LessOrEqual(0.0)
369 >>> nonpositive == -1.0
370 True
371 >>> nonpositive == 0.0
372 True
373 >>> nonpositive == 1.0
374 False
376 """
378 def _eq_impl(self, other):
379 return other <= self.bound_
382class LessThan(_OrderComp):
383 """Special value used to filter terms.
385 An instance of this class is constructed with `LessThan(obj)`. It
386 will always be equal to, and only to, any value for which
387 ``value < obj`` is ``True``.
389 Parameters
390 ----------
391 bound : object
392 The object to which a candidate should be strictly inferior in
393 order to pass through the filter.
395 See Also
396 --------
397 nilearn.datasets.neurovault.IsNull,
398 nilearn.datasets.neurovault.NotNull,
399 nilearn.datasets.neurovault.NotEqual,
400 nilearn.datasets.neurovault.GreaterOrEqual,
401 nilearn.datasets.neurovault.GreaterThan,
402 nilearn.datasets.neurovault.LessOrEqual,
403 nilearn.datasets.neurovault.IsIn,
404 nilearn.datasets.neurovault.NotIn,
405 nilearn.datasets.neurovault.Contains,
406 nilearn.datasets.neurovault.NotContains,
407 nilearn.datasets.neurovault.Pattern.
409 Examples
410 --------
411 >>> from nilearn.datasets.neurovault import LessThan
412 >>> negative = LessThan(0.0)
413 >>> negative == -1.0
414 True
415 >>> negative == 0.0
416 False
417 >>> negative == 1.0
418 False
420 """
422 def _eq_impl(self, other):
423 return other < self.bound_
426class IsIn(_SpecialValue):
427 """Special value used to filter terms.
429 An instance of this class is constructed with
430 `IsIn(*accepted)`. It will always be equal to, and only to, any
431 value for which ``value in accepted`` is ``True``.
433 Parameters
434 ----------
435 accepted : container
436 A value will pass through the filter if it is present in
437 `accepted`.
439 See Also
440 --------
441 nilearn.datasets.neurovault.IsNull,
442 nilearn.datasets.neurovault.NotNull,
443 nilearn.datasets.neurovault.NotEqual,
444 nilearn.datasets.neurovault.GreaterOrEqual,
445 nilearn.datasets.neurovault.GreaterThan,
446 nilearn.datasets.neurovault.LessOrEqual,
447 nilearn.datasets.neurovault.LessThan,
448 nilearn.datasets.neurovault.NotIn,
449 nilearn.datasets.neurovault.Contains,
450 nilearn.datasets.neurovault.NotContains,
451 nilearn.datasets.neurovault.Pattern.
453 Examples
454 --------
455 >>> from nilearn.datasets.neurovault import IsIn
456 >>> vowels = IsIn("a", "e", "i", "o", "u", "y")
457 >>> "a" == vowels
458 True
459 >>> vowels == "b"
460 False
462 """
464 def __init__(self, *accepted):
465 self.accepted_ = accepted
467 def __eq__(self, other):
468 return other in self.accepted_
470 def __repr__(self):
471 return f"{self.__class__.__name__}{self.accepted_!r}"
474class NotIn(_SpecialValue):
475 """Special value used to filter terms.
477 An instance of this class is constructed with
478 `NotIn(*rejected)`. It will always be equal to, and only to, any
479 value for which ``value in rejected`` is ``False``.
481 Parameters
482 ----------
483 rejected : container
484 A value will pass through the filter if it is absent from
485 `rejected`.
487 See Also
488 --------
489 nilearn.datasets.neurovault.IsNull,
490 nilearn.datasets.neurovault.NotNull,
491 nilearn.datasets.neurovault.NotEqual,
492 nilearn.datasets.neurovault.GreaterOrEqual,
493 nilearn.datasets.neurovault.GreaterThan,
494 nilearn.datasets.neurovault.LessOrEqual,
495 nilearn.datasets.neurovault.LessThan,
496 nilearn.datasets.neurovault.IsIn,
497 nilearn.datasets.neurovault.Contains,
498 nilearn.datasets.neurovault.NotContains,
499 nilearn.datasets.neurovault.Pattern.
501 Examples
502 --------
503 >>> from nilearn.datasets.neurovault import NotIn
504 >>> consonants = NotIn("a", "e", "i", "o", "u", "y")
505 >>> "b" == consonants
506 True
507 >>> consonants == "a"
508 False
510 """
512 def __init__(self, *rejected):
513 self.rejected_ = rejected
515 def __eq__(self, other):
516 return other not in self.rejected_
518 def __repr__(self):
519 return f"{self.__class__.__name__}{self.rejected_!r}"
522class Contains(_SpecialValue):
523 """Special value used to filter terms.
525 An instance of this class is constructed with
526 `Contains(*must_be_contained)`. It will always be equal to, and
527 only to, any value for which ``item in value`` is ``True`` for
528 every item in ``must_be_contained``.
530 Parameters
531 ----------
532 must_be_contained : container
533 A value will pass through the filter if it contains all the
534 items in must_be_contained.
536 See Also
537 --------
538 nilearn.datasets.neurovault.IsNull,
539 nilearn.datasets.neurovault.NotNull,
540 nilearn.datasets.neurovault.NotEqual,
541 nilearn.datasets.neurovault.GreaterOrEqual,
542 nilearn.datasets.neurovault.GreaterThan,
543 nilearn.datasets.neurovault.LessOrEqual,
544 nilearn.datasets.neurovault.LessThan,
545 nilearn.datasets.neurovault.IsIn,
546 nilearn.datasets.neurovault.NotIn,
547 nilearn.datasets.neurovault.NotContains,
548 nilearn.datasets.neurovault.Pattern.
550 Examples
551 --------
552 >>> from nilearn.datasets.neurovault import Contains
553 >>> contains = Contains("house", "face")
554 >>> "face vs house" == contains
555 True
556 >>> "smiling face vs frowning face" == contains
557 False
559 """
561 def __init__(self, *must_be_contained):
562 self.must_be_contained_ = must_be_contained
564 def __eq__(self, other):
565 return (
566 all(item in other for item in self.must_be_contained_)
567 if isinstance(other, Container)
568 else False
569 )
571 def __repr__(self):
572 return f"{self.__class__.__name__}{self.must_be_contained_!r}"
575class NotContains(_SpecialValue):
576 """Special value used to filter terms.
578 An instance of this class is constructed with
579 `NotContains(*must_not_be_contained)`. It will always be equal
580 to, and only to, any value for which ``item in value`` is
581 ``False`` for every item in ``must_not_be_contained``.
583 Parameters
584 ----------
585 must_not_be_contained : container
586 A value will pass through the filter if it does not contain
587 any of the items in must_not_be_contained.
589 See Also
590 --------
591 nilearn.datasets.neurovault.IsNull,
592 nilearn.datasets.neurovault.NotNull,
593 nilearn.datasets.neurovault.NotEqual,
594 nilearn.datasets.neurovault.GreaterOrEqual,
595 nilearn.datasets.neurovault.GreaterThan,
596 nilearn.datasets.neurovault.LessOrEqual,
597 nilearn.datasets.neurovault.LessThan,
598 nilearn.datasets.neurovault.IsIn,
599 nilearn.datasets.neurovault.NotIn,
600 nilearn.datasets.neurovault.Contains,
601 nilearn.datasets.neurovault.Pattern.
603 Examples
604 --------
605 >>> from nilearn.datasets.neurovault import NotContains
606 >>> no_garbage = NotContains("bad", "test")
607 >>> no_garbage == "test image"
608 False
609 >>> no_garbage == "good image"
610 True
612 """
614 def __init__(self, *must_not_be_contained):
615 self.must_not_be_contained_ = must_not_be_contained
617 def __eq__(self, other):
618 return (
619 all(item not in other for item in self.must_not_be_contained_)
620 if isinstance(other, Container)
621 else False
622 )
624 def __repr__(self):
625 return f"{self.__class__.__name__}{self.must_not_be_contained_!r}"
628class Pattern(_SpecialValue):
629 """Special value used to filter terms.
631 An instance of this class is constructed with
633 `Pattern(pattern[, flags])`. It will always be equal to, and only
634 to, any value for which ``re.match(pattern, value, flags)`` is
635 ``True``.
637 Parameters
638 ----------
639 pattern : str
640 The pattern to try to match to candidates.
642 flags : int, default=0
643 Value for ``re.match`` `flags` parameter,
644 e.g. ``re.IGNORECASE``. The default (0), is the default value
645 used by ``re.match``.
647 See Also
648 --------
649 nilearn.datasets.neurovault.IsNull,
650 nilearn.datasets.neurovault.NotNull,
651 nilearn.datasets.neurovault.NotEqual,
652 nilearn.datasets.neurovault.GreaterOrEqual,
653 nilearn.datasets.neurovault.GreaterThan,
654 nilearn.datasets.neurovault.LessOrEqual,
655 nilearn.datasets.neurovault.LessThan,
656 nilearn.datasets.neurovault.IsIn,
657 nilearn.datasets.neurovault.NotIn,
658 nilearn.datasets.neurovault.Contains,
659 nilearn.datasets.neurovault.NotContains.
661 Documentation for standard library ``re`` module.
663 Examples
664 --------
665 >>> from nilearn.datasets.neurovault import Pattern
666 >>> poker = Pattern(r"[0-9akqj]{5}$")
667 >>> "ak05q" == poker
668 True
669 >>> "ak05e" == poker
670 False
672 """
674 def __init__(self, pattern, flags=0):
675 # Don't use re.compile because compiled patterns
676 # can't be deepcopied.
677 self.pattern_ = pattern
678 self.flags_ = flags
680 def __eq__(self, other):
681 return (
682 isinstance(other, str)
683 and re.match(self.pattern_, other, self.flags_) is not None
684 )
686 def __repr__(self):
687 return (
688 f"{self.__class__.__name__}(pattern={self.pattern_!r}, "
689 f"flags={self.flags_})"
690 )
693def _empty_filter(result): # noqa: ARG001
694 """Place holder for a filter which always returns True.
696 This is the default ``image_filter`` and ``collection_filter``
697 argument for ``fetch_neurovault``.
699 The ``result`` parameter is necessary for the API consistency
700 with other filters.
701 """
702 return True
705class ResultFilter:
706 """Easily create callable (local) filters for ``fetch_neurovault``.
708 Constructed from a mapping of key-value pairs (optional) and a
709 callable filter (also optional), instances of this class are meant
710 to be used as ``image_filter`` or ``collection_filter`` parameters
711 for ``fetch_neurovault``.
713 Such filters can be combined using their methods ``AND``, ``OR``,
714 ``XOR``, and ``NOT``, with the usual semantics.
716 Key-value pairs can be added by treating a ``ResultFilter`` as a
717 dictionary: after evaluating ``res_filter[key] = value``, only
718 metadata such that ``metadata[key] == value`` can pass through the
719 filter.
721 Parameters
722 ----------
723 query_terms : dict, optional
724 A ``metadata`` dictionary will be blocked by the filter if it
725 does not respect ``metadata[key] == value`` for all
726 ``key``, ``value`` pairs in `query_terms`. If ``None``, the
727 empty dictionary is used.
729 callable_filter : callable, default=empty_filter
730 A ``metadata`` dictionary will be blocked by the filter if
731 `callable_filter` does not return ``True`` for ``metadata``.
733 As an alternative to the `query_terms` dictionary parameter,
734 key, value pairs can be passed as keyword arguments.
736 Attributes
737 ----------
738 query_terms_ : :obj:`dict`
739 In order to pass through the filter, metadata must verify
740 ``metadata[key] == value`` for each ``key``, ``value`` pair in
741 `query_terms_`.
743 callable_filters_ : list of callables
744 In addition to ``(key, value)`` pairs, we can use this
745 attribute to specify more elaborate requirements. Called with
746 a dict representing metadata for an image or collection, each
747 element of this list returns ``True`` if the metadata should
748 pass through the filter and ``False`` otherwise.
750 A dict of metadata will only pass through the filter if it
751 satisfies all the `query_terms` AND all the elements of
752 `callable_filters_`.
754 See Also
755 --------
756 nilearn.datasets.neurovault.IsNull,
757 nilearn.datasets.neurovault.NotNull,
758 nilearn.datasets.neurovault.NotEqual,
759 nilearn.datasets.neurovault.GreaterOrEqual,
760 nilearn.datasets.neurovault.GreaterThan,
761 nilearn.datasets.neurovault.LessOrEqual,
762 nilearn.datasets.neurovault.LessThan,
763 nilearn.datasets.neurovault.IsIn,
764 nilearn.datasets.neurovault.NotIn,
765 nilearn.datasets.neurovault.Contains,
766 nilearn.datasets.neurovault.NotContains,
767 nilearn.datasets.neurovault.Pattern.
769 Examples
770 --------
771 >>> from nilearn.datasets.neurovault import ResultFilter
772 >>> filt = ResultFilter(a=0).AND(ResultFilter(b=1).OR(ResultFilter(b=2)))
773 >>> filt({"a": 0, "b": 1})
774 True
775 >>> filt({"a": 0, "b": 0})
776 False
778 """
780 def __init__(
781 self, query_terms=None, callable_filter=_empty_filter, **kwargs
782 ):
783 if query_terms is None:
784 query_terms = {}
785 query_terms = dict(query_terms, **kwargs)
786 self.query_terms_ = query_terms
787 self.callable_filters_ = [callable_filter]
789 def __call__(self, candidate):
790 """Return True if candidate satisfies the requirements.
792 Parameters
793 ----------
794 candidate : dict
795 A dictionary representing metadata for a file or a
796 collection, to be filtered.
798 Returns
799 -------
800 bool
801 ``True`` if `candidate` passes through the filter and ``False``
802 otherwise.
804 """
805 for key, value in self.query_terms_.items():
806 if value != candidate.get(key):
807 return False
808 return all(
809 callable_filter(candidate)
810 for callable_filter in self.callable_filters_
811 )
813 def OR(self, other_filter): # noqa: N802
814 """Implement the OR operator between two filters."""
815 filt1, filt2 = deepcopy(self), deepcopy(other_filter)
816 new_filter = ResultFilter(
817 callable_filter=lambda r: filt1(r) or filt2(r)
818 )
819 return new_filter
821 def AND(self, other_filter): # noqa: N802
822 """Implement the AND operator between two filters."""
823 filt1, filt2 = deepcopy(self), deepcopy(other_filter)
824 new_filter = ResultFilter(
825 callable_filter=lambda r: filt1(r) and filt2(r)
826 )
827 return new_filter
829 def XOR(self, other_filter): # noqa: N802
830 """Implement the XOR operator between two filters."""
831 filt1, filt2 = deepcopy(self), deepcopy(other_filter)
832 new_filter = ResultFilter(
833 callable_filter=lambda r: filt1(r) != filt2(r)
834 )
835 return new_filter
837 def NOT(self): # noqa: N802
838 """Implement the NOT operator between two filters."""
839 filt = deepcopy(self)
840 new_filter = ResultFilter(callable_filter=lambda r: not filt(r))
841 return new_filter
843 def __getitem__(self, item):
844 """Get item from query_terms_."""
845 return self.query_terms_[item]
847 def __setitem__(self, item, value):
848 """Set item in query_terms_."""
849 self.query_terms_[item] = value
851 def __delitem__(self, item):
852 """Remove item from query_terms_."""
853 if item in self.query_terms_:
854 del self.query_terms_[item]
856 def add_filter(self, callable_filter):
857 """Add a function to the callable_filters_.
859 After a call add_filter(additional_filt), in addition to all
860 the previous requirements, a candidate must also verify
861 additional_filt(candidate) in order to pass through the
862 filter.
864 """
865 self.callable_filters_.append(callable_filter)
867 def __str__(self):
868 return self.__class__.__name__
871# Utilities for composing queries and interacting with
872# neurovault and neurosynth
875class _TemporaryDirectory:
876 """Context manager that provides a temporary directory.
878 A temporary directory is created on __enter__
879 and removed on __exit__ .
881 Attributes
882 ----------
883 temp_dir_ : str or None
884 location of temporary directory or None if not created.
886 """
888 def __init__(self):
889 self.temp_dir_ = None
891 def __enter__(self):
892 self.temp_dir_ = mkdtemp()
893 return Path(self.temp_dir_)
895 def __exit__(self, *args):
896 if self.temp_dir_ is None:
897 return
898 shutil.rmtree(self.temp_dir_)
899 self.temp_dir_ = None
902def _append_filters_to_query(query, filters):
903 """Encode dict or sequence of key-value pairs into a URL query string.
905 Parameters
906 ----------
907 query : str
908 URL to which the filters should be appended
910 filters : dict or sequence of pairs
911 Filters to append to the URL.
913 Returns
914 -------
915 str
916 The query with filters appended to it.
918 Notes
919 -----
920 If one of the `filters` keys is 'id', we get the url that points
921 directly to that id,
922 e.g. 'https://neurovault.org/api/collections/40', and the other
923 filters are ignored.
925 """
926 if not filters:
927 return query
928 if "id" in filters:
929 return urljoin(query, str(filters["id"]))
930 new_query = urljoin(query, f"?{urlencode(filters)}")
931 return new_query
934def _get_batch(query, prefix_msg="", timeout=_DEFAULT_TIME_OUT, verbose=3):
935 """Given an URL, get the HTTP response and transform it to python dict.
937 The URL is used to send an HTTP GET request and the response is
938 transformed into a dictionary.
940 Parameters
941 ----------
942 query : str
943 The URL from which to get data.
945 prefix_msg : str, default=''
946 Prefix for all log messages.
948 timeout : float, default=_DEFAULT_TIME_OUT
949 Timeout in seconds.
951 verbose : int, default=3
952 An integer in [0, 1, 2, 3] to control the verbosity level.
954 Returns
955 -------
956 batch : dict
957 Python dict representing the response's content.
959 Raises
960 ------
961 requests.RequestException
962 If there was a problem opening the URL.
964 ValueError
965 If the response could not be decoded, was not json, or did not contain
966 either 'id' (single result), or 'results' and 'count' (actual batch).
968 """
969 session = _requests_session()
970 req = requests.Request(
971 method="GET", url=query, headers={"Connection": "Keep-Alive"}
972 )
973 prepped = session.prepare_request(req)
974 logger.log(
975 f"{prefix_msg}getting new batch:\n\t{query}",
976 verbose=verbose,
977 msg_level=_DEBUG,
978 )
979 try:
980 resp = session.send(prepped, timeout=timeout)
981 resp.raise_for_status()
982 batch = resp.json()
983 except requests.exceptions.ReadTimeout:
984 logger.log(
985 (
986 f"Could not get batch from {query}.\n\t"
987 f"Timeout error with {timeout=} seconds.\n\t"
988 f"Try increasing 'timeout' value."
989 ),
990 msg_level=_ERROR,
991 verbose=verbose,
992 with_traceback=False,
993 )
994 raise
995 except Exception:
996 logger.log(
997 f"Could not get batch from {query}",
998 msg_level=_ERROR,
999 verbose=verbose,
1000 with_traceback=True,
1001 )
1002 raise
1003 if "id" in batch:
1004 batch = {"count": 1, "results": [batch]}
1005 for key in ["results", "count"]:
1006 if batch.get(key) is None:
1007 msg = (
1008 f'Could not find required key "{key}" '
1009 f"in batch retrieved from {query}."
1010 )
1011 logger.log(msg, msg_level=_ERROR, verbose=verbose)
1012 raise ValueError(msg)
1014 return batch
1017def _scroll_server_results(
1018 url,
1019 local_filter=_empty_filter,
1020 query_terms=None,
1021 max_results=None,
1022 batch_size=None,
1023 prefix_msg="",
1024 timeout=_DEFAULT_TIME_OUT,
1025 verbose=3,
1026):
1027 """Download list of metadata from Neurovault.
1029 Parameters
1030 ----------
1031 url : str
1032 The base url (without the filters) from which to get data.
1034 local_filter : callable, default=_empty_filter
1035 Used to filter the results based on their metadata:
1036 must return True if the result is to be kept and False otherwise.
1037 Is called with the dict containing the metadata as sole argument.
1039 query_terms : dict, sequence of pairs or None, optional
1040 Key-value pairs to add to the base url in order to form query.
1041 If ``None``, nothing is added to the url.
1043 max_results : int or None, optional
1044 Maximum number of results to fetch; if ``None``, all available data
1045 that matches the query is fetched.
1047 batch_size : int or None, optional
1048 Neurovault returns the metadata for hits corresponding to a query
1049 in batches. batch_size is used to choose the (maximum) number of
1050 elements in a batch. If None, ``_DEFAULT_BATCH_SIZE`` is used.
1052 prefix_msg : str, default=''
1053 Prefix for all log messages.
1055 timeout : float, default=_DEFAULT_TIME_OUT
1056 Timeout in seconds.
1058 verbose : int, default=3
1059 An integer in [0, 1, 2, 3] to control the verbosity level.
1061 Yields
1062 ------
1063 result : dict
1064 A result in the retrieved batch.
1066 None
1067 Once for each batch that could not be downloaded or decoded,
1068 to indicate a failure.
1070 """
1071 query = _append_filters_to_query(url, query_terms)
1072 if batch_size is None:
1073 batch_size = _DEFAULT_BATCH_SIZE
1074 query = (
1075 f"{query}{'&' if '?' in query else '?'}limit={batch_size}&offset={{0}}"
1076 )
1077 downloaded = 0
1078 n_available = None
1079 while max_results is None or downloaded < max_results:
1080 new_query = query.format(downloaded)
1081 try:
1082 batch = _get_batch(
1083 new_query, prefix_msg, verbose=verbose, timeout=timeout
1084 )
1085 except Exception:
1086 yield None
1087 batch = None
1088 if batch is not None:
1089 batch_size = len(batch["results"])
1090 downloaded += batch_size
1091 logger.log(
1092 f"{prefix_msg}batch size: {batch_size}",
1093 msg_level=_DEBUG,
1094 verbose=verbose,
1095 )
1096 if n_available is None:
1097 n_available = batch["count"]
1098 max_results = (
1099 n_available
1100 if max_results is None
1101 else min(max_results, n_available)
1102 )
1103 for result in batch["results"]:
1104 if local_filter(result):
1105 yield result
1108def _yield_from_url_list(url_list, timeout=_DEFAULT_TIME_OUT, verbose=3):
1109 """Get metadata coming from an explicit list of URLs.
1111 This is different from ``_scroll_server_results``, which is used
1112 to get all the metadata that matches certain filters.
1114 Parameters
1115 ----------
1116 url_list : Container of str
1117 URLs from which to get data
1119 timeout : float, default=_DEFAULT_TIME_OUT
1120 Timeout in seconds.
1122 verbose : int, default=3
1123 An integer in [0, 1, 2, 3] to control the verbosity level.
1125 Yields
1126 ------
1127 content : dict
1128 The metadata from one URL.
1130 None
1131 Once for each URL that resulted in an error, to signify failure.
1133 """
1134 for url in url_list:
1135 try:
1136 batch = _get_batch(url, verbose=verbose, timeout=timeout)
1137 except Exception:
1138 yield None
1139 batch = None
1140 if batch is not None:
1141 yield batch["results"][0]
1144def _simple_download(url, target_file, temp_dir, verbose=3):
1145 """Wrap around ``utils.fetch_single_file``.
1147 This allows specifying the target file name.
1149 Parameters
1150 ----------
1151 url : str
1152 URL of the file to download.
1154 target_file : str
1155 Location of the downloaded file on filesystem.
1157 temp_dir : pathlib.Path
1158 Location of sandbox directory used by ``fetch_single_file``.
1160 verbose : int, default=3
1161 An integer in [0, 1, 2, 3] to control the verbosity level.
1163 Returns
1164 -------
1165 target_file : str
1166 The location in which the file was downloaded.
1168 Raises
1169 ------
1170 RequestException, ValueError
1171 If an error occurred when downloading the file.
1173 See Also
1174 --------
1175 nilearn.datasets._utils.fetch_single_file
1177 """
1178 logger.log(
1179 f"Downloading file: {url}",
1180 msg_level=_DEBUG,
1181 verbose=verbose,
1182 )
1183 try:
1184 downloaded = fetch_single_file(
1185 url, temp_dir, resume=False, overwrite=True, verbose=0
1186 )
1187 except Exception:
1188 logger.log(
1189 f"Problem downloading file from {url}",
1190 msg_level=_ERROR,
1191 verbose=verbose,
1192 )
1193 raise
1194 shutil.move(downloaded, target_file)
1195 logger.log(
1196 f"Download succeeded, downloaded to: {target_file}",
1197 msg_level=_DEBUG,
1198 verbose=verbose,
1199 )
1200 return target_file
1203def neurosynth_words_vectorized(word_files, verbose=3, **kwargs):
1204 """Load Neurosynth data from disk into an (n images, voc size) matrix.
1206 Neurosynth data is saved on disk as ``{word: weight}``
1207 dictionaries for each image, this function reads it and returns a
1208 vocabulary list and a term weight matrix.
1210 Parameters
1211 ----------
1212 word_files : Container
1213 The paths to the files from which to read word weights (each
1214 is supposed to contain the Neurosynth response for a
1215 particular image).
1217 verbose : :obj:`int`, default=3
1218 An integer in [0, 1, 2, 3] to control the verbosity level.
1220 Keyword arguments are passed on to
1221 ``sklearn.feature_extraction.DictVectorizer``.
1223 Returns
1224 -------
1225 frequencies : numpy.ndarray
1226 An (n images, vocabulary size) array. Each row corresponds to
1227 an image, and each column corresponds to a word. The words are
1228 in the same order as in returned value `vocabulary`, so that
1229 `frequencies[i, j]` corresponds to the weight of
1230 `vocabulary[j]` for image ``i``. This matrix is computed by
1231 an ``sklearn.feature_extraction.DictVectorizer`` instance.
1233 vocabulary : list of str
1234 A list of all the words encountered in the word files.
1236 See Also
1237 --------
1238 sklearn.feature_extraction.DictVectorizer
1240 """
1241 logger.log("Computing word features.", msg_level=_INFO, verbose=verbose)
1242 words = []
1243 voc_empty = True
1244 for file_name in word_files:
1245 try:
1246 with Path(file_name).open("rb") as word_file:
1247 info = json.loads(word_file.read().decode("utf-8"))
1248 words.append(info["data"]["values"])
1249 if info["data"]["values"] != {}:
1250 voc_empty = False
1251 except Exception:
1252 logger.log(
1253 (
1254 f"Could not load words from file {file_name}; "
1255 f"error: {traceback.format_exc()}"
1256 ),
1257 msg_level=_ERROR,
1258 verbose=verbose,
1259 )
1260 words.append({})
1261 if voc_empty:
1262 warnings.warn(
1263 "No word weight could be loaded, "
1264 "vectorizing Neurosynth words failed.",
1265 stacklevel=find_stack_level(),
1266 )
1267 return None, None
1268 vectorizer = DictVectorizer(**kwargs)
1269 frequencies = vectorizer.fit_transform(words).toarray()
1270 vocabulary = np.asarray(vectorizer.feature_names_)
1271 logger.log(
1272 f"Computing word features done; vocabulary size: {vocabulary.size}",
1273 msg_level=_INFO,
1274 verbose=verbose,
1275 )
1276 return frequencies, vocabulary
1279def _remove_none_strings(metadata):
1280 """Replace strings representing a null value with ``None``.
1282 Some collections and images in Neurovault, for some fields, use the
1283 string "None", "None / Other", or "null", instead of having ``null``
1284 in the json file; we replace these strings with ``None`` so that
1285 they are consistent with the rest and for correct behavior when we
1286 want to select or filter out null values.
1288 Parameters
1289 ----------
1290 metadata : dict
1291 Metadata to transform
1293 Returns
1294 -------
1295 metadata : dict
1296 Original metadata in which strings representing null values
1297 have been replaced by ``None``.
1299 """
1300 metadata = metadata.copy()
1301 for key, value in metadata.items():
1302 if isinstance(value, str) and re.match(
1303 r"($|n/?a$|none|null)", value, re.IGNORECASE
1304 ):
1305 metadata[key] = None
1306 return metadata
1309def _write_metadata(metadata, file_name):
1310 """Save metadata to disk.
1312 Absolute paths are not written; they are recomputed using the
1313 relative paths when data is loaded again, so that if the
1314 Neurovault directory has been moved paths are still valid.
1316 Parameters
1317 ----------
1318 metadata : dict
1319 Dictionary representing metadata for a file or a
1320 collection. Any key containing 'absolute' is ignored.
1322 file_name : str or pathlib.Path
1323 Path to the file in which to write the data.
1325 """
1326 metadata = {k: v for k, v in metadata.items() if "absolute" not in k}
1328 # Path objects need to be converted to string for the JSON serialization
1329 for key, value in metadata.items():
1330 if isinstance(value, Path):
1331 metadata[key] = str(value)
1333 with Path(file_name).open("wb") as metadata_file:
1334 metadata_file.write(json.dumps(metadata).encode("utf-8"))
1337def _add_absolute_paths(root_dir, metadata, force=True):
1338 """Add absolute paths to a dictionary containing relative paths.
1340 Parameters
1341 ----------
1342 root_dir : pathlib.Path
1343 The root of the data directory, to prepend to relative paths
1344 in order to form absolute paths.
1346 metadata : dict
1347 Dictionary containing metadata for a file or a collection. Any
1348 key containing 'relative' is understood to be mapped to a
1349 relative path and the corresponding absolute path is added to
1350 the dictionary.
1352 force : bool, default=True
1353 If ``True``, if an absolute path is already present in the
1354 metadata, it is replaced with the recomputed value. If
1355 ``False``, already specified absolute paths have priority.
1357 Returns
1358 -------
1359 metadata : dict
1360 The metadata enriched with absolute paths.
1362 """
1363 absolute_paths = {}
1364 for name, value in metadata.items():
1365 match = re.match(r"(.*)relative_path(.*)", name)
1366 if match is not None:
1367 abs_name = f"{match.groups()[0]}absolute_path{match.groups()[1]}"
1368 absolute_paths[abs_name] = root_dir / value
1369 if not absolute_paths:
1370 return metadata
1371 new_metadata = metadata.copy()
1372 set_func = new_metadata.__setitem__ if force else new_metadata.setdefault
1373 for name, value in absolute_paths.items():
1374 set_func(name, value)
1375 return new_metadata
1378def _json_from_file(file_name):
1379 """Load a json file encoded with UTF-8.
1381 Parameters
1382 ----------
1383 file_name : str or pathlib.Path
1384 """
1385 with Path(file_name).open("rb") as dumped:
1386 loaded = json.loads(dumped.read().decode("utf-8"))
1387 return loaded
1390def _json_add_collection_dir(file_name, force=True):
1391 """Load a json file and add is parent dir to resulting dict.
1393 Parameters
1394 ----------
1395 file_name : str or pathlib.Path
1397 force : bool
1398 """
1399 file_name = Path(file_name)
1400 loaded = _json_from_file(file_name)
1401 set_func = loaded.__setitem__ if force else loaded.setdefault
1402 dir_path = file_name.parent
1403 set_func("absolute_path", dir_path.absolute())
1404 set_func("relative_path", dir_path)
1405 return loaded
1408def _json_add_im_files_paths(file_name, force=True):
1409 """Load a json file and add image and words paths."""
1410 loaded = _json_from_file(file_name)
1411 set_func = loaded.__setitem__ if force else loaded.setdefault
1412 dir_path = file_name.parent
1413 image_file_name = f"image_{loaded['id']}.nii.gz"
1414 words_file_name = f"neurosynth_words_for_image_{loaded['id']}.json"
1415 set_func("relative_path", dir_path / image_file_name)
1416 if (dir_path / words_file_name).is_file():
1417 set_func(
1418 "ns_words_relative_path",
1419 dir_path / words_file_name,
1420 )
1421 loaded = _add_absolute_paths(dir_path.parent, loaded, force=force)
1422 return loaded
1425def _download_collection(collection, download_params):
1426 """Create directory and download metadata for a collection.
1428 Parameters
1429 ----------
1430 collection : dict
1431 Collection metadata.
1433 download_params : dict
1434 General information about download session, containing e.g. the
1435 data directory (see `_read_download_params` and
1436 `_prepare_download_params for details`)
1438 Returns
1439 -------
1440 collection : dict
1441 Collection metadata, with local path added to it.
1443 """
1444 if collection is None:
1445 return None
1447 collection = _remove_none_strings(collection)
1448 collection_id = collection["id"]
1449 collection_name = f"collection_{collection_id}"
1450 collection_dir = Path(download_params["nv_data_dir"]) / collection_name
1451 collection["relative_path"] = collection_name
1452 collection["absolute_path"] = collection_dir.absolute()
1454 if not collection_dir.is_dir():
1455 collection_dir.mkdir(parents=True)
1457 metadata_file_path = collection_dir / "collection_metadata.json"
1458 _write_metadata(collection, metadata_file_path)
1460 return collection
1463def _fetch_collection_for_image(image_info, download_params):
1464 """Find the collection metadata for an image.
1466 If necessary, the collection metadata is downloaded and its
1467 directory is created.
1469 Parameters
1470 ----------
1471 image_info : dict
1472 Image metadata.
1474 download_params : dict
1475 General information about download session, containing e.g. the
1476 data directory (see `_read_download_params` and
1477 `_prepare_download_params for details`)
1479 Returns
1480 -------
1481 collection : dict
1482 The collection metadata.
1484 """
1485 collection_id = image_info["collection_id"]
1486 collection_relative_path = f"collection_{collection_id}"
1487 collection_absolute_path = (
1488 Path(download_params["nv_data_dir"]) / collection_relative_path
1489 )
1490 if collection_absolute_path.is_dir():
1491 return _json_add_collection_dir(
1492 collection_absolute_path / "collection_metadata.json"
1493 )
1495 col_batch = _get_batch(
1496 urljoin(_NEUROVAULT_COLLECTIONS_URL, str(collection_id)),
1497 verbose=download_params["verbose"],
1498 )
1499 return _download_collection(col_batch["results"][0], download_params)
1502def _download_image_nii_file(image_info, collection, download_params):
1503 """Download an image (.nii.gz) file from Neurovault.
1505 Parameters
1506 ----------
1507 image_info : dict
1508 Image metadata.
1510 collection : dict
1511 Corresponding collection metadata.
1513 download_params : dict
1514 General information about download session, containing e.g. the
1515 data directory (see `_read_download_params` and
1516 `_prepare_download_params for details`)
1518 Returns
1519 -------
1520 image_info : dict
1521 Image metadata with local paths added to it.
1523 collection : dict
1524 Corresponding collection metadata with local paths added to it.
1526 """
1527 image_info = image_info.copy()
1528 image_id = image_info["id"]
1529 image_url = image_info["file"]
1530 image_file_name = f"image_{image_id}.nii.gz"
1531 image_relative_path = Path(collection["relative_path"], image_file_name)
1532 image_absolute_path = Path(collection["absolute_path"], image_file_name)
1534 resampled_image_file_name = f"image_{image_id}_resampled.nii.gz"
1535 resampled_image_absolute_path = Path(
1536 collection["absolute_path"], resampled_image_file_name
1537 )
1538 resampled_image_relative_path = Path(
1539 collection["relative_path"], resampled_image_file_name
1540 )
1542 image_info["absolute_path"] = image_absolute_path
1543 image_info["relative_path"] = image_relative_path
1544 image_info["resampled_absolute_path"] = resampled_image_absolute_path
1545 image_info["resampled_relative_path"] = resampled_image_relative_path
1547 if download_params["resample"]:
1548 # Generate a temporary file name
1549 struuid = str(uuid.uuid1())
1551 tmp_file = f"tmp_{struuid}.nii.gz"
1553 tmp_path = Path(collection["absolute_path"], tmp_file)
1555 _simple_download(
1556 image_url,
1557 tmp_path,
1558 download_params["temp_dir"],
1559 verbose=download_params["verbose"],
1560 )
1562 # Resample here
1563 logger.log(
1564 "Resampling...",
1565 )
1566 # TODO switch to force_resample=True
1567 # when bumping to version > 0.13
1568 im_resampled = resample_img(
1569 img=tmp_path,
1570 target_affine=STD_AFFINE,
1571 interpolation=download_params["interpolation"],
1572 copy_header=True,
1573 force_resample=False,
1574 )
1575 im_resampled.to_filename(resampled_image_absolute_path)
1577 # Remove temporary file
1578 tmp_path.unlink()
1579 else:
1580 _simple_download(
1581 image_url,
1582 image_absolute_path,
1583 download_params["temp_dir"],
1584 verbose=download_params["verbose"],
1585 )
1586 return image_info, collection
1589def _check_has_words(file_name):
1590 file_name = Path(file_name)
1591 if not file_name.is_file():
1592 return False
1593 info = _remove_none_strings(_json_from_file(file_name))
1594 try:
1595 assert len(info["data"]["values"])
1596 return True
1597 except (AttributeError, TypeError, AssertionError):
1598 pass
1599 file_name.unlink()
1600 return False
1603def _download_image_terms(image_info, collection, download_params):
1604 """Download Neurosynth words for an image.
1606 Parameters
1607 ----------
1608 image_info : dict
1609 Image metadata.
1611 collection : dict
1612 Corresponding collection metadata.
1614 download_params : dict
1615 General information about download session, containing e.g. the
1616 data directory (see `_read_download_params` and
1617 `_prepare_download_params for details`)
1619 Returns
1620 -------
1621 image_info : dict
1622 Image metadata with neurosynth words file path added to it.
1624 collection : dict
1625 Corresponding collection metadata.
1627 """
1628 if not download_params["fetch_neurosynth_words"]:
1629 return image_info, collection
1631 ns_words_file_name = f"neurosynth_words_for_image_{image_info['id']}.json"
1632 image_info = image_info.copy()
1633 image_info["ns_words_relative_path"] = Path(
1634 collection["relative_path"], ns_words_file_name
1635 )
1636 image_info["ns_words_absolute_path"] = Path(
1637 collection["absolute_path"], ns_words_file_name
1638 )
1640 if Path(image_info["ns_words_absolute_path"]).is_file():
1641 return image_info, collection
1643 query = urljoin(
1644 _NEUROSYNTH_FETCH_WORDS_URL, f"?neurovault={image_info['id']}"
1645 )
1646 try:
1647 _simple_download(
1648 query,
1649 image_info["ns_words_absolute_path"],
1650 download_params["temp_dir"],
1651 verbose=download_params["verbose"],
1652 )
1653 assert _check_has_words(image_info["ns_words_absolute_path"])
1654 except Exception:
1655 message = f"Could not fetch words for image {image_info['id']}"
1656 if not download_params.get("allow_neurosynth_failure", True):
1657 raise RuntimeError(message)
1658 logger.log(
1659 message,
1660 msg_level=_ERROR,
1661 verbose=download_params["verbose"],
1662 with_traceback=True,
1663 )
1665 return image_info, collection
1668def _download_image(image_info, download_params):
1669 """Download a Neurovault image.
1671 If necessary, create the corresponding collection's directory and
1672 download the collection's metadata.
1674 Parameters
1675 ----------
1676 image_info : dict
1677 Image metadata.
1679 download_params : dict
1680 General information about download session, containing e.g. the
1681 data directory (see `_read_download_params` and
1682 `_prepare_download_params for details`)
1684 Returns
1685 -------
1686 image_info : dict
1687 Image metadata with local paths added to it.
1689 """
1690 if image_info is None:
1691 return None
1692 image_info = _remove_none_strings(image_info)
1694 # image_info = self._image_hook(image_info)
1695 collection = _fetch_collection_for_image(image_info, download_params)
1696 image_info, collection = _download_image_nii_file(
1697 image_info, collection, download_params
1698 )
1699 image_info, collection = _download_image_terms(
1700 image_info, collection, download_params
1701 )
1702 metadata_file_path = Path(
1703 collection["absolute_path"], f"image_{image_info['id']}_metadata.json"
1704 )
1705 _write_metadata(image_info, metadata_file_path)
1707 return image_info
1710def _update_image(image_info, download_params):
1711 """Update local metadata for an image.
1713 If required and necessary, download the Neurosynth tags.
1715 Parameters
1716 ----------
1717 image_info : dict
1718 Image metadata.
1720 download_params : dict
1721 General information about download session, containing e.g. the
1722 data directory (see `_read_download_params` and
1723 `_prepare_download_params for details`)
1725 Returns
1726 -------
1727 image_info : dict
1728 Image metadata.
1730 """
1731 if not download_params["write_ok"]:
1732 return image_info
1733 try:
1734 collection = _fetch_collection_for_image(image_info, download_params)
1735 image_info, collection = _download_image_terms(
1736 image_info, collection, download_params
1737 )
1738 metadata_file_path = (
1739 Path(image_info["absolute_path"]).parent
1740 / f"image_{image_info['id']}_metadata.json"
1741 )
1742 _write_metadata(image_info, metadata_file_path)
1743 except OSError:
1744 warnings.warn(
1745 f"Could not update metadata for image {image_info['id']}, "
1746 "most likely because you do not have "
1747 "write permissions to its metadata file.",
1748 stacklevel=find_stack_level(),
1749 )
1750 return image_info
1753def _update(image_info, collection, download_params):
1754 """Update local metadata for an image and its collection."""
1755 image_info = _update_image(image_info, download_params)
1756 return image_info, collection
1759def _scroll_local(download_params):
1760 """Iterate over local neurovault data.
1762 Parameters
1763 ----------
1764 download_params : dict
1765 General information about download session, containing e.g. the
1766 data directory (see `_read_download_params` and
1767 `_prepare_download_params for details`)
1769 Yields
1770 ------
1771 image : dict
1772 Metadata for an image.
1774 collection : dict
1775 Metadata for the corresponding collection.
1777 """
1778 logger.log(
1779 "Reading local neurovault data.",
1780 msg_level=_DEBUG,
1781 verbose=download_params["verbose"],
1782 )
1784 collections = Path(download_params["nv_data_dir"]).rglob(
1785 "collection_metadata.json"
1786 )
1788 good_collections = (
1789 col
1790 for col in (_json_add_collection_dir(col) for col in collections)
1791 if download_params["local_collection_filter"](col)
1792 )
1793 for collection in good_collections:
1794 images = Path(collection["absolute_path"]).glob(
1795 "image_*_metadata.json"
1796 )
1798 good_images = (
1799 img
1800 for img in (_json_add_im_files_paths(img) for img in images)
1801 if download_params["local_image_filter"](img)
1802 )
1803 for image in good_images:
1804 image, collection = _update(image, collection, download_params)
1805 if download_params["resample"]:
1806 if not Path(image["resampled_absolute_path"]).is_file():
1807 # TODO switch to force_resample=True
1808 # when bumping to version > 0.13
1809 im_resampled = resample_img(
1810 img=image["absolute_path"],
1811 target_affine=STD_AFFINE,
1812 interpolation=download_params["interpolation"],
1813 copy_header=True,
1814 force_resample=False,
1815 )
1816 im_resampled.to_filename(image["resampled_absolute_path"])
1817 download_params["visited_images"].add(image["id"])
1818 download_params["visited_collections"].add(collection["id"])
1819 yield image, collection
1820 elif Path(image["absolute_path"]).is_file():
1821 download_params["visited_images"].add(image["id"])
1822 download_params["visited_collections"].add(collection["id"])
1823 yield image, collection
1826def _scroll_collection(collection, download_params):
1827 """Iterate over the content of a collection on Neurovault server.
1829 Images that are found and match filter criteria are downloaded.
1831 Parameters
1832 ----------
1833 collection : dict
1834 Metadata for the collection
1836 download_params : dict
1837 General information about download session, containing e.g. the
1838 data directory (see `_read_download_params` and
1839 `_prepare_download_params for details`)
1841 Yields
1842 ------
1843 image : dict
1844 Metadata for an image.
1846 Notes
1847 -----
1848 ``image`` can be ``None`` to signify a failed download.
1850 """
1851 if collection is None:
1852 yield None
1853 return
1854 n_im_in_collection = 0
1855 fails_in_collection = 0
1856 query = urljoin(_NEUROVAULT_COLLECTIONS_URL, f"{collection['id']}/images/")
1857 images = _scroll_server_results(
1858 query,
1859 query_terms=download_params["image_terms"],
1860 local_filter=download_params["image_filter"],
1861 prefix_msg=f"Scroll images from collection {collection['id']}: ",
1862 batch_size=download_params["batch_size"],
1863 timeout=download_params["timeout"],
1864 verbose=download_params["verbose"],
1865 )
1867 for image in images:
1868 if image is None:
1869 yield None
1870 try:
1871 image = _download_image(image, download_params)
1872 fails_in_collection = 0
1873 n_im_in_collection += 1
1874 yield image
1875 except Exception:
1876 fails_in_collection += 1
1877 logger.log(
1878 f"_scroll_collection: bad image: {image}",
1879 msg_level=_ERROR,
1880 verbose=download_params["verbose"],
1881 with_traceback=True,
1882 )
1883 yield None
1884 if fails_in_collection == download_params["max_fails_in_collection"]:
1885 logger.log(
1886 f"Too many bad images in collection {collection['id']}: "
1887 f"{fails_in_collection} bad images.",
1888 msg_level=_ERROR,
1889 verbose=download_params["verbose"],
1890 )
1891 return
1892 logger.log(
1893 "On neurovault.org: "
1894 f"{n_im_in_collection or 'no'} "
1895 f"image{'s' if n_im_in_collection > 1 else ''} "
1896 f"matched query in collection {collection['id']}",
1897 msg_level=_INFO,
1898 verbose=download_params["verbose"],
1899 )
1902def _scroll_filtered(download_params):
1903 """Iterate over Neurovault data that matches specified filters.
1905 Images and collections which match the filters provided in the
1906 download parameters are fetched from the server.
1908 Parameters
1909 ----------
1910 download_params : dict
1911 General information about download session, containing e.g. the
1912 data directory (see `_read_download_params` and
1913 `_prepare_download_params for details`)
1915 Yields
1916 ------
1917 image : dict
1918 Metadata for an image.
1920 collection : dict
1921 Metadata for the corresponding collection.
1923 Notes
1924 -----
1925 ``image``, ``collection`` can be ``None``, ``None`` to signify a
1926 failed download.
1928 """
1929 logger.log(
1930 "Reading server neurovault data.",
1931 msg_level=_DEBUG,
1932 verbose=download_params["verbose"],
1933 )
1935 download_params["collection_filter"] = ResultFilter(
1936 {"id": NotIn(*download_params["visited_collections"])}
1937 ).AND(download_params["collection_filter"])
1939 download_params["image_filter"] = ResultFilter(
1940 {"id": NotIn(*download_params["visited_images"])}
1941 ).AND(download_params["image_filter"])
1943 collections = _scroll_server_results(
1944 _NEUROVAULT_COLLECTIONS_URL,
1945 query_terms=download_params["collection_terms"],
1946 local_filter=download_params["collection_filter"],
1947 prefix_msg="Scroll collections: ",
1948 batch_size=download_params["batch_size"],
1949 timeout=download_params["timeout"],
1950 verbose=download_params["verbose"],
1951 )
1953 for collection in collections:
1954 collection = _download_collection(collection, download_params)
1955 collection_content = _scroll_collection(collection, download_params)
1956 for image in collection_content:
1957 yield image, collection
1960def _scroll_collection_ids(download_params):
1961 """Download a specific list of collections from Neurovault.
1963 The collections listed in the download parameters, and all
1964 the images they contain, are downloaded.
1966 Parameters
1967 ----------
1968 download_params : dict
1969 General information about download session, containing e.g. the
1970 data directory (see `_read_download_params` and
1971 `_prepare_download_params for details`)
1973 Yields
1974 ------
1975 image : dict
1976 Metadata for an image.
1978 collection : dict
1979 Metadata for the corresponding collection.
1981 Notes
1982 -----
1983 ``image``, ``collection`` can be ``None``, ``None`` to signify a
1984 failed download.
1986 """
1987 collection_urls = [
1988 urljoin(_NEUROVAULT_COLLECTIONS_URL, str(col_id))
1989 for col_id in download_params["wanted_collection_ids"]
1990 ]
1992 if collection_urls:
1993 logger.log(
1994 "Reading collections from server neurovault data.",
1995 msg_level=_DEBUG,
1996 verbose=download_params["verbose"],
1997 )
1999 collections = _yield_from_url_list(
2000 collection_urls,
2001 verbose=download_params["verbose"],
2002 timeout=download_params["timeout"],
2003 )
2004 for collection in collections:
2005 collection = _download_collection(collection, download_params)
2006 for image in _scroll_collection(collection, download_params):
2007 yield image, collection
2010def _scroll_image_ids(download_params):
2011 """Download a specific list of images from Neurovault.
2013 The images listed in the download parameters, and the metadata for
2014 the collections they belong to, are downloaded.
2016 Parameters
2017 ----------
2018 download_params : dict
2019 General information about download session, containing e.g. the
2020 data directory (see `_read_download_params` and
2021 `_prepare_download_params for details`)
2023 Yields
2024 ------
2025 image : dict
2026 Metadata for an image.
2028 collection : dict
2029 Metadata for the corresponding collection.
2031 Notes
2032 -----
2033 ``image``, ``collection`` can be ``None``, ``None`` to signify a
2034 failed download.
2036 """
2037 image_urls = [
2038 urljoin(_NEUROVAULT_IMAGES_URL, str(im_id))
2039 for im_id in download_params["wanted_image_ids"]
2040 ]
2042 images = _yield_from_url_list(
2043 image_urls, verbose=download_params["verbose"]
2044 )
2045 for image in images:
2046 try:
2047 image = _download_image(image, download_params)
2048 collection = _json_add_collection_dir(
2049 Path(image["absolute_path"]).parent
2050 / "collection_metadata.json",
2051 )
2052 except Exception:
2053 image, collection = None, None
2054 yield image, collection
2057def _scroll_explicit(download_params):
2058 """Download specific lists of collections and images from Neurovault.
2060 Parameters
2061 ----------
2062 download_params : dict
2063 General information about download session, containing e.g. the
2064 data directory (see `_read_download_params` and
2065 `_prepare_download_params for details`)
2067 Yields
2068 ------
2069 image : dict
2070 Metadata for an image.
2072 collection : dict
2073 Metadata for the corresponding collection.
2075 Notes
2076 -----
2077 ``image``, ``collection`` can be ``None``, ``None`` to signify a
2078 failed download.
2080 """
2081 download_params["wanted_collection_ids"] = set(
2082 download_params["wanted_collection_ids"]
2083 ).difference(download_params["visited_collections"])
2084 for image, collection in _scroll_collection_ids(download_params):
2085 if image is not None:
2086 download_params["visited_images"].add(image["id"])
2087 yield image, collection
2089 download_params["wanted_image_ids"] = set(
2090 download_params["wanted_image_ids"]
2091 ).difference(download_params["visited_images"])
2093 yield from _scroll_image_ids(download_params)
2096def _print_progress(found, download_params, level=_INFO):
2097 """Print number of images fetched so far."""
2098 logger.log(
2099 f"Already fetched {found} image{'s' if found > 1 else ''}",
2100 msg_level=level,
2101 verbose=download_params["verbose"],
2102 )
2105def _scroll(download_params):
2106 """Iterate over Neurovault data.
2108 Relevant images and collections are loaded from local disk, then
2109 from neurovault.org
2111 Parameters
2112 ----------
2113 download_params : dict
2114 General information about download session, containing e.g. the
2115 data directory (see `_read_download_params` and
2116 `_prepare_download_params for details`)
2118 Yields
2119 ------
2120 image : dict
2121 Metadata for an image.
2123 collection : dict
2124 Metadata for the corresponding collection.
2126 Notes
2127 -----
2128 Stops if:
2129 - All available images have been fetched.
2130 - Or a max number of images has been specified by user and
2131 reached.
2132 - Or too many downloads have failed in a row.
2134 """
2135 scroll_modes = {"filtered": _scroll_filtered, "explicit": _scroll_explicit}
2136 if download_params["max_images"] == 0:
2137 return
2138 found = 0
2140 if download_params["download_mode"] != "overwrite":
2141 for image, collection in _scroll_local(download_params):
2142 found = len(download_params["visited_images"])
2143 _print_progress(found, download_params, _DEBUG)
2144 yield image, collection
2145 if found == download_params["max_images"]:
2146 break
2147 logger.log(
2148 f"{found or 'No'} "
2149 f"image{'s' if found > 1 else ''} "
2150 "found on local disk.",
2151 msg_level=_INFO,
2152 verbose=download_params["verbose"],
2153 )
2155 if download_params["download_mode"] == "offline":
2156 return
2157 if found == download_params["max_images"]:
2158 return
2159 server_data = scroll_modes[download_params["scroll_mode"]](download_params)
2160 n_consecutive_fails = 0
2161 for image, collection in server_data:
2162 if image is None or collection is None:
2163 n_consecutive_fails += 1
2164 else:
2165 n_consecutive_fails = 0
2166 found += 1
2167 _print_progress(found, download_params)
2168 yield image, collection
2170 if n_consecutive_fails >= download_params["max_consecutive_fails"]:
2171 warnings.warn(
2172 "Neurovault download stopped early: "
2173 f"too many downloads failed in a row ({n_consecutive_fails})",
2174 stacklevel=find_stack_level(),
2175 )
2176 return
2177 if found == download_params["max_images"]:
2178 return
2181# Utilities for providing defaults and transforming input and output
2184def _split_terms(terms, available_on_server):
2185 """Isolate term filters that can be applied by server."""
2186 terms_ = dict(terms)
2187 server_terms = {
2188 k: terms_.pop(k)
2189 for k in available_on_server
2190 if k in terms_ and (isinstance(terms_[k], (str, int)))
2191 }
2192 return terms_, server_terms
2195def _move_unknown_terms_to_local_filter(
2196 terms, local_filter, available_on_server
2197):
2198 """Move filters handled by the server inside URL.
2200 Some filters are available on the server and can be inserted into
2201 the URL query. The rest will have to be applied on metadata
2202 locally.
2204 """
2205 local_terms, server_terms = _split_terms(terms, available_on_server)
2206 local_filter = ResultFilter(query_terms=local_terms).AND(local_filter)
2207 return server_terms, local_filter
2210def basic_collection_terms():
2211 """Return a term filter that excludes empty collections."""
2212 return {"number_of_images": NotNull()}
2215def basic_image_terms():
2216 """Filter that selects unthresholded F, T and Z maps in :term:`MNI` space.
2218 More precisely, an image is excluded if one of the following is
2219 true:
2221 - It is not in :term:`MNI` space.
2222 - It is thresholded.
2223 - Its map type is one of "ROI/mask", "anatomical", or "parcellation".
2224 - Its image type is "atlas"
2226 """
2227 return {
2228 "not_mni": False,
2229 "is_thresholded": False,
2230 "map_type": NotIn("ROI/mask", "anatomical", "parcellation"),
2231 "image_type": NotEqual("atlas"),
2232 }
2235def _move_col_id(im_terms, col_terms):
2236 """Reposition 'collection_id' term.
2238 If the collection id was specified in image filters, move it to
2239 the collection filters for efficiency.
2241 This makes specifying the collection id as a keyword argument for
2242 ``fetch_neurovault`` efficient.
2244 """
2245 if "collection_id" not in im_terms:
2246 return im_terms, col_terms
2247 im_terms = copy(im_terms)
2248 col_terms = copy(col_terms)
2249 if "id" not in col_terms or col_terms["id"] == im_terms["collection_id"]:
2250 col_terms["id"] = im_terms.pop("collection_id")
2251 else:
2252 warnings.warn(
2253 "You specified contradictory collection ids, "
2254 "one in the image filters and one in the "
2255 "collection filters",
2256 stacklevel=find_stack_level(),
2257 )
2258 return im_terms, col_terms
2261def _read_download_params(
2262 data_dir,
2263 download_mode="download_new",
2264 collection_terms=None,
2265 collection_filter=_empty_filter,
2266 image_terms=None,
2267 image_filter=_empty_filter,
2268 wanted_collection_ids=None,
2269 wanted_image_ids=None,
2270 max_images=None,
2271 max_consecutive_fails=_MAX_CONSECUTIVE_FAILS,
2272 max_fails_in_collection=_MAX_FAILS_IN_COLLECTION,
2273 resample=False,
2274 interpolation="linear",
2275 batch_size=None,
2276 timeout=_DEFAULT_TIME_OUT,
2277 verbose=3,
2278 fetch_neurosynth_words=False,
2279 vectorize_words=True,
2280):
2281 """Create a dictionary containing download information."""
2282 download_params = {"verbose": verbose}
2283 download_mode = download_mode.lower()
2284 if download_mode not in ["overwrite", "download_new", "offline"]:
2285 raise ValueError(
2286 "Supported download modes are: overwrite, download_new, offline. "
2287 f"Got {download_mode}."
2288 )
2289 download_params["download_mode"] = download_mode
2290 if collection_terms is None:
2291 collection_terms = {}
2292 if image_terms is None:
2293 image_terms = {}
2294 if max_images is not None and max_images < 0:
2295 max_images = None
2296 download_params["nv_data_dir"] = data_dir
2297 download_params["collection_terms"] = dict(collection_terms)
2298 download_params["collection_filter"] = collection_filter
2299 download_params["image_terms"] = dict(image_terms)
2300 download_params["image_filter"] = image_filter
2301 download_params["visited_images"] = set()
2302 download_params["visited_collections"] = set()
2303 download_params["max_images"] = max_images
2304 download_params["max_consecutive_fails"] = max_consecutive_fails
2305 download_params["max_fails_in_collection"] = max_fails_in_collection
2306 download_params["batch_size"] = batch_size
2307 download_params["resample"] = resample
2308 download_params["interpolation"] = interpolation
2309 download_params["wanted_image_ids"] = wanted_image_ids
2310 download_params["wanted_collection_ids"] = wanted_collection_ids
2311 download_params["fetch_neurosynth_words"] = fetch_neurosynth_words
2312 download_params["write_ok"] = os.access(
2313 download_params["nv_data_dir"], os.W_OK
2314 )
2315 download_params["vectorize_words"] = vectorize_words
2316 download_params["timeout"] = timeout
2317 return download_params
2320def _prepare_explicit_ids_download_params(download_params):
2321 """Prepare the download parameters if explicit ids are specified."""
2322 if download_params.get("wanted_image_ids") is None:
2323 download_params["wanted_image_ids"] = []
2324 if download_params.get("wanted_collection_ids") is None:
2325 download_params["wanted_collection_ids"] = []
2326 download_params["max_images"] = None
2327 download_params["scroll_mode"] = "explicit"
2328 download_params["image_terms"] = {}
2329 download_params["image_filter"] = _empty_filter
2330 download_params["collection_terms"] = {}
2331 download_params["collection_filter"] = _empty_filter
2332 download_params["local_collection_filter"] = _empty_filter
2333 download_params["local_image_filter"] = ResultFilter(
2334 {"id": IsIn(*download_params["wanted_image_ids"])}
2335 ).OR(
2336 ResultFilter(
2337 collection_id=IsIn(*download_params["wanted_collection_ids"])
2338 )
2339 )
2340 return download_params
2343def _prepare_filtered_download_params(download_params):
2344 """Prepare the download parameters if filters are used."""
2345 (
2346 download_params["image_terms"],
2347 download_params["collection_terms"],
2348 ) = _move_col_id(
2349 download_params["image_terms"], download_params["collection_terms"]
2350 )
2351 (
2352 download_params["collection_terms"],
2353 download_params["collection_filter"],
2354 ) = _move_unknown_terms_to_local_filter(
2355 download_params["collection_terms"],
2356 download_params["collection_filter"],
2357 _COL_FILTERS_AVAILABLE_ON_SERVER,
2358 )
2360 (
2361 download_params["image_terms"],
2362 download_params["image_filter"],
2363 ) = _move_unknown_terms_to_local_filter(
2364 download_params["image_terms"],
2365 download_params["image_filter"],
2366 _IM_FILTERS_AVAILABLE_ON_SERVER,
2367 )
2369 download_params["local_collection_filter"] = ResultFilter(
2370 **download_params["collection_terms"]
2371 ).AND(download_params["collection_filter"])
2372 download_params["local_image_filter"] = ResultFilter(
2373 **download_params["image_terms"]
2374 ).AND(download_params["image_filter"])
2376 download_params["scroll_mode"] = "filtered"
2377 return download_params
2380def _prepare_download_params(download_params):
2381 """Adjust the download parameters.
2383 Information for the downloaders is added. The result depends on
2384 whether we are downloading a set of collections and images
2385 explicitly specified by the user (by id), or we are downloading
2386 all the collections and images that match certain filters.
2388 """
2389 if (
2390 download_params["wanted_collection_ids"] is not None
2391 or download_params["wanted_image_ids"] is not None
2392 ):
2393 return _prepare_explicit_ids_download_params(download_params)
2394 return _prepare_filtered_download_params(download_params)
2397def _result_list_to_bunch(result_list, download_params):
2398 """Transform a list of results into a Bunch.
2400 If necessary, a vocabulary list and a matrix of vectorized tags are
2401 added.
2403 """
2404 if not result_list:
2405 images_meta, collections_meta = [], []
2406 else:
2407 images_meta, collections_meta = zip(*result_list)
2408 images_meta = list(images_meta)
2409 collections_meta = list(collections_meta)
2411 if download_params["resample"]:
2412 images = [
2413 im_meta.get("resampled_absolute_path") for im_meta in images_meta
2414 ]
2415 else:
2416 images = [im_meta.get("absolute_path") for im_meta in images_meta]
2418 # make sure all paths are strings instead of Path objects
2419 images = [str(image) for image in images]
2420 images_meta = [
2421 {k: str(v) if isinstance(v, Path) else v for k, v in meta.items()}
2422 for meta in images_meta
2423 ]
2424 collections_meta = [
2425 {k: str(v) if isinstance(v, Path) else v for k, v in meta.items()}
2426 for meta in collections_meta
2427 ]
2429 result = Bunch(
2430 images=images,
2431 images_meta=images_meta,
2432 collections_meta=collections_meta,
2433 description=get_dataset_descr("neurovault"),
2434 )
2435 if (
2436 download_params["fetch_neurosynth_words"]
2437 and download_params["vectorize_words"]
2438 ):
2439 (
2440 result["word_frequencies"],
2441 result["vocabulary"],
2442 ) = neurosynth_words_vectorized(
2443 [meta.get("ns_words_absolute_path") for meta in images_meta],
2444 verbose=download_params["verbose"],
2445 )
2446 return result
2449# High-level functions that provide access to neurovault and neurosynth.
2450# _fetch_neurovault_implementation does the work, and two interfaces
2451# are available:
2452# fetch_neurovault, to filter results based on metadata
2453# fetch_neurovault_ids, to ask for specific images or collections
2456def _fetch_neurovault_implementation(
2457 max_images=_DEFAULT_MAX_IMAGES,
2458 collection_terms=None,
2459 collection_filter=_empty_filter,
2460 image_terms=None,
2461 image_filter=_empty_filter,
2462 collection_ids=None,
2463 image_ids=None,
2464 mode="download_new",
2465 data_dir=None,
2466 fetch_neurosynth_words=False,
2467 resample=False,
2468 interpolation="continuous",
2469 vectorize_words=True,
2470 timeout=_DEFAULT_TIME_OUT,
2471 verbose=3,
2472 **kwarg_image_filters,
2473):
2474 """Download data from neurovault.org and neurosynth.org."""
2475 if collection_terms is None:
2476 collection_terms = basic_collection_terms()
2477 if image_terms is None:
2478 image_terms = basic_image_terms()
2479 image_terms = dict(image_terms, **kwarg_image_filters)
2480 neurovault_data_dir = get_dataset_dir("neurovault", data_dir)
2481 if mode != "offline" and not os.access(neurovault_data_dir, os.W_OK):
2482 warnings.warn(
2483 "You don't have write access to neurovault dir: "
2484 f"{neurovault_data_dir}. "
2485 "fetch_neurovault is working offline.",
2486 stacklevel=find_stack_level(),
2487 )
2488 mode = "offline"
2490 download_params = _read_download_params(
2491 neurovault_data_dir,
2492 download_mode=mode,
2493 collection_terms=collection_terms,
2494 collection_filter=collection_filter,
2495 image_terms=image_terms,
2496 image_filter=image_filter,
2497 wanted_collection_ids=collection_ids,
2498 wanted_image_ids=image_ids,
2499 max_images=max_images,
2500 resample=resample,
2501 interpolation=interpolation,
2502 timeout=timeout,
2503 verbose=verbose,
2504 fetch_neurosynth_words=fetch_neurosynth_words,
2505 vectorize_words=vectorize_words,
2506 )
2507 download_params = _prepare_download_params(download_params)
2509 with _TemporaryDirectory() as temp_dir:
2510 download_params["temp_dir"] = temp_dir
2511 scroller = list(_scroll(download_params))
2513 return _result_list_to_bunch(scroller, download_params)
2516@fill_doc
2517def fetch_neurovault(
2518 max_images=_DEFAULT_MAX_IMAGES,
2519 collection_terms=None,
2520 collection_filter=_empty_filter,
2521 image_terms=None,
2522 image_filter=_empty_filter,
2523 mode="download_new",
2524 data_dir=None,
2525 fetch_neurosynth_words=False,
2526 resample=False,
2527 vectorize_words=True,
2528 timeout=_DEFAULT_TIME_OUT,
2529 verbose=3,
2530 **kwarg_image_filters,
2531):
2532 """Download data from neurovault.org that match certain criteria.
2534 Any downloaded data is saved on the local disk and subsequent
2535 calls to this function will first look for the data locally before
2536 querying the server for more if necessary.
2538 We explore the metadata for :term:`Neurovault` collections and images,
2539 keeping those that match a certain set of criteria, until we have
2540 skimmed through the whole database or until an (optional) maximum
2541 number of images to fetch has been reached.
2543 For more information, see :footcite:t:`Gorgolewski2015`,
2544 and :footcite:t:`Yarkoni2011`.
2546 Parameters
2547 ----------
2548 max_images : :obj:`int`, default=100
2549 Maximum number of images to fetch.
2551 collection_terms : :obj:`dict` or None, default=None
2552 Key, value pairs used to filter collection
2553 metadata. Collections for which
2554 ``collection_metadata['key'] == value`` is not ``True`` for
2555 every key, value pair will be discarded.
2556 See documentation for ``basic_collection_terms`` for a
2557 description of the default selection criteria.
2558 If ``None`` is passed, will default to ``basic_collection_terms()``
2560 collection_filter : Callable, default=empty_filter
2561 Collections for which `collection_filter(collection_metadata)`
2562 is ``False`` will be discarded.
2564 image_terms : :obj:`dict` or None, default=None
2565 Key, value pairs used to filter image metadata. Images for
2566 which ``image_metadata['key'] == value`` is not ``True`` for
2567 if image_filter != _empty_filter and image_terms =
2568 every key, value pair will be discarded.
2569 See documentation for ``basic_image_terms`` for a
2570 description of the default selection criteria.
2571 Will default to ``basic_image_terms()`` if ``None`` is passed.
2573 image_filter : Callable, default=empty_filter
2574 Images for which `image_filter(image_metadata)` is ``False``
2575 will be discarded.
2577 mode : {'download_new', 'overwrite', 'offline'}, default="download_new"
2578 When to fetch an image from the server rather than the local
2579 disk.
2581 - 'download_new' (the default) means download only files that
2582 are not already on disk (regardless of modify date).
2583 - 'overwrite' means ignore files on disk and overwrite them.
2584 - 'offline' means load only data from disk; don't query server.
2586 %(data_dir)s
2588 fetch_neurosynth_words : :obj:`bool`, default=False
2589 whether to collect words from Neurosynth.
2591 vectorize_words : :obj:`bool`, default=True
2592 If neurosynth words are downloaded, create a matrix of word
2593 counts and add it to the result. Also add to the result a
2594 vocabulary list. See ``sklearn.CountVectorizer`` for more info.
2596 resample : :obj:`bool`, default=False
2597 Resamples downloaded images to a 3x3x3 grid before saving them,
2598 to save disk space.
2600 interpolation : str, default='continuous'
2601 Can be 'continuous', 'linear', or 'nearest'. Indicates the resample
2602 method.
2603 Argument passed to nilearn.image.resample_img.
2605 timeout : float, default=_DEFAULT_TIME_OUT
2606 Timeout in seconds.
2608 verbose : :obj:`int`, default=3
2609 An integer in [0, 1, 2, 3] to control the verbosity level.
2611 kwarg_image_filters
2612 Keyword arguments are understood to be filter terms for
2613 images, so for example ``map_type='Z map'`` means only
2614 download Z-maps; ``collection_id=35`` means download images
2615 from collection 35 only.
2617 Returns
2618 -------
2619 Bunch
2620 A dict-like object which exposes its items as attributes. It contains:
2622 - 'images', the paths to downloaded files.
2623 - 'images_meta', the metadata for the images in a list of
2624 dictionaries.
2625 - 'collections_meta', the metadata for the
2626 collections.
2627 - 'description', a short description
2628 of the :term:`Neurovault` dataset.
2630 If `fetch_neurosynth_words` and `vectorize_words` were set, it
2631 also contains:
2633 - 'vocabulary', a list of words
2634 - 'word_frequencies', the weight of the words returned by
2635 neurosynth.org for each image, such that the weight of word
2636 `vocabulary[j]` for the image found in `images[i]` is
2637 `word_frequencies[i, j]`
2639 See Also
2640 --------
2641 nilearn.datasets.fetch_neurovault_ids
2642 Fetch collections and images from Neurovault by explicitly specifying
2643 their ids.
2645 Notes
2646 -----
2647 Images and collections from disk are fetched before remote data.
2649 Some helpers are provided in the ``neurovault`` module to express
2650 filtering criteria more concisely:
2652 ``ResultFilter``, ``IsNull``, ``NotNull``, ``NotEqual``,
2653 ``GreaterOrEqual``, ``GreaterThan``, ``LessOrEqual``,
2654 ``LessThan``, ``IsIn``, ``NotIn``, ``Contains``,
2655 ``NotContains``, ``Pattern``.
2657 If you pass a single value to match against the collection id
2658 (whether as the 'id' field of the collection metadata or as the
2659 'collection_id' field of the image metadata), the server is
2660 directly queried for that collection, so
2661 ``fetch_neurovault(collection_id=40)`` is as efficient as
2662 ``fetch_neurovault(collection_ids=[40])`` (but in the former
2663 version the other filters will still be applied). This is not true
2664 for the image ids. If you pass a single value to match against any
2665 of the fields listed in ``_COL_FILTERS_AVAILABLE_ON_SERVER``,
2666 i.e., 'DOI', 'name', and 'owner', these filters can be
2667 applied by the server, limiting the amount of metadata we have to
2668 download: filtering on those fields makes the fetching faster
2669 because the filtering takes place on the server side.
2671 In `download_new` mode, if a file exists on disk, it is not
2672 downloaded again, even if the version on the server is newer. Use
2673 `overwrite` mode to force a new download (you can filter on the
2674 field ``modify_date`` to re-download the files that are newer on
2675 the server - see Examples section).
2677 Tries to yield `max_images` images; stops early if we have fetched
2678 all the images matching the filters or if too many images fail to
2679 be downloaded in a row.
2681 References
2682 ----------
2683 .. footbibliography::
2685 Examples
2686 --------
2687 To download **all** the collections and images from Neurovault::
2689 fetch_neurovault(max_images=None, collection_terms={}, image_terms={})
2691 To further limit the default selection to collections which
2692 specify a DOI (which reference a published paper, as they may be
2693 more likely to contain good images)::
2695 fetch_neurovault(
2696 max_images=None,
2697 collection_terms=dict(basic_collection_terms(), DOI=NotNull()),
2698 )
2700 To update all the images (matching the default filters)::
2702 fetch_neurovault(
2703 max_images=None, mode="overwrite", modify_date=GreaterThan(newest)
2704 )
2706 """
2707 check_params(locals())
2709 if collection_terms is None:
2710 collection_terms = basic_collection_terms()
2711 if image_terms is None:
2712 image_terms = basic_image_terms()
2714 if max_images == _DEFAULT_MAX_IMAGES:
2715 logger.log(
2716 "fetch_neurovault: "
2717 f"using default value of {_DEFAULT_MAX_IMAGES} "
2718 "for max_images. "
2719 "Set max_images to another value or None "
2720 "if you want more images.",
2721 msg_level=_INFO,
2722 verbose=verbose,
2723 )
2724 # Users may get confused if they write their image_filter function
2725 # and the default filters contained in image_terms still apply, so we
2726 # issue a warning.
2727 if (
2728 image_filter is not _empty_filter
2729 and image_terms == basic_image_terms()
2730 ):
2731 warnings.warn(
2732 "You specified a value for `image_filter` but the "
2733 "default filters in `image_terms` still apply. "
2734 "If you want to disable them, pass `image_terms={}`",
2735 stacklevel=find_stack_level(),
2736 )
2737 if (
2738 collection_filter is not _empty_filter
2739 and collection_terms == basic_collection_terms()
2740 ):
2741 warnings.warn(
2742 "You specified a value for `collection_filter` but the "
2743 "default filters in `collection_terms` still apply. "
2744 "If you want to disable them, pass `collection_terms={}`",
2745 stacklevel=find_stack_level(),
2746 )
2748 return _fetch_neurovault_implementation(
2749 max_images=max_images,
2750 collection_terms=collection_terms,
2751 collection_filter=collection_filter,
2752 image_terms=image_terms,
2753 image_filter=image_filter,
2754 mode=mode,
2755 data_dir=data_dir,
2756 fetch_neurosynth_words=fetch_neurosynth_words,
2757 resample=resample,
2758 vectorize_words=vectorize_words,
2759 timeout=timeout,
2760 verbose=verbose,
2761 **kwarg_image_filters,
2762 )
2765@fill_doc
2766def fetch_neurovault_ids(
2767 collection_ids=(),
2768 image_ids=(),
2769 mode="download_new",
2770 data_dir=None,
2771 fetch_neurosynth_words=False,
2772 resample=False,
2773 vectorize_words=True,
2774 timeout=_DEFAULT_TIME_OUT,
2775 verbose=3,
2776):
2777 """Download specific images and collections from neurovault.org.
2779 Any downloaded data is saved on the local disk and subsequent
2780 calls to this function will first look for the data locally before
2781 querying the server for more if necessary.
2783 This is the fast way to get the data from the server if we already
2784 know which images or collections we want.
2786 For more information, see :footcite:t:`Gorgolewski2015`,
2787 and :footcite:t:`Yarkoni2011`.
2789 Parameters
2790 ----------
2791 collection_ids : Container, default=()
2792 The ids of whole collections to be downloaded.
2794 image_ids : Container, default=()
2795 The ids of particular images to be downloaded. The metadata for the
2796 corresponding collections is also downloaded.
2798 mode : {'download_new', 'overwrite', 'offline'}, default='download_new'
2799 When to fetch an image from the server rather than the local
2800 disk.
2802 - 'download_new' (the default) means download only files that
2803 are not already on disk (regardless of modify date).
2804 - 'overwrite' means ignore files on disk and overwrite them.
2805 - 'offline' means load only data from disk; don't query server.
2807 %(data_dir)s
2809 fetch_neurosynth_words : :obj:`bool`, default=False
2810 Whether to collect words from Neurosynth.
2812 resample : :obj:`bool`, default=False
2813 Resamples downloaded images to a 3x3x3 grid before saving them,
2814 to save disk space.
2816 vectorize_words : :obj:`bool`, default=True
2817 If neurosynth words are downloaded, create a matrix of word
2818 counts and add it to the result. Also add to the result a
2819 vocabulary list. See ``sklearn.CountVectorizer`` for more info.
2821 timeout : float, default=_DEFAULT_TIME_OUT
2822 Timeout in seconds.
2824 verbose : :obj:`int`, default=3
2825 An integer in [0, 1, 2, 3] to control the verbosity level.
2827 Returns
2828 -------
2829 Bunch
2830 A dict-like object which exposes its items as attributes. It contains:
2832 - 'images', the paths to downloaded files.
2833 - 'images_meta', the metadata for the images in a list of
2834 dictionaries.
2835 - 'collections_meta', the metadata for the
2836 collections.
2837 - 'description', a short description
2838 of the :term:`Neurovault` dataset.
2840 If `fetch_neurosynth_words` and `vectorize_words` were set, it
2841 also contains:
2843 - 'vocabulary', a list of words
2844 - 'word_frequencies', the weight of the words returned by
2845 neurosynth.org for each image, such that the weight of word
2846 `vocabulary[j]` for the image found in `images[i]` is
2847 `word_frequencies[i, j]`
2849 See Also
2850 --------
2851 nilearn.datasets.fetch_neurovault
2852 Fetch data from Neurovault, but use filters on metadata to select
2853 images and collections rather than giving explicit lists of ids.
2855 Notes
2856 -----
2857 Images and collections from disk are fetched before remote data.
2859 In `download_new` mode, if a file exists on disk, it is not
2860 downloaded again, even if the version on the server is newer. Use
2861 `overwrite` mode to force a new download.
2863 Stops early if too many images fail to be downloaded in a row.
2865 References
2866 ----------
2867 .. footbibliography::
2869 """
2870 check_params(locals())
2872 return _fetch_neurovault_implementation(
2873 mode=mode,
2874 collection_ids=collection_ids,
2875 image_ids=image_ids,
2876 data_dir=data_dir,
2877 fetch_neurosynth_words=fetch_neurosynth_words,
2878 resample=resample,
2879 vectorize_words=vectorize_words,
2880 timeout=timeout,
2881 verbose=verbose,
2882 )
2885@fill_doc
2886def fetch_neurovault_motor_task(
2887 data_dir=None, timeout=_DEFAULT_TIME_OUT, verbose=1
2888):
2889 """Fetch left vs right button press \
2890 group :term:`contrast` map from :term:`Neurovault`.
2892 .. deprecated:: 0.11.2dev
2894 This fetcher function will be removed in version>0.13.1
2895 as it returns the same data
2896 as :func:`nilearn.datasets.load_sample_motor_activation_image`.
2898 Please use
2899 :func:`nilearn.datasets.load_sample_motor_activation_image`
2900 instead.
2902 Parameters
2903 ----------
2904 %(data_dir)s
2906 %(verbose)s
2908 Returns
2909 -------
2910 data : Bunch
2911 A dict-like object which exposes its items as attributes. It contains:
2912 - 'images', the paths to downloaded files.
2913 - 'images_meta', the metadata for the images in a list of
2914 dictionaries.
2915 - 'collections_meta', the metadata for the
2916 collections.
2917 - 'description', a short description
2918 of the :term:`Neurovault` dataset.
2920 Notes
2921 -----
2922 The 'left vs right button press' contrast is used:
2923 https://neurovault.org/images/10426/
2925 See Also
2926 --------
2927 nilearn.datasets.fetch_neurovault_ids
2928 nilearn.datasets.fetch_neurovault
2929 nilearn.datasets.fetch_neurovault_auditory_computation_task
2931 """
2932 check_params(locals())
2934 warnings.warn(
2935 (
2936 "The 'fetch_neurovault_motor_task' function will be removed "
2937 "in version>0.13.1 as it returns the same data "
2938 "as 'load_sample_motor_activation_image'.\n"
2939 "Please use 'load_sample_motor_activation_image' instead.'"
2940 ),
2941 DeprecationWarning,
2942 stacklevel=find_stack_level(),
2943 )
2945 data = fetch_neurovault_ids(
2946 image_ids=[10426], data_dir=data_dir, verbose=verbose, timeout=timeout
2947 )
2948 return data
2951@fill_doc
2952def fetch_neurovault_auditory_computation_task(
2953 data_dir=None, verbose=1, timeout=_DEFAULT_TIME_OUT
2954):
2955 """Fetch a :term:`contrast` map from :term:`Neurovault` showing \
2956 the effect of mental subtraction upon auditory instructions.
2958 Parameters
2959 ----------
2960 %(data_dir)s
2962 %(verbose)s
2964 Returns
2965 -------
2966 data : Bunch
2967 A dict-like object which exposes its items as attributes. It contains:
2968 - 'images', the paths to downloaded files.
2969 - 'images_meta', the metadata for the images in a list of
2970 dictionaries.
2971 - 'collections_meta', the metadata for the
2972 collections.
2973 - 'description', a short description
2974 of the :term:`Neurovault` dataset.
2976 Notes
2977 -----
2978 The 'auditory_calculation_vs_baseline' contrast is used:
2979 https://neurovault.org/images/32980/
2981 See Also
2982 --------
2983 nilearn.datasets.fetch_neurovault_ids
2984 nilearn.datasets.fetch_neurovault
2986 """
2987 check_params(locals())
2989 data = fetch_neurovault_ids(
2990 image_ids=[32980], data_dir=data_dir, verbose=verbose, timeout=timeout
2991 )
2992 return data