Coverage for nilearn/_utils/data_gen.py: 8%
343 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 10:58 +0200
1"""Data generation utilities."""
3from __future__ import annotations
5import itertools
6import json
7import string
8from pathlib import Path
10import numpy as np
11import pandas as pd
12import scipy.signal
13from nibabel import Nifti1Image, gifti
14from scipy.ndimage import binary_dilation
16from nilearn import datasets, image, maskers, masking
17from nilearn._utils import as_ndarray, logger
18from nilearn.interfaces.bids.utils import (
19 bids_entities,
20 check_bids_label,
21 create_bids_filename,
22)
24# TODO get legal_confounds out of private testing module
25from nilearn.interfaces.fmriprep.tests._testing import get_legal_confound
28def generate_mni_space_img(n_scans=1, res=30, random_state=0, mask_dilation=2):
29 """Generate MNI space img.
31 Parameters
32 ----------
33 n_scans : :obj:`int`, default=1
34 Number of scans.
36 res : :obj:`int`, default=30
37 Desired resolution, in mm, of output images.
39 %(random_state)s
40 default=0
42 mask_dilation : :obj:`int`, default=2
43 The number of times the binary :term:`dilation<Dilation>` is repeated
44 on the mask.
46 Returns
47 -------
48 inverse_img : Niimg-like object
49 Image transformed back to MNI space.
51 mask_img : Niimg-like object
52 Generated mask in MNI space.
54 """
55 rand_gen = np.random.default_rng(random_state)
56 mask_img = datasets.load_mni152_brain_mask(resolution=res)
57 masker = maskers.NiftiMasker(mask_img).fit()
58 n_voxels = image.get_data(mask_img).sum()
59 data = rand_gen.standard_normal((n_scans, n_voxels))
60 if mask_dilation is not None and mask_dilation > 0:
61 mask_img = image.new_img_like(
62 mask_img,
63 binary_dilation(
64 image.get_data(mask_img), iterations=mask_dilation
65 ),
66 )
67 inverse_img = masker.inverse_transform(data)
68 return inverse_img, mask_img
71def generate_timeseries(n_timepoints, n_features, random_state=0):
72 """Generate some random timeseries.
74 Parameters
75 ----------
76 n_timepoints : :obj:`int`
77 Number of timepoints
79 n_features : :obj:`int`
80 Number of features
82 %(random_state)s
83 default=0
85 Returns
86 -------
87 :obj:`numpy.ndarray` of shape (n_timepoints, n_features)
88 Generated time series.
90 """
91 rand_gen = np.random.default_rng(random_state)
92 return rand_gen.standard_normal((n_timepoints, n_features))
95def generate_regions_ts(
96 n_features,
97 n_regions,
98 overlap=0,
99 random_state=0,
100 window="boxcar",
101 negative_regions=False,
102):
103 """Generate some regions as timeseries.
105 Parameters
106 ----------
107 n_features : :obj:`int`
108 Number of features.
110 n_regions : :obj:`int`
111 Number of regions.
113 overlap : :obj:`int`, default=0
114 Number of overlapping voxels between two regions (more or less).
116 %(random_state)s
117 default=0
119 window : :obj:`str`, default='boxcar'
120 Name of a window in scipy.signal. e.g. "hamming".
122 negative_regions : :obj:`bool`, default=False
123 If True, creates negative and positive valued regions randomly; all
124 generated region values are positive otherwise.
126 .. versionadded:: 0.11.1
128 Returns
129 -------
130 regions : :obj:`numpy.ndarray`
131 Regions, represented as signals.
132 shape (n_features, n_regions)
134 """
135 rand_gen = np.random.default_rng(random_state)
136 if window is None:
137 window = "boxcar"
139 assert n_features > n_regions
141 # Compute region boundaries indices.
142 # Start at 1 to avoid getting an empty region
143 boundaries = np.zeros(n_regions + 1)
144 boundaries[-1] = n_features
145 boundaries[1:-1] = rand_gen.permutation(np.arange(1, n_features))[
146 : n_regions - 1
147 ]
148 boundaries.sort()
150 regions = np.zeros((n_regions, n_features), order="C")
151 overlap_end = int((overlap + 1) / 2.0)
152 overlap_start = int(overlap / 2.0)
153 for n in range(len(boundaries) - 1):
154 start = int(max(0, boundaries[n] - overlap_start))
155 end = int(min(n_features, boundaries[n + 1] + overlap_end))
156 win = scipy.signal.get_window(window, end - start)
157 win /= win.mean() # unity mean
158 if negative_regions and rand_gen.choice(a=[True, False]):
159 win = -1 * win
160 regions[n, start:end] = win
162 return regions
165def generate_maps(
166 shape,
167 n_regions,
168 overlap=0,
169 border=1,
170 window="boxcar",
171 random_state=0,
172 affine=None,
173 negative_regions=False,
174):
175 """Generate a 4D volume containing several maps.
177 Parameters
178 ----------
179 n_regions : :obj:`int`
180 Number of regions to generate.
182 overlap : :obj:`int`, default=0
183 Approximate number of voxels common to two neighboring regions.
185 window : :obj:`str`, default='boxcar'
186 Name of a window in scipy.signal. Used to get non-uniform regions.
188 %(random_state)s
189 default=0
191 affine : :obj:`numpy.ndarray`, default=None
192 Affine transformation to use.
193 Will default to ``np.eye(4)`` if ``None`` is passed.
195 border : :obj:`int`, default=1
196 Number of background voxels on each side of the 3D volumes.
198 negative_regions : :obj:`bool`, default=False
199 If True, creates negative and positive valued regions randomly; all
200 generated region values are positive otherwise.
202 .. versionadded:: 0.11.1
204 Returns
205 -------
206 maps : Niimg-like object
207 4D image object containing the maps.
209 mask_img : Niimg-like object
210 3D mask giving non-zero voxels.
212 """
213 if affine is None:
214 affine = np.eye(4)
215 mask = np.zeros(shape, dtype=np.int8)
216 mask[border:-border, border:-border, border:-border] = 1
217 ts = generate_regions_ts(
218 mask.sum(),
219 n_regions,
220 overlap=overlap,
221 random_state=random_state,
222 window=window,
223 negative_regions=negative_regions,
224 )
225 mask_img = Nifti1Image(mask, affine)
226 return masking.unmask(ts, mask_img), mask_img
229def generate_labeled_regions(
230 shape,
231 n_regions,
232 random_state=0,
233 labels=None,
234 affine=None,
235 dtype="int32",
236):
237 """Generate a 3D volume with labeled regions.
239 Parameters
240 ----------
241 shape : :obj:`tuple`
242 Shape of returned array.
244 n_regions : :obj:`int`, default=None
245 Number of regions to generate. By default (if "labels" is None),
246 add a background with value zero.
248 %(random_state)s
249 default=0
251 labels : iterable, optional
252 Labels to use for each zone. If provided, n_regions is unused.
254 affine : :obj:`numpy.ndarray`, default=None
255 Affine of returned image.
256 Will default to ``np.eye(4)`` if ``None`` is passed.
258 dtype : :obj:`type`, default='int32'
259 Data type of image.
261 Returns
262 -------
263 Niimg-like object
264 Data has shape "shape", containing region labels.
266 """
267 if affine is None:
268 affine = np.eye(4)
269 n_voxels = shape[0] * shape[1] * shape[2]
270 if labels is None:
271 n_regions += 1
272 labels = range(n_regions)
273 else:
274 n_regions = len(labels)
276 regions = generate_regions_ts(
277 n_voxels, n_regions, random_state=random_state
278 )
279 # replace weights with labels
280 for n, row in zip(labels, regions):
281 row[row > 0] = n
282 data = np.zeros(shape, dtype=dtype)
283 data[np.ones(shape, dtype=bool)] = regions.sum(axis=0).T
284 return Nifti1Image(data, affine)
287def generate_fake_fmri(
288 shape=(10, 11, 12),
289 length=17,
290 kind="noise",
291 affine=None,
292 n_blocks=None,
293 block_size=3,
294 block_type="classification",
295 random_state=0,
296):
297 """Generate a signal which can be used for testing.
299 The return value is a 4D image, representing 3D volumes along time.
300 Only the voxels in the center are non-zero, to mimic the presence of
301 brain voxels in real signals. Setting n_blocks to an integer generates
302 condition blocks, the remaining of the timeseries corresponding
303 to 'rest' or 'baseline' condition.
305 Parameters
306 ----------
307 shape : :obj:`tuple`, default=(10, 11, 12)
308 Shape of 3D volume.
310 length : :obj:`int`, default=17
311 Number of time instants.
313 kind : :obj:`str`, default='noise'
314 Kind of signal used as timeseries.
315 "noise": uniformly sampled values in [0..255]
316 "step": 0.5 for the first half then 1.
318 affine : :obj:`numpy.ndarray`, default=None
319 Affine of returned images.
320 Will default to ``np.eye(4)`` if ``None`` is passed.
322 n_blocks : :obj:`int` or None, default=None
323 Number of condition blocks.
325 block_size : :obj:`int` or None, default=3
326 Number of timepoints in a block.
327 Used only if n_blocks is not None.
329 block_type : :obj:`str`, default='classification'
330 Defines if the returned target should be used for
331 'classification' or 'regression'.
332 Used only if n_blocks is not None.
334 %(random_state)s
335 default=0
337 Returns
338 -------
339 Niimg-like object
340 Fake fmri signal.
341 shape: shape + (length,)
343 Niimg-like object
344 Mask giving non-zero voxels.
346 target : :obj:`numpy.ndarray`
347 Classification or regression target.
348 A 1D array with one element for each time point.
349 Returned only if ``n_blocks`` is not None.
351 """
352 if affine is None:
353 affine = np.eye(4)
354 full_shape = (*shape, length)
355 fmri = np.zeros(full_shape)
356 # Fill central voxels timeseries with random signals
357 width = [s // 2 for s in shape]
358 shift = [s // 4 for s in shape]
360 rand_gen = np.random.default_rng(random_state)
361 if kind == "noise":
362 signals = rand_gen.integers(256, size=([*width, length]))
363 elif kind == "step":
364 signals = np.ones([*width, length])
365 signals[..., : length // 2] = 0.5
366 else:
367 raise ValueError("Unhandled value for parameter 'kind'")
369 fmri[
370 shift[0] : shift[0] + width[0],
371 shift[1] : shift[1] + width[1],
372 shift[2] : shift[2] + width[2],
373 :,
374 ] = signals
376 mask = np.zeros(shape)
377 mask[
378 shift[0] : shift[0] + width[0],
379 shift[1] : shift[1] + width[1],
380 shift[2] : shift[2] + width[2],
381 ] = 1
383 if n_blocks is None:
384 return (Nifti1Image(fmri, affine), Nifti1Image(mask, affine))
386 flat_fmri = fmri[mask.astype(bool)]
387 flat_fmri /= np.abs(flat_fmri).max()
388 target = np.zeros(length, dtype=int)
389 rest_max_size = (length - (n_blocks * block_size)) // n_blocks
390 if rest_max_size < 0:
391 raise ValueError(
392 f"{length} is too small "
393 f"to put {n_blocks} blocks of size {block_size}"
394 )
395 t_start = (
396 rand_gen.integers(0, rest_max_size, 1)[0] if rest_max_size > 0 else 0
397 )
398 for block in range(n_blocks):
399 if block_type == "classification":
400 # Select a random voxel and add some signal to the background
401 voxel_idx = rand_gen.integers(0, flat_fmri.shape[0], 1)[0]
402 trials_effect = (rand_gen.random(block_size) + 1) * 3.0
403 else:
404 # Select the voxel in the image center and add some signal
405 # that increases with each block
406 voxel_idx = flat_fmri.shape[0] // 2
407 trials_effect = (rand_gen.random(block_size) + 1) * block
408 t_rest = 0
409 if rest_max_size > 0:
410 t_rest = rand_gen.integers(0, rest_max_size, 1)[0]
411 flat_fmri[voxel_idx, t_start : t_start + block_size] += trials_effect
412 target[t_start : t_start + block_size] = block + 1
413 t_start += t_rest + block_size
414 target = (
415 target if block_type == "classification" else target.astype(np.float64)
416 )
417 fmri = np.zeros(fmri.shape)
418 fmri[mask.astype(bool)] = flat_fmri
419 return (Nifti1Image(fmri, affine), Nifti1Image(mask, affine), target)
422def generate_fake_fmri_data_and_design(
423 shapes, rk=3, affine=None, random_state=0
424):
425 """Generate random :term:`fMRI` time series \
426 and design matrices of given shapes.
428 Parameters
429 ----------
430 shapes : :obj:`list` of length-4 :obj:`tuple`s of :obj:`int`
431 Shapes of the fmri data to be generated.
433 rk : :obj:`int`, default=3
434 Number of columns in the design matrix to be generated.
436 affine : :obj:`numpy.ndarray`, default=None
437 Affine of returned images. Must be a 4x4 array.
438 Will default to ``np.eye(4)`` if ``None`` is passed.
440 %(random_state)s
441 default=0
443 Returns
444 -------
445 mask : Niimg-like object
446 3D mask giving non-zero voxels.
448 fmri_data : :obj:`list`
449 A 4D volume represented by a list of 3D Niimg-like objects.
451 design_matrices : :obj:`list`
452 A list of pd.DataFrame
454 """
455 if affine is None:
456 affine = np.eye(4)
457 fmri_data = []
458 design_matrices = []
459 rand_gen = np.random.default_rng(random_state)
460 for shape in shapes:
461 data = rand_gen.standard_normal(shape)
462 data[1:-1, 1:-1, 1:-1] += 100
463 fmri_data.append(Nifti1Image(data, affine))
464 columns = rand_gen.choice(
465 list(string.ascii_lowercase), size=rk, replace=False
466 )
467 design_matrices.append(
468 pd.DataFrame(
469 rand_gen.standard_normal((shape[3], rk)), columns=columns
470 )
471 )
472 mask = Nifti1Image(
473 (rand_gen.random(shape[:3]) > 0.5).astype(np.int8), affine
474 )
475 return mask, fmri_data, design_matrices
478def write_fake_fmri_data_and_design(
479 shapes, rk=3, affine=None, random_state=0, file_path=None
480):
481 """Generate random :term:`fMRI` data \
482 and design matrices and write them to disk.
484 Parameters
485 ----------
486 shapes : :obj:`list` of :obj:`tuple`s of :obj:`int`
487 A list of shapes in tuple format.
489 rk : :obj:`int`, default=3
490 Number of columns in the design matrix to be generated.
492 affine : :obj:`numpy.ndarray`, default=None
493 Affine of returned images.
494 Will default to ``np.eye(4)`` if ``None`` is passed.
496 %(random_state)s
497 default=0
499 file_path : :obj:`str` or :obj:`pathlib.Path`, default=None
500 Output file path.
502 Returns
503 -------
504 mask_file : :obj:`str`
505 Path to mask file.
507 fmri_files : :obj:`list` of :obj:`str`
508 A list of paths to the generated fmri files.
510 design_files : :obj:`list` of :obj:`str`
511 A list of paths to the generated design matrix files.
513 See Also
514 --------
515 nilearn._utils.data_gen.generate_fake_fmri_data_and_design
517 """
518 file_path = Path.cwd() if file_path is None else Path(file_path)
520 mask, fmri_data, design_matrices = generate_fake_fmri_data_and_design(
521 shapes, rk=rk, affine=affine, random_state=random_state
522 )
524 mask_file, fmri_files, design_files = file_path / "mask.nii", [], []
526 mask.to_filename(mask_file)
527 for i, fmri in enumerate(fmri_data):
528 fmri_files.append(str(file_path / f"fmri_run{i:d}.nii"))
529 fmri.to_filename(fmri_files[-1])
530 for i, design in enumerate(design_matrices):
531 design_files.append(str(file_path / f"dmtx_{i:d}.tsv"))
532 design.to_csv(design_files[-1], sep="\t", index=False)
534 return mask_file, fmri_files, design_files
537def _write_fake_bold_gifti(
538 file_path, n_time_points, n_vertices, random_state=0
539):
540 """Generate a gifti image and write it to disk.
542 Note this only generates an empty file
543 if the number of vertices demanded is 0.
545 Parameters
546 ----------
547 file_path : :obj:`str`
548 Output file path.
550 n_time_points : :obj:`int`
552 n_vertices : :obj:`int`
554 Returns
555 -------
556 file_path : :obj:`str`
557 Output file path.
559 shape : :obj:`tuple` of :obj:`int`
560 Shape of output array with m vertices by n timepoints.
561 If number of vertices is 0, only a dummy file is created.
563 %(random_state)s
564 default=0
565 """
566 rand_gen = np.random.default_rng(random_state)
567 data = rand_gen.standard_normal((n_time_points, n_vertices))
568 darray = gifti.GiftiDataArray(data=data, datatype="NIFTI_TYPE_FLOAT32")
569 gii = gifti.GiftiImage(darrays=[darray])
570 gii.to_filename(file_path)
572 return file_path
575def write_fake_bold_img(file_path, shape, affine=None, random_state=0):
576 """Generate a random image of given shape and write it to disk.
578 Parameters
579 ----------
580 file_path : :obj:`str`
581 Output file path.
583 shape : :obj:`tuple` of :obj:`int`
584 Shape of output array. Should be at least 3D.
586 affine : :obj:`numpy.ndarray`, default=None
587 Affine of returned images.
588 Will default to ``np.eye(4)`` if ``None`` is passed.
590 %(random_state)s
591 default=0
593 Returns
594 -------
595 file_path : :obj:`str`
596 Output file path.
598 """
599 if affine is None:
600 affine = np.eye(4)
601 rand_gen = np.random.default_rng(random_state)
602 data = rand_gen.standard_normal(shape)
603 data[1:-1, 1:-1, 1:-1] += 100
604 Nifti1Image(data, affine).to_filename(file_path)
605 return file_path
608def _generate_signals_from_precisions(
609 precisions, min_n_samples=50, max_n_samples=100, random_state=0
610):
611 """Generate timeseries according to some given precision matrices.
613 Signals all have zero mean.
615 Parameters
616 ----------
617 precisions : :obj:`list` of :obj:`numpy.ndarray`
618 A list of precision matrices. Every matrix must be square (with the
619 same size) and positive definite.
621 min_samples, max_samples : :obj:`int`, optional
622 The number of samples drawn for each timeseries is taken at random
623 between these two numbers. Defaults are 50 and 100.
625 %(random_state)s
626 default=0
628 Returns
629 -------
630 signals : :obj:`list` of :obj:`numpy.ndarray`
631 Output signals. signals[n] corresponds to precisions[n], and has shape
632 (sample number, precisions[n].shape[0]).
634 """
635 rand_gen = np.random.default_rng(random_state)
637 signals = []
638 n_samples = rand_gen.integers(
639 min_n_samples, high=max_n_samples, size=len(precisions), endpoint=True
640 )
642 mean = np.zeros(precisions[0].shape[0])
643 signals.extend(
644 rand_gen.multivariate_normal(mean, np.linalg.inv(prec), (n,))
645 for n, prec in zip(n_samples, precisions)
646 )
647 return signals
650def generate_group_sparse_gaussian_graphs(
651 n_subjects=5,
652 n_features=30,
653 min_n_samples=30,
654 max_n_samples=50,
655 density=0.1,
656 random_state=0,
657 verbose=0,
658):
659 """Generate signals drawn from a sparse Gaussian graphical model.
661 Parameters
662 ----------
663 n_subjects : :obj:`int`, default=5
664 Number of subjects.
666 n_features : :obj:`int`, default=30
667 Number of signals per subject to generate.
669 min_n_samples, max_n_samples : :obj:`int`, optional
670 Each subject has a random number of samples, between these two
671 numbers. All signals for a given subject have the same number of
672 samples. Defaults are 30 and 50.
674 density : :obj:`float`, default=0.1
675 Density of edges in graph topology.
677 %(random_state)s
678 default=0
680 %(verbose0)s
682 Returns
683 -------
684 signals : :obj:`list` of :obj:`numpy.ndarray`, shape for each \
685 (n_samples, n_features) signals[n] is the signals for subject n.
686 They are provided as a numpy len(signals) = n_subjects.
687 n_samples varies according to the subject.
689 precisions : :obj:`list` of :obj:`numpy.ndarray`
690 Precision matrices.
692 topology : :obj:`numpy.ndarray`
693 Binary array giving the graph topology used for generating covariances
694 and signals.
696 """
697 rand_gen = np.random.default_rng(random_state)
698 # Generate topology (upper triangular binary matrix, with zeros on the
699 # diagonal)
700 topology = np.empty((n_features, n_features))
701 topology[:, :] = np.triu(
702 (
703 rand_gen.integers(
704 0, high=int(1.0 / density), size=n_features * n_features
705 )
706 ).reshape(n_features, n_features)
707 == 0,
708 k=1,
709 )
711 # Generate edges weights on topology
712 precisions = []
713 mask = topology > 0
714 for _ in range(n_subjects):
715 # See also sklearn.datasets.samples_generator.make_sparse_spd_matrix
716 prec = topology.copy()
717 prec[mask] = rand_gen.uniform(low=0.1, high=0.8, size=(mask.sum()))
718 prec += np.eye(prec.shape[0])
719 prec = np.dot(prec.T, prec)
721 # Assert precision matrix is spd
722 np.testing.assert_almost_equal(prec, prec.T)
723 eigenvalues = np.linalg.eigvalsh(prec)
724 if eigenvalues.min() < 0:
725 raise ValueError(
726 "Failed generating a positive definite precision "
727 "matrix. Decreasing n_features can help solving "
728 "this problem."
729 )
730 precisions.append(prec)
732 # Returns the topology matrix of precision matrices.
733 topology += np.eye(*topology.shape)
734 topology = np.dot(topology.T, topology)
735 topology = topology > 0
736 assert np.all(topology == topology.T)
737 logger.log(
738 f"Sparsity: {1.0 * topology.sum() / topology.shape[0] ** 2:f}",
739 verbose=verbose,
740 )
742 # Generate temporal signals
743 signals = _generate_signals_from_precisions(
744 precisions,
745 min_n_samples=min_n_samples,
746 max_n_samples=max_n_samples,
747 random_state=rand_gen,
748 )
749 return signals, precisions, topology
752def basic_paradigm(condition_names_have_spaces=False):
753 """Generate basic paradigm.
755 Parameters
756 ----------
757 condition_names_have_spaces : :obj:`bool`, default=False
758 Check for spaces in condition names.
760 Returns
761 -------
762 events : pd.DataFrame
763 Basic experimental paradigm with events data.
765 """
766 conditions = [
767 "c 0",
768 "c 0",
769 "c 0",
770 "c 1",
771 "c 1",
772 "c 1",
773 "c 2",
774 "c 2",
775 "c 2",
776 ]
778 if not condition_names_have_spaces:
779 conditions = [c.replace(" ", "") for c in conditions]
780 onsets = [30, 70, 100, 10, 30, 90, 30, 40, 60]
781 durations = 1 * np.ones(9)
782 events = pd.DataFrame(
783 {"trial_type": conditions, "onset": onsets, "duration": durations}
784 )
785 return events
788def _basic_confounds(length, random_state=0):
789 """Generate random motion parameters \
790 (3 translation directions, 3 rotation directions).
792 Parameters
793 ----------
794 length : :obj:`int`
795 Length of basic confounds.
797 %(random_state)s
798 default=0
800 Returns
801 -------
802 confounds : :obj:`pandas.DataFrame`.
803 Basic confounds.
804 This DataFrame will have 9 columns:
805 'csf', 'white_matter', 'global_signal'
806 'rot_x', 'rot_y', 'rot_z',
807 'trans_x', 'trans_y', 'trans_z'.
809 """
810 rand_gen = np.random.default_rng(random_state)
811 columns = [
812 "csf",
813 "white_matter",
814 "global_signal",
815 "rot_x",
816 "rot_y",
817 "rot_z",
818 "trans_x",
819 "trans_y",
820 "trans_z",
821 ]
822 data = rand_gen.random((length, len(columns)))
823 confounds = pd.DataFrame(data, columns=columns)
824 return confounds
827def add_metadata_to_bids_dataset(bids_path, metadata, json_file=None):
828 """Add JSON file with specific metadata to BIDS dataset.
830 Note no "BIDS validation" are performed on the metadata,
831 or on the file path.
833 Parameters
834 ----------
835 bids_path : :obj:`str` or :obj:`pathlib.Path`
836 Path to the BIDS dataset where the file is to be added.
838 metadata : :obj:`dict`
839 Dictionary with metadata to be added to the JSON file.
841 json_file : :obj:`str` or :obj:`pathlib.Path`, default=None
842 Path to the json file relative to the root of the BIDS dataset.
843 If no json_file is specified, a default path is used
844 that is meant to work well with the defaults of
845 `create_fake_bids_dataset`:
846 this is meant to facilitate modifying datasets used during tests.
848 Returns
849 -------
850 pathlib.Path
851 Full path to the json file created.
852 """
853 if json_file is None:
854 json_file = (
855 Path(bids_path)
856 / "derivatives"
857 / "sub-01"
858 / "ses-01"
859 / "func"
860 / "sub-01_ses-01_task-main_run-01_space-MNI_desc-preproc_bold.json"
861 )
862 else:
863 json_file = Path(bids_path) / json_file
865 with json_file.open("w") as f:
866 json.dump(metadata, f)
868 return json_file
871def generate_random_img(
872 shape,
873 affine=None,
874 random_state=0,
875):
876 """Create a random 3D or 4D image with a given shape and affine.
878 Parameters
879 ----------
880 shape : length-3 or length-4 tuple
881 The shape of the image being generated.
882 The number of elements determines the dimensionality of the image.
884 affine : 4x4 numpy.ndarray, default=None
885 The affine of the image
886 Will default to ``np.eye(4)`` if ``None`` is passed.
888 %(random_state)s
889 default=0
891 Returns
892 -------
893 data_img : 3D or 4D niimg
894 The data image.
896 mask_img : 3D niimg
897 The mask image.
898 """
899 if affine is None:
900 affine = np.eye(4)
901 rng = np.random.default_rng(random_state)
902 data = rng.standard_normal(size=shape)
903 data_img = Nifti1Image(data, affine)
904 if len(shape) == 4:
905 mask_data = as_ndarray(data[..., 0] > 0.2, dtype=np.int8)
906 else:
907 mask_data = as_ndarray(data > 0.2, dtype=np.int8)
909 mask_img = Nifti1Image(mask_data, affine)
911 return data_img, mask_img
914def create_fake_bids_dataset(
915 base_dir=None,
916 n_sub=10,
917 n_ses=2,
918 tasks=None,
919 n_runs=None,
920 with_derivatives=True,
921 with_confounds=True,
922 confounds_tag="desc-confounds_timeseries",
923 random_state=0,
924 entities=None,
925 n_vertices=0,
926 n_voxels=4,
927 spaces=None,
928):
929 """Create a fake :term:`BIDS` dataset directory with dummy files.
931 Returns fake dataset directory name.
933 Parameters
934 ----------
935 base_dir : :obj:`str` or :obj:`pathlib.Path` (Absolute path). \
936 default=pathlib.Path()
937 Absolute directory path in which to create the fake :term:`BIDS`
938 dataset dir.
940 n_sub : :obj:`int`, default=10
941 Number of subjects to be simulated in the dataset.
943 n_ses : :obj:`int`, default=2
944 Number of sessions to be simulated in the dataset.
946 Specifying n_ses=0 will only produce runs and files without the
947 optional session field.
949 tasks : :obj:`list` of :obj:`str`, default=["localizer", "main"]
950 List of tasks to be simulated in the dataset.
952 n_runs : :obj:`list` of :obj:`int`, default=[1, 3]
953 Number of runs to create, where each element indicates the
954 number of runs for the corresponding task.
955 The length of this list must match the number of items in ``tasks``.
956 Each run creates 100 volumes.
957 Files will be generated without run entity
958 if a value is equal to 0 or less.
960 with_derivatives : :obj:`bool`, default=True
961 In the case derivatives are included, they come with two spaces and
962 descriptions.
963 Spaces are 'MNI' and 'T1w'.
964 Descriptions are 'preproc' and :term:`fMRIPrep`.
965 Only space 'T1w' include both descriptions.
967 with_confounds : :obj:`bool`, default=True
968 Whether to generate associated confounds files or not.
970 confounds_tag : :obj:`str`, default="desc-confounds_timeseries"
971 Filename "suffix":
972 If generating confounds, what path should they have?
973 Defaults to `desc-confounds_timeseries` as in :term:`fMRIPrep` >= 20.2
974 but can be other values (e.g. "desc-confounds_regressors" as
975 in :term:`fMRIPrep` < 20.2).
977 %(random_state)s
978 default=0
980 entities : :obj:`dict`, optional
981 Extra entity to add to the :term:`BIDS` filename with a list of values.
982 For example, if you want to add an 'echo' entity
983 with values '1' for some files and '1' for others,
984 you would pass: ``entities={"echo": ['1', '2']}``.
986 n_vertices : :obj:`int`, default = 0
987 Number of vertices for surface data.
988 If n_vertices == 0 only dummy gifti files will be generated.
989 Use n_vertices == 10242 to match the number of vertices
990 in fsaverage5.
992 n_voxels : :obj:`int`, default = 4
993 Number of voxels along x, y, z dimensions for volume data.
995 spaces : :obj:`list` of :obj:`str`, optional.
996 Defaults to ``("MNI", "T1w")``
998 Returns
999 -------
1000 dataset directory name : :obj:`pathlib.Path`
1001 'bids_dataset'.
1003 Notes
1004 -----
1005 Creates a directory with dummy files.
1007 """
1008 if base_dir is None:
1009 base_dir = Path()
1010 if tasks is None:
1011 tasks = ["localizer", "main"]
1012 if n_runs is None:
1013 n_runs = [1, 3]
1014 if spaces is None:
1015 spaces = ("MNI", "T1w")
1017 rand_gen = np.random.default_rng(random_state)
1019 bids_dataset_dir = "bids_dataset"
1020 bids_path = Path(base_dir) / bids_dataset_dir
1022 for task_ in tasks:
1023 check_bids_label(task_)
1025 if not isinstance(n_runs, list) or not all(
1026 isinstance(x, int) for x in n_runs
1027 ):
1028 raise TypeError("n_runs must be a list of integers.")
1030 if len(tasks) != len(n_runs):
1031 raise ValueError(
1032 "The number of tasks and number of runs must be the same."
1033 f"Got {len(tasks)} tasks and {len(n_runs)} runs."
1034 )
1036 if entities is None:
1037 entities = {}
1038 _check_entities_and_labels(entities)
1040 _mock_bids_dataset(
1041 bids_path=bids_path,
1042 n_sub=n_sub,
1043 n_ses=n_ses,
1044 tasks=tasks,
1045 n_runs=n_runs,
1046 entities=entities,
1047 n_voxels=n_voxels,
1048 rand_gen=rand_gen,
1049 )
1051 if with_derivatives:
1052 if not with_confounds:
1053 confounds_tag = None
1055 _mock_bids_derivatives(
1056 bids_path=bids_path,
1057 n_sub=n_sub,
1058 n_ses=n_ses,
1059 tasks=tasks,
1060 n_runs=n_runs,
1061 confounds_tag=confounds_tag,
1062 entities=entities,
1063 n_voxels=n_voxels,
1064 rand_gen=rand_gen,
1065 n_vertices=n_vertices,
1066 spaces=spaces,
1067 )
1069 return bids_path
1072def _check_entities_and_labels(entities):
1073 """Check entities and labels are BIDS compliant.
1075 Parameters
1076 ----------
1077 entities : :obj:`dict`, optional
1078 Extra entity to add to the BIDS filename with a list of values.
1079 For example, if you want to add an 'echo' entity
1080 with values '1' for some files and '1' for others,
1081 you would pass: ``entities={"echo": ['1', '2']}``.
1082 """
1083 if len(entities.keys()) > 1:
1084 # Generating dataset with more than one extra entity
1085 # becomes too complex.
1086 # Won't be implemented until there is a need.
1087 raise ValueError("Only a single extra entity is supported for now.")
1089 for key, value in entities.items():
1090 if key not in [
1091 *bids_entities()["raw"],
1092 *bids_entities()["derivatives"],
1093 ]:
1094 allowed_entities = [
1095 *bids_entities()["raw"],
1096 *bids_entities()["derivatives"],
1097 ]
1098 raise ValueError(
1099 f"Invalid entity: {key}. Allowed entities are: "
1100 f"{allowed_entities}"
1101 )
1102 for label_ in value:
1103 check_bids_label(label_)
1106def _mock_bids_dataset(
1107 bids_path,
1108 n_sub,
1109 n_ses,
1110 tasks,
1111 n_runs,
1112 entities,
1113 n_voxels,
1114 rand_gen,
1115):
1116 """Create a fake raw :term:`bids<BIDS>` dataset directory with dummy files.
1118 Parameters
1119 ----------
1120 base_dir : :obj:`pathlib.Path`
1121 Path where to create the fake :term:`BIDS` dataset.
1123 n_sub : :obj:`int`
1124 Number of subjects to be simulated in the dataset.
1126 n_ses : :obj:`int`
1127 Number of sessions to be simulated in the dataset.
1128 Ignored if n_ses=0.
1130 tasks : :obj:`list` of :obj:`str`
1131 List of tasks to be simulated in the dataset.
1133 n_runs : :obj:`list` of :obj:`int`
1134 Number of runs to create, where each element indicates the
1135 number of runs for the corresponding task.
1136 No run entity will be used if a value is equal to 1 or less.
1138 entities : :obj:`dict`, optional
1139 Extra entities to add to the BIDS filename with a list of values.
1141 n_voxels : :obj:`int`
1142 Number of voxels along a given axis in the functional image.
1144 rand_gen : :obj:`numpy.random.RandomState` instance
1145 Random number generator.
1147 """
1148 bids_path.mkdir(parents=True, exist_ok=True)
1150 bids_path.joinpath("README.txt").write_text("")
1152 for subject, session in itertools.product(
1153 _listify(n_sub), _listify(n_ses)
1154 ):
1155 subses_dir = bids_path / f"sub-{subject}"
1156 if session != "":
1157 subses_dir = subses_dir / f"ses-{session}"
1159 if session in ("01", ""):
1160 _write_bids_raw_anat(subses_dir, subject, session)
1162 func_path = subses_dir / "func"
1163 func_path.mkdir(parents=True, exist_ok=True)
1165 for task, n_run in zip(tasks, n_runs):
1166 for run in _listify(n_run):
1167 if entities:
1168 for key in entities:
1169 for label in entities[key]:
1170 fields = _init_fields(
1171 subject=subject,
1172 session=session,
1173 task=task,
1174 run=run,
1175 )
1176 if key in bids_entities()["raw"]:
1177 fields["entities"][key] = label
1178 _write_bids_raw_func(
1179 func_path=func_path,
1180 fields=fields,
1181 n_voxels=n_voxels,
1182 rand_gen=rand_gen,
1183 )
1185 else:
1186 fields = _init_fields(
1187 subject=subject, session=session, task=task, run=run
1188 )
1189 _write_bids_raw_func(
1190 func_path=func_path,
1191 fields=fields,
1192 n_voxels=n_voxels,
1193 rand_gen=rand_gen,
1194 )
1197def _mock_bids_derivatives(
1198 bids_path,
1199 n_sub,
1200 n_ses,
1201 tasks,
1202 n_runs,
1203 confounds_tag,
1204 entities,
1205 n_voxels,
1206 rand_gen,
1207 n_vertices,
1208 spaces,
1209):
1210 """Create a fake derivatives :term:`bids<BIDS>` dataset directory \
1211 with dummy files.
1213 Parameters
1214 ----------
1215 base_dir : :obj:`pathlib.Path`
1216 Path where to create the fake :term:`BIDS` dataset.
1218 n_sub : :obj:`int`
1219 Number of subjects to be simulated in the dataset.
1221 n_ses : :obj:`int`
1222 Number of sessions to be simulated in the dataset.
1223 Ignored if n_ses=0.
1225 tasks : :obj:`list` of :obj:`str`
1226 List of tasks to be simulated in the dataset.
1228 n_runs : :obj:`list` of :obj:`int`
1229 Number of runs to create, where each element indicates the
1230 number of runs for the corresponding task.
1231 No run entity will be used if a value is equal to 1 or less.
1233 confounds_tag : :obj:`str`
1234 Filename "suffix":
1235 For example: `desc-confounds_timeseries`
1236 or "desc-confounds_regressors".
1238 entities : :obj:`dict`
1239 Extra entity to add to the BIDS filename with a list of values.
1241 n_voxels : :obj:`int`
1242 Number of voxels along a given axis in the functional image.
1244 rand_gen : :obj:`numpy.random.RandomState` instance
1245 Random number generator.
1247 n_vertices : :obj:`int`
1248 Number of vertices for surface data.
1249 If n_vertices == 0 only dummy gifti files will be generated.
1250 Use n_vertices == 10242 to match the number of vertices
1251 in fsaverage5.
1253 spaces : :obj:`list` of :obj:`str`, optional.
1254 """
1255 bids_path = bids_path / "derivatives"
1256 bids_path.mkdir(parents=True, exist_ok=True)
1258 for subject, session in itertools.product(
1259 _listify(n_sub), _listify(n_ses)
1260 ):
1261 subses_dir = bids_path / f"sub-{subject}"
1262 if session != "":
1263 subses_dir = subses_dir / f"ses-{session}"
1265 func_path = subses_dir / "func"
1266 func_path.mkdir(parents=True, exist_ok=True)
1268 for task, n_run in zip(tasks, n_runs):
1269 for run in _listify(n_run):
1270 if entities:
1271 for key in entities:
1272 for label in entities[key]:
1273 fields = _init_fields(
1274 subject=subject,
1275 session=session,
1276 task=task,
1277 run=run,
1278 )
1279 fields["entities"][key] = label
1280 _write_bids_derivative_func(
1281 func_path=func_path,
1282 fields=fields,
1283 n_voxels=n_voxels,
1284 rand_gen=rand_gen,
1285 confounds_tag=confounds_tag,
1286 n_vertices=n_vertices,
1287 spaces=spaces,
1288 )
1290 else:
1291 fields = _init_fields(
1292 subject=subject, session=session, task=task, run=run
1293 )
1294 _write_bids_derivative_func(
1295 func_path=func_path,
1296 fields=fields,
1297 n_voxels=n_voxels,
1298 rand_gen=rand_gen,
1299 confounds_tag=confounds_tag,
1300 n_vertices=n_vertices,
1301 spaces=spaces,
1302 )
1305def _listify(n):
1306 """Return a list of zero padded BIDS labels.
1308 If n is 0 or less, return an empty list.
1310 Parameters
1311 ----------
1312 n : :obj:`int`
1313 Number of labels to create.
1315 Returns
1316 -------
1317 List of labels : :obj:`list` of :obj:`str`
1319 """
1320 return [""] if n <= 0 else [f"{label:02}" for label in range(1, n + 1)]
1323def _init_fields(subject, session, task, run):
1324 """Initialize fields to help create a valid BIDS filename.
1326 Parameters
1327 ----------
1328 subject : :obj:`str`
1329 Subject label
1331 session : :obj:`str`
1332 Session label
1334 task : :obj:`str`
1335 Task label
1337 run : :obj:`str`
1338 Run label
1340 Returns
1341 -------
1342 dict
1343 Fields used to create a BIDS filename.
1345 See Also
1346 --------
1347 create_bids_filename
1349 """
1350 fields = {
1351 "suffix": "bold",
1352 "extension": "nii.gz",
1353 "entities": {
1354 "sub": subject,
1355 "ses": session,
1356 "task": task,
1357 "run": run,
1358 },
1359 }
1360 return fields
1363def _write_bids_raw_anat(subses_dir, subject, session) -> None:
1364 """Create a dummy anat T1w file.
1366 Parameters
1367 ----------
1368 subses_dir : :obj:`pathlib.Path`
1369 Subject session directory
1371 subject : :obj:`str`
1372 Subject label
1374 session : :obj:`str`
1375 Session label
1376 """
1377 anat_path = subses_dir / "anat"
1378 anat_path.mkdir(parents=True, exist_ok=True)
1379 fields = {
1380 "suffix": "T1w",
1381 "extension": "nii.gz",
1382 "entities": {"sub": subject, "ses": session},
1383 }
1384 (anat_path / create_bids_filename(fields)).write_text("")
1387def _write_bids_raw_func(
1388 func_path,
1389 fields,
1390 n_voxels,
1391 rand_gen,
1392):
1393 """Create BIDS functional raw nifti, json sidecar and events files.
1395 Parameters
1396 ----------
1397 func_path : :obj:`pathlib.Path`
1398 Path to a subject functional directory.
1400 file_id : :obj:`str`
1401 Root of the BIDS filename:
1402 typically basename without the BIDS suffix and extension.
1404 n_voxels : :obj:`int`
1405 Number of voxels along a given axis in the functional image.
1407 rand_gen : :obj:`numpy.random.RandomState` instance
1408 Random number generator.
1410 """
1411 n_time_points = 30
1412 bold_path = func_path / create_bids_filename(fields)
1414 write_fake_bold_img(
1415 bold_path,
1416 [n_voxels, n_voxels, n_voxels, n_time_points],
1417 random_state=rand_gen,
1418 )
1420 repetition_time = 1.5
1421 fields["extension"] = "json"
1422 param_path = func_path / create_bids_filename(fields)
1423 param_path.write_text(json.dumps({"RepetitionTime": repetition_time}))
1425 fields["suffix"] = "events"
1426 fields["extension"] = "tsv"
1427 events_path = func_path / create_bids_filename(fields)
1428 basic_paradigm().to_csv(events_path, sep="\t", index=None)
1431def _write_bids_derivative_func(
1432 func_path,
1433 fields,
1434 n_voxels,
1435 rand_gen,
1436 confounds_tag,
1437 n_vertices=0,
1438 spaces=None,
1439):
1440 """Create BIDS functional derivative and confounds files.
1442 Nifti files created come with two spaces and descriptions.
1443 Spaces are: 'MNI' and 'T1w'.
1444 Descriptions are: 'preproc' and :term:`fMRIPrep`.
1445 Only space 'T1w' include both descriptions.
1447 Gifti files are in "fsaverage5" space for both hemispheres.
1449 Parameters
1450 ----------
1451 func_path : :obj:`pathlib.Path`
1452 Path to a subject functional directory.
1454 file_id : :obj:`str`
1455 Root of the BIDS filename:
1456 typically basename without the BIDS suffix and extension.
1458 n_voxels : :obj:`int`
1459 Number of voxels along a given axis in the functional image.
1461 rand_gen : :obj:`numpy.random.RandomState` instance
1462 Random number generator.
1464 confounds_tag : :obj:`str`, optional.
1465 Filename "suffix":
1466 For example: `desc-confounds_timeseries`
1467 or "desc-confounds_regressors".
1469 n_vertices : :obj:`int`, default = 0
1470 Number of vertices for surface data.
1471 If n_vertices == 0 only dummy gifti files will be generated.
1472 Use n_vertices == 10242 to match the number of vertices
1473 in fsaverage5.
1475 spaces : :obj:`list` of :obj:`str`, optional.
1476 Defaults to ``("MNI", "T1w")``
1477 """
1478 n_time_points = 30
1480 if confounds_tag is not None:
1481 fields["suffix"] = confounds_tag
1482 fields["extension"] = "tsv"
1483 confounds_path = func_path / create_bids_filename(
1484 fields=fields, entities_to_include=bids_entities()["raw"]
1485 )
1486 confounds, metadata = get_legal_confound()
1487 confounds.to_csv(
1488 confounds_path, sep="\t", index=None, encoding="utf-8"
1489 )
1490 with confounds_path.with_suffix(".json").open("w") as f:
1491 json.dump(metadata, f)
1493 fields["suffix"] = "bold"
1494 fields["extension"] = "nii.gz"
1496 shape = [n_voxels, n_voxels, n_voxels, n_time_points]
1498 entities_to_include = [
1499 *bids_entities()["raw"],
1500 *bids_entities()["derivatives"],
1501 ]
1503 for space in spaces:
1504 for desc in ("preproc", "fmriprep"):
1505 # Only space 'T1w' include both descriptions.
1506 if space == "MNI" and desc == "fmriprep":
1507 continue
1509 fields["entities"]["space"] = space
1510 fields["entities"]["desc"] = desc
1512 bold_path = func_path / create_bids_filename(
1513 fields=fields, entities_to_include=entities_to_include
1514 )
1515 write_fake_bold_img(bold_path, shape=shape, random_state=rand_gen)
1517 fields["entities"]["space"] = "fsaverage5"
1518 fields["extension"] = "func.gii"
1519 fields["entities"].pop("desc")
1520 for hemi in ["L", "R"]:
1521 fields["entities"]["hemi"] = hemi
1522 gifti_path = func_path / create_bids_filename(
1523 fields=fields, entities_to_include=entities_to_include
1524 )
1525 _write_fake_bold_gifti(
1526 gifti_path, n_time_points=n_time_points, n_vertices=n_vertices
1527 )