import numpy as np
import mne
import os
from typing import Dict, Any
[docs]
def dreams_dataset(
path="Data/DatabaseREMs/",
subject=1,
dataGroups = ["EEG","EOG","EMG", "ECG", "Resp", "Oxímetro", "Outros"],
dataType = "raw",
) -> Dict[str, Any]:
"""
Description
-----------
This function loads EEG and associated biosignals from the
The DREAMS REMs Database.
It allows loading signals from different channel groups
(EEG, EOG, EMG, ECG, respiratory, oximeter, others), and
can return either continuous raw signals or windowed epochs.
If available, the hypnogram labels (sleep stages) are also loaded.
The dataset can be found at:
- https://zenodo.org/records/2650142
Parameters
----------
path : str
Path to the folder containing the DREAMS dataset files
(EDF and hypnogram text files).
subject : int
Index of the subject to load (e.g., subject=1 loads `excerpt1.edf`).
dataGroups : list of str
List of signal groups to include in the dataset.
Options include ["EEG","EOG","EMG","ECG","Resp","Oxímetro","Outros"].
dataType : str
Type of data representation:
- "raw" : returns continuous signals concatenated in time.
- "epochs" : returns segmented windowed trials (default window = 5s).
Returns
-------
dict
A dictionary containing the following keys:
- X: EEG data as a numpy array [trials, 1, channels, time] or [1, 1, channels, samples].
- y: Labels corresponding to the EEG data (expanded per sample if raw, per trial if epochs).
- sfreq: Sampling frequency of the EEG data.
- y_dict: Mapping of labels to integers.
- events: Dictionary describing event markers.
- ch_names: List of channel names.
- tmin: Start time of the EEG data.
- data_type: Type of data returned ("raw" or "epochs").
Dictionary with the following fields:
- X : numpy array
EEG/biological data in format:
* raw mode: (1, 1, n_channels, total_samples)
* epochs mode: (n_trials, 1, n_channels, samples_per_window)
- y : numpy array
Corresponding hypnogram labels (expanded per sample if raw, per trial if epochs).
- sfreq : float
Sampling frequency of the signals.
- y_dict : dict
Mapping of sleep stage names to integers.
- events : None
Placeholder for event dictionary (not implemented).
- ch_names : list
List of available channel names.
- tmin : float
Start time of the signals (default = 0.0).
- data_type : str
Type of data returned ("raw" or "epochs").
Raises
------
FileNotFoundError
If the EDF file for the given subject is not found.
Examples
--------
Load subject 1 data in epochs format:
>>> from bciflow.datasets import dreams_dataset
>>> eeg_data = dreams_dataset(subject=1, dataType="epochs")
>>> print(eeg_data['X'].shape) # Shape of the EEG data
>>> print(eeg_data['y'].shape) # Labels aligned with epochs
Load subject 2 raw continuous data with EEG + ECG only:
>>> eeg_data = dreams_dataset(subject=2, dataGroups=["EEG","ECG"], dataType="raw")
>>> print(eeg_data['X'].shape)
>>> print(len(eeg_data['ch_names']))
"""
CHANNEL_GROUPS = {
"EEG": ["FP1-A2", "FP2-A1", "CZ-A1", "CZ2-A1", "O1-A2", "O2-A1"],
"EOG": ["EOG1", "EOG2"],
"EMG": ["EMG1", "EMG2", "EMG3"],
"ECG": ["ECG", "PULSE"],
"Resp": ["VTH", "VAB", "VTOT", "NAF1", "NAF2P-A1", "PR", "PCPAP"],
"Oxímetro": ["SAO2"],
"Outros": ["PHONO", "POS"]
}
window_size= 5.0
overlap= 0.0
if path[-1] != '/':
path += '/'
edf_file, hypnogram_file = "", ""
for filename in os.listdir(path):
if filename == f"Hypnogram_excerpt{subject}.txt":
hypnogram_file = path + filename
if filename == f"excerpt{subject}.edf":
edf_file = path + filename
if not os.path.exists(edf_file):
raise FileNotFoundError(f"EDF file not found: {edf_file}")
if hypnogram_file and not os.path.exists(hypnogram_file):
print(f"Hypnogram file not found: {hypnogram_file} (proceeding without labels)")
hypnogram_file = None
# Load raw EEG
raw = mne.io.read_raw_edf(edf_file, preload=True, verbose="ERROR")
sfreq = raw.info['sfreq']
ch_names = raw.ch_names
signals = raw.get_data().T # (samples, channels)
# Load hypnogram if available
if hypnogram_file:
with open(hypnogram_file, "r") as f:
lines = f.readlines()
stages = np.array([int(line.strip()) for line in lines[1:]])
else:
stages = None
# Create trials
samples_per_window = int(window_size * sfreq)
step = samples_per_window - int(overlap * sfreq)
total_samples = signals.shape[0]
trials = []
for start in range(0, total_samples - samples_per_window + 1, step):
end = start + samples_per_window
window = signals[start:end, :].T # (channels, samples)
trials.append(window)
trials = np.array(trials) # (num_trials, channels, samples)
_trials = []
for dataGroup in dataGroups:
for group, channels in CHANNEL_GROUPS.items():
if dataGroup == group:
idxs = [ch_names.index(ch) for ch in channels if ch in ch_names]
if len(idxs) > 0:
_trials = trials[:, idxs, :] # (trials, chans, samples)
_X = _trials[:, np.newaxis, :, :]
# Map sleep stage codes
y_dict = {
"Unknown": 0,
"Stage 1": 1,
"Stage 2": 2,
"Stage 3": 3,
"REM": 4,
"Awake": 5
}
y = []
if dataType == "raw":
X = _X.reshape(1,1,_X.shape[2],_X.shape[0]*_X.shape[3])
_stages = np.repeat(stages, samples_per_window)
y = _stages
elif dataType == "epochs":
X = _X
y = stages
return {
"X": X,
"y": y,
"sfreq": sfreq,
"y_dict": y_dict,
"events": None,
"ch_names": ch_names,
"tmin": 0.0,
"data_type": dataType
}