import numpy as np
import h5py
from typing import List, Optional, Dict, Any
[docs]
def mengu(subject: int = 1,
session_list: Optional[List[str]] = None,
labels: Optional[List[str]] = None,
depth: Optional[List[str]] = None,
path='data/mengu/'):
'''
Description
-----------
This function loads EEG data for a specific subject and session from the MenGu dataset.
It processes the data to fit the structure of the `eegdata` dictionary, which is used
for further processing and analysis.
The dataset can be found at:
- https://springernature.figshare.com/collections/An_open_dataset_for_human_SSVEPs_in_the_frequency_range_of_1-60_Hz/6752910/1
Parameters
----------
subject : int
index of the subject to retrieve the data from.
session_list : list, optional
list of session codes.
default state is None, which results on the collection of all session.
labels : list
list of labels used in the dataset.
default state is None, which results on all labels being used.
depth : list
list of depths used.
default state is None, which results on all depths being used.
path : str
path to the foldar that contains all dataset files.
Returns
----------
dict
A dictionary containing the following keys:
- X: EEG data as a numpy array.
- y: Labels corresponding to the EEG data.
- sfreq: Sampling frequency of the EEG data.
- y_dict: Mapping of labels to integers.
- events: Dictionary describing event markers.
- ch_names: List of channel names.
- tmin: Start time of the EEG data.
- data_type: Type of the data ('epochs').
Raises
------
ValueError
If any of the input parameters are invalid or if the specified file does not exist.
Examples
--------
Load EEG data for subject 1, all sessions, and default labels:
>>> from bciflow.datasets import mengu
>>> eeg_data = mengu(subject=1)
>>> print(eeg_data['X'].shape) # Shape of the EEG data
>>> print(eeg_data['y']) # Labels
'''
# Check if the subject input is valid
if type(subject) != int:
raise ValueError("subject has to be a int type value")
if subject > 30 or subject < 1:
raise ValueError("subject has to be between 1 and 30")
# Check if the session_list input is valid
_available_sessions = ['s%02d'%i for i in range(1, 12+1)]
if session_list == None:
session_list = _available_sessions
elif type(session_list) != list:
raise ValueError("session_list has to be an List or None type")
else:
for i in session_list:
if i not in _available_sessions:
raise ValueError("session_list has to be a sublist of ['s1', 's2', ..., 's12']")
# Check if the labels input is valid
_available_labels = ['f%02d'%i for i in range(1, 60+1)]
if labels == None:
labels = _available_labels
elif type(labels) != list:
raise ValueError("labels has to be a list type value")
else:
for i in labels:
if i not in _available_labels:
raise ValueError("labels has to be a sublist of ['f1', 'f2', ..., 'f60']")
# Check if the depth input is valid
_available_depths = ['low', 'high']
if depth == None:
depth = _available_depths
elif type(depth) != list:
raise ValueError("depth has to be a list type value")
else:
for i in depth:
if i not in _available_depths:
raise ValueError("depth has to be a sublist of ['low', 'high']")
# Check if the path input is valid
if type(path) != str:
raise ValueError("path has to be a str type value")
if path[-1] != '/':
path += '/'
# Set basic parameters of the clinical BCI challenge dataset
sfreq = 1000.
events = {'task_exec': [0, 5]}
ch_names = np.array(["FP1", "FPZ", "FP2", "AF3", "AF4", "F7", "F5", "F3", "F1",
"FZ", "F2", "F4", "F6", "F8", "FT7", "FC5", "FC3", "FC1",
"FCZ", "FC2", "FC4", "FC6", "FT8", "T7", "C5", "C3", "C1",
"CZ", "C2", "C4", "C6", "T8", "M1", "TP7", "CP5", "CP3",
"CP1", "CPZ", "CP2", "CP4", "CP6", "TP8", "M2", "P7", "P5",
"P3", "P1", "PZ", "P2", "P4", "P6", "P8", "PO7", "PO5", "PO3",
"POZ", "PO4", "PO6", "PO8", "CB1", "O1", "OZ", "O2", "CB2",])
tmin = 0.
with h5py.File(path+'data_s%d_64.mat'%subject, "r") as f:
data = np.asarray(f["datas"])
session_id = np.where(np.isin(_available_sessions, session_list))[0]
labels_id = np.where(np.isin(_available_labels, labels))[0]
depth_id = np.where(np.isin(_available_depths, depth))[0]
data = data[session_id, :, :, :, :]
data = data[:, labels_id, :, :, :]
data = data[:, :, :, :, depth_id]
X, y = [], []
for i in range(data.shape[0]):
for j in range(data.shape[1]):
for k in range(data.shape[4]):
X.append(data[i, j, :, :, k])
y.append(_available_labels[labels_id[j]])
X, y = np.array(X), np.array(y)
y_dict = {label: i for i, label in enumerate(labels)}
y = np.array([y_dict[i] for i in y])
print(X.shape, y.shape, y)
print(y_dict)
return {'X': X,
'y': y,
'sfreq': sfreq,
'y_dict': y_dict,
'events': events,
'ch_names': ch_names,
'tmin': tmin,
'data_type': "epochs"}