gaitsetpy.dataset.daphnet
Daphnet Dataset Loader and Utils. Maintainer: @aharshit123456
This file contains the Daphnet dataset loader class that inherits from BaseDatasetLoader.
1''' 2Daphnet Dataset Loader and Utils. 3Maintainer: @aharshit123456 4 5This file contains the Daphnet dataset loader class that inherits from BaseDatasetLoader. 6''' 7 8import os 9import pandas as pd 10import numpy as np 11from typing import List, Dict, Tuple 12from glob import glob 13from ..core.base_classes import BaseDatasetLoader 14from .utils import download_dataset, extract_dataset, sliding_window 15 16 17class DaphnetLoader(BaseDatasetLoader): 18 """ 19 Daphnet dataset loader class. 20 21 This class handles loading and processing of the Daphnet dataset for gait analysis. 22 """ 23 24 def __init__(self, max_workers: int = 8): 25 """ 26 Initialize Daphnet loader with concurrent download support. 27 28 Args: 29 max_workers: Maximum number of concurrent download threads (default: 8) 30 """ 31 super().__init__( 32 name="daphnet", 33 description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease", 34 max_workers=max_workers 35 ) 36 self.metadata = { 37 'sensors': ['shank', 'thigh', 'trunk'], 38 'components': ['h_fd', 'v', 'h_l'], # horizontal forward, vertical, horizontal lateral 39 'sampling_frequency': 64, 40 'annotations': { 41 0: 'not_valid', 42 1: 'no_freeze', 43 2: 'freeze' 44 } 45 } 46 47 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 48 """ 49 Load Daphnet dataset from the specified directory. 50 51 Args: 52 data_dir: Directory to store/find the dataset 53 **kwargs: Additional arguments (unused for Daphnet) 54 55 Returns: 56 Tuple of (data_list, names_list) 57 """ 58 # Download and extract if needed 59 download_dataset("daphnet", data_dir) 60 extract_dataset("daphnet", data_dir) 61 62 file_path = os.path.join(data_dir, "dataset_fog_release/dataset") 63 daphnet_data = [] 64 daphnet_names = [] 65 66 # Load all subject files 67 for file in sorted(glob(os.path.join(file_path, "S*.txt"))): 68 # Extract filename from path 69 filename = os.path.basename(file) 70 daphnet_names.append(filename) 71 72 # Load CSV with proper column names 73 column_names = [ 74 "time", "shank_h_fd", "shank_v", "shank_h_l", 75 "thigh_h_fd", "thigh_v", "thigh_h_l", 76 "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations" 77 ] 78 79 df = pd.read_csv(file, sep=" ", names=column_names) 80 81 # Set time as index 82 df = df.set_index("time") 83 84 # Calculate magnitude for each sensor 85 df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2) 86 df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2) 87 df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2) 88 89 # Reorder columns for consistency 90 df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 91 "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 92 "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]] 93 94 daphnet_data.append(df) 95 96 # Store loaded data 97 self.data = daphnet_data 98 self.names = daphnet_names 99 100 return daphnet_data, daphnet_names 101 102 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 103 window_size: int = 192, step_size: int = 32) -> List[Dict]: 104 """ 105 Create sliding windows from the Daphnet dataset. 106 107 Args: 108 data: List of DataFrames containing Daphnet data 109 names: List of names corresponding to the data 110 window_size: Size of the sliding window (default: 192) 111 step_size: Step size for the sliding window (default: 32) 112 113 Returns: 114 List of dictionaries containing sliding windows for each DataFrame 115 """ 116 windows_data = [] 117 118 for idx, df in enumerate(data): 119 # Filter out invalid data (annotations == 0) 120 df_filtered = df[df.annotations > 0] 121 122 if df_filtered.empty: 123 continue 124 125 windows = [] 126 processed_columns = set() 127 128 # Process each sensor column 129 for col in df_filtered.columns: 130 if col != "annotations" and col not in processed_columns: 131 window_data = sliding_window(df_filtered[col], window_size, step_size) 132 windows.append({"name": col, "data": window_data}) 133 processed_columns.add(col) 134 135 # Include annotations separately 136 annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size) 137 windows.append({"name": "annotations", "data": annotations_window}) 138 139 windows_data.append({"name": names[idx], "windows": windows}) 140 141 return windows_data 142 143 def get_supported_formats(self) -> List[str]: 144 """ 145 Get list of supported file formats for Daphnet dataset. 146 147 Returns: 148 List of supported file extensions 149 """ 150 return ['.txt'] 151 152 def get_sensor_info(self) -> Dict[str, List[str]]: 153 """ 154 Get information about sensors in the dataset. 155 156 Returns: 157 Dictionary containing sensor information 158 """ 159 return { 160 'sensors': self.metadata['sensors'], 161 'components': self.metadata['components'], 162 'sampling_frequency': self.metadata['sampling_frequency'] 163 } 164 165 def get_annotation_info(self) -> Dict[int, str]: 166 """ 167 Get information about annotations in the dataset. 168 169 Returns: 170 Dictionary mapping annotation values to descriptions 171 """ 172 return self.metadata['annotations'] 173 174 175# Legacy function wrappers for backward compatibility 176def load_daphnet_data(data_dir: str): 177 """ 178 Legacy function for loading Daphnet data. 179 180 Args: 181 data_dir: Directory to store the dataset 182 183 Returns: 184 Tuple of (data_list, names_list) 185 """ 186 loader = DaphnetLoader() 187 return loader.load_data(data_dir) 188 189 190def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32): 191 """ 192 Legacy function for creating sliding windows. 193 194 Args: 195 daphnet: List of dataframes containing Daphnet data 196 daphnet_names: List of names of the Daphnet dataframes 197 window_size: Size of the sliding window 198 step_size: Step size for the sliding window 199 200 Returns: 201 List of dictionaries containing sliding windows for each DataFrame 202 """ 203 loader = DaphnetLoader() 204 return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size) 205 206 207def plot_dataset_sample(): 208 """Placeholder for dataset sample plotting.""" 209 pass 210 211 212def plot_sliding_window(): 213 """Placeholder for sliding window plotting.""" 214 pass
18class DaphnetLoader(BaseDatasetLoader): 19 """ 20 Daphnet dataset loader class. 21 22 This class handles loading and processing of the Daphnet dataset for gait analysis. 23 """ 24 25 def __init__(self, max_workers: int = 8): 26 """ 27 Initialize Daphnet loader with concurrent download support. 28 29 Args: 30 max_workers: Maximum number of concurrent download threads (default: 8) 31 """ 32 super().__init__( 33 name="daphnet", 34 description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease", 35 max_workers=max_workers 36 ) 37 self.metadata = { 38 'sensors': ['shank', 'thigh', 'trunk'], 39 'components': ['h_fd', 'v', 'h_l'], # horizontal forward, vertical, horizontal lateral 40 'sampling_frequency': 64, 41 'annotations': { 42 0: 'not_valid', 43 1: 'no_freeze', 44 2: 'freeze' 45 } 46 } 47 48 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 49 """ 50 Load Daphnet dataset from the specified directory. 51 52 Args: 53 data_dir: Directory to store/find the dataset 54 **kwargs: Additional arguments (unused for Daphnet) 55 56 Returns: 57 Tuple of (data_list, names_list) 58 """ 59 # Download and extract if needed 60 download_dataset("daphnet", data_dir) 61 extract_dataset("daphnet", data_dir) 62 63 file_path = os.path.join(data_dir, "dataset_fog_release/dataset") 64 daphnet_data = [] 65 daphnet_names = [] 66 67 # Load all subject files 68 for file in sorted(glob(os.path.join(file_path, "S*.txt"))): 69 # Extract filename from path 70 filename = os.path.basename(file) 71 daphnet_names.append(filename) 72 73 # Load CSV with proper column names 74 column_names = [ 75 "time", "shank_h_fd", "shank_v", "shank_h_l", 76 "thigh_h_fd", "thigh_v", "thigh_h_l", 77 "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations" 78 ] 79 80 df = pd.read_csv(file, sep=" ", names=column_names) 81 82 # Set time as index 83 df = df.set_index("time") 84 85 # Calculate magnitude for each sensor 86 df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2) 87 df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2) 88 df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2) 89 90 # Reorder columns for consistency 91 df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 92 "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 93 "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]] 94 95 daphnet_data.append(df) 96 97 # Store loaded data 98 self.data = daphnet_data 99 self.names = daphnet_names 100 101 return daphnet_data, daphnet_names 102 103 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 104 window_size: int = 192, step_size: int = 32) -> List[Dict]: 105 """ 106 Create sliding windows from the Daphnet dataset. 107 108 Args: 109 data: List of DataFrames containing Daphnet data 110 names: List of names corresponding to the data 111 window_size: Size of the sliding window (default: 192) 112 step_size: Step size for the sliding window (default: 32) 113 114 Returns: 115 List of dictionaries containing sliding windows for each DataFrame 116 """ 117 windows_data = [] 118 119 for idx, df in enumerate(data): 120 # Filter out invalid data (annotations == 0) 121 df_filtered = df[df.annotations > 0] 122 123 if df_filtered.empty: 124 continue 125 126 windows = [] 127 processed_columns = set() 128 129 # Process each sensor column 130 for col in df_filtered.columns: 131 if col != "annotations" and col not in processed_columns: 132 window_data = sliding_window(df_filtered[col], window_size, step_size) 133 windows.append({"name": col, "data": window_data}) 134 processed_columns.add(col) 135 136 # Include annotations separately 137 annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size) 138 windows.append({"name": "annotations", "data": annotations_window}) 139 140 windows_data.append({"name": names[idx], "windows": windows}) 141 142 return windows_data 143 144 def get_supported_formats(self) -> List[str]: 145 """ 146 Get list of supported file formats for Daphnet dataset. 147 148 Returns: 149 List of supported file extensions 150 """ 151 return ['.txt'] 152 153 def get_sensor_info(self) -> Dict[str, List[str]]: 154 """ 155 Get information about sensors in the dataset. 156 157 Returns: 158 Dictionary containing sensor information 159 """ 160 return { 161 'sensors': self.metadata['sensors'], 162 'components': self.metadata['components'], 163 'sampling_frequency': self.metadata['sampling_frequency'] 164 } 165 166 def get_annotation_info(self) -> Dict[int, str]: 167 """ 168 Get information about annotations in the dataset. 169 170 Returns: 171 Dictionary mapping annotation values to descriptions 172 """ 173 return self.metadata['annotations']
Daphnet dataset loader class.
This class handles loading and processing of the Daphnet dataset for gait analysis.
25 def __init__(self, max_workers: int = 8): 26 """ 27 Initialize Daphnet loader with concurrent download support. 28 29 Args: 30 max_workers: Maximum number of concurrent download threads (default: 8) 31 """ 32 super().__init__( 33 name="daphnet", 34 description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease", 35 max_workers=max_workers 36 ) 37 self.metadata = { 38 'sensors': ['shank', 'thigh', 'trunk'], 39 'components': ['h_fd', 'v', 'h_l'], # horizontal forward, vertical, horizontal lateral 40 'sampling_frequency': 64, 41 'annotations': { 42 0: 'not_valid', 43 1: 'no_freeze', 44 2: 'freeze' 45 } 46 }
Initialize Daphnet loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
48 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 49 """ 50 Load Daphnet dataset from the specified directory. 51 52 Args: 53 data_dir: Directory to store/find the dataset 54 **kwargs: Additional arguments (unused for Daphnet) 55 56 Returns: 57 Tuple of (data_list, names_list) 58 """ 59 # Download and extract if needed 60 download_dataset("daphnet", data_dir) 61 extract_dataset("daphnet", data_dir) 62 63 file_path = os.path.join(data_dir, "dataset_fog_release/dataset") 64 daphnet_data = [] 65 daphnet_names = [] 66 67 # Load all subject files 68 for file in sorted(glob(os.path.join(file_path, "S*.txt"))): 69 # Extract filename from path 70 filename = os.path.basename(file) 71 daphnet_names.append(filename) 72 73 # Load CSV with proper column names 74 column_names = [ 75 "time", "shank_h_fd", "shank_v", "shank_h_l", 76 "thigh_h_fd", "thigh_v", "thigh_h_l", 77 "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations" 78 ] 79 80 df = pd.read_csv(file, sep=" ", names=column_names) 81 82 # Set time as index 83 df = df.set_index("time") 84 85 # Calculate magnitude for each sensor 86 df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2) 87 df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2) 88 df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2) 89 90 # Reorder columns for consistency 91 df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 92 "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 93 "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]] 94 95 daphnet_data.append(df) 96 97 # Store loaded data 98 self.data = daphnet_data 99 self.names = daphnet_names 100 101 return daphnet_data, daphnet_names
Load Daphnet dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Daphnet)
Returns: Tuple of (data_list, names_list)
103 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 104 window_size: int = 192, step_size: int = 32) -> List[Dict]: 105 """ 106 Create sliding windows from the Daphnet dataset. 107 108 Args: 109 data: List of DataFrames containing Daphnet data 110 names: List of names corresponding to the data 111 window_size: Size of the sliding window (default: 192) 112 step_size: Step size for the sliding window (default: 32) 113 114 Returns: 115 List of dictionaries containing sliding windows for each DataFrame 116 """ 117 windows_data = [] 118 119 for idx, df in enumerate(data): 120 # Filter out invalid data (annotations == 0) 121 df_filtered = df[df.annotations > 0] 122 123 if df_filtered.empty: 124 continue 125 126 windows = [] 127 processed_columns = set() 128 129 # Process each sensor column 130 for col in df_filtered.columns: 131 if col != "annotations" and col not in processed_columns: 132 window_data = sliding_window(df_filtered[col], window_size, step_size) 133 windows.append({"name": col, "data": window_data}) 134 processed_columns.add(col) 135 136 # Include annotations separately 137 annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size) 138 windows.append({"name": "annotations", "data": annotations_window}) 139 140 windows_data.append({"name": names[idx], "windows": windows}) 141 142 return windows_data
Create sliding windows from the Daphnet dataset.
Args: data: List of DataFrames containing Daphnet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)
Returns: List of dictionaries containing sliding windows for each DataFrame
144 def get_supported_formats(self) -> List[str]: 145 """ 146 Get list of supported file formats for Daphnet dataset. 147 148 Returns: 149 List of supported file extensions 150 """ 151 return ['.txt']
Get list of supported file formats for Daphnet dataset.
Returns: List of supported file extensions
153 def get_sensor_info(self) -> Dict[str, List[str]]: 154 """ 155 Get information about sensors in the dataset. 156 157 Returns: 158 Dictionary containing sensor information 159 """ 160 return { 161 'sensors': self.metadata['sensors'], 162 'components': self.metadata['components'], 163 'sampling_frequency': self.metadata['sampling_frequency'] 164 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
166 def get_annotation_info(self) -> Dict[int, str]: 167 """ 168 Get information about annotations in the dataset. 169 170 Returns: 171 Dictionary mapping annotation values to descriptions 172 """ 173 return self.metadata['annotations']
Get information about annotations in the dataset.
Returns: Dictionary mapping annotation values to descriptions
177def load_daphnet_data(data_dir: str): 178 """ 179 Legacy function for loading Daphnet data. 180 181 Args: 182 data_dir: Directory to store the dataset 183 184 Returns: 185 Tuple of (data_list, names_list) 186 """ 187 loader = DaphnetLoader() 188 return loader.load_data(data_dir)
Legacy function for loading Daphnet data.
Args: data_dir: Directory to store the dataset
Returns: Tuple of (data_list, names_list)
191def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32): 192 """ 193 Legacy function for creating sliding windows. 194 195 Args: 196 daphnet: List of dataframes containing Daphnet data 197 daphnet_names: List of names of the Daphnet dataframes 198 window_size: Size of the sliding window 199 step_size: Step size for the sliding window 200 201 Returns: 202 List of dictionaries containing sliding windows for each DataFrame 203 """ 204 loader = DaphnetLoader() 205 return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)
Legacy function for creating sliding windows.
Args: daphnet: List of dataframes containing Daphnet data daphnet_names: List of names of the Daphnet dataframes window_size: Size of the sliding window step_size: Step size for the sliding window
Returns: List of dictionaries containing sliding windows for each DataFrame
Placeholder for dataset sample plotting.
Placeholder for sliding window plotting.