gaitsetpy.dataset
dataset: Handles loading and processing of supported datasets.
This module provides both the new class-based dataset loaders and legacy function-based API. All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager.
Supported datasets:
- Daphnet: Freezing of Gait dataset
- MobiFall: Fall detection dataset
- Arduous: Daily activity recognition dataset
- PhysioNet: VGRF dataset for Parkinson's disease gait analysis
- HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition
- UrFall: University of Rzeszow Fall Detection Dataset with multimodal data
1""" 2dataset: Handles loading and processing of supported datasets. 3 4This module provides both the new class-based dataset loaders and legacy function-based API. 5All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager. 6 7Supported datasets: 8- Daphnet: Freezing of Gait dataset 9- MobiFall: Fall detection dataset 10- Arduous: Daily activity recognition dataset 11- PhysioNet: VGRF dataset for Parkinson's disease gait analysis 12- HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition 13- UrFall: University of Rzeszow Fall Detection Dataset with multimodal data 14 15""" 16 17# Import the new class-based loaders 18from .daphnet import DaphnetLoader 19from .mobifall import MobiFallLoader 20from .arduous import ArduousLoader 21from .physionet import PhysioNetLoader 22from .harup import HARUPLoader 23from .urfall import UrFallLoader 24 25# Import legacy functions for backward compatibility 26from .daphnet import load_daphnet_data, create_sliding_windows 27from .mobifall import load_mobifall_data 28from .arduous import load_arduous_data 29from .physionet import load_physionet_data, create_physionet_windows 30from .harup import load_harup_data, create_harup_windows, extract_harup_features 31from .urfall import load_urfall_data, create_urfall_windows 32from .utils import download_dataset, extract_dataset, sliding_window 33 34# Import managers 35from ..core.managers import DatasetManager 36 37# Register all dataset loaders with the manager 38def _register_datasets(): 39 """Register all available dataset loaders with the DatasetManager.""" 40 manager = DatasetManager() 41 manager.register_dataset("daphnet", DaphnetLoader) 42 manager.register_dataset("mobifall", MobiFallLoader) 43 manager.register_dataset("arduous", ArduousLoader) 44 manager.register_dataset("physionet", PhysioNetLoader) 45 manager.register_dataset("harup", HARUPLoader) 46 manager.register_dataset("urfall", UrFallLoader) 47 48# Auto-register datasets when module is imported 49_register_datasets() 50 51# Convenient access to the dataset manager 52def get_dataset_manager(): 53 """Get the singleton DatasetManager instance.""" 54 return DatasetManager() 55 56# Helper function to get available datasets 57def get_available_datasets(): 58 """Get list of available dataset names.""" 59 return DatasetManager().get_available_components() 60 61# Helper function to load dataset using manager 62def load_dataset(name: str, data_dir: str, **kwargs): 63 """ 64 Load a dataset using the DatasetManager. 65 66 Args: 67 name: Name of the dataset loader 68 data_dir: Directory containing the dataset 69 **kwargs: Additional arguments for the loader 70 71 Returns: 72 Dataset loader instance with loaded data 73 """ 74 return DatasetManager().load_dataset(name, data_dir, **kwargs) 75 76__all__ = [ 77 # New class-based loaders 78 'DaphnetLoader', 79 'MobiFallLoader', 80 'ArduousLoader', 81 'PhysioNetLoader', 82 'HARUPLoader', 83 'UrFallLoader', 84 # Legacy functions for backward compatibility 85 'load_daphnet_data', 86 'create_sliding_windows', 87 'load_mobifall_data', 88 'load_arduous_data', 89 'load_physionet_data', 90 'create_physionet_windows', 91 'load_harup_data', 92 'create_harup_windows', 93 'extract_harup_features', 94 'load_urfall_data', 95 'create_urfall_windows', 96 'download_dataset', 97 'extract_dataset', 98 'sliding_window', 99 # Manager functions 100 'get_dataset_manager', 101 'get_available_datasets', 102 'load_dataset' 103]
18class DaphnetLoader(BaseDatasetLoader): 19 """ 20 Daphnet dataset loader class. 21 22 This class handles loading and processing of the Daphnet dataset for gait analysis. 23 """ 24 25 def __init__(self, max_workers: int = 8): 26 """ 27 Initialize Daphnet loader with concurrent download support. 28 29 Args: 30 max_workers: Maximum number of concurrent download threads (default: 8) 31 """ 32 super().__init__( 33 name="daphnet", 34 description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease", 35 max_workers=max_workers 36 ) 37 self.metadata = { 38 'sensors': ['shank', 'thigh', 'trunk'], 39 'components': ['h_fd', 'v', 'h_l'], # horizontal forward, vertical, horizontal lateral 40 'sampling_frequency': 64, 41 'annotations': { 42 0: 'not_valid', 43 1: 'no_freeze', 44 2: 'freeze' 45 } 46 } 47 48 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 49 """ 50 Load Daphnet dataset from the specified directory. 51 52 Args: 53 data_dir: Directory to store/find the dataset 54 **kwargs: Additional arguments (unused for Daphnet) 55 56 Returns: 57 Tuple of (data_list, names_list) 58 """ 59 # Download and extract if needed 60 download_dataset("daphnet", data_dir) 61 extract_dataset("daphnet", data_dir) 62 63 file_path = os.path.join(data_dir, "dataset_fog_release/dataset") 64 daphnet_data = [] 65 daphnet_names = [] 66 67 # Load all subject files 68 for file in sorted(glob(os.path.join(file_path, "S*.txt"))): 69 # Extract filename from path 70 filename = os.path.basename(file) 71 daphnet_names.append(filename) 72 73 # Load CSV with proper column names 74 column_names = [ 75 "time", "shank_h_fd", "shank_v", "shank_h_l", 76 "thigh_h_fd", "thigh_v", "thigh_h_l", 77 "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations" 78 ] 79 80 df = pd.read_csv(file, sep=" ", names=column_names) 81 82 # Set time as index 83 df = df.set_index("time") 84 85 # Calculate magnitude for each sensor 86 df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2) 87 df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2) 88 df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2) 89 90 # Reorder columns for consistency 91 df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 92 "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 93 "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]] 94 95 daphnet_data.append(df) 96 97 # Store loaded data 98 self.data = daphnet_data 99 self.names = daphnet_names 100 101 return daphnet_data, daphnet_names 102 103 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 104 window_size: int = 192, step_size: int = 32) -> List[Dict]: 105 """ 106 Create sliding windows from the Daphnet dataset. 107 108 Args: 109 data: List of DataFrames containing Daphnet data 110 names: List of names corresponding to the data 111 window_size: Size of the sliding window (default: 192) 112 step_size: Step size for the sliding window (default: 32) 113 114 Returns: 115 List of dictionaries containing sliding windows for each DataFrame 116 """ 117 windows_data = [] 118 119 for idx, df in enumerate(data): 120 # Filter out invalid data (annotations == 0) 121 df_filtered = df[df.annotations > 0] 122 123 if df_filtered.empty: 124 continue 125 126 windows = [] 127 processed_columns = set() 128 129 # Process each sensor column 130 for col in df_filtered.columns: 131 if col != "annotations" and col not in processed_columns: 132 window_data = sliding_window(df_filtered[col], window_size, step_size) 133 windows.append({"name": col, "data": window_data}) 134 processed_columns.add(col) 135 136 # Include annotations separately 137 annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size) 138 windows.append({"name": "annotations", "data": annotations_window}) 139 140 windows_data.append({"name": names[idx], "windows": windows}) 141 142 return windows_data 143 144 def get_supported_formats(self) -> List[str]: 145 """ 146 Get list of supported file formats for Daphnet dataset. 147 148 Returns: 149 List of supported file extensions 150 """ 151 return ['.txt'] 152 153 def get_sensor_info(self) -> Dict[str, List[str]]: 154 """ 155 Get information about sensors in the dataset. 156 157 Returns: 158 Dictionary containing sensor information 159 """ 160 return { 161 'sensors': self.metadata['sensors'], 162 'components': self.metadata['components'], 163 'sampling_frequency': self.metadata['sampling_frequency'] 164 } 165 166 def get_annotation_info(self) -> Dict[int, str]: 167 """ 168 Get information about annotations in the dataset. 169 170 Returns: 171 Dictionary mapping annotation values to descriptions 172 """ 173 return self.metadata['annotations']
Daphnet dataset loader class.
This class handles loading and processing of the Daphnet dataset for gait analysis.
25 def __init__(self, max_workers: int = 8): 26 """ 27 Initialize Daphnet loader with concurrent download support. 28 29 Args: 30 max_workers: Maximum number of concurrent download threads (default: 8) 31 """ 32 super().__init__( 33 name="daphnet", 34 description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease", 35 max_workers=max_workers 36 ) 37 self.metadata = { 38 'sensors': ['shank', 'thigh', 'trunk'], 39 'components': ['h_fd', 'v', 'h_l'], # horizontal forward, vertical, horizontal lateral 40 'sampling_frequency': 64, 41 'annotations': { 42 0: 'not_valid', 43 1: 'no_freeze', 44 2: 'freeze' 45 } 46 }
Initialize Daphnet loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
48 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 49 """ 50 Load Daphnet dataset from the specified directory. 51 52 Args: 53 data_dir: Directory to store/find the dataset 54 **kwargs: Additional arguments (unused for Daphnet) 55 56 Returns: 57 Tuple of (data_list, names_list) 58 """ 59 # Download and extract if needed 60 download_dataset("daphnet", data_dir) 61 extract_dataset("daphnet", data_dir) 62 63 file_path = os.path.join(data_dir, "dataset_fog_release/dataset") 64 daphnet_data = [] 65 daphnet_names = [] 66 67 # Load all subject files 68 for file in sorted(glob(os.path.join(file_path, "S*.txt"))): 69 # Extract filename from path 70 filename = os.path.basename(file) 71 daphnet_names.append(filename) 72 73 # Load CSV with proper column names 74 column_names = [ 75 "time", "shank_h_fd", "shank_v", "shank_h_l", 76 "thigh_h_fd", "thigh_v", "thigh_h_l", 77 "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations" 78 ] 79 80 df = pd.read_csv(file, sep=" ", names=column_names) 81 82 # Set time as index 83 df = df.set_index("time") 84 85 # Calculate magnitude for each sensor 86 df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2) 87 df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2) 88 df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2) 89 90 # Reorder columns for consistency 91 df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 92 "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 93 "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]] 94 95 daphnet_data.append(df) 96 97 # Store loaded data 98 self.data = daphnet_data 99 self.names = daphnet_names 100 101 return daphnet_data, daphnet_names
Load Daphnet dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Daphnet)
Returns: Tuple of (data_list, names_list)
103 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 104 window_size: int = 192, step_size: int = 32) -> List[Dict]: 105 """ 106 Create sliding windows from the Daphnet dataset. 107 108 Args: 109 data: List of DataFrames containing Daphnet data 110 names: List of names corresponding to the data 111 window_size: Size of the sliding window (default: 192) 112 step_size: Step size for the sliding window (default: 32) 113 114 Returns: 115 List of dictionaries containing sliding windows for each DataFrame 116 """ 117 windows_data = [] 118 119 for idx, df in enumerate(data): 120 # Filter out invalid data (annotations == 0) 121 df_filtered = df[df.annotations > 0] 122 123 if df_filtered.empty: 124 continue 125 126 windows = [] 127 processed_columns = set() 128 129 # Process each sensor column 130 for col in df_filtered.columns: 131 if col != "annotations" and col not in processed_columns: 132 window_data = sliding_window(df_filtered[col], window_size, step_size) 133 windows.append({"name": col, "data": window_data}) 134 processed_columns.add(col) 135 136 # Include annotations separately 137 annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size) 138 windows.append({"name": "annotations", "data": annotations_window}) 139 140 windows_data.append({"name": names[idx], "windows": windows}) 141 142 return windows_data
Create sliding windows from the Daphnet dataset.
Args: data: List of DataFrames containing Daphnet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)
Returns: List of dictionaries containing sliding windows for each DataFrame
144 def get_supported_formats(self) -> List[str]: 145 """ 146 Get list of supported file formats for Daphnet dataset. 147 148 Returns: 149 List of supported file extensions 150 """ 151 return ['.txt']
Get list of supported file formats for Daphnet dataset.
Returns: List of supported file extensions
153 def get_sensor_info(self) -> Dict[str, List[str]]: 154 """ 155 Get information about sensors in the dataset. 156 157 Returns: 158 Dictionary containing sensor information 159 """ 160 return { 161 'sensors': self.metadata['sensors'], 162 'components': self.metadata['components'], 163 'sampling_frequency': self.metadata['sampling_frequency'] 164 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
166 def get_annotation_info(self) -> Dict[int, str]: 167 """ 168 Get information about annotations in the dataset. 169 170 Returns: 171 Dictionary mapping annotation values to descriptions 172 """ 173 return self.metadata['annotations']
Get information about annotations in the dataset.
Returns: Dictionary mapping annotation values to descriptions
17class MobiFallLoader(BaseDatasetLoader): 18 """ 19 MobiFall dataset loader class. 20 21 This class handles loading and processing of the MobiFall dataset for gait analysis. 22 """ 23 24 def __init__(self, max_workers: int = 8): 25 """ 26 Initialize MobiFall loader with concurrent download support. 27 28 Args: 29 max_workers: Maximum number of concurrent download threads (default: 8) 30 """ 31 super().__init__( 32 name="mobifall", 33 description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection", 34 max_workers=max_workers 35 ) 36 self.metadata = { 37 'sensors': ['accelerometer', 'gyroscope'], 38 'components': ['x', 'y', 'z'], 39 'sampling_frequency': 100, # Typical for MobiFall 40 'activities': ['ADL', 'FALL'] # Activities of Daily Living and Falls 41 } 42 43 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 44 """ 45 Load MobiFall dataset from the specified directory. 46 47 Args: 48 data_dir: Directory to store/find the dataset 49 **kwargs: Additional arguments (unused for MobiFall) 50 51 Returns: 52 Tuple of (data_list, names_list) 53 """ 54 # TODO: Implement MobiFall data loading 55 # This is a placeholder implementation 56 print("MobiFall data loading is not yet implemented") 57 return [], [] 58 59 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 60 window_size: int = 192, step_size: int = 32) -> List[Dict]: 61 """ 62 Create sliding windows from the MobiFall dataset. 63 64 Args: 65 data: List of DataFrames containing MobiFall data 66 names: List of names corresponding to the data 67 window_size: Size of the sliding window (default: 192) 68 step_size: Step size for the sliding window (default: 32) 69 70 Returns: 71 List of dictionaries containing sliding windows for each DataFrame 72 """ 73 # TODO: Implement MobiFall sliding window creation 74 # This is a placeholder implementation 75 print("MobiFall sliding window creation is not yet implemented") 76 return [] 77 78 def get_supported_formats(self) -> List[str]: 79 """ 80 Get list of supported file formats for MobiFall dataset. 81 82 Returns: 83 List of supported file extensions 84 """ 85 return ['.csv', '.txt'] 86 87 def get_sensor_info(self) -> Dict[str, List[str]]: 88 """ 89 Get information about sensors in the dataset. 90 91 Returns: 92 Dictionary containing sensor information 93 """ 94 return { 95 'sensors': self.metadata['sensors'], 96 'components': self.metadata['components'], 97 'sampling_frequency': self.metadata['sampling_frequency'] 98 } 99 100 def get_activity_info(self) -> List[str]: 101 """ 102 Get information about activities in the dataset. 103 104 Returns: 105 List of activity types 106 """ 107 return self.metadata['activities']
MobiFall dataset loader class.
This class handles loading and processing of the MobiFall dataset for gait analysis.
24 def __init__(self, max_workers: int = 8): 25 """ 26 Initialize MobiFall loader with concurrent download support. 27 28 Args: 29 max_workers: Maximum number of concurrent download threads (default: 8) 30 """ 31 super().__init__( 32 name="mobifall", 33 description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection", 34 max_workers=max_workers 35 ) 36 self.metadata = { 37 'sensors': ['accelerometer', 'gyroscope'], 38 'components': ['x', 'y', 'z'], 39 'sampling_frequency': 100, # Typical for MobiFall 40 'activities': ['ADL', 'FALL'] # Activities of Daily Living and Falls 41 }
Initialize MobiFall loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
43 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 44 """ 45 Load MobiFall dataset from the specified directory. 46 47 Args: 48 data_dir: Directory to store/find the dataset 49 **kwargs: Additional arguments (unused for MobiFall) 50 51 Returns: 52 Tuple of (data_list, names_list) 53 """ 54 # TODO: Implement MobiFall data loading 55 # This is a placeholder implementation 56 print("MobiFall data loading is not yet implemented") 57 return [], []
Load MobiFall dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for MobiFall)
Returns: Tuple of (data_list, names_list)
59 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 60 window_size: int = 192, step_size: int = 32) -> List[Dict]: 61 """ 62 Create sliding windows from the MobiFall dataset. 63 64 Args: 65 data: List of DataFrames containing MobiFall data 66 names: List of names corresponding to the data 67 window_size: Size of the sliding window (default: 192) 68 step_size: Step size for the sliding window (default: 32) 69 70 Returns: 71 List of dictionaries containing sliding windows for each DataFrame 72 """ 73 # TODO: Implement MobiFall sliding window creation 74 # This is a placeholder implementation 75 print("MobiFall sliding window creation is not yet implemented") 76 return []
Create sliding windows from the MobiFall dataset.
Args: data: List of DataFrames containing MobiFall data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)
Returns: List of dictionaries containing sliding windows for each DataFrame
78 def get_supported_formats(self) -> List[str]: 79 """ 80 Get list of supported file formats for MobiFall dataset. 81 82 Returns: 83 List of supported file extensions 84 """ 85 return ['.csv', '.txt']
Get list of supported file formats for MobiFall dataset.
Returns: List of supported file extensions
87 def get_sensor_info(self) -> Dict[str, List[str]]: 88 """ 89 Get information about sensors in the dataset. 90 91 Returns: 92 Dictionary containing sensor information 93 """ 94 return { 95 'sensors': self.metadata['sensors'], 96 'components': self.metadata['components'], 97 'sampling_frequency': self.metadata['sampling_frequency'] 98 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
100 def get_activity_info(self) -> List[str]: 101 """ 102 Get information about activities in the dataset. 103 104 Returns: 105 List of activity types 106 """ 107 return self.metadata['activities']
Get information about activities in the dataset.
Returns: List of activity types
17class ArduousLoader(BaseDatasetLoader): 18 """ 19 Arduous dataset loader class. 20 21 This class handles loading and processing of the Arduous dataset for gait analysis. 22 """ 23 24 def __init__(self, max_workers: int = 8): 25 """ 26 Initialize Arduous loader with concurrent download support. 27 28 Args: 29 max_workers: Maximum number of concurrent download threads (default: 8) 30 """ 31 super().__init__( 32 name="arduous", 33 description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition", 34 max_workers=max_workers 35 ) 36 self.metadata = { 37 'sensors': ['accelerometer', 'gyroscope', 'magnetometer'], 38 'components': ['x', 'y', 'z'], 39 'sampling_frequency': 50, # Typical for Arduous 40 'activities': ['walking', 'running', 'sitting', 'standing', 'lying'] 41 } 42 43 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 44 """ 45 Load Arduous dataset from the specified directory. 46 47 Args: 48 data_dir: Directory to store/find the dataset 49 **kwargs: Additional arguments (unused for Arduous) 50 51 Returns: 52 Tuple of (data_list, names_list) 53 """ 54 # TODO: Implement Arduous data loading 55 # This is a placeholder implementation 56 print("Arduous data loading is not yet implemented") 57 return [], [] 58 59 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 60 window_size: int = 192, step_size: int = 32) -> List[Dict]: 61 """ 62 Create sliding windows from the Arduous dataset. 63 64 Args: 65 data: List of DataFrames containing Arduous data 66 names: List of names corresponding to the data 67 window_size: Size of the sliding window (default: 192) 68 step_size: Step size for the sliding window (default: 32) 69 70 Returns: 71 List of dictionaries containing sliding windows for each DataFrame 72 """ 73 # TODO: Implement Arduous sliding window creation 74 # This is a placeholder implementation 75 print("Arduous sliding window creation is not yet implemented") 76 return [] 77 78 def get_supported_formats(self) -> List[str]: 79 """ 80 Get list of supported file formats for Arduous dataset. 81 82 Returns: 83 List of supported file extensions 84 """ 85 return ['.csv', '.txt'] 86 87 def get_sensor_info(self) -> Dict[str, List[str]]: 88 """ 89 Get information about sensors in the dataset. 90 91 Returns: 92 Dictionary containing sensor information 93 """ 94 return { 95 'sensors': self.metadata['sensors'], 96 'components': self.metadata['components'], 97 'sampling_frequency': self.metadata['sampling_frequency'] 98 } 99 100 def get_activity_info(self) -> List[str]: 101 """ 102 Get information about activities in the dataset. 103 104 Returns: 105 List of activity types 106 """ 107 return self.metadata['activities']
Arduous dataset loader class.
This class handles loading and processing of the Arduous dataset for gait analysis.
24 def __init__(self, max_workers: int = 8): 25 """ 26 Initialize Arduous loader with concurrent download support. 27 28 Args: 29 max_workers: Maximum number of concurrent download threads (default: 8) 30 """ 31 super().__init__( 32 name="arduous", 33 description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition", 34 max_workers=max_workers 35 ) 36 self.metadata = { 37 'sensors': ['accelerometer', 'gyroscope', 'magnetometer'], 38 'components': ['x', 'y', 'z'], 39 'sampling_frequency': 50, # Typical for Arduous 40 'activities': ['walking', 'running', 'sitting', 'standing', 'lying'] 41 }
Initialize Arduous loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
43 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 44 """ 45 Load Arduous dataset from the specified directory. 46 47 Args: 48 data_dir: Directory to store/find the dataset 49 **kwargs: Additional arguments (unused for Arduous) 50 51 Returns: 52 Tuple of (data_list, names_list) 53 """ 54 # TODO: Implement Arduous data loading 55 # This is a placeholder implementation 56 print("Arduous data loading is not yet implemented") 57 return [], []
Load Arduous dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Arduous)
Returns: Tuple of (data_list, names_list)
59 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 60 window_size: int = 192, step_size: int = 32) -> List[Dict]: 61 """ 62 Create sliding windows from the Arduous dataset. 63 64 Args: 65 data: List of DataFrames containing Arduous data 66 names: List of names corresponding to the data 67 window_size: Size of the sliding window (default: 192) 68 step_size: Step size for the sliding window (default: 32) 69 70 Returns: 71 List of dictionaries containing sliding windows for each DataFrame 72 """ 73 # TODO: Implement Arduous sliding window creation 74 # This is a placeholder implementation 75 print("Arduous sliding window creation is not yet implemented") 76 return []
Create sliding windows from the Arduous dataset.
Args: data: List of DataFrames containing Arduous data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)
Returns: List of dictionaries containing sliding windows for each DataFrame
78 def get_supported_formats(self) -> List[str]: 79 """ 80 Get list of supported file formats for Arduous dataset. 81 82 Returns: 83 List of supported file extensions 84 """ 85 return ['.csv', '.txt']
Get list of supported file formats for Arduous dataset.
Returns: List of supported file extensions
87 def get_sensor_info(self) -> Dict[str, List[str]]: 88 """ 89 Get information about sensors in the dataset. 90 91 Returns: 92 Dictionary containing sensor information 93 """ 94 return { 95 'sensors': self.metadata['sensors'], 96 'components': self.metadata['components'], 97 'sampling_frequency': self.metadata['sampling_frequency'] 98 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
100 def get_activity_info(self) -> List[str]: 101 """ 102 Get information about activities in the dataset. 103 104 Returns: 105 List of activity types 106 """ 107 return self.metadata['activities']
Get information about activities in the dataset.
Returns: List of activity types
25class PhysioNetLoader(BaseDatasetLoader): 26 """ 27 PhysioNet VGRF dataset loader class. 28 29 This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset. 30 The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's 31 disease and healthy controls. 32 33 Features concurrent downloading for efficient data retrieval. 34 """ 35 36 def __init__(self, max_workers: int = 8): 37 """ 38 Initialize PhysioNet loader with concurrent download support. 39 40 Args: 41 max_workers: Maximum number of concurrent download threads (default: 8) 42 """ 43 super().__init__( 44 name="physionet", 45 description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls", 46 max_workers=max_workers 47 ) 48 self.metadata = { 49 'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8', 50 'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'], 51 'sampling_frequency': 100, # 100 Hz sampling frequency 52 'subjects': { 53 'Co': 'Control subjects', 54 'Pt': 'Parkinson\'s disease patients' 55 }, 56 'window_size': 600, # 6 seconds at 100 Hz 57 'url': 'https://physionet.org/files/gaitpdb/1.0.0/' 58 } 59 self.labels = [] 60 self.subject_types = [] 61 62 def _download_physionet_data(self, data_dir: str) -> str: 63 """ 64 Download PhysioNet dataset if not already present using concurrent downloads. 65 66 This method uses multi-threaded downloading to significantly speed up the 67 download process for the 100+ files in the PhysioNet dataset. 68 69 Args: 70 data_dir: Directory to store the dataset 71 72 Returns: 73 Path to the downloaded/existing dataset directory 74 """ 75 dataset_path = os.path.join(data_dir, "physionet_gaitpdb") 76 77 if os.path.exists(dataset_path) and len(os.listdir(dataset_path)) > 0: 78 print(f"PhysioNet dataset already exists at: {dataset_path}") 79 return dataset_path 80 81 os.makedirs(dataset_path, exist_ok=True) 82 83 # Download the dataset files 84 base_url = "https://physionet.org/files/gaitpdb/1.0.0/" 85 86 # Get list of files (basic file names based on the reference) 87 file_patterns = [ 88 # Control subjects - Ga prefix 89 *[f"GaCo{i:02d}_{j:02d}.txt" for i in range(1, 18) for j in range(1, 3)], 90 "GaCo22_01.txt", "GaCo22_10.txt", 91 92 # Parkinson's patients - Ga prefix 93 *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(3, 10) for j in range(1, 3)], 94 *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(12, 34) for j in range(1, 3)], 95 *[f"GaPt{i:02d}_10.txt" for i in range(13, 34)], 96 97 # Control subjects - Ju prefix 98 *[f"JuCo{i:02d}_01.txt" for i in range(1, 27)], 99 100 # Parkinson's patients - Ju prefix 101 *[f"JuPt{i:02d}_{j:02d}.txt" for i in range(1, 30) for j in range(1, 8)], 102 103 # Control subjects - Si prefix 104 *[f"SiCo{i:02d}_01.txt" for i in range(1, 31)], 105 106 # Parkinson's patients - Si prefix 107 *[f"SiPt{i:02d}_01.txt" for i in range(2, 41)] 108 ] 109 110 # Prepare download tasks for concurrent execution 111 download_tasks = [ 112 { 113 'url': base_url + filename, 114 'dest_path': os.path.join(dataset_path, filename) 115 } 116 for filename in file_patterns 117 ] 118 119 print(f"Downloading PhysioNet dataset to {dataset_path} using {self.max_workers} threads") 120 121 # Use concurrent downloading from base class 122 results = self.download_files_concurrent( 123 download_tasks, 124 show_progress=True, 125 desc="Downloading PhysioNet files" 126 ) 127 128 # Print summary 129 print(f"\nDownload Summary:") 130 print(f" Total files: {results['total']}") 131 print(f" Successfully downloaded: {results['success']}") 132 print(f" Already existed (skipped): {results['skipped']}") 133 print(f" Failed: {results['failed']}") 134 135 if results['failed'] > 0 and len(results['failed_downloads']) > 0: 136 print(f"\nFailed downloads (showing first 10):") 137 for failed in results['failed_downloads'][:10]: 138 print(f" - {os.path.basename(failed['dest_path'])}: {failed['error']}") 139 if len(results['failed_downloads']) > 10: 140 print(f" ... and {len(results['failed_downloads']) - 10} more failures") 141 142 return dataset_path 143 144 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 145 """ 146 Load PhysioNet VGRF dataset from the specified directory. 147 148 Args: 149 data_dir: Directory to store/find the dataset 150 **kwargs: Additional arguments (unused for PhysioNet) 151 152 Returns: 153 Tuple of (data_list, names_list) 154 """ 155 # Download dataset if needed 156 dataset_path = self._download_physionet_data(data_dir) 157 158 physionet_data = [] 159 physionet_names = [] 160 self.labels = [] 161 self.subject_types = [] 162 163 # Load all available files 164 for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))): 165 filename = os.path.basename(filepath) 166 167 # Extract subject type from filename 168 if 'Co' in filename: 169 subject_type = 'Control' 170 label = 'Co' 171 elif 'Pt' in filename: 172 subject_type = 'Patient' 173 label = 'Pt' 174 else: 175 continue # Skip files that don't match expected pattern 176 177 try: 178 # Read the file - PhysioNet files are tab-delimited with variable columns 179 # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist 180 df = pd.read_csv(filepath, delimiter='\t', header=None) 181 182 # Handle variable number of columns 183 n_cols = min(df.shape[1], 19) # Limit to 19 columns max 184 df = df.iloc[:, :n_cols] 185 186 # Create column names 187 col_names = ['time'] 188 for i in range(1, n_cols): 189 if i <= 8: 190 col_names.append(f'VGRF_L{i}') 191 elif i <= 16: 192 col_names.append(f'VGRF_R{i-8}') 193 else: 194 col_names.append(f'sensor_{i}') 195 196 df.columns = col_names 197 198 # Set time as index 199 df = df.set_index('time') 200 201 # Add subject metadata 202 df['subject_type'] = subject_type 203 df['label'] = label 204 205 physionet_data.append(df) 206 physionet_names.append(filename) 207 self.labels.append(label) 208 self.subject_types.append(subject_type) 209 210 except Exception as e: 211 print(f"Error loading {filename}: {e}") 212 continue 213 214 # Store loaded data 215 self.data = physionet_data 216 self.names = physionet_names 217 218 print(f"Loaded {len(physionet_data)} PhysioNet files") 219 print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}") 220 221 return physionet_data, physionet_names 222 223 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 224 window_size: int = 600, step_size: int = 100) -> List[Dict]: 225 """ 226 Create sliding windows from the PhysioNet dataset. 227 228 Args: 229 data: List of DataFrames containing PhysioNet data 230 names: List of names corresponding to the data 231 window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) 232 step_size: Step size for the sliding window (default: 100) 233 234 Returns: 235 List of dictionaries containing sliding windows for each DataFrame 236 """ 237 windows_data = [] 238 239 for idx, df in enumerate(data): 240 # Remove metadata columns for windowing 241 sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')] 242 df_sensors = df[sensor_columns] 243 244 if df_sensors.empty or len(df_sensors) < window_size: 245 continue 246 247 windows = [] 248 249 # Create windows for each sensor 250 for col in sensor_columns: 251 try: 252 window_data = sliding_window(df_sensors[col].values, window_size, step_size) 253 windows.append({"name": col, "data": window_data}) 254 except Exception as e: 255 print(f"Error creating windows for {col} in {names[idx]}: {e}") 256 continue 257 258 if windows: 259 windows_data.append({ 260 "name": names[idx], 261 "windows": windows, 262 "metadata": { 263 "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown', 264 "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown', 265 "window_size": window_size, 266 "step_size": step_size, 267 "num_windows": len(windows[0]["data"]) if windows else 0 268 } 269 }) 270 271 return windows_data 272 273 def get_supported_formats(self) -> List[str]: 274 """ 275 Get list of supported file formats for PhysioNet dataset. 276 277 Returns: 278 List of supported file extensions 279 """ 280 return ['.txt'] 281 282 def get_sensor_info(self) -> Dict[str, List[str]]: 283 """ 284 Get information about sensors in the dataset. 285 286 Returns: 287 Dictionary containing sensor information 288 """ 289 return { 290 'sensors': self.metadata['sensors'], 291 'sampling_frequency': self.metadata['sampling_frequency'], 292 'window_size': self.metadata['window_size'] 293 } 294 295 def get_subject_info(self) -> Dict[str, str]: 296 """ 297 Get information about subjects in the dataset. 298 299 Returns: 300 Dictionary containing subject information 301 """ 302 return self.metadata['subjects'] 303 304 def get_labels(self) -> List[str]: 305 """ 306 Get labels for loaded data. 307 308 Returns: 309 List of labels corresponding to loaded data 310 """ 311 return self.labels 312 313 def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]: 314 """ 315 Filter loaded data by subject type. 316 317 Args: 318 subject_type: 'Control' or 'Patient' 319 320 Returns: 321 Tuple of (filtered_data, filtered_names) 322 """ 323 if not self.data: 324 raise ValueError("No data loaded. Call load_data() first.") 325 326 filtered_data = [] 327 filtered_names = [] 328 329 for i, df in enumerate(self.data): 330 if df['subject_type'].iloc[0] == subject_type: 331 filtered_data.append(df) 332 filtered_names.append(self.names[i]) 333 334 return filtered_data, filtered_names
PhysioNet VGRF dataset loader class.
This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset. The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's disease and healthy controls.
Features concurrent downloading for efficient data retrieval.
36 def __init__(self, max_workers: int = 8): 37 """ 38 Initialize PhysioNet loader with concurrent download support. 39 40 Args: 41 max_workers: Maximum number of concurrent download threads (default: 8) 42 """ 43 super().__init__( 44 name="physionet", 45 description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls", 46 max_workers=max_workers 47 ) 48 self.metadata = { 49 'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8', 50 'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'], 51 'sampling_frequency': 100, # 100 Hz sampling frequency 52 'subjects': { 53 'Co': 'Control subjects', 54 'Pt': 'Parkinson\'s disease patients' 55 }, 56 'window_size': 600, # 6 seconds at 100 Hz 57 'url': 'https://physionet.org/files/gaitpdb/1.0.0/' 58 } 59 self.labels = [] 60 self.subject_types = []
Initialize PhysioNet loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
144 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 145 """ 146 Load PhysioNet VGRF dataset from the specified directory. 147 148 Args: 149 data_dir: Directory to store/find the dataset 150 **kwargs: Additional arguments (unused for PhysioNet) 151 152 Returns: 153 Tuple of (data_list, names_list) 154 """ 155 # Download dataset if needed 156 dataset_path = self._download_physionet_data(data_dir) 157 158 physionet_data = [] 159 physionet_names = [] 160 self.labels = [] 161 self.subject_types = [] 162 163 # Load all available files 164 for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))): 165 filename = os.path.basename(filepath) 166 167 # Extract subject type from filename 168 if 'Co' in filename: 169 subject_type = 'Control' 170 label = 'Co' 171 elif 'Pt' in filename: 172 subject_type = 'Patient' 173 label = 'Pt' 174 else: 175 continue # Skip files that don't match expected pattern 176 177 try: 178 # Read the file - PhysioNet files are tab-delimited with variable columns 179 # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist 180 df = pd.read_csv(filepath, delimiter='\t', header=None) 181 182 # Handle variable number of columns 183 n_cols = min(df.shape[1], 19) # Limit to 19 columns max 184 df = df.iloc[:, :n_cols] 185 186 # Create column names 187 col_names = ['time'] 188 for i in range(1, n_cols): 189 if i <= 8: 190 col_names.append(f'VGRF_L{i}') 191 elif i <= 16: 192 col_names.append(f'VGRF_R{i-8}') 193 else: 194 col_names.append(f'sensor_{i}') 195 196 df.columns = col_names 197 198 # Set time as index 199 df = df.set_index('time') 200 201 # Add subject metadata 202 df['subject_type'] = subject_type 203 df['label'] = label 204 205 physionet_data.append(df) 206 physionet_names.append(filename) 207 self.labels.append(label) 208 self.subject_types.append(subject_type) 209 210 except Exception as e: 211 print(f"Error loading {filename}: {e}") 212 continue 213 214 # Store loaded data 215 self.data = physionet_data 216 self.names = physionet_names 217 218 print(f"Loaded {len(physionet_data)} PhysioNet files") 219 print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}") 220 221 return physionet_data, physionet_names
Load PhysioNet VGRF dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for PhysioNet)
Returns: Tuple of (data_list, names_list)
223 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 224 window_size: int = 600, step_size: int = 100) -> List[Dict]: 225 """ 226 Create sliding windows from the PhysioNet dataset. 227 228 Args: 229 data: List of DataFrames containing PhysioNet data 230 names: List of names corresponding to the data 231 window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) 232 step_size: Step size for the sliding window (default: 100) 233 234 Returns: 235 List of dictionaries containing sliding windows for each DataFrame 236 """ 237 windows_data = [] 238 239 for idx, df in enumerate(data): 240 # Remove metadata columns for windowing 241 sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')] 242 df_sensors = df[sensor_columns] 243 244 if df_sensors.empty or len(df_sensors) < window_size: 245 continue 246 247 windows = [] 248 249 # Create windows for each sensor 250 for col in sensor_columns: 251 try: 252 window_data = sliding_window(df_sensors[col].values, window_size, step_size) 253 windows.append({"name": col, "data": window_data}) 254 except Exception as e: 255 print(f"Error creating windows for {col} in {names[idx]}: {e}") 256 continue 257 258 if windows: 259 windows_data.append({ 260 "name": names[idx], 261 "windows": windows, 262 "metadata": { 263 "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown', 264 "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown', 265 "window_size": window_size, 266 "step_size": step_size, 267 "num_windows": len(windows[0]["data"]) if windows else 0 268 } 269 }) 270 271 return windows_data
Create sliding windows from the PhysioNet dataset.
Args: data: List of DataFrames containing PhysioNet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) step_size: Step size for the sliding window (default: 100)
Returns: List of dictionaries containing sliding windows for each DataFrame
273 def get_supported_formats(self) -> List[str]: 274 """ 275 Get list of supported file formats for PhysioNet dataset. 276 277 Returns: 278 List of supported file extensions 279 """ 280 return ['.txt']
Get list of supported file formats for PhysioNet dataset.
Returns: List of supported file extensions
282 def get_sensor_info(self) -> Dict[str, List[str]]: 283 """ 284 Get information about sensors in the dataset. 285 286 Returns: 287 Dictionary containing sensor information 288 """ 289 return { 290 'sensors': self.metadata['sensors'], 291 'sampling_frequency': self.metadata['sampling_frequency'], 292 'window_size': self.metadata['window_size'] 293 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
295 def get_subject_info(self) -> Dict[str, str]: 296 """ 297 Get information about subjects in the dataset. 298 299 Returns: 300 Dictionary containing subject information 301 """ 302 return self.metadata['subjects']
Get information about subjects in the dataset.
Returns: Dictionary containing subject information
304 def get_labels(self) -> List[str]: 305 """ 306 Get labels for loaded data. 307 308 Returns: 309 List of labels corresponding to loaded data 310 """ 311 return self.labels
Get labels for loaded data.
Returns: List of labels corresponding to loaded data
313 def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]: 314 """ 315 Filter loaded data by subject type. 316 317 Args: 318 subject_type: 'Control' or 'Patient' 319 320 Returns: 321 Tuple of (filtered_data, filtered_names) 322 """ 323 if not self.data: 324 raise ValueError("No data loaded. Call load_data() first.") 325 326 filtered_data = [] 327 filtered_names = [] 328 329 for i, df in enumerate(self.data): 330 if df['subject_type'].iloc[0] == subject_type: 331 filtered_data.append(df) 332 filtered_names.append(self.names[i]) 333 334 return filtered_data, filtered_names
Filter loaded data by subject type.
Args: subject_type: 'Control' or 'Patient'
Returns: Tuple of (filtered_data, filtered_names)
26class HARUPLoader(BaseDatasetLoader): 27 """ 28 HAR-UP dataset loader class. 29 30 This class handles loading and processing of the HAR-UP dataset for human activity recognition 31 and fall detection analysis. 32 """ 33 34 def __init__(self, max_workers: int = 8): 35 """ 36 Initialize HAR-UP loader with concurrent download support. 37 38 Args: 39 max_workers: Maximum number of concurrent download threads (default: 8) 40 """ 41 super().__init__( 42 name="harup", 43 description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition", 44 max_workers=max_workers 45 ) 46 self.metadata = { 47 'sensors': [ 48 'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity', 49 'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity', 50 'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity', 51 'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity', 52 'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity', 53 'BrainSensor', 'Infrared' 54 ], 55 'components': { 56 'Accelerometer': ['x', 'y', 'z'], 57 'AngularVelocity': ['x', 'y', 'z'], 58 'Luminosity': ['illuminance'], 59 'BrainSensor': ['value'], 60 'Infrared': ['value'] 61 }, 62 'sampling_frequency': 100, # Hz 63 'activities': { 64 1: 'Walking', 65 2: 'Walking upstairs', 66 3: 'Walking downstairs', 67 4: 'Sitting', 68 5: 'Standing', 69 6: 'Lying', 70 7: 'Falling forward using hands', 71 8: 'Falling forward using knees', 72 9: 'Falling backwards', 73 10: 'Falling sideward', 74 11: 'Falling sitting in empty chair' 75 } 76 } 77 78 # Features used in HAR-UP 79 self.features = [ 80 'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude', 81 'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness', 82 'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation', 83 'Energy' 84 ] 85 86 def download_harup_data(self, data_dir: str) -> Optional[str]: 87 """ 88 Download HAR-UP dataset if not already present. 89 90 Args: 91 data_dir: Directory to store the dataset 92 93 Returns: 94 Path to the extracted dataset or None if not found 95 """ 96 # Use the utility function to download and extract the dataset 97 download_dataset("harup", data_dir) 98 extract_dataset("harup", data_dir) 99 100 # Check if dataset exists after download attempt 101 dataset_path = os.path.join(data_dir, "DataSet") 102 if not os.path.exists(dataset_path): 103 print("HAR-UP dataset not found after download attempt.") 104 print("Please ensure the dataset is organized in the following structure:") 105 print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv") 106 return None 107 108 return dataset_path 109 110 def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 111 activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, 112 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 113 """ 114 Load HAR-UP dataset from the specified directory. 115 Args: 116 data_dir: Directory containing the dataset 117 subjects: List of subject IDs to load (default: all subjects) 118 activities: List of activity IDs to load (default: all activities) 119 trials: List of trial IDs to load (default: all trials) 120 **kwargs: Additional arguments 121 Returns: 122 Tuple of (data_list, names_list) 123 """ 124 import re 125 import os 126 # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials) 127 if subjects is None: 128 subjects = list(range(1, 5)) # 4 subjects 129 if activities is None: 130 activities = list(range(1, 12)) # 11 activities 131 if trials is None: 132 trials = list(range(1, 4)) # 3 trials 133 134 # Column names as per official HAR-UP documentation 135 columns = [ 136 "Timestamp", 137 "EEG_NeuroSky", 138 "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z", 139 "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z", 140 "Belt_Luminosity", 141 "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z", 142 "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z", 143 "Neck_Luminosity", 144 "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z", 145 "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z", 146 "Pocket_Luminosity", 147 "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z", 148 "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z", 149 "Wrist_Luminosity", 150 "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4" 151 ] 152 153 # If data_dir does not exist, trigger interactive download 154 if not os.path.exists(data_dir): 155 print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...") 156 self.download_harup_data(data_dir) 157 # If still doesn't exist, error out 158 if not os.path.exists(data_dir): 159 print(f"Failed to create or download dataset directory: {data_dir}") 160 return [], [] 161 162 # Find the UP_Fall_Detection_Dataset directory 163 dataset_path = None 164 for entry in os.listdir(data_dir): 165 entry_path = os.path.join(data_dir, entry) 166 if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"): 167 dataset_path = entry_path 168 break 169 if dataset_path is None: 170 print("UP_Fall_Detection_Dataset directory not found in", data_dir) 171 print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.") 172 print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.") 173 return [], [] 174 175 harup_data = [] 176 harup_names = [] 177 178 # Iterate over subjects 179 for subject_id in subjects: 180 subject_folder = f"Subject_{subject_id:02d}" 181 subject_path = os.path.join(dataset_path, subject_folder) 182 if not os.path.isdir(subject_path): 183 continue 184 185 # Initialize empty DataFrame for this subject 186 subject_df = pd.DataFrame() 187 188 # Iterate over activities in order 189 for activity_id in sorted(activities): 190 activity_folder = f"A{activity_id:02d}" 191 activity_path = os.path.join(subject_path, activity_folder) 192 if not os.path.isdir(activity_path): 193 continue 194 195 # Iterate over trials in order 196 for trial_id in sorted(trials): 197 file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv" 198 file_path = os.path.join(activity_path, file_name) 199 name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}" 200 201 try: 202 df = pd.read_csv(file_path, header=0) 203 print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}") 204 df['subject_id'] = subject_id 205 df['activity_id'] = activity_id 206 df['trial_id'] = trial_id 207 df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}") 208 209 # Concatenate to subject's DataFrame 210 subject_df = pd.concat([subject_df, df], ignore_index=True) 211 harup_names.append(name) 212 213 except Exception as e: 214 print(f"Error loading {file_path}: {e}") 215 216 # Add complete subject DataFrame to data list 217 if not subject_df.empty: 218 harup_data.append(subject_df) 219 220 self.data = harup_data 221 self.names = harup_names 222 223 return harup_data, harup_names 224 225 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 226 window_size: int = 100, step_size: int = 50) -> List[Dict]: 227 """ 228 Create sliding windows from the HAR-UP dataset. 229 230 Args: 231 data: List of DataFrames containing HAR-UP data 232 names: List of names corresponding to the data 233 window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) 234 step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz) 235 236 Returns: 237 List of dictionaries containing sliding windows for each DataFrame 238 """ 239 windows_data = [] 240 241 for idx, df in enumerate(data): 242 if df.empty: 243 continue 244 245 windows = [] 246 processed_columns = set() 247 248 # Only use numeric columns (skip TIME and any non-numeric) 249 sensor_columns = [col for col in df.columns if col not in 250 ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME'] 251 and pd.api.types.is_numeric_dtype(df[col])] 252 253 254 # Process each sensor column 255 for col in sensor_columns: 256 if col not in processed_columns: 257 258 window_data = sliding_window(df[col], window_size, step_size) 259 windows.append({"name": col, "data": window_data}) 260 processed_columns.add(col) 261 262 # Include activity ID for each window 263 activity_windows = sliding_window(df["activity_id"], window_size, step_size) 264 windows.append({"name": "activity_id", "data": activity_windows}) 265 266 # For each window, take the most common activity ID as the label 267 labels = [] 268 for window in activity_windows: 269 # Get most common activity in this window 270 unique_vals, counts = np.unique(window, return_counts=True) 271 most_common_idx = np.argmax(counts) 272 labels.append(unique_vals[most_common_idx]) 273 274 windows.append({"name": "labels", "data": np.array(labels)}) 275 276 windows_data.append({"name": names[idx], "windows": windows}) 277 278 return windows_data 279 280 def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True, 281 freq_domain_features: bool = True) -> List[Dict]: 282 """ 283 Extract features from sliding windows using HAR-UP feature extraction methods. 284 Args: 285 windows_data: List of dictionaries containing sliding windows 286 time_domain_features: Whether to extract time domain features 287 freq_domain_features: Whether to extract frequency domain features 288 Returns: 289 List of dictionaries containing extracted features 290 """ 291 # Mapping from original sensor names to actual CSV column names 292 sensor_map = { 293 'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X', 294 'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y', 295 'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z', 296 'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X', 297 'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y', 298 'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z', 299 'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY', 300 'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X', 301 'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y', 302 'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z', 303 'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X', 304 'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y', 305 'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z', 306 'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY', 307 'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X', 308 'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y', 309 'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z', 310 'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X', 311 'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y', 312 'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z', 313 'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY', 314 'WristAccelerometer: x-axis (g)': 'WRST_ACC_X', 315 'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y', 316 'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z', 317 'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X', 318 'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y', 319 'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z', 320 'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY', 321 'BrainSensor': 'HELMET_RAW', 322 'Infrared1': 'IR_1', 323 'Infrared2': 'IR_2', 324 'Infrared3': 'IR_3', 325 'Infrared4': 'IR_4', 326 } 327 extractor = HARUPFeatureExtractor(verbose=True) 328 extractor.config['time_domain'] = time_domain_features 329 extractor.config['frequency_domain'] = freq_domain_features 330 all_features = [] 331 for window_dict in windows_data: 332 name = window_dict["name"] 333 windows = window_dict["windows"] 334 labels = None 335 for window in windows: 336 if window["name"] == "labels": 337 labels = window["data"] 338 break 339 if labels is None: 340 print(f"No labels found for {name}, skipping feature extraction") 341 continue 342 filtered_windows = [] 343 missing = [] 344 for orig_sensor, csv_col in sensor_map.items(): 345 found = False 346 for window in windows: 347 if window["name"] == csv_col: 348 filtered_windows.append(window) 349 found = True 350 break 351 if not found: 352 missing.append((orig_sensor, csv_col)) 353 if missing: 354 print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}") 355 for window in windows: 356 if window["name"] == "activity_id" or window["name"] == "labels": 357 filtered_windows.append(window) 358 features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency']) 359 for i, feature in enumerate(features): 360 window_idx = i // (len(filtered_windows) - 2) # Subtract 2 for labels and activity_id 361 if window_idx < len(labels): 362 feature["label"] = labels[window_idx] 363 all_features.append({"name": name, "features": features}) 364 return all_features 365 366 def get_supported_formats(self) -> List[str]: 367 """ 368 Get list of supported file formats for HAR-UP dataset. 369 370 Returns: 371 List of supported file extensions 372 """ 373 return ['.csv'] 374 375 def get_sensor_info(self) -> Dict[str, List[str]]: 376 """ 377 Get information about sensors in the dataset. 378 379 Returns: 380 Dictionary containing sensor information 381 """ 382 return { 383 'sensors': self.metadata['sensors'], 384 'components': self.metadata['components'], 385 'sampling_frequency': self.metadata['sampling_frequency'] 386 } 387 388 def get_activity_info(self) -> Dict[int, str]: 389 """ 390 Get information about activities in the dataset. 391 392 Returns: 393 Dictionary mapping activity IDs to descriptions 394 """ 395 return self.metadata['activities']
HAR-UP dataset loader class.
This class handles loading and processing of the HAR-UP dataset for human activity recognition and fall detection analysis.
34 def __init__(self, max_workers: int = 8): 35 """ 36 Initialize HAR-UP loader with concurrent download support. 37 38 Args: 39 max_workers: Maximum number of concurrent download threads (default: 8) 40 """ 41 super().__init__( 42 name="harup", 43 description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition", 44 max_workers=max_workers 45 ) 46 self.metadata = { 47 'sensors': [ 48 'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity', 49 'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity', 50 'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity', 51 'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity', 52 'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity', 53 'BrainSensor', 'Infrared' 54 ], 55 'components': { 56 'Accelerometer': ['x', 'y', 'z'], 57 'AngularVelocity': ['x', 'y', 'z'], 58 'Luminosity': ['illuminance'], 59 'BrainSensor': ['value'], 60 'Infrared': ['value'] 61 }, 62 'sampling_frequency': 100, # Hz 63 'activities': { 64 1: 'Walking', 65 2: 'Walking upstairs', 66 3: 'Walking downstairs', 67 4: 'Sitting', 68 5: 'Standing', 69 6: 'Lying', 70 7: 'Falling forward using hands', 71 8: 'Falling forward using knees', 72 9: 'Falling backwards', 73 10: 'Falling sideward', 74 11: 'Falling sitting in empty chair' 75 } 76 } 77 78 # Features used in HAR-UP 79 self.features = [ 80 'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude', 81 'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness', 82 'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation', 83 'Energy' 84 ]
Initialize HAR-UP loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
86 def download_harup_data(self, data_dir: str) -> Optional[str]: 87 """ 88 Download HAR-UP dataset if not already present. 89 90 Args: 91 data_dir: Directory to store the dataset 92 93 Returns: 94 Path to the extracted dataset or None if not found 95 """ 96 # Use the utility function to download and extract the dataset 97 download_dataset("harup", data_dir) 98 extract_dataset("harup", data_dir) 99 100 # Check if dataset exists after download attempt 101 dataset_path = os.path.join(data_dir, "DataSet") 102 if not os.path.exists(dataset_path): 103 print("HAR-UP dataset not found after download attempt.") 104 print("Please ensure the dataset is organized in the following structure:") 105 print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv") 106 return None 107 108 return dataset_path
Download HAR-UP dataset if not already present.
Args: data_dir: Directory to store the dataset
Returns: Path to the extracted dataset or None if not found
110 def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 111 activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, 112 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 113 """ 114 Load HAR-UP dataset from the specified directory. 115 Args: 116 data_dir: Directory containing the dataset 117 subjects: List of subject IDs to load (default: all subjects) 118 activities: List of activity IDs to load (default: all activities) 119 trials: List of trial IDs to load (default: all trials) 120 **kwargs: Additional arguments 121 Returns: 122 Tuple of (data_list, names_list) 123 """ 124 import re 125 import os 126 # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials) 127 if subjects is None: 128 subjects = list(range(1, 5)) # 4 subjects 129 if activities is None: 130 activities = list(range(1, 12)) # 11 activities 131 if trials is None: 132 trials = list(range(1, 4)) # 3 trials 133 134 # Column names as per official HAR-UP documentation 135 columns = [ 136 "Timestamp", 137 "EEG_NeuroSky", 138 "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z", 139 "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z", 140 "Belt_Luminosity", 141 "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z", 142 "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z", 143 "Neck_Luminosity", 144 "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z", 145 "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z", 146 "Pocket_Luminosity", 147 "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z", 148 "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z", 149 "Wrist_Luminosity", 150 "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4" 151 ] 152 153 # If data_dir does not exist, trigger interactive download 154 if not os.path.exists(data_dir): 155 print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...") 156 self.download_harup_data(data_dir) 157 # If still doesn't exist, error out 158 if not os.path.exists(data_dir): 159 print(f"Failed to create or download dataset directory: {data_dir}") 160 return [], [] 161 162 # Find the UP_Fall_Detection_Dataset directory 163 dataset_path = None 164 for entry in os.listdir(data_dir): 165 entry_path = os.path.join(data_dir, entry) 166 if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"): 167 dataset_path = entry_path 168 break 169 if dataset_path is None: 170 print("UP_Fall_Detection_Dataset directory not found in", data_dir) 171 print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.") 172 print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.") 173 return [], [] 174 175 harup_data = [] 176 harup_names = [] 177 178 # Iterate over subjects 179 for subject_id in subjects: 180 subject_folder = f"Subject_{subject_id:02d}" 181 subject_path = os.path.join(dataset_path, subject_folder) 182 if not os.path.isdir(subject_path): 183 continue 184 185 # Initialize empty DataFrame for this subject 186 subject_df = pd.DataFrame() 187 188 # Iterate over activities in order 189 for activity_id in sorted(activities): 190 activity_folder = f"A{activity_id:02d}" 191 activity_path = os.path.join(subject_path, activity_folder) 192 if not os.path.isdir(activity_path): 193 continue 194 195 # Iterate over trials in order 196 for trial_id in sorted(trials): 197 file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv" 198 file_path = os.path.join(activity_path, file_name) 199 name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}" 200 201 try: 202 df = pd.read_csv(file_path, header=0) 203 print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}") 204 df['subject_id'] = subject_id 205 df['activity_id'] = activity_id 206 df['trial_id'] = trial_id 207 df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}") 208 209 # Concatenate to subject's DataFrame 210 subject_df = pd.concat([subject_df, df], ignore_index=True) 211 harup_names.append(name) 212 213 except Exception as e: 214 print(f"Error loading {file_path}: {e}") 215 216 # Add complete subject DataFrame to data list 217 if not subject_df.empty: 218 harup_data.append(subject_df) 219 220 self.data = harup_data 221 self.names = harup_names 222 223 return harup_data, harup_names
Load HAR-UP dataset from the specified directory. Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials) **kwargs: Additional arguments Returns: Tuple of (data_list, names_list)
225 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 226 window_size: int = 100, step_size: int = 50) -> List[Dict]: 227 """ 228 Create sliding windows from the HAR-UP dataset. 229 230 Args: 231 data: List of DataFrames containing HAR-UP data 232 names: List of names corresponding to the data 233 window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) 234 step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz) 235 236 Returns: 237 List of dictionaries containing sliding windows for each DataFrame 238 """ 239 windows_data = [] 240 241 for idx, df in enumerate(data): 242 if df.empty: 243 continue 244 245 windows = [] 246 processed_columns = set() 247 248 # Only use numeric columns (skip TIME and any non-numeric) 249 sensor_columns = [col for col in df.columns if col not in 250 ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME'] 251 and pd.api.types.is_numeric_dtype(df[col])] 252 253 254 # Process each sensor column 255 for col in sensor_columns: 256 if col not in processed_columns: 257 258 window_data = sliding_window(df[col], window_size, step_size) 259 windows.append({"name": col, "data": window_data}) 260 processed_columns.add(col) 261 262 # Include activity ID for each window 263 activity_windows = sliding_window(df["activity_id"], window_size, step_size) 264 windows.append({"name": "activity_id", "data": activity_windows}) 265 266 # For each window, take the most common activity ID as the label 267 labels = [] 268 for window in activity_windows: 269 # Get most common activity in this window 270 unique_vals, counts = np.unique(window, return_counts=True) 271 most_common_idx = np.argmax(counts) 272 labels.append(unique_vals[most_common_idx]) 273 274 windows.append({"name": "labels", "data": np.array(labels)}) 275 276 windows_data.append({"name": names[idx], "windows": windows}) 277 278 return windows_data
Create sliding windows from the HAR-UP dataset.
Args: data: List of DataFrames containing HAR-UP data names: List of names corresponding to the data window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
Returns: List of dictionaries containing sliding windows for each DataFrame
280 def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True, 281 freq_domain_features: bool = True) -> List[Dict]: 282 """ 283 Extract features from sliding windows using HAR-UP feature extraction methods. 284 Args: 285 windows_data: List of dictionaries containing sliding windows 286 time_domain_features: Whether to extract time domain features 287 freq_domain_features: Whether to extract frequency domain features 288 Returns: 289 List of dictionaries containing extracted features 290 """ 291 # Mapping from original sensor names to actual CSV column names 292 sensor_map = { 293 'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X', 294 'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y', 295 'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z', 296 'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X', 297 'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y', 298 'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z', 299 'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY', 300 'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X', 301 'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y', 302 'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z', 303 'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X', 304 'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y', 305 'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z', 306 'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY', 307 'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X', 308 'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y', 309 'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z', 310 'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X', 311 'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y', 312 'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z', 313 'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY', 314 'WristAccelerometer: x-axis (g)': 'WRST_ACC_X', 315 'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y', 316 'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z', 317 'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X', 318 'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y', 319 'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z', 320 'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY', 321 'BrainSensor': 'HELMET_RAW', 322 'Infrared1': 'IR_1', 323 'Infrared2': 'IR_2', 324 'Infrared3': 'IR_3', 325 'Infrared4': 'IR_4', 326 } 327 extractor = HARUPFeatureExtractor(verbose=True) 328 extractor.config['time_domain'] = time_domain_features 329 extractor.config['frequency_domain'] = freq_domain_features 330 all_features = [] 331 for window_dict in windows_data: 332 name = window_dict["name"] 333 windows = window_dict["windows"] 334 labels = None 335 for window in windows: 336 if window["name"] == "labels": 337 labels = window["data"] 338 break 339 if labels is None: 340 print(f"No labels found for {name}, skipping feature extraction") 341 continue 342 filtered_windows = [] 343 missing = [] 344 for orig_sensor, csv_col in sensor_map.items(): 345 found = False 346 for window in windows: 347 if window["name"] == csv_col: 348 filtered_windows.append(window) 349 found = True 350 break 351 if not found: 352 missing.append((orig_sensor, csv_col)) 353 if missing: 354 print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}") 355 for window in windows: 356 if window["name"] == "activity_id" or window["name"] == "labels": 357 filtered_windows.append(window) 358 features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency']) 359 for i, feature in enumerate(features): 360 window_idx = i // (len(filtered_windows) - 2) # Subtract 2 for labels and activity_id 361 if window_idx < len(labels): 362 feature["label"] = labels[window_idx] 363 all_features.append({"name": name, "features": features}) 364 return all_features
Extract features from sliding windows using HAR-UP feature extraction methods. Args: windows_data: List of dictionaries containing sliding windows time_domain_features: Whether to extract time domain features freq_domain_features: Whether to extract frequency domain features Returns: List of dictionaries containing extracted features
366 def get_supported_formats(self) -> List[str]: 367 """ 368 Get list of supported file formats for HAR-UP dataset. 369 370 Returns: 371 List of supported file extensions 372 """ 373 return ['.csv']
Get list of supported file formats for HAR-UP dataset.
Returns: List of supported file extensions
375 def get_sensor_info(self) -> Dict[str, List[str]]: 376 """ 377 Get information about sensors in the dataset. 378 379 Returns: 380 Dictionary containing sensor information 381 """ 382 return { 383 'sensors': self.metadata['sensors'], 384 'components': self.metadata['components'], 385 'sampling_frequency': self.metadata['sampling_frequency'] 386 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
388 def get_activity_info(self) -> Dict[int, str]: 389 """ 390 Get information about activities in the dataset. 391 392 Returns: 393 Dictionary mapping activity IDs to descriptions 394 """ 395 return self.metadata['activities']
Get information about activities in the dataset.
Returns: Dictionary mapping activity IDs to descriptions
24class UrFallLoader(BaseDatasetLoader): 25 """ 26 UrFall dataset loader class. 27 28 This class handles loading and processing of the UrFall dataset for fall detection. 29 Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, 30 and pre-extracted features from depth maps. 31 """ 32 33 def __init__(self, max_workers: int = 8): 34 """ 35 Initialize UrFall loader with concurrent download support. 36 37 Args: 38 max_workers: Maximum number of concurrent download threads (default: 8) 39 """ 40 super().__init__( 41 name="urfall", 42 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data", 43 max_workers=max_workers 44 ) 45 self.metadata = { 46 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 47 'camera': 'cam0', # Front camera 48 'sampling_frequency': 30, # Depth/RGB camera fps 49 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 50 'activities': { 51 -1: 'Not lying (standing/walking)', 52 0: 'Falling (transient)', 53 1: 'Lying on ground' 54 }, 55 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 56 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 57 'feature_columns': [ 58 'sequence_name', 59 'frame_number', 60 'label', 61 'HeightWidthRatio', 62 'MajorMinorRatio', 63 'BoundingBoxOccupancy', 64 'MaxStdXZ', 65 'HHmaxRatio', 66 'H', 67 'D', 68 'P40' 69 ], 70 'feature_descriptions': { 71 'HeightWidthRatio': 'Bounding box height to width ratio', 72 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 73 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 74 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 75 'HHmaxRatio': 'Human height in frame to standing height ratio', 76 'H': 'Actual height in mm', 77 'D': 'Distance of person center to floor in mm', 78 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 79 } 80 } 81 82 def load_data(self, data_dir: str, 83 data_types: Optional[List[str]] = None, 84 sequences: Optional[List[str]] = None, 85 use_falls: bool = True, 86 use_adls: bool = True, 87 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 88 """ 89 Load UrFall dataset from the specified directory. 90 91 Args: 92 data_dir: Directory containing the dataset 93 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 94 'synchronization', 'video', 'features' (default: ['features']) 95 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 96 If None, loads all based on use_falls and use_adls 97 use_falls: Whether to load fall sequences (default: True) 98 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 99 **kwargs: Additional arguments 100 101 Returns: 102 Tuple of (data_list, names_list) 103 """ 104 # Default to loading pre-extracted features if not specified 105 if data_types is None: 106 data_types = ['features'] 107 108 # Validate data types 109 valid_types = set(self.metadata['data_types']) 110 requested_types = set(data_types) 111 invalid_types = requested_types - valid_types 112 if invalid_types: 113 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 114 115 # Create directory if it doesn't exist 116 os.makedirs(data_dir, exist_ok=True) 117 118 data_list = [] 119 names_list = [] 120 121 # Load pre-extracted features (CSV files) 122 if 'features' in data_types: 123 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 124 data_list.extend(features_data) 125 names_list.extend(features_names) 126 127 # Load raw accelerometer data 128 if 'accelerometer' in data_types: 129 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 130 data_list.extend(accel_data) 131 names_list.extend(accel_names) 132 133 # Load synchronization data 134 if 'synchronization' in data_types: 135 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 136 data_list.extend(sync_data) 137 names_list.extend(sync_names) 138 139 # Note: Depth, RGB, and Video data are image/video files 140 # These would require specialized loading and are not typically loaded into DataFrames 141 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 142 print("Note: Depth, RGB, and Video data types contain image/video files.") 143 print("These are not loaded into DataFrames but their paths can be accessed.") 144 print("Use the get_file_paths() method to retrieve paths to these files.") 145 146 self.data = data_list 147 return data_list, names_list 148 149 def _load_features(self, data_dir: str, sequences: Optional[List[str]], 150 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 151 """ 152 Load pre-extracted features from CSV files. 153 154 Args: 155 data_dir: Directory containing the dataset 156 sequences: Specific sequences to load 157 use_falls: Whether to include fall sequences 158 use_adls: Whether to include ADL sequences 159 160 Returns: 161 Tuple of (data_list, names_list) 162 """ 163 data_list = [] 164 names_list = [] 165 166 # Load falls features 167 if use_falls: 168 falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv") 169 if os.path.exists(falls_csv): 170 df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns']) 171 172 # Filter by specific sequences if provided 173 if sequences is not None: 174 fall_sequences = [s for s in sequences if s.startswith('fall-')] 175 if fall_sequences: 176 df = df[df['sequence_name'].isin(fall_sequences)] 177 178 # Add metadata columns 179 df['activity_type'] = 'fall' 180 df['activity_id'] = 1 # Falls are labeled as 1 181 182 data_list.append(df) 183 names_list.append("urfall-cam0-falls") 184 else: 185 print(f"Warning: Falls features file not found at {falls_csv}") 186 187 # Load ADLs features 188 if use_adls: 189 adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv") 190 if os.path.exists(adls_csv): 191 df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns']) 192 193 # Filter by specific sequences if provided 194 if sequences is not None: 195 adl_sequences = [s for s in sequences if s.startswith('adl-')] 196 if adl_sequences: 197 df = df[df['sequence_name'].isin(adl_sequences)] 198 199 # Add metadata columns 200 df['activity_type'] = 'adl' 201 df['activity_id'] = 0 # ADLs are labeled as 0 202 203 data_list.append(df) 204 names_list.append("urfall-cam0-adls") 205 else: 206 print(f"Warning: ADLs features file not found at {adls_csv}") 207 208 return data_list, names_list 209 210 def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]], 211 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 212 """ 213 Load accelerometer CSV data files. 214 215 Args: 216 data_dir: Directory containing the dataset 217 sequences: Specific sequences to load 218 use_falls: Whether to include fall sequences 219 use_adls: Whether to include ADL sequences 220 221 Returns: 222 Tuple of (data_list, names_list) 223 """ 224 data_list = [] 225 names_list = [] 226 227 # Determine which sequences to load 228 seq_list = [] 229 if sequences is not None: 230 seq_list = sequences 231 else: 232 if use_falls: 233 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 234 if use_adls: 235 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 236 237 # Load accelerometer data for each sequence 238 for seq in seq_list: 239 accel_file = os.path.join(data_dir, f"{seq}-acc.csv") 240 if os.path.exists(accel_file): 241 try: 242 df = pd.read_csv(accel_file) 243 df['sequence_name'] = seq 244 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 245 df['activity_id'] = 1 if seq.startswith('fall-') else 0 246 data_list.append(df) 247 names_list.append(f"{seq}-accelerometer") 248 except Exception as e: 249 print(f"Warning: Could not load accelerometer data from {accel_file}: {e}") 250 251 return data_list, names_list 252 253 def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]], 254 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 255 """ 256 Load synchronization CSV data files. 257 258 Args: 259 data_dir: Directory containing the dataset 260 sequences: Specific sequences to load 261 use_falls: Whether to include fall sequences 262 use_adls: Whether to include ADL sequences 263 264 Returns: 265 Tuple of (data_list, names_list) 266 """ 267 data_list = [] 268 names_list = [] 269 270 # Determine which sequences to load 271 seq_list = [] 272 if sequences is not None: 273 seq_list = sequences 274 else: 275 if use_falls: 276 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 277 if use_adls: 278 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 279 280 # Load synchronization data for each sequence 281 for seq in seq_list: 282 sync_file = os.path.join(data_dir, f"{seq}-data.csv") 283 if os.path.exists(sync_file): 284 try: 285 df = pd.read_csv(sync_file) 286 df['sequence_name'] = seq 287 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 288 df['activity_id'] = 1 if seq.startswith('fall-') else 0 289 data_list.append(df) 290 names_list.append(f"{seq}-synchronization") 291 except Exception as e: 292 print(f"Warning: Could not load synchronization data from {sync_file}: {e}") 293 294 return data_list, names_list 295 296 def get_file_paths(self, data_dir: str, data_type: str, 297 sequences: Optional[List[str]] = None, 298 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 299 """ 300 Get file paths for image/video data types (depth, RGB, video). 301 302 Args: 303 data_dir: Directory containing the dataset 304 data_type: Type of data ('depth', 'rgb', 'video') 305 sequences: Specific sequences to get paths for 306 use_falls: Whether to include fall sequences 307 use_adls: Whether to include ADL sequences 308 309 Returns: 310 Dictionary mapping sequence names to file paths 311 """ 312 if data_type not in ['depth', 'rgb', 'video']: 313 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 314 315 file_paths = {} 316 317 # Determine which sequences to include 318 seq_list = [] 319 if sequences is not None: 320 seq_list = sequences 321 else: 322 if use_falls: 323 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 324 if use_adls: 325 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 326 327 # Map data type to file extension 328 extension_map = { 329 'depth': '-cam0-d.zip', 330 'rgb': '-cam0-rgb.zip', 331 'video': '-cam0.mp4' 332 } 333 334 ext = extension_map[data_type] 335 336 for seq in seq_list: 337 file_path = os.path.join(data_dir, f"{seq}{ext}") 338 if os.path.exists(file_path): 339 file_paths[seq] = file_path 340 341 return file_paths 342 343 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 344 window_size: int = 30, step_size: int = 15) -> List[Dict]: 345 """ 346 Create sliding windows from the loaded data. 347 348 Args: 349 data: List of DataFrames containing the dataset 350 names: List of names corresponding to each DataFrame 351 window_size: Size of the sliding window (default: 30 frames for depth features) 352 step_size: Step size for sliding window (default: 15 frames) 353 354 Returns: 355 List of dictionaries containing windowed data 356 """ 357 windows_data = [] 358 359 for idx, df in enumerate(data): 360 if df.empty: 361 continue 362 363 # Get numeric feature columns (exclude metadata columns) 364 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 365 feature_cols = [col for col in df.columns 366 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 367 368 if not feature_cols: 369 continue 370 371 windows = [] 372 373 # Create windows for each feature column 374 for col in feature_cols: 375 win = sliding_window(df[col].values, window_size, step_size) 376 windows.append({"name": col, "data": win}) 377 378 # Create windows for labels if present 379 if 'label' in df.columns: 380 label_windows = sliding_window(df['label'].values, window_size, step_size) 381 # Majority voting for each window 382 labels = [] 383 for w in label_windows: 384 vals, counts = np.unique(w, return_counts=True) 385 labels.append(vals[np.argmax(counts)]) 386 windows.append({"name": "labels", "data": np.array(labels)}) 387 388 # Create activity_id windows 389 if 'activity_id' in df.columns: 390 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 391 windows.append({"name": "activity_id", "data": activity_windows}) 392 393 windows_data.append({"name": names[idx], "windows": windows}) 394 395 return windows_data 396 397 def get_supported_formats(self) -> List[str]: 398 """ 399 Get list of supported file formats for UrFall dataset. 400 401 Returns: 402 List of supported file extensions 403 """ 404 return ['.csv', '.zip', '.mp4'] 405 406 def get_sensor_info(self) -> Dict[str, any]: 407 """ 408 Get information about sensors in the dataset. 409 410 Returns: 411 Dictionary containing sensor information 412 """ 413 return { 414 'data_types': self.metadata['data_types'], 415 'camera': self.metadata['camera'], 416 'sampling_frequency': self.metadata['sampling_frequency'], 417 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 418 } 419 420 def get_activity_info(self) -> Dict[int, str]: 421 """ 422 Get information about activities in the dataset. 423 424 Returns: 425 Dictionary mapping activity IDs to labels 426 """ 427 return self.metadata['activities'] 428 429 def get_feature_info(self) -> Dict[str, str]: 430 """ 431 Get information about pre-extracted features. 432 433 Returns: 434 Dictionary mapping feature names to descriptions 435 """ 436 return self.metadata['feature_descriptions']
UrFall dataset loader class.
This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.
33 def __init__(self, max_workers: int = 8): 34 """ 35 Initialize UrFall loader with concurrent download support. 36 37 Args: 38 max_workers: Maximum number of concurrent download threads (default: 8) 39 """ 40 super().__init__( 41 name="urfall", 42 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data", 43 max_workers=max_workers 44 ) 45 self.metadata = { 46 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 47 'camera': 'cam0', # Front camera 48 'sampling_frequency': 30, # Depth/RGB camera fps 49 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 50 'activities': { 51 -1: 'Not lying (standing/walking)', 52 0: 'Falling (transient)', 53 1: 'Lying on ground' 54 }, 55 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 56 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 57 'feature_columns': [ 58 'sequence_name', 59 'frame_number', 60 'label', 61 'HeightWidthRatio', 62 'MajorMinorRatio', 63 'BoundingBoxOccupancy', 64 'MaxStdXZ', 65 'HHmaxRatio', 66 'H', 67 'D', 68 'P40' 69 ], 70 'feature_descriptions': { 71 'HeightWidthRatio': 'Bounding box height to width ratio', 72 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 73 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 74 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 75 'HHmaxRatio': 'Human height in frame to standing height ratio', 76 'H': 'Actual height in mm', 77 'D': 'Distance of person center to floor in mm', 78 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 79 } 80 }
Initialize UrFall loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
82 def load_data(self, data_dir: str, 83 data_types: Optional[List[str]] = None, 84 sequences: Optional[List[str]] = None, 85 use_falls: bool = True, 86 use_adls: bool = True, 87 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 88 """ 89 Load UrFall dataset from the specified directory. 90 91 Args: 92 data_dir: Directory containing the dataset 93 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 94 'synchronization', 'video', 'features' (default: ['features']) 95 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 96 If None, loads all based on use_falls and use_adls 97 use_falls: Whether to load fall sequences (default: True) 98 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 99 **kwargs: Additional arguments 100 101 Returns: 102 Tuple of (data_list, names_list) 103 """ 104 # Default to loading pre-extracted features if not specified 105 if data_types is None: 106 data_types = ['features'] 107 108 # Validate data types 109 valid_types = set(self.metadata['data_types']) 110 requested_types = set(data_types) 111 invalid_types = requested_types - valid_types 112 if invalid_types: 113 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 114 115 # Create directory if it doesn't exist 116 os.makedirs(data_dir, exist_ok=True) 117 118 data_list = [] 119 names_list = [] 120 121 # Load pre-extracted features (CSV files) 122 if 'features' in data_types: 123 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 124 data_list.extend(features_data) 125 names_list.extend(features_names) 126 127 # Load raw accelerometer data 128 if 'accelerometer' in data_types: 129 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 130 data_list.extend(accel_data) 131 names_list.extend(accel_names) 132 133 # Load synchronization data 134 if 'synchronization' in data_types: 135 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 136 data_list.extend(sync_data) 137 names_list.extend(sync_names) 138 139 # Note: Depth, RGB, and Video data are image/video files 140 # These would require specialized loading and are not typically loaded into DataFrames 141 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 142 print("Note: Depth, RGB, and Video data types contain image/video files.") 143 print("These are not loaded into DataFrames but their paths can be accessed.") 144 print("Use the get_file_paths() method to retrieve paths to these files.") 145 146 self.data = data_list 147 return data_list, names_list
Load UrFall dataset from the specified directory.
Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments
Returns: Tuple of (data_list, names_list)
296 def get_file_paths(self, data_dir: str, data_type: str, 297 sequences: Optional[List[str]] = None, 298 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 299 """ 300 Get file paths for image/video data types (depth, RGB, video). 301 302 Args: 303 data_dir: Directory containing the dataset 304 data_type: Type of data ('depth', 'rgb', 'video') 305 sequences: Specific sequences to get paths for 306 use_falls: Whether to include fall sequences 307 use_adls: Whether to include ADL sequences 308 309 Returns: 310 Dictionary mapping sequence names to file paths 311 """ 312 if data_type not in ['depth', 'rgb', 'video']: 313 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 314 315 file_paths = {} 316 317 # Determine which sequences to include 318 seq_list = [] 319 if sequences is not None: 320 seq_list = sequences 321 else: 322 if use_falls: 323 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 324 if use_adls: 325 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 326 327 # Map data type to file extension 328 extension_map = { 329 'depth': '-cam0-d.zip', 330 'rgb': '-cam0-rgb.zip', 331 'video': '-cam0.mp4' 332 } 333 334 ext = extension_map[data_type] 335 336 for seq in seq_list: 337 file_path = os.path.join(data_dir, f"{seq}{ext}") 338 if os.path.exists(file_path): 339 file_paths[seq] = file_path 340 341 return file_paths
Get file paths for image/video data types (depth, RGB, video).
Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences
Returns: Dictionary mapping sequence names to file paths
343 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 344 window_size: int = 30, step_size: int = 15) -> List[Dict]: 345 """ 346 Create sliding windows from the loaded data. 347 348 Args: 349 data: List of DataFrames containing the dataset 350 names: List of names corresponding to each DataFrame 351 window_size: Size of the sliding window (default: 30 frames for depth features) 352 step_size: Step size for sliding window (default: 15 frames) 353 354 Returns: 355 List of dictionaries containing windowed data 356 """ 357 windows_data = [] 358 359 for idx, df in enumerate(data): 360 if df.empty: 361 continue 362 363 # Get numeric feature columns (exclude metadata columns) 364 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 365 feature_cols = [col for col in df.columns 366 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 367 368 if not feature_cols: 369 continue 370 371 windows = [] 372 373 # Create windows for each feature column 374 for col in feature_cols: 375 win = sliding_window(df[col].values, window_size, step_size) 376 windows.append({"name": col, "data": win}) 377 378 # Create windows for labels if present 379 if 'label' in df.columns: 380 label_windows = sliding_window(df['label'].values, window_size, step_size) 381 # Majority voting for each window 382 labels = [] 383 for w in label_windows: 384 vals, counts = np.unique(w, return_counts=True) 385 labels.append(vals[np.argmax(counts)]) 386 windows.append({"name": "labels", "data": np.array(labels)}) 387 388 # Create activity_id windows 389 if 'activity_id' in df.columns: 390 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 391 windows.append({"name": "activity_id", "data": activity_windows}) 392 393 windows_data.append({"name": names[idx], "windows": windows}) 394 395 return windows_data
Create sliding windows from the loaded data.
Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)
Returns: List of dictionaries containing windowed data
397 def get_supported_formats(self) -> List[str]: 398 """ 399 Get list of supported file formats for UrFall dataset. 400 401 Returns: 402 List of supported file extensions 403 """ 404 return ['.csv', '.zip', '.mp4']
Get list of supported file formats for UrFall dataset.
Returns: List of supported file extensions
406 def get_sensor_info(self) -> Dict[str, any]: 407 """ 408 Get information about sensors in the dataset. 409 410 Returns: 411 Dictionary containing sensor information 412 """ 413 return { 414 'data_types': self.metadata['data_types'], 415 'camera': self.metadata['camera'], 416 'sampling_frequency': self.metadata['sampling_frequency'], 417 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 418 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
420 def get_activity_info(self) -> Dict[int, str]: 421 """ 422 Get information about activities in the dataset. 423 424 Returns: 425 Dictionary mapping activity IDs to labels 426 """ 427 return self.metadata['activities']
Get information about activities in the dataset.
Returns: Dictionary mapping activity IDs to labels
429 def get_feature_info(self) -> Dict[str, str]: 430 """ 431 Get information about pre-extracted features. 432 433 Returns: 434 Dictionary mapping feature names to descriptions 435 """ 436 return self.metadata['feature_descriptions']
Get information about pre-extracted features.
Returns: Dictionary mapping feature names to descriptions
177def load_daphnet_data(data_dir: str): 178 """ 179 Legacy function for loading Daphnet data. 180 181 Args: 182 data_dir: Directory to store the dataset 183 184 Returns: 185 Tuple of (data_list, names_list) 186 """ 187 loader = DaphnetLoader() 188 return loader.load_data(data_dir)
Legacy function for loading Daphnet data.
Args: data_dir: Directory to store the dataset
Returns: Tuple of (data_list, names_list)
191def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32): 192 """ 193 Legacy function for creating sliding windows. 194 195 Args: 196 daphnet: List of dataframes containing Daphnet data 197 daphnet_names: List of names of the Daphnet dataframes 198 window_size: Size of the sliding window 199 step_size: Step size for the sliding window 200 201 Returns: 202 List of dictionaries containing sliding windows for each DataFrame 203 """ 204 loader = DaphnetLoader() 205 return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)
Legacy function for creating sliding windows.
Args: daphnet: List of dataframes containing Daphnet data daphnet_names: List of names of the Daphnet dataframes window_size: Size of the sliding window step_size: Step size for the sliding window
Returns: List of dictionaries containing sliding windows for each DataFrame
111def load_mobifall_data(): 112 """ 113 Legacy function for loading MobiFall data. 114 115 Returns: 116 Tuple of (data_list, names_list) 117 """ 118 loader = MobiFallLoader() 119 return loader.load_data("")
Legacy function for loading MobiFall data.
Returns: Tuple of (data_list, names_list)
111def load_arduous_data(): 112 """ 113 Legacy function for loading Arduous data. 114 115 Returns: 116 Tuple of (data_list, names_list) 117 """ 118 loader = ArduousLoader() 119 return loader.load_data("")
Legacy function for loading Arduous data.
Returns: Tuple of (data_list, names_list)
338def load_physionet_data(data_dir: str) -> Tuple[List[pd.DataFrame], List[str]]: 339 """ 340 Legacy function to load PhysioNet data. 341 342 Args: 343 data_dir: Directory containing the dataset 344 345 Returns: 346 Tuple of (data_list, names_list) 347 """ 348 loader = PhysioNetLoader() 349 return loader.load_data(data_dir)
Legacy function to load PhysioNet data.
Args: data_dir: Directory containing the dataset
Returns: Tuple of (data_list, names_list)
352def create_physionet_windows(data: List[pd.DataFrame], names: List[str], 353 window_size: int = 600, step_size: int = 100) -> List[Dict]: 354 """ 355 Legacy function to create sliding windows from PhysioNet data. 356 357 Args: 358 data: List of DataFrames 359 names: List of names 360 window_size: Size of sliding window 361 step_size: Step size for sliding window 362 363 Returns: 364 List of sliding window dictionaries 365 """ 366 loader = PhysioNetLoader() 367 return loader.create_sliding_windows(data, names, window_size, step_size)
Legacy function to create sliding windows from PhysioNet data.
Args: data: List of DataFrames names: List of names window_size: Size of sliding window step_size: Step size for sliding window
Returns: List of sliding window dictionaries
399def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None): 400 """ 401 Legacy function for loading HAR-UP data. 402 403 Args: 404 data_dir: Directory containing the dataset 405 subjects: List of subject IDs to load (default: all subjects) 406 activities: List of activity IDs to load (default: all activities) 407 trials: List of trial IDs to load (default: all trials) 408 409 Returns: 410 Tuple of (data_list, names_list) 411 """ 412 loader = HARUPLoader() 413 return loader.load_data(data_dir, subjects, activities, trials)
Legacy function for loading HAR-UP data.
Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials)
Returns: Tuple of (data_list, names_list)
416def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50): 417 """ 418 Legacy function for creating sliding windows from HAR-UP data. 419 420 Args: 421 harup_data: List of dataframes containing HAR-UP data 422 harup_names: List of names of the HAR-UP dataframes 423 window_size: Size of the sliding window 424 step_size: Step size for the sliding window 425 426 Returns: 427 List of dictionaries containing sliding windows for each DataFrame 428 """ 429 loader = HARUPLoader() 430 return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)
Legacy function for creating sliding windows from HAR-UP data.
Args: harup_data: List of dataframes containing HAR-UP data harup_names: List of names of the HAR-UP dataframes window_size: Size of the sliding window step_size: Step size for the sliding window
Returns: List of dictionaries containing sliding windows for each DataFrame
433def extract_harup_features(windows_data, time_domain=True, freq_domain=True): 434 """ 435 Legacy function for extracting features from HAR-UP windows. 436 437 Args: 438 windows_data: List of dictionaries containing sliding windows 439 time_domain: Whether to extract time domain features 440 freq_domain: Whether to extract frequency domain features 441 442 Returns: 443 List of dictionaries containing extracted features 444 """ 445 loader = HARUPLoader() 446 return loader.extract_features(windows_data, time_domain, freq_domain)
Legacy function for extracting features from HAR-UP windows.
Args: windows_data: List of dictionaries containing sliding windows time_domain: Whether to extract time domain features freq_domain: Whether to extract frequency domain features
Returns: List of dictionaries containing extracted features
440def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None, 441 sequences: Optional[List[str]] = None, 442 use_falls: bool = True, use_adls: bool = True): 443 """ 444 Load UrFall dataset using the legacy function interface. 445 446 Args: 447 data_dir: Directory containing the dataset 448 data_types: List of data types to load 449 sequences: List of specific sequences to load 450 use_falls: Whether to load fall sequences 451 use_adls: Whether to load ADL sequences 452 453 Returns: 454 Tuple of (data_list, names_list) 455 """ 456 loader = UrFallLoader() 457 return loader.load_data(data_dir, data_types=data_types, sequences=sequences, 458 use_falls=use_falls, use_adls=use_adls)
Load UrFall dataset using the legacy function interface.
Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences
Returns: Tuple of (data_list, names_list)
461def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15): 462 """ 463 Create sliding windows from UrFall data using the legacy function interface. 464 465 Args: 466 urfall_data: List of DataFrames 467 urfall_names: List of names 468 window_size: Size of sliding window 469 step_size: Step size for sliding window 470 471 Returns: 472 List of dictionaries containing windowed data 473 """ 474 loader = UrFallLoader() 475 return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
Create sliding windows from UrFall data using the legacy function interface.
Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window
Returns: List of dictionaries containing windowed data
25def download_dataset(dataset_name, data_dir): 26 """Download the dataset.""" 27 if dataset_name == "daphnet": 28 download_daphnet_data(data_dir) 29 elif dataset_name == "mobifall": 30 download_mobifall_data(data_dir) 31 elif dataset_name == "arduous": 32 download_arduous_data(data_dir) 33 elif dataset_name == "harup": 34 download_harup_data(data_dir) 35 elif dataset_name == "urfall": 36 download_urfall_data(data_dir) 37 elif dataset_name == "physionet": 38 # PhysioNet dataset is handled by the PhysioNetLoader itself 39 pass 40 else: 41 raise ValueError(f"Dataset {dataset_name} not supported.")
Download the dataset.
243def extract_dataset(dataset_name, data_dir): 244 """Extract the dataset.""" 245 if dataset_name == "daphnet": 246 extract_daphnet_data(data_dir) 247 elif dataset_name == "mobifall": 248 extract_mobifall_data(data_dir) 249 elif dataset_name == "arduous": 250 extract_arduous_data(data_dir) 251 elif dataset_name == "harup": 252 extract_harup_data(data_dir) 253 elif dataset_name == "urfall": 254 extract_urfall_data(data_dir) 255 elif dataset_name == "physionet": 256 # PhysioNet dataset is handled by the PhysioNetLoader itself 257 pass 258 else: 259 raise ValueError(f"Dataset {dataset_name} not supported.")
Extract the dataset.
53def get_dataset_manager(): 54 """Get the singleton DatasetManager instance.""" 55 return DatasetManager()
Get the singleton DatasetManager instance.
58def get_available_datasets(): 59 """Get list of available dataset names.""" 60 return DatasetManager().get_available_components()
Get list of available dataset names.
63def load_dataset(name: str, data_dir: str, **kwargs): 64 """ 65 Load a dataset using the DatasetManager. 66 67 Args: 68 name: Name of the dataset loader 69 data_dir: Directory containing the dataset 70 **kwargs: Additional arguments for the loader 71 72 Returns: 73 Dataset loader instance with loaded data 74 """ 75 return DatasetManager().load_dataset(name, data_dir, **kwargs)
Load a dataset using the DatasetManager.
Args: name: Name of the dataset loader data_dir: Directory containing the dataset **kwargs: Additional arguments for the loader
Returns: Dataset loader instance with loaded data