gaitsetpy.dataset.urfall
UrFall Dataset Loader and Utils. Maintainer: @aharshit123456
This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader. UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer, and pre-extracted features from depth maps.
Reference:
- Website: https://fenix.ur.edu.pl/~mkepski/ds/uf.html
- Dataset: University of Rzeszow Fall Detection Dataset
1''' 2UrFall Dataset Loader and Utils. 3Maintainer: @aharshit123456 4 5This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader. 6UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer, 7and pre-extracted features from depth maps. 8 9Reference: 10- Website: https://fenix.ur.edu.pl/~mkepski/ds/uf.html 11- Dataset: University of Rzeszow Fall Detection Dataset 12''' 13 14import os 15import pandas as pd 16import numpy as np 17from typing import List, Dict, Tuple, Optional, Set 18from glob import glob 19from ..core.base_classes import BaseDatasetLoader 20from .utils import download_dataset, extract_dataset, sliding_window 21 22 23class UrFallLoader(BaseDatasetLoader): 24 """ 25 UrFall dataset loader class. 26 27 This class handles loading and processing of the UrFall dataset for fall detection. 28 Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, 29 and pre-extracted features from depth maps. 30 """ 31 32 def __init__(self, max_workers: int = 8): 33 """ 34 Initialize UrFall loader with concurrent download support. 35 36 Args: 37 max_workers: Maximum number of concurrent download threads (default: 8) 38 """ 39 super().__init__( 40 name="urfall", 41 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data", 42 max_workers=max_workers 43 ) 44 self.metadata = { 45 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 46 'camera': 'cam0', # Front camera 47 'sampling_frequency': 30, # Depth/RGB camera fps 48 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 49 'activities': { 50 -1: 'Not lying (standing/walking)', 51 0: 'Falling (transient)', 52 1: 'Lying on ground' 53 }, 54 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 55 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 56 'feature_columns': [ 57 'sequence_name', 58 'frame_number', 59 'label', 60 'HeightWidthRatio', 61 'MajorMinorRatio', 62 'BoundingBoxOccupancy', 63 'MaxStdXZ', 64 'HHmaxRatio', 65 'H', 66 'D', 67 'P40' 68 ], 69 'feature_descriptions': { 70 'HeightWidthRatio': 'Bounding box height to width ratio', 71 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 72 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 73 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 74 'HHmaxRatio': 'Human height in frame to standing height ratio', 75 'H': 'Actual height in mm', 76 'D': 'Distance of person center to floor in mm', 77 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 78 } 79 } 80 81 def load_data(self, data_dir: str, 82 data_types: Optional[List[str]] = None, 83 sequences: Optional[List[str]] = None, 84 use_falls: bool = True, 85 use_adls: bool = True, 86 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 87 """ 88 Load UrFall dataset from the specified directory. 89 90 Args: 91 data_dir: Directory containing the dataset 92 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 93 'synchronization', 'video', 'features' (default: ['features']) 94 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 95 If None, loads all based on use_falls and use_adls 96 use_falls: Whether to load fall sequences (default: True) 97 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 98 **kwargs: Additional arguments 99 100 Returns: 101 Tuple of (data_list, names_list) 102 """ 103 # Default to loading pre-extracted features if not specified 104 if data_types is None: 105 data_types = ['features'] 106 107 # Validate data types 108 valid_types = set(self.metadata['data_types']) 109 requested_types = set(data_types) 110 invalid_types = requested_types - valid_types 111 if invalid_types: 112 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 113 114 # Create directory if it doesn't exist 115 os.makedirs(data_dir, exist_ok=True) 116 117 data_list = [] 118 names_list = [] 119 120 # Load pre-extracted features (CSV files) 121 if 'features' in data_types: 122 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 123 data_list.extend(features_data) 124 names_list.extend(features_names) 125 126 # Load raw accelerometer data 127 if 'accelerometer' in data_types: 128 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 129 data_list.extend(accel_data) 130 names_list.extend(accel_names) 131 132 # Load synchronization data 133 if 'synchronization' in data_types: 134 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 135 data_list.extend(sync_data) 136 names_list.extend(sync_names) 137 138 # Note: Depth, RGB, and Video data are image/video files 139 # These would require specialized loading and are not typically loaded into DataFrames 140 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 141 print("Note: Depth, RGB, and Video data types contain image/video files.") 142 print("These are not loaded into DataFrames but their paths can be accessed.") 143 print("Use the get_file_paths() method to retrieve paths to these files.") 144 145 self.data = data_list 146 return data_list, names_list 147 148 def _load_features(self, data_dir: str, sequences: Optional[List[str]], 149 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 150 """ 151 Load pre-extracted features from CSV files. 152 153 Args: 154 data_dir: Directory containing the dataset 155 sequences: Specific sequences to load 156 use_falls: Whether to include fall sequences 157 use_adls: Whether to include ADL sequences 158 159 Returns: 160 Tuple of (data_list, names_list) 161 """ 162 data_list = [] 163 names_list = [] 164 165 # Load falls features 166 if use_falls: 167 falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv") 168 if os.path.exists(falls_csv): 169 df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns']) 170 171 # Filter by specific sequences if provided 172 if sequences is not None: 173 fall_sequences = [s for s in sequences if s.startswith('fall-')] 174 if fall_sequences: 175 df = df[df['sequence_name'].isin(fall_sequences)] 176 177 # Add metadata columns 178 df['activity_type'] = 'fall' 179 df['activity_id'] = 1 # Falls are labeled as 1 180 181 data_list.append(df) 182 names_list.append("urfall-cam0-falls") 183 else: 184 print(f"Warning: Falls features file not found at {falls_csv}") 185 186 # Load ADLs features 187 if use_adls: 188 adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv") 189 if os.path.exists(adls_csv): 190 df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns']) 191 192 # Filter by specific sequences if provided 193 if sequences is not None: 194 adl_sequences = [s for s in sequences if s.startswith('adl-')] 195 if adl_sequences: 196 df = df[df['sequence_name'].isin(adl_sequences)] 197 198 # Add metadata columns 199 df['activity_type'] = 'adl' 200 df['activity_id'] = 0 # ADLs are labeled as 0 201 202 data_list.append(df) 203 names_list.append("urfall-cam0-adls") 204 else: 205 print(f"Warning: ADLs features file not found at {adls_csv}") 206 207 return data_list, names_list 208 209 def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]], 210 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 211 """ 212 Load accelerometer CSV data files. 213 214 Args: 215 data_dir: Directory containing the dataset 216 sequences: Specific sequences to load 217 use_falls: Whether to include fall sequences 218 use_adls: Whether to include ADL sequences 219 220 Returns: 221 Tuple of (data_list, names_list) 222 """ 223 data_list = [] 224 names_list = [] 225 226 # Determine which sequences to load 227 seq_list = [] 228 if sequences is not None: 229 seq_list = sequences 230 else: 231 if use_falls: 232 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 233 if use_adls: 234 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 235 236 # Load accelerometer data for each sequence 237 for seq in seq_list: 238 accel_file = os.path.join(data_dir, f"{seq}-acc.csv") 239 if os.path.exists(accel_file): 240 try: 241 df = pd.read_csv(accel_file) 242 df['sequence_name'] = seq 243 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 244 df['activity_id'] = 1 if seq.startswith('fall-') else 0 245 data_list.append(df) 246 names_list.append(f"{seq}-accelerometer") 247 except Exception as e: 248 print(f"Warning: Could not load accelerometer data from {accel_file}: {e}") 249 250 return data_list, names_list 251 252 def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]], 253 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 254 """ 255 Load synchronization CSV data files. 256 257 Args: 258 data_dir: Directory containing the dataset 259 sequences: Specific sequences to load 260 use_falls: Whether to include fall sequences 261 use_adls: Whether to include ADL sequences 262 263 Returns: 264 Tuple of (data_list, names_list) 265 """ 266 data_list = [] 267 names_list = [] 268 269 # Determine which sequences to load 270 seq_list = [] 271 if sequences is not None: 272 seq_list = sequences 273 else: 274 if use_falls: 275 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 276 if use_adls: 277 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 278 279 # Load synchronization data for each sequence 280 for seq in seq_list: 281 sync_file = os.path.join(data_dir, f"{seq}-data.csv") 282 if os.path.exists(sync_file): 283 try: 284 df = pd.read_csv(sync_file) 285 df['sequence_name'] = seq 286 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 287 df['activity_id'] = 1 if seq.startswith('fall-') else 0 288 data_list.append(df) 289 names_list.append(f"{seq}-synchronization") 290 except Exception as e: 291 print(f"Warning: Could not load synchronization data from {sync_file}: {e}") 292 293 return data_list, names_list 294 295 def get_file_paths(self, data_dir: str, data_type: str, 296 sequences: Optional[List[str]] = None, 297 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 298 """ 299 Get file paths for image/video data types (depth, RGB, video). 300 301 Args: 302 data_dir: Directory containing the dataset 303 data_type: Type of data ('depth', 'rgb', 'video') 304 sequences: Specific sequences to get paths for 305 use_falls: Whether to include fall sequences 306 use_adls: Whether to include ADL sequences 307 308 Returns: 309 Dictionary mapping sequence names to file paths 310 """ 311 if data_type not in ['depth', 'rgb', 'video']: 312 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 313 314 file_paths = {} 315 316 # Determine which sequences to include 317 seq_list = [] 318 if sequences is not None: 319 seq_list = sequences 320 else: 321 if use_falls: 322 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 323 if use_adls: 324 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 325 326 # Map data type to file extension 327 extension_map = { 328 'depth': '-cam0-d.zip', 329 'rgb': '-cam0-rgb.zip', 330 'video': '-cam0.mp4' 331 } 332 333 ext = extension_map[data_type] 334 335 for seq in seq_list: 336 file_path = os.path.join(data_dir, f"{seq}{ext}") 337 if os.path.exists(file_path): 338 file_paths[seq] = file_path 339 340 return file_paths 341 342 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 343 window_size: int = 30, step_size: int = 15) -> List[Dict]: 344 """ 345 Create sliding windows from the loaded data. 346 347 Args: 348 data: List of DataFrames containing the dataset 349 names: List of names corresponding to each DataFrame 350 window_size: Size of the sliding window (default: 30 frames for depth features) 351 step_size: Step size for sliding window (default: 15 frames) 352 353 Returns: 354 List of dictionaries containing windowed data 355 """ 356 windows_data = [] 357 358 for idx, df in enumerate(data): 359 if df.empty: 360 continue 361 362 # Get numeric feature columns (exclude metadata columns) 363 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 364 feature_cols = [col for col in df.columns 365 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 366 367 if not feature_cols: 368 continue 369 370 windows = [] 371 372 # Create windows for each feature column 373 for col in feature_cols: 374 win = sliding_window(df[col].values, window_size, step_size) 375 windows.append({"name": col, "data": win}) 376 377 # Create windows for labels if present 378 if 'label' in df.columns: 379 label_windows = sliding_window(df['label'].values, window_size, step_size) 380 # Majority voting for each window 381 labels = [] 382 for w in label_windows: 383 vals, counts = np.unique(w, return_counts=True) 384 labels.append(vals[np.argmax(counts)]) 385 windows.append({"name": "labels", "data": np.array(labels)}) 386 387 # Create activity_id windows 388 if 'activity_id' in df.columns: 389 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 390 windows.append({"name": "activity_id", "data": activity_windows}) 391 392 windows_data.append({"name": names[idx], "windows": windows}) 393 394 return windows_data 395 396 def get_supported_formats(self) -> List[str]: 397 """ 398 Get list of supported file formats for UrFall dataset. 399 400 Returns: 401 List of supported file extensions 402 """ 403 return ['.csv', '.zip', '.mp4'] 404 405 def get_sensor_info(self) -> Dict[str, any]: 406 """ 407 Get information about sensors in the dataset. 408 409 Returns: 410 Dictionary containing sensor information 411 """ 412 return { 413 'data_types': self.metadata['data_types'], 414 'camera': self.metadata['camera'], 415 'sampling_frequency': self.metadata['sampling_frequency'], 416 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 417 } 418 419 def get_activity_info(self) -> Dict[int, str]: 420 """ 421 Get information about activities in the dataset. 422 423 Returns: 424 Dictionary mapping activity IDs to labels 425 """ 426 return self.metadata['activities'] 427 428 def get_feature_info(self) -> Dict[str, str]: 429 """ 430 Get information about pre-extracted features. 431 432 Returns: 433 Dictionary mapping feature names to descriptions 434 """ 435 return self.metadata['feature_descriptions'] 436 437 438# Legacy function wrappers for backward compatibility 439def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None, 440 sequences: Optional[List[str]] = None, 441 use_falls: bool = True, use_adls: bool = True): 442 """ 443 Load UrFall dataset using the legacy function interface. 444 445 Args: 446 data_dir: Directory containing the dataset 447 data_types: List of data types to load 448 sequences: List of specific sequences to load 449 use_falls: Whether to load fall sequences 450 use_adls: Whether to load ADL sequences 451 452 Returns: 453 Tuple of (data_list, names_list) 454 """ 455 loader = UrFallLoader() 456 return loader.load_data(data_dir, data_types=data_types, sequences=sequences, 457 use_falls=use_falls, use_adls=use_adls) 458 459 460def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15): 461 """ 462 Create sliding windows from UrFall data using the legacy function interface. 463 464 Args: 465 urfall_data: List of DataFrames 466 urfall_names: List of names 467 window_size: Size of sliding window 468 step_size: Step size for sliding window 469 470 Returns: 471 List of dictionaries containing windowed data 472 """ 473 loader = UrFallLoader() 474 return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
24class UrFallLoader(BaseDatasetLoader): 25 """ 26 UrFall dataset loader class. 27 28 This class handles loading and processing of the UrFall dataset for fall detection. 29 Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, 30 and pre-extracted features from depth maps. 31 """ 32 33 def __init__(self, max_workers: int = 8): 34 """ 35 Initialize UrFall loader with concurrent download support. 36 37 Args: 38 max_workers: Maximum number of concurrent download threads (default: 8) 39 """ 40 super().__init__( 41 name="urfall", 42 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data", 43 max_workers=max_workers 44 ) 45 self.metadata = { 46 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 47 'camera': 'cam0', # Front camera 48 'sampling_frequency': 30, # Depth/RGB camera fps 49 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 50 'activities': { 51 -1: 'Not lying (standing/walking)', 52 0: 'Falling (transient)', 53 1: 'Lying on ground' 54 }, 55 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 56 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 57 'feature_columns': [ 58 'sequence_name', 59 'frame_number', 60 'label', 61 'HeightWidthRatio', 62 'MajorMinorRatio', 63 'BoundingBoxOccupancy', 64 'MaxStdXZ', 65 'HHmaxRatio', 66 'H', 67 'D', 68 'P40' 69 ], 70 'feature_descriptions': { 71 'HeightWidthRatio': 'Bounding box height to width ratio', 72 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 73 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 74 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 75 'HHmaxRatio': 'Human height in frame to standing height ratio', 76 'H': 'Actual height in mm', 77 'D': 'Distance of person center to floor in mm', 78 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 79 } 80 } 81 82 def load_data(self, data_dir: str, 83 data_types: Optional[List[str]] = None, 84 sequences: Optional[List[str]] = None, 85 use_falls: bool = True, 86 use_adls: bool = True, 87 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 88 """ 89 Load UrFall dataset from the specified directory. 90 91 Args: 92 data_dir: Directory containing the dataset 93 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 94 'synchronization', 'video', 'features' (default: ['features']) 95 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 96 If None, loads all based on use_falls and use_adls 97 use_falls: Whether to load fall sequences (default: True) 98 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 99 **kwargs: Additional arguments 100 101 Returns: 102 Tuple of (data_list, names_list) 103 """ 104 # Default to loading pre-extracted features if not specified 105 if data_types is None: 106 data_types = ['features'] 107 108 # Validate data types 109 valid_types = set(self.metadata['data_types']) 110 requested_types = set(data_types) 111 invalid_types = requested_types - valid_types 112 if invalid_types: 113 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 114 115 # Create directory if it doesn't exist 116 os.makedirs(data_dir, exist_ok=True) 117 118 data_list = [] 119 names_list = [] 120 121 # Load pre-extracted features (CSV files) 122 if 'features' in data_types: 123 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 124 data_list.extend(features_data) 125 names_list.extend(features_names) 126 127 # Load raw accelerometer data 128 if 'accelerometer' in data_types: 129 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 130 data_list.extend(accel_data) 131 names_list.extend(accel_names) 132 133 # Load synchronization data 134 if 'synchronization' in data_types: 135 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 136 data_list.extend(sync_data) 137 names_list.extend(sync_names) 138 139 # Note: Depth, RGB, and Video data are image/video files 140 # These would require specialized loading and are not typically loaded into DataFrames 141 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 142 print("Note: Depth, RGB, and Video data types contain image/video files.") 143 print("These are not loaded into DataFrames but their paths can be accessed.") 144 print("Use the get_file_paths() method to retrieve paths to these files.") 145 146 self.data = data_list 147 return data_list, names_list 148 149 def _load_features(self, data_dir: str, sequences: Optional[List[str]], 150 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 151 """ 152 Load pre-extracted features from CSV files. 153 154 Args: 155 data_dir: Directory containing the dataset 156 sequences: Specific sequences to load 157 use_falls: Whether to include fall sequences 158 use_adls: Whether to include ADL sequences 159 160 Returns: 161 Tuple of (data_list, names_list) 162 """ 163 data_list = [] 164 names_list = [] 165 166 # Load falls features 167 if use_falls: 168 falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv") 169 if os.path.exists(falls_csv): 170 df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns']) 171 172 # Filter by specific sequences if provided 173 if sequences is not None: 174 fall_sequences = [s for s in sequences if s.startswith('fall-')] 175 if fall_sequences: 176 df = df[df['sequence_name'].isin(fall_sequences)] 177 178 # Add metadata columns 179 df['activity_type'] = 'fall' 180 df['activity_id'] = 1 # Falls are labeled as 1 181 182 data_list.append(df) 183 names_list.append("urfall-cam0-falls") 184 else: 185 print(f"Warning: Falls features file not found at {falls_csv}") 186 187 # Load ADLs features 188 if use_adls: 189 adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv") 190 if os.path.exists(adls_csv): 191 df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns']) 192 193 # Filter by specific sequences if provided 194 if sequences is not None: 195 adl_sequences = [s for s in sequences if s.startswith('adl-')] 196 if adl_sequences: 197 df = df[df['sequence_name'].isin(adl_sequences)] 198 199 # Add metadata columns 200 df['activity_type'] = 'adl' 201 df['activity_id'] = 0 # ADLs are labeled as 0 202 203 data_list.append(df) 204 names_list.append("urfall-cam0-adls") 205 else: 206 print(f"Warning: ADLs features file not found at {adls_csv}") 207 208 return data_list, names_list 209 210 def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]], 211 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 212 """ 213 Load accelerometer CSV data files. 214 215 Args: 216 data_dir: Directory containing the dataset 217 sequences: Specific sequences to load 218 use_falls: Whether to include fall sequences 219 use_adls: Whether to include ADL sequences 220 221 Returns: 222 Tuple of (data_list, names_list) 223 """ 224 data_list = [] 225 names_list = [] 226 227 # Determine which sequences to load 228 seq_list = [] 229 if sequences is not None: 230 seq_list = sequences 231 else: 232 if use_falls: 233 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 234 if use_adls: 235 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 236 237 # Load accelerometer data for each sequence 238 for seq in seq_list: 239 accel_file = os.path.join(data_dir, f"{seq}-acc.csv") 240 if os.path.exists(accel_file): 241 try: 242 df = pd.read_csv(accel_file) 243 df['sequence_name'] = seq 244 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 245 df['activity_id'] = 1 if seq.startswith('fall-') else 0 246 data_list.append(df) 247 names_list.append(f"{seq}-accelerometer") 248 except Exception as e: 249 print(f"Warning: Could not load accelerometer data from {accel_file}: {e}") 250 251 return data_list, names_list 252 253 def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]], 254 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 255 """ 256 Load synchronization CSV data files. 257 258 Args: 259 data_dir: Directory containing the dataset 260 sequences: Specific sequences to load 261 use_falls: Whether to include fall sequences 262 use_adls: Whether to include ADL sequences 263 264 Returns: 265 Tuple of (data_list, names_list) 266 """ 267 data_list = [] 268 names_list = [] 269 270 # Determine which sequences to load 271 seq_list = [] 272 if sequences is not None: 273 seq_list = sequences 274 else: 275 if use_falls: 276 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 277 if use_adls: 278 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 279 280 # Load synchronization data for each sequence 281 for seq in seq_list: 282 sync_file = os.path.join(data_dir, f"{seq}-data.csv") 283 if os.path.exists(sync_file): 284 try: 285 df = pd.read_csv(sync_file) 286 df['sequence_name'] = seq 287 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 288 df['activity_id'] = 1 if seq.startswith('fall-') else 0 289 data_list.append(df) 290 names_list.append(f"{seq}-synchronization") 291 except Exception as e: 292 print(f"Warning: Could not load synchronization data from {sync_file}: {e}") 293 294 return data_list, names_list 295 296 def get_file_paths(self, data_dir: str, data_type: str, 297 sequences: Optional[List[str]] = None, 298 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 299 """ 300 Get file paths for image/video data types (depth, RGB, video). 301 302 Args: 303 data_dir: Directory containing the dataset 304 data_type: Type of data ('depth', 'rgb', 'video') 305 sequences: Specific sequences to get paths for 306 use_falls: Whether to include fall sequences 307 use_adls: Whether to include ADL sequences 308 309 Returns: 310 Dictionary mapping sequence names to file paths 311 """ 312 if data_type not in ['depth', 'rgb', 'video']: 313 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 314 315 file_paths = {} 316 317 # Determine which sequences to include 318 seq_list = [] 319 if sequences is not None: 320 seq_list = sequences 321 else: 322 if use_falls: 323 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 324 if use_adls: 325 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 326 327 # Map data type to file extension 328 extension_map = { 329 'depth': '-cam0-d.zip', 330 'rgb': '-cam0-rgb.zip', 331 'video': '-cam0.mp4' 332 } 333 334 ext = extension_map[data_type] 335 336 for seq in seq_list: 337 file_path = os.path.join(data_dir, f"{seq}{ext}") 338 if os.path.exists(file_path): 339 file_paths[seq] = file_path 340 341 return file_paths 342 343 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 344 window_size: int = 30, step_size: int = 15) -> List[Dict]: 345 """ 346 Create sliding windows from the loaded data. 347 348 Args: 349 data: List of DataFrames containing the dataset 350 names: List of names corresponding to each DataFrame 351 window_size: Size of the sliding window (default: 30 frames for depth features) 352 step_size: Step size for sliding window (default: 15 frames) 353 354 Returns: 355 List of dictionaries containing windowed data 356 """ 357 windows_data = [] 358 359 for idx, df in enumerate(data): 360 if df.empty: 361 continue 362 363 # Get numeric feature columns (exclude metadata columns) 364 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 365 feature_cols = [col for col in df.columns 366 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 367 368 if not feature_cols: 369 continue 370 371 windows = [] 372 373 # Create windows for each feature column 374 for col in feature_cols: 375 win = sliding_window(df[col].values, window_size, step_size) 376 windows.append({"name": col, "data": win}) 377 378 # Create windows for labels if present 379 if 'label' in df.columns: 380 label_windows = sliding_window(df['label'].values, window_size, step_size) 381 # Majority voting for each window 382 labels = [] 383 for w in label_windows: 384 vals, counts = np.unique(w, return_counts=True) 385 labels.append(vals[np.argmax(counts)]) 386 windows.append({"name": "labels", "data": np.array(labels)}) 387 388 # Create activity_id windows 389 if 'activity_id' in df.columns: 390 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 391 windows.append({"name": "activity_id", "data": activity_windows}) 392 393 windows_data.append({"name": names[idx], "windows": windows}) 394 395 return windows_data 396 397 def get_supported_formats(self) -> List[str]: 398 """ 399 Get list of supported file formats for UrFall dataset. 400 401 Returns: 402 List of supported file extensions 403 """ 404 return ['.csv', '.zip', '.mp4'] 405 406 def get_sensor_info(self) -> Dict[str, any]: 407 """ 408 Get information about sensors in the dataset. 409 410 Returns: 411 Dictionary containing sensor information 412 """ 413 return { 414 'data_types': self.metadata['data_types'], 415 'camera': self.metadata['camera'], 416 'sampling_frequency': self.metadata['sampling_frequency'], 417 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 418 } 419 420 def get_activity_info(self) -> Dict[int, str]: 421 """ 422 Get information about activities in the dataset. 423 424 Returns: 425 Dictionary mapping activity IDs to labels 426 """ 427 return self.metadata['activities'] 428 429 def get_feature_info(self) -> Dict[str, str]: 430 """ 431 Get information about pre-extracted features. 432 433 Returns: 434 Dictionary mapping feature names to descriptions 435 """ 436 return self.metadata['feature_descriptions']
UrFall dataset loader class.
This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.
33 def __init__(self, max_workers: int = 8): 34 """ 35 Initialize UrFall loader with concurrent download support. 36 37 Args: 38 max_workers: Maximum number of concurrent download threads (default: 8) 39 """ 40 super().__init__( 41 name="urfall", 42 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data", 43 max_workers=max_workers 44 ) 45 self.metadata = { 46 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 47 'camera': 'cam0', # Front camera 48 'sampling_frequency': 30, # Depth/RGB camera fps 49 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 50 'activities': { 51 -1: 'Not lying (standing/walking)', 52 0: 'Falling (transient)', 53 1: 'Lying on ground' 54 }, 55 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 56 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 57 'feature_columns': [ 58 'sequence_name', 59 'frame_number', 60 'label', 61 'HeightWidthRatio', 62 'MajorMinorRatio', 63 'BoundingBoxOccupancy', 64 'MaxStdXZ', 65 'HHmaxRatio', 66 'H', 67 'D', 68 'P40' 69 ], 70 'feature_descriptions': { 71 'HeightWidthRatio': 'Bounding box height to width ratio', 72 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 73 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 74 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 75 'HHmaxRatio': 'Human height in frame to standing height ratio', 76 'H': 'Actual height in mm', 77 'D': 'Distance of person center to floor in mm', 78 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 79 } 80 }
Initialize UrFall loader with concurrent download support.
Args: max_workers: Maximum number of concurrent download threads (default: 8)
82 def load_data(self, data_dir: str, 83 data_types: Optional[List[str]] = None, 84 sequences: Optional[List[str]] = None, 85 use_falls: bool = True, 86 use_adls: bool = True, 87 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 88 """ 89 Load UrFall dataset from the specified directory. 90 91 Args: 92 data_dir: Directory containing the dataset 93 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 94 'synchronization', 'video', 'features' (default: ['features']) 95 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 96 If None, loads all based on use_falls and use_adls 97 use_falls: Whether to load fall sequences (default: True) 98 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 99 **kwargs: Additional arguments 100 101 Returns: 102 Tuple of (data_list, names_list) 103 """ 104 # Default to loading pre-extracted features if not specified 105 if data_types is None: 106 data_types = ['features'] 107 108 # Validate data types 109 valid_types = set(self.metadata['data_types']) 110 requested_types = set(data_types) 111 invalid_types = requested_types - valid_types 112 if invalid_types: 113 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 114 115 # Create directory if it doesn't exist 116 os.makedirs(data_dir, exist_ok=True) 117 118 data_list = [] 119 names_list = [] 120 121 # Load pre-extracted features (CSV files) 122 if 'features' in data_types: 123 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 124 data_list.extend(features_data) 125 names_list.extend(features_names) 126 127 # Load raw accelerometer data 128 if 'accelerometer' in data_types: 129 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 130 data_list.extend(accel_data) 131 names_list.extend(accel_names) 132 133 # Load synchronization data 134 if 'synchronization' in data_types: 135 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 136 data_list.extend(sync_data) 137 names_list.extend(sync_names) 138 139 # Note: Depth, RGB, and Video data are image/video files 140 # These would require specialized loading and are not typically loaded into DataFrames 141 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 142 print("Note: Depth, RGB, and Video data types contain image/video files.") 143 print("These are not loaded into DataFrames but their paths can be accessed.") 144 print("Use the get_file_paths() method to retrieve paths to these files.") 145 146 self.data = data_list 147 return data_list, names_list
Load UrFall dataset from the specified directory.
Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments
Returns: Tuple of (data_list, names_list)
296 def get_file_paths(self, data_dir: str, data_type: str, 297 sequences: Optional[List[str]] = None, 298 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 299 """ 300 Get file paths for image/video data types (depth, RGB, video). 301 302 Args: 303 data_dir: Directory containing the dataset 304 data_type: Type of data ('depth', 'rgb', 'video') 305 sequences: Specific sequences to get paths for 306 use_falls: Whether to include fall sequences 307 use_adls: Whether to include ADL sequences 308 309 Returns: 310 Dictionary mapping sequence names to file paths 311 """ 312 if data_type not in ['depth', 'rgb', 'video']: 313 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 314 315 file_paths = {} 316 317 # Determine which sequences to include 318 seq_list = [] 319 if sequences is not None: 320 seq_list = sequences 321 else: 322 if use_falls: 323 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 324 if use_adls: 325 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 326 327 # Map data type to file extension 328 extension_map = { 329 'depth': '-cam0-d.zip', 330 'rgb': '-cam0-rgb.zip', 331 'video': '-cam0.mp4' 332 } 333 334 ext = extension_map[data_type] 335 336 for seq in seq_list: 337 file_path = os.path.join(data_dir, f"{seq}{ext}") 338 if os.path.exists(file_path): 339 file_paths[seq] = file_path 340 341 return file_paths
Get file paths for image/video data types (depth, RGB, video).
Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences
Returns: Dictionary mapping sequence names to file paths
343 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 344 window_size: int = 30, step_size: int = 15) -> List[Dict]: 345 """ 346 Create sliding windows from the loaded data. 347 348 Args: 349 data: List of DataFrames containing the dataset 350 names: List of names corresponding to each DataFrame 351 window_size: Size of the sliding window (default: 30 frames for depth features) 352 step_size: Step size for sliding window (default: 15 frames) 353 354 Returns: 355 List of dictionaries containing windowed data 356 """ 357 windows_data = [] 358 359 for idx, df in enumerate(data): 360 if df.empty: 361 continue 362 363 # Get numeric feature columns (exclude metadata columns) 364 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 365 feature_cols = [col for col in df.columns 366 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 367 368 if not feature_cols: 369 continue 370 371 windows = [] 372 373 # Create windows for each feature column 374 for col in feature_cols: 375 win = sliding_window(df[col].values, window_size, step_size) 376 windows.append({"name": col, "data": win}) 377 378 # Create windows for labels if present 379 if 'label' in df.columns: 380 label_windows = sliding_window(df['label'].values, window_size, step_size) 381 # Majority voting for each window 382 labels = [] 383 for w in label_windows: 384 vals, counts = np.unique(w, return_counts=True) 385 labels.append(vals[np.argmax(counts)]) 386 windows.append({"name": "labels", "data": np.array(labels)}) 387 388 # Create activity_id windows 389 if 'activity_id' in df.columns: 390 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 391 windows.append({"name": "activity_id", "data": activity_windows}) 392 393 windows_data.append({"name": names[idx], "windows": windows}) 394 395 return windows_data
Create sliding windows from the loaded data.
Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)
Returns: List of dictionaries containing windowed data
397 def get_supported_formats(self) -> List[str]: 398 """ 399 Get list of supported file formats for UrFall dataset. 400 401 Returns: 402 List of supported file extensions 403 """ 404 return ['.csv', '.zip', '.mp4']
Get list of supported file formats for UrFall dataset.
Returns: List of supported file extensions
406 def get_sensor_info(self) -> Dict[str, any]: 407 """ 408 Get information about sensors in the dataset. 409 410 Returns: 411 Dictionary containing sensor information 412 """ 413 return { 414 'data_types': self.metadata['data_types'], 415 'camera': self.metadata['camera'], 416 'sampling_frequency': self.metadata['sampling_frequency'], 417 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 418 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
420 def get_activity_info(self) -> Dict[int, str]: 421 """ 422 Get information about activities in the dataset. 423 424 Returns: 425 Dictionary mapping activity IDs to labels 426 """ 427 return self.metadata['activities']
Get information about activities in the dataset.
Returns: Dictionary mapping activity IDs to labels
429 def get_feature_info(self) -> Dict[str, str]: 430 """ 431 Get information about pre-extracted features. 432 433 Returns: 434 Dictionary mapping feature names to descriptions 435 """ 436 return self.metadata['feature_descriptions']
Get information about pre-extracted features.
Returns: Dictionary mapping feature names to descriptions
440def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None, 441 sequences: Optional[List[str]] = None, 442 use_falls: bool = True, use_adls: bool = True): 443 """ 444 Load UrFall dataset using the legacy function interface. 445 446 Args: 447 data_dir: Directory containing the dataset 448 data_types: List of data types to load 449 sequences: List of specific sequences to load 450 use_falls: Whether to load fall sequences 451 use_adls: Whether to load ADL sequences 452 453 Returns: 454 Tuple of (data_list, names_list) 455 """ 456 loader = UrFallLoader() 457 return loader.load_data(data_dir, data_types=data_types, sequences=sequences, 458 use_falls=use_falls, use_adls=use_adls)
Load UrFall dataset using the legacy function interface.
Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences
Returns: Tuple of (data_list, names_list)
461def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15): 462 """ 463 Create sliding windows from UrFall data using the legacy function interface. 464 465 Args: 466 urfall_data: List of DataFrames 467 urfall_names: List of names 468 window_size: Size of sliding window 469 step_size: Step size for sliding window 470 471 Returns: 472 List of dictionaries containing windowed data 473 """ 474 loader = UrFallLoader() 475 return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
Create sliding windows from UrFall data using the legacy function interface.
Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window
Returns: List of dictionaries containing windowed data