gaitsetpy.dataset.urfall

UrFall Dataset Loader and Utils. Maintainer: @aharshit123456

This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader. UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer, and pre-extracted features from depth maps.

Reference:

  1'''
  2UrFall Dataset Loader and Utils.
  3Maintainer: @aharshit123456
  4
  5This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader.
  6UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer,
  7and pre-extracted features from depth maps.
  8
  9Reference:
 10- Website: https://fenix.ur.edu.pl/~mkepski/ds/uf.html
 11- Dataset: University of Rzeszow Fall Detection Dataset
 12'''
 13
 14import os
 15import pandas as pd
 16import numpy as np
 17from typing import List, Dict, Tuple, Optional, Set
 18from glob import glob
 19from ..core.base_classes import BaseDatasetLoader
 20from .utils import download_dataset, extract_dataset, sliding_window
 21
 22
 23class UrFallLoader(BaseDatasetLoader):
 24    """
 25    UrFall dataset loader class.
 26    
 27    This class handles loading and processing of the UrFall dataset for fall detection.
 28    Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video,
 29    and pre-extracted features from depth maps.
 30    """
 31    
 32    def __init__(self, max_workers: int = 8):
 33        """
 34        Initialize UrFall loader with concurrent download support.
 35        
 36        Args:
 37            max_workers: Maximum number of concurrent download threads (default: 8)
 38        """
 39        super().__init__(
 40            name="urfall",
 41            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data",
 42            max_workers=max_workers
 43        )
 44        self.metadata = {
 45            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
 46            'camera': 'cam0',  # Front camera
 47            'sampling_frequency': 30,  # Depth/RGB camera fps
 48            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
 49            'activities': {
 50                -1: 'Not lying (standing/walking)',
 51                0: 'Falling (transient)',
 52                1: 'Lying on ground'
 53            },
 54            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
 55            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
 56            'feature_columns': [
 57                'sequence_name',
 58                'frame_number',
 59                'label',
 60                'HeightWidthRatio',
 61                'MajorMinorRatio',
 62                'BoundingBoxOccupancy',
 63                'MaxStdXZ',
 64                'HHmaxRatio',
 65                'H',
 66                'D',
 67                'P40'
 68            ],
 69            'feature_descriptions': {
 70                'HeightWidthRatio': 'Bounding box height to width ratio',
 71                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
 72                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
 73                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
 74                'HHmaxRatio': 'Human height in frame to standing height ratio',
 75                'H': 'Actual height in mm',
 76                'D': 'Distance of person center to floor in mm',
 77                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
 78            }
 79        }
 80    
 81    def load_data(self, data_dir: str, 
 82                  data_types: Optional[List[str]] = None,
 83                  sequences: Optional[List[str]] = None,
 84                  use_falls: bool = True,
 85                  use_adls: bool = True,
 86                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 87        """
 88        Load UrFall dataset from the specified directory.
 89        
 90        Args:
 91            data_dir: Directory containing the dataset
 92            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 93                       'synchronization', 'video', 'features' (default: ['features'])
 94            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 95                      If None, loads all based on use_falls and use_adls
 96            use_falls: Whether to load fall sequences (default: True)
 97            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 98            **kwargs: Additional arguments
 99            
100        Returns:
101            Tuple of (data_list, names_list)
102        """
103        # Default to loading pre-extracted features if not specified
104        if data_types is None:
105            data_types = ['features']
106        
107        # Validate data types
108        valid_types = set(self.metadata['data_types'])
109        requested_types = set(data_types)
110        invalid_types = requested_types - valid_types
111        if invalid_types:
112            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
113        
114        # Create directory if it doesn't exist
115        os.makedirs(data_dir, exist_ok=True)
116        
117        data_list = []
118        names_list = []
119        
120        # Load pre-extracted features (CSV files)
121        if 'features' in data_types:
122            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
123            data_list.extend(features_data)
124            names_list.extend(features_names)
125        
126        # Load raw accelerometer data
127        if 'accelerometer' in data_types:
128            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
129            data_list.extend(accel_data)
130            names_list.extend(accel_names)
131        
132        # Load synchronization data
133        if 'synchronization' in data_types:
134            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
135            data_list.extend(sync_data)
136            names_list.extend(sync_names)
137        
138        # Note: Depth, RGB, and Video data are image/video files
139        # These would require specialized loading and are not typically loaded into DataFrames
140        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
141            print("Note: Depth, RGB, and Video data types contain image/video files.")
142            print("These are not loaded into DataFrames but their paths can be accessed.")
143            print("Use the get_file_paths() method to retrieve paths to these files.")
144        
145        self.data = data_list
146        return data_list, names_list
147    
148    def _load_features(self, data_dir: str, sequences: Optional[List[str]], 
149                       use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
150        """
151        Load pre-extracted features from CSV files.
152        
153        Args:
154            data_dir: Directory containing the dataset
155            sequences: Specific sequences to load
156            use_falls: Whether to include fall sequences
157            use_adls: Whether to include ADL sequences
158            
159        Returns:
160            Tuple of (data_list, names_list)
161        """
162        data_list = []
163        names_list = []
164        
165        # Load falls features
166        if use_falls:
167            falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv")
168            if os.path.exists(falls_csv):
169                df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns'])
170                
171                # Filter by specific sequences if provided
172                if sequences is not None:
173                    fall_sequences = [s for s in sequences if s.startswith('fall-')]
174                    if fall_sequences:
175                        df = df[df['sequence_name'].isin(fall_sequences)]
176                
177                # Add metadata columns
178                df['activity_type'] = 'fall'
179                df['activity_id'] = 1  # Falls are labeled as 1
180                
181                data_list.append(df)
182                names_list.append("urfall-cam0-falls")
183            else:
184                print(f"Warning: Falls features file not found at {falls_csv}")
185        
186        # Load ADLs features
187        if use_adls:
188            adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv")
189            if os.path.exists(adls_csv):
190                df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns'])
191                
192                # Filter by specific sequences if provided
193                if sequences is not None:
194                    adl_sequences = [s for s in sequences if s.startswith('adl-')]
195                    if adl_sequences:
196                        df = df[df['sequence_name'].isin(adl_sequences)]
197                
198                # Add metadata columns
199                df['activity_type'] = 'adl'
200                df['activity_id'] = 0  # ADLs are labeled as 0
201                
202                data_list.append(df)
203                names_list.append("urfall-cam0-adls")
204            else:
205                print(f"Warning: ADLs features file not found at {adls_csv}")
206        
207        return data_list, names_list
208    
209    def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]],
210                            use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
211        """
212        Load accelerometer CSV data files.
213        
214        Args:
215            data_dir: Directory containing the dataset
216            sequences: Specific sequences to load
217            use_falls: Whether to include fall sequences
218            use_adls: Whether to include ADL sequences
219            
220        Returns:
221            Tuple of (data_list, names_list)
222        """
223        data_list = []
224        names_list = []
225        
226        # Determine which sequences to load
227        seq_list = []
228        if sequences is not None:
229            seq_list = sequences
230        else:
231            if use_falls:
232                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
233            if use_adls:
234                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
235        
236        # Load accelerometer data for each sequence
237        for seq in seq_list:
238            accel_file = os.path.join(data_dir, f"{seq}-acc.csv")
239            if os.path.exists(accel_file):
240                try:
241                    df = pd.read_csv(accel_file)
242                    df['sequence_name'] = seq
243                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
244                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
245                    data_list.append(df)
246                    names_list.append(f"{seq}-accelerometer")
247                except Exception as e:
248                    print(f"Warning: Could not load accelerometer data from {accel_file}: {e}")
249        
250        return data_list, names_list
251    
252    def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]],
253                              use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
254        """
255        Load synchronization CSV data files.
256        
257        Args:
258            data_dir: Directory containing the dataset
259            sequences: Specific sequences to load
260            use_falls: Whether to include fall sequences
261            use_adls: Whether to include ADL sequences
262            
263        Returns:
264            Tuple of (data_list, names_list)
265        """
266        data_list = []
267        names_list = []
268        
269        # Determine which sequences to load
270        seq_list = []
271        if sequences is not None:
272            seq_list = sequences
273        else:
274            if use_falls:
275                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
276            if use_adls:
277                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
278        
279        # Load synchronization data for each sequence
280        for seq in seq_list:
281            sync_file = os.path.join(data_dir, f"{seq}-data.csv")
282            if os.path.exists(sync_file):
283                try:
284                    df = pd.read_csv(sync_file)
285                    df['sequence_name'] = seq
286                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
287                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
288                    data_list.append(df)
289                    names_list.append(f"{seq}-synchronization")
290                except Exception as e:
291                    print(f"Warning: Could not load synchronization data from {sync_file}: {e}")
292        
293        return data_list, names_list
294    
295    def get_file_paths(self, data_dir: str, data_type: str, 
296                       sequences: Optional[List[str]] = None,
297                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
298        """
299        Get file paths for image/video data types (depth, RGB, video).
300        
301        Args:
302            data_dir: Directory containing the dataset
303            data_type: Type of data ('depth', 'rgb', 'video')
304            sequences: Specific sequences to get paths for
305            use_falls: Whether to include fall sequences
306            use_adls: Whether to include ADL sequences
307            
308        Returns:
309            Dictionary mapping sequence names to file paths
310        """
311        if data_type not in ['depth', 'rgb', 'video']:
312            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
313        
314        file_paths = {}
315        
316        # Determine which sequences to include
317        seq_list = []
318        if sequences is not None:
319            seq_list = sequences
320        else:
321            if use_falls:
322                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
323            if use_adls:
324                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
325        
326        # Map data type to file extension
327        extension_map = {
328            'depth': '-cam0-d.zip',
329            'rgb': '-cam0-rgb.zip',
330            'video': '-cam0.mp4'
331        }
332        
333        ext = extension_map[data_type]
334        
335        for seq in seq_list:
336            file_path = os.path.join(data_dir, f"{seq}{ext}")
337            if os.path.exists(file_path):
338                file_paths[seq] = file_path
339        
340        return file_paths
341    
342    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
343                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
344        """
345        Create sliding windows from the loaded data.
346        
347        Args:
348            data: List of DataFrames containing the dataset
349            names: List of names corresponding to each DataFrame
350            window_size: Size of the sliding window (default: 30 frames for depth features)
351            step_size: Step size for sliding window (default: 15 frames)
352            
353        Returns:
354            List of dictionaries containing windowed data
355        """
356        windows_data = []
357        
358        for idx, df in enumerate(data):
359            if df.empty:
360                continue
361            
362            # Get numeric feature columns (exclude metadata columns)
363            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
364            feature_cols = [col for col in df.columns 
365                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
366            
367            if not feature_cols:
368                continue
369            
370            windows = []
371            
372            # Create windows for each feature column
373            for col in feature_cols:
374                win = sliding_window(df[col].values, window_size, step_size)
375                windows.append({"name": col, "data": win})
376            
377            # Create windows for labels if present
378            if 'label' in df.columns:
379                label_windows = sliding_window(df['label'].values, window_size, step_size)
380                # Majority voting for each window
381                labels = []
382                for w in label_windows:
383                    vals, counts = np.unique(w, return_counts=True)
384                    labels.append(vals[np.argmax(counts)])
385                windows.append({"name": "labels", "data": np.array(labels)})
386            
387            # Create activity_id windows
388            if 'activity_id' in df.columns:
389                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
390                windows.append({"name": "activity_id", "data": activity_windows})
391            
392            windows_data.append({"name": names[idx], "windows": windows})
393        
394        return windows_data
395    
396    def get_supported_formats(self) -> List[str]:
397        """
398        Get list of supported file formats for UrFall dataset.
399        
400        Returns:
401            List of supported file extensions
402        """
403        return ['.csv', '.zip', '.mp4']
404    
405    def get_sensor_info(self) -> Dict[str, any]:
406        """
407        Get information about sensors in the dataset.
408        
409        Returns:
410            Dictionary containing sensor information
411        """
412        return {
413            'data_types': self.metadata['data_types'],
414            'camera': self.metadata['camera'],
415            'sampling_frequency': self.metadata['sampling_frequency'],
416            'accelerometer_frequency': self.metadata['accelerometer_frequency']
417        }
418    
419    def get_activity_info(self) -> Dict[int, str]:
420        """
421        Get information about activities in the dataset.
422        
423        Returns:
424            Dictionary mapping activity IDs to labels
425        """
426        return self.metadata['activities']
427    
428    def get_feature_info(self) -> Dict[str, str]:
429        """
430        Get information about pre-extracted features.
431        
432        Returns:
433            Dictionary mapping feature names to descriptions
434        """
435        return self.metadata['feature_descriptions']
436
437
438# Legacy function wrappers for backward compatibility
439def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None,
440                     sequences: Optional[List[str]] = None,
441                     use_falls: bool = True, use_adls: bool = True):
442    """
443    Load UrFall dataset using the legacy function interface.
444    
445    Args:
446        data_dir: Directory containing the dataset
447        data_types: List of data types to load
448        sequences: List of specific sequences to load
449        use_falls: Whether to load fall sequences
450        use_adls: Whether to load ADL sequences
451        
452    Returns:
453        Tuple of (data_list, names_list)
454    """
455    loader = UrFallLoader()
456    return loader.load_data(data_dir, data_types=data_types, sequences=sequences,
457                           use_falls=use_falls, use_adls=use_adls)
458
459
460def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
461    """
462    Create sliding windows from UrFall data using the legacy function interface.
463    
464    Args:
465        urfall_data: List of DataFrames
466        urfall_names: List of names
467        window_size: Size of sliding window
468        step_size: Step size for sliding window
469        
470    Returns:
471        List of dictionaries containing windowed data
472    """
473    loader = UrFallLoader()
474    return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
class UrFallLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 24class UrFallLoader(BaseDatasetLoader):
 25    """
 26    UrFall dataset loader class.
 27    
 28    This class handles loading and processing of the UrFall dataset for fall detection.
 29    Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video,
 30    and pre-extracted features from depth maps.
 31    """
 32    
 33    def __init__(self, max_workers: int = 8):
 34        """
 35        Initialize UrFall loader with concurrent download support.
 36        
 37        Args:
 38            max_workers: Maximum number of concurrent download threads (default: 8)
 39        """
 40        super().__init__(
 41            name="urfall",
 42            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data",
 43            max_workers=max_workers
 44        )
 45        self.metadata = {
 46            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
 47            'camera': 'cam0',  # Front camera
 48            'sampling_frequency': 30,  # Depth/RGB camera fps
 49            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
 50            'activities': {
 51                -1: 'Not lying (standing/walking)',
 52                0: 'Falling (transient)',
 53                1: 'Lying on ground'
 54            },
 55            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
 56            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
 57            'feature_columns': [
 58                'sequence_name',
 59                'frame_number',
 60                'label',
 61                'HeightWidthRatio',
 62                'MajorMinorRatio',
 63                'BoundingBoxOccupancy',
 64                'MaxStdXZ',
 65                'HHmaxRatio',
 66                'H',
 67                'D',
 68                'P40'
 69            ],
 70            'feature_descriptions': {
 71                'HeightWidthRatio': 'Bounding box height to width ratio',
 72                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
 73                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
 74                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
 75                'HHmaxRatio': 'Human height in frame to standing height ratio',
 76                'H': 'Actual height in mm',
 77                'D': 'Distance of person center to floor in mm',
 78                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
 79            }
 80        }
 81    
 82    def load_data(self, data_dir: str, 
 83                  data_types: Optional[List[str]] = None,
 84                  sequences: Optional[List[str]] = None,
 85                  use_falls: bool = True,
 86                  use_adls: bool = True,
 87                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 88        """
 89        Load UrFall dataset from the specified directory.
 90        
 91        Args:
 92            data_dir: Directory containing the dataset
 93            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 94                       'synchronization', 'video', 'features' (default: ['features'])
 95            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 96                      If None, loads all based on use_falls and use_adls
 97            use_falls: Whether to load fall sequences (default: True)
 98            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 99            **kwargs: Additional arguments
100            
101        Returns:
102            Tuple of (data_list, names_list)
103        """
104        # Default to loading pre-extracted features if not specified
105        if data_types is None:
106            data_types = ['features']
107        
108        # Validate data types
109        valid_types = set(self.metadata['data_types'])
110        requested_types = set(data_types)
111        invalid_types = requested_types - valid_types
112        if invalid_types:
113            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
114        
115        # Create directory if it doesn't exist
116        os.makedirs(data_dir, exist_ok=True)
117        
118        data_list = []
119        names_list = []
120        
121        # Load pre-extracted features (CSV files)
122        if 'features' in data_types:
123            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
124            data_list.extend(features_data)
125            names_list.extend(features_names)
126        
127        # Load raw accelerometer data
128        if 'accelerometer' in data_types:
129            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
130            data_list.extend(accel_data)
131            names_list.extend(accel_names)
132        
133        # Load synchronization data
134        if 'synchronization' in data_types:
135            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
136            data_list.extend(sync_data)
137            names_list.extend(sync_names)
138        
139        # Note: Depth, RGB, and Video data are image/video files
140        # These would require specialized loading and are not typically loaded into DataFrames
141        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
142            print("Note: Depth, RGB, and Video data types contain image/video files.")
143            print("These are not loaded into DataFrames but their paths can be accessed.")
144            print("Use the get_file_paths() method to retrieve paths to these files.")
145        
146        self.data = data_list
147        return data_list, names_list
148    
149    def _load_features(self, data_dir: str, sequences: Optional[List[str]], 
150                       use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
151        """
152        Load pre-extracted features from CSV files.
153        
154        Args:
155            data_dir: Directory containing the dataset
156            sequences: Specific sequences to load
157            use_falls: Whether to include fall sequences
158            use_adls: Whether to include ADL sequences
159            
160        Returns:
161            Tuple of (data_list, names_list)
162        """
163        data_list = []
164        names_list = []
165        
166        # Load falls features
167        if use_falls:
168            falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv")
169            if os.path.exists(falls_csv):
170                df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns'])
171                
172                # Filter by specific sequences if provided
173                if sequences is not None:
174                    fall_sequences = [s for s in sequences if s.startswith('fall-')]
175                    if fall_sequences:
176                        df = df[df['sequence_name'].isin(fall_sequences)]
177                
178                # Add metadata columns
179                df['activity_type'] = 'fall'
180                df['activity_id'] = 1  # Falls are labeled as 1
181                
182                data_list.append(df)
183                names_list.append("urfall-cam0-falls")
184            else:
185                print(f"Warning: Falls features file not found at {falls_csv}")
186        
187        # Load ADLs features
188        if use_adls:
189            adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv")
190            if os.path.exists(adls_csv):
191                df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns'])
192                
193                # Filter by specific sequences if provided
194                if sequences is not None:
195                    adl_sequences = [s for s in sequences if s.startswith('adl-')]
196                    if adl_sequences:
197                        df = df[df['sequence_name'].isin(adl_sequences)]
198                
199                # Add metadata columns
200                df['activity_type'] = 'adl'
201                df['activity_id'] = 0  # ADLs are labeled as 0
202                
203                data_list.append(df)
204                names_list.append("urfall-cam0-adls")
205            else:
206                print(f"Warning: ADLs features file not found at {adls_csv}")
207        
208        return data_list, names_list
209    
210    def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]],
211                            use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
212        """
213        Load accelerometer CSV data files.
214        
215        Args:
216            data_dir: Directory containing the dataset
217            sequences: Specific sequences to load
218            use_falls: Whether to include fall sequences
219            use_adls: Whether to include ADL sequences
220            
221        Returns:
222            Tuple of (data_list, names_list)
223        """
224        data_list = []
225        names_list = []
226        
227        # Determine which sequences to load
228        seq_list = []
229        if sequences is not None:
230            seq_list = sequences
231        else:
232            if use_falls:
233                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
234            if use_adls:
235                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
236        
237        # Load accelerometer data for each sequence
238        for seq in seq_list:
239            accel_file = os.path.join(data_dir, f"{seq}-acc.csv")
240            if os.path.exists(accel_file):
241                try:
242                    df = pd.read_csv(accel_file)
243                    df['sequence_name'] = seq
244                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
245                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
246                    data_list.append(df)
247                    names_list.append(f"{seq}-accelerometer")
248                except Exception as e:
249                    print(f"Warning: Could not load accelerometer data from {accel_file}: {e}")
250        
251        return data_list, names_list
252    
253    def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]],
254                              use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
255        """
256        Load synchronization CSV data files.
257        
258        Args:
259            data_dir: Directory containing the dataset
260            sequences: Specific sequences to load
261            use_falls: Whether to include fall sequences
262            use_adls: Whether to include ADL sequences
263            
264        Returns:
265            Tuple of (data_list, names_list)
266        """
267        data_list = []
268        names_list = []
269        
270        # Determine which sequences to load
271        seq_list = []
272        if sequences is not None:
273            seq_list = sequences
274        else:
275            if use_falls:
276                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
277            if use_adls:
278                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
279        
280        # Load synchronization data for each sequence
281        for seq in seq_list:
282            sync_file = os.path.join(data_dir, f"{seq}-data.csv")
283            if os.path.exists(sync_file):
284                try:
285                    df = pd.read_csv(sync_file)
286                    df['sequence_name'] = seq
287                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
288                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
289                    data_list.append(df)
290                    names_list.append(f"{seq}-synchronization")
291                except Exception as e:
292                    print(f"Warning: Could not load synchronization data from {sync_file}: {e}")
293        
294        return data_list, names_list
295    
296    def get_file_paths(self, data_dir: str, data_type: str, 
297                       sequences: Optional[List[str]] = None,
298                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
299        """
300        Get file paths for image/video data types (depth, RGB, video).
301        
302        Args:
303            data_dir: Directory containing the dataset
304            data_type: Type of data ('depth', 'rgb', 'video')
305            sequences: Specific sequences to get paths for
306            use_falls: Whether to include fall sequences
307            use_adls: Whether to include ADL sequences
308            
309        Returns:
310            Dictionary mapping sequence names to file paths
311        """
312        if data_type not in ['depth', 'rgb', 'video']:
313            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
314        
315        file_paths = {}
316        
317        # Determine which sequences to include
318        seq_list = []
319        if sequences is not None:
320            seq_list = sequences
321        else:
322            if use_falls:
323                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
324            if use_adls:
325                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
326        
327        # Map data type to file extension
328        extension_map = {
329            'depth': '-cam0-d.zip',
330            'rgb': '-cam0-rgb.zip',
331            'video': '-cam0.mp4'
332        }
333        
334        ext = extension_map[data_type]
335        
336        for seq in seq_list:
337            file_path = os.path.join(data_dir, f"{seq}{ext}")
338            if os.path.exists(file_path):
339                file_paths[seq] = file_path
340        
341        return file_paths
342    
343    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
344                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
345        """
346        Create sliding windows from the loaded data.
347        
348        Args:
349            data: List of DataFrames containing the dataset
350            names: List of names corresponding to each DataFrame
351            window_size: Size of the sliding window (default: 30 frames for depth features)
352            step_size: Step size for sliding window (default: 15 frames)
353            
354        Returns:
355            List of dictionaries containing windowed data
356        """
357        windows_data = []
358        
359        for idx, df in enumerate(data):
360            if df.empty:
361                continue
362            
363            # Get numeric feature columns (exclude metadata columns)
364            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
365            feature_cols = [col for col in df.columns 
366                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
367            
368            if not feature_cols:
369                continue
370            
371            windows = []
372            
373            # Create windows for each feature column
374            for col in feature_cols:
375                win = sliding_window(df[col].values, window_size, step_size)
376                windows.append({"name": col, "data": win})
377            
378            # Create windows for labels if present
379            if 'label' in df.columns:
380                label_windows = sliding_window(df['label'].values, window_size, step_size)
381                # Majority voting for each window
382                labels = []
383                for w in label_windows:
384                    vals, counts = np.unique(w, return_counts=True)
385                    labels.append(vals[np.argmax(counts)])
386                windows.append({"name": "labels", "data": np.array(labels)})
387            
388            # Create activity_id windows
389            if 'activity_id' in df.columns:
390                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
391                windows.append({"name": "activity_id", "data": activity_windows})
392            
393            windows_data.append({"name": names[idx], "windows": windows})
394        
395        return windows_data
396    
397    def get_supported_formats(self) -> List[str]:
398        """
399        Get list of supported file formats for UrFall dataset.
400        
401        Returns:
402            List of supported file extensions
403        """
404        return ['.csv', '.zip', '.mp4']
405    
406    def get_sensor_info(self) -> Dict[str, any]:
407        """
408        Get information about sensors in the dataset.
409        
410        Returns:
411            Dictionary containing sensor information
412        """
413        return {
414            'data_types': self.metadata['data_types'],
415            'camera': self.metadata['camera'],
416            'sampling_frequency': self.metadata['sampling_frequency'],
417            'accelerometer_frequency': self.metadata['accelerometer_frequency']
418        }
419    
420    def get_activity_info(self) -> Dict[int, str]:
421        """
422        Get information about activities in the dataset.
423        
424        Returns:
425            Dictionary mapping activity IDs to labels
426        """
427        return self.metadata['activities']
428    
429    def get_feature_info(self) -> Dict[str, str]:
430        """
431        Get information about pre-extracted features.
432        
433        Returns:
434            Dictionary mapping feature names to descriptions
435        """
436        return self.metadata['feature_descriptions']

UrFall dataset loader class.

This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.

UrFallLoader(max_workers: int = 8)
33    def __init__(self, max_workers: int = 8):
34        """
35        Initialize UrFall loader with concurrent download support.
36        
37        Args:
38            max_workers: Maximum number of concurrent download threads (default: 8)
39        """
40        super().__init__(
41            name="urfall",
42            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data",
43            max_workers=max_workers
44        )
45        self.metadata = {
46            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
47            'camera': 'cam0',  # Front camera
48            'sampling_frequency': 30,  # Depth/RGB camera fps
49            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
50            'activities': {
51                -1: 'Not lying (standing/walking)',
52                0: 'Falling (transient)',
53                1: 'Lying on ground'
54            },
55            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
56            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
57            'feature_columns': [
58                'sequence_name',
59                'frame_number',
60                'label',
61                'HeightWidthRatio',
62                'MajorMinorRatio',
63                'BoundingBoxOccupancy',
64                'MaxStdXZ',
65                'HHmaxRatio',
66                'H',
67                'D',
68                'P40'
69            ],
70            'feature_descriptions': {
71                'HeightWidthRatio': 'Bounding box height to width ratio',
72                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
73                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
74                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
75                'HHmaxRatio': 'Human height in frame to standing height ratio',
76                'H': 'Actual height in mm',
77                'D': 'Distance of person center to floor in mm',
78                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
79            }
80        }

Initialize UrFall loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
def load_data( self, data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
 82    def load_data(self, data_dir: str, 
 83                  data_types: Optional[List[str]] = None,
 84                  sequences: Optional[List[str]] = None,
 85                  use_falls: bool = True,
 86                  use_adls: bool = True,
 87                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 88        """
 89        Load UrFall dataset from the specified directory.
 90        
 91        Args:
 92            data_dir: Directory containing the dataset
 93            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 94                       'synchronization', 'video', 'features' (default: ['features'])
 95            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 96                      If None, loads all based on use_falls and use_adls
 97            use_falls: Whether to load fall sequences (default: True)
 98            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 99            **kwargs: Additional arguments
100            
101        Returns:
102            Tuple of (data_list, names_list)
103        """
104        # Default to loading pre-extracted features if not specified
105        if data_types is None:
106            data_types = ['features']
107        
108        # Validate data types
109        valid_types = set(self.metadata['data_types'])
110        requested_types = set(data_types)
111        invalid_types = requested_types - valid_types
112        if invalid_types:
113            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
114        
115        # Create directory if it doesn't exist
116        os.makedirs(data_dir, exist_ok=True)
117        
118        data_list = []
119        names_list = []
120        
121        # Load pre-extracted features (CSV files)
122        if 'features' in data_types:
123            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
124            data_list.extend(features_data)
125            names_list.extend(features_names)
126        
127        # Load raw accelerometer data
128        if 'accelerometer' in data_types:
129            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
130            data_list.extend(accel_data)
131            names_list.extend(accel_names)
132        
133        # Load synchronization data
134        if 'synchronization' in data_types:
135            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
136            data_list.extend(sync_data)
137            names_list.extend(sync_names)
138        
139        # Note: Depth, RGB, and Video data are image/video files
140        # These would require specialized loading and are not typically loaded into DataFrames
141        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
142            print("Note: Depth, RGB, and Video data types contain image/video files.")
143            print("These are not loaded into DataFrames but their paths can be accessed.")
144            print("Use the get_file_paths() method to retrieve paths to these files.")
145        
146        self.data = data_list
147        return data_list, names_list

Load UrFall dataset from the specified directory.

Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments

Returns: Tuple of (data_list, names_list)

def get_file_paths( self, data_dir: str, data_type: str, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
296    def get_file_paths(self, data_dir: str, data_type: str, 
297                       sequences: Optional[List[str]] = None,
298                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
299        """
300        Get file paths for image/video data types (depth, RGB, video).
301        
302        Args:
303            data_dir: Directory containing the dataset
304            data_type: Type of data ('depth', 'rgb', 'video')
305            sequences: Specific sequences to get paths for
306            use_falls: Whether to include fall sequences
307            use_adls: Whether to include ADL sequences
308            
309        Returns:
310            Dictionary mapping sequence names to file paths
311        """
312        if data_type not in ['depth', 'rgb', 'video']:
313            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
314        
315        file_paths = {}
316        
317        # Determine which sequences to include
318        seq_list = []
319        if sequences is not None:
320            seq_list = sequences
321        else:
322            if use_falls:
323                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
324            if use_adls:
325                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
326        
327        # Map data type to file extension
328        extension_map = {
329            'depth': '-cam0-d.zip',
330            'rgb': '-cam0-rgb.zip',
331            'video': '-cam0.mp4'
332        }
333        
334        ext = extension_map[data_type]
335        
336        for seq in seq_list:
337            file_path = os.path.join(data_dir, f"{seq}{ext}")
338            if os.path.exists(file_path):
339                file_paths[seq] = file_path
340        
341        return file_paths

Get file paths for image/video data types (depth, RGB, video).

Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences

Returns: Dictionary mapping sequence names to file paths

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 30, step_size: int = 15) -> List[Dict]:
343    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
344                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
345        """
346        Create sliding windows from the loaded data.
347        
348        Args:
349            data: List of DataFrames containing the dataset
350            names: List of names corresponding to each DataFrame
351            window_size: Size of the sliding window (default: 30 frames for depth features)
352            step_size: Step size for sliding window (default: 15 frames)
353            
354        Returns:
355            List of dictionaries containing windowed data
356        """
357        windows_data = []
358        
359        for idx, df in enumerate(data):
360            if df.empty:
361                continue
362            
363            # Get numeric feature columns (exclude metadata columns)
364            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
365            feature_cols = [col for col in df.columns 
366                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
367            
368            if not feature_cols:
369                continue
370            
371            windows = []
372            
373            # Create windows for each feature column
374            for col in feature_cols:
375                win = sliding_window(df[col].values, window_size, step_size)
376                windows.append({"name": col, "data": win})
377            
378            # Create windows for labels if present
379            if 'label' in df.columns:
380                label_windows = sliding_window(df['label'].values, window_size, step_size)
381                # Majority voting for each window
382                labels = []
383                for w in label_windows:
384                    vals, counts = np.unique(w, return_counts=True)
385                    labels.append(vals[np.argmax(counts)])
386                windows.append({"name": "labels", "data": np.array(labels)})
387            
388            # Create activity_id windows
389            if 'activity_id' in df.columns:
390                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
391                windows.append({"name": "activity_id", "data": activity_windows})
392            
393            windows_data.append({"name": names[idx], "windows": windows})
394        
395        return windows_data

Create sliding windows from the loaded data.

Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)

Returns: List of dictionaries containing windowed data

def get_supported_formats(self) -> List[str]:
397    def get_supported_formats(self) -> List[str]:
398        """
399        Get list of supported file formats for UrFall dataset.
400        
401        Returns:
402            List of supported file extensions
403        """
404        return ['.csv', '.zip', '.mp4']

Get list of supported file formats for UrFall dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, <built-in function any>]:
406    def get_sensor_info(self) -> Dict[str, any]:
407        """
408        Get information about sensors in the dataset.
409        
410        Returns:
411            Dictionary containing sensor information
412        """
413        return {
414            'data_types': self.metadata['data_types'],
415            'camera': self.metadata['camera'],
416            'sampling_frequency': self.metadata['sampling_frequency'],
417            'accelerometer_frequency': self.metadata['accelerometer_frequency']
418        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
420    def get_activity_info(self) -> Dict[int, str]:
421        """
422        Get information about activities in the dataset.
423        
424        Returns:
425            Dictionary mapping activity IDs to labels
426        """
427        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to labels

def get_feature_info(self) -> Dict[str, str]:
429    def get_feature_info(self) -> Dict[str, str]:
430        """
431        Get information about pre-extracted features.
432        
433        Returns:
434            Dictionary mapping feature names to descriptions
435        """
436        return self.metadata['feature_descriptions']

Get information about pre-extracted features.

Returns: Dictionary mapping feature names to descriptions

def load_urfall_data( data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True):
440def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None,
441                     sequences: Optional[List[str]] = None,
442                     use_falls: bool = True, use_adls: bool = True):
443    """
444    Load UrFall dataset using the legacy function interface.
445    
446    Args:
447        data_dir: Directory containing the dataset
448        data_types: List of data types to load
449        sequences: List of specific sequences to load
450        use_falls: Whether to load fall sequences
451        use_adls: Whether to load ADL sequences
452        
453    Returns:
454        Tuple of (data_list, names_list)
455    """
456    loader = UrFallLoader()
457    return loader.load_data(data_dir, data_types=data_types, sequences=sequences,
458                           use_falls=use_falls, use_adls=use_adls)

Load UrFall dataset using the legacy function interface.

Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences

Returns: Tuple of (data_list, names_list)

def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
461def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
462    """
463    Create sliding windows from UrFall data using the legacy function interface.
464    
465    Args:
466        urfall_data: List of DataFrames
467        urfall_names: List of names
468        window_size: Size of sliding window
469        step_size: Step size for sliding window
470        
471    Returns:
472        List of dictionaries containing windowed data
473    """
474    loader = UrFallLoader()
475    return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)

Create sliding windows from UrFall data using the legacy function interface.

Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window

Returns: List of dictionaries containing windowed data