gaitsetpy.dataset

dataset: Handles loading and processing of supported datasets.

This module provides both the new class-based dataset loaders and legacy function-based API. All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager.

Supported datasets:

  • Daphnet: Freezing of Gait dataset
  • MobiFall: Fall detection dataset
  • Arduous: Daily activity recognition dataset
  • PhysioNet: VGRF dataset for Parkinson's disease gait analysis
  • HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition
  • UrFall: University of Rzeszow Fall Detection Dataset with multimodal data
  1"""
  2dataset: Handles loading and processing of supported datasets.
  3
  4This module provides both the new class-based dataset loaders and legacy function-based API.
  5All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager.
  6
  7Supported datasets:
  8- Daphnet: Freezing of Gait dataset
  9- MobiFall: Fall detection dataset
 10- Arduous: Daily activity recognition dataset
 11- PhysioNet: VGRF dataset for Parkinson's disease gait analysis
 12- HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition
 13- UrFall: University of Rzeszow Fall Detection Dataset with multimodal data
 14
 15"""
 16
 17# Import the new class-based loaders
 18from .daphnet import DaphnetLoader
 19from .mobifall import MobiFallLoader
 20from .arduous import ArduousLoader
 21from .physionet import PhysioNetLoader
 22from .harup import HARUPLoader
 23from .urfall import UrFallLoader
 24
 25# Import legacy functions for backward compatibility
 26from .daphnet import load_daphnet_data, create_sliding_windows
 27from .mobifall import load_mobifall_data
 28from .arduous import load_arduous_data
 29from .physionet import load_physionet_data, create_physionet_windows
 30from .harup import load_harup_data, create_harup_windows, extract_harup_features
 31from .urfall import load_urfall_data, create_urfall_windows
 32from .utils import download_dataset, extract_dataset, sliding_window
 33
 34# Import managers
 35from ..core.managers import DatasetManager
 36
 37# Register all dataset loaders with the manager
 38def _register_datasets():
 39    """Register all available dataset loaders with the DatasetManager."""
 40    manager = DatasetManager()
 41    manager.register_dataset("daphnet", DaphnetLoader)
 42    manager.register_dataset("mobifall", MobiFallLoader)
 43    manager.register_dataset("arduous", ArduousLoader)
 44    manager.register_dataset("physionet", PhysioNetLoader)
 45    manager.register_dataset("harup", HARUPLoader)
 46    manager.register_dataset("urfall", UrFallLoader)
 47
 48# Auto-register datasets when module is imported
 49_register_datasets()
 50
 51# Convenient access to the dataset manager
 52def get_dataset_manager():
 53    """Get the singleton DatasetManager instance."""
 54    return DatasetManager()
 55
 56# Helper function to get available datasets
 57def get_available_datasets():
 58    """Get list of available dataset names."""
 59    return DatasetManager().get_available_components()
 60
 61# Helper function to load dataset using manager
 62def load_dataset(name: str, data_dir: str, **kwargs):
 63    """
 64    Load a dataset using the DatasetManager.
 65    
 66    Args:
 67        name: Name of the dataset loader
 68        data_dir: Directory containing the dataset
 69        **kwargs: Additional arguments for the loader
 70        
 71    Returns:
 72        Dataset loader instance with loaded data
 73    """
 74    return DatasetManager().load_dataset(name, data_dir, **kwargs)
 75
 76__all__ = [
 77    # New class-based loaders
 78    'DaphnetLoader',
 79    'MobiFallLoader', 
 80    'ArduousLoader',
 81    'PhysioNetLoader',
 82    'HARUPLoader',
 83    'UrFallLoader',
 84    # Legacy functions for backward compatibility
 85    'load_daphnet_data',
 86    'create_sliding_windows',
 87    'load_mobifall_data',
 88    'load_arduous_data',
 89    'load_physionet_data',
 90    'create_physionet_windows',
 91    'load_harup_data',
 92    'create_harup_windows',
 93    'extract_harup_features',
 94    'load_urfall_data',
 95    'create_urfall_windows',
 96    'download_dataset',
 97    'extract_dataset',
 98    'sliding_window',
 99    # Manager functions
100    'get_dataset_manager',
101    'get_available_datasets',
102    'load_dataset'
103]
class DaphnetLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 18class DaphnetLoader(BaseDatasetLoader):
 19    """
 20    Daphnet dataset loader class.
 21    
 22    This class handles loading and processing of the Daphnet dataset for gait analysis.
 23    """
 24    
 25    def __init__(self, max_workers: int = 8):
 26        """
 27        Initialize Daphnet loader with concurrent download support.
 28        
 29        Args:
 30            max_workers: Maximum number of concurrent download threads (default: 8)
 31        """
 32        super().__init__(
 33            name="daphnet",
 34            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease",
 35            max_workers=max_workers
 36        )
 37        self.metadata = {
 38            'sensors': ['shank', 'thigh', 'trunk'],
 39            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
 40            'sampling_frequency': 64,
 41            'annotations': {
 42                0: 'not_valid',
 43                1: 'no_freeze',
 44                2: 'freeze'
 45            }
 46        }
 47    
 48    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 49        """
 50        Load Daphnet dataset from the specified directory.
 51        
 52        Args:
 53            data_dir: Directory to store/find the dataset
 54            **kwargs: Additional arguments (unused for Daphnet)
 55            
 56        Returns:
 57            Tuple of (data_list, names_list)
 58        """
 59        # Download and extract if needed
 60        download_dataset("daphnet", data_dir)
 61        extract_dataset("daphnet", data_dir)
 62        
 63        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 64        daphnet_data = []
 65        daphnet_names = []
 66        
 67        # Load all subject files
 68        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 69            # Extract filename from path
 70            filename = os.path.basename(file)
 71            daphnet_names.append(filename)
 72            
 73            # Load CSV with proper column names
 74            column_names = [
 75                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 76                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 77                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 78            ]
 79            
 80            df = pd.read_csv(file, sep=" ", names=column_names)
 81            
 82            # Set time as index
 83            df = df.set_index("time")
 84            
 85            # Calculate magnitude for each sensor
 86            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 87            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 88            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 89            
 90            # Reorder columns for consistency
 91            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 92                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 93                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 94            
 95            daphnet_data.append(df)
 96        
 97        # Store loaded data
 98        self.data = daphnet_data
 99        self.names = daphnet_names
100        
101        return daphnet_data, daphnet_names
102    
103    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
104                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
105        """
106        Create sliding windows from the Daphnet dataset.
107        
108        Args:
109            data: List of DataFrames containing Daphnet data
110            names: List of names corresponding to the data
111            window_size: Size of the sliding window (default: 192)
112            step_size: Step size for the sliding window (default: 32)
113            
114        Returns:
115            List of dictionaries containing sliding windows for each DataFrame
116        """
117        windows_data = []
118        
119        for idx, df in enumerate(data):
120            # Filter out invalid data (annotations == 0)
121            df_filtered = df[df.annotations > 0]
122            
123            if df_filtered.empty:
124                continue
125                
126            windows = []
127            processed_columns = set()
128            
129            # Process each sensor column
130            for col in df_filtered.columns:
131                if col != "annotations" and col not in processed_columns:
132                    window_data = sliding_window(df_filtered[col], window_size, step_size)
133                    windows.append({"name": col, "data": window_data})
134                    processed_columns.add(col)
135            
136            # Include annotations separately
137            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
138            windows.append({"name": "annotations", "data": annotations_window})
139            
140            windows_data.append({"name": names[idx], "windows": windows})
141        
142        return windows_data
143    
144    def get_supported_formats(self) -> List[str]:
145        """
146        Get list of supported file formats for Daphnet dataset.
147        
148        Returns:
149            List of supported file extensions
150        """
151        return ['.txt']
152    
153    def get_sensor_info(self) -> Dict[str, List[str]]:
154        """
155        Get information about sensors in the dataset.
156        
157        Returns:
158            Dictionary containing sensor information
159        """
160        return {
161            'sensors': self.metadata['sensors'],
162            'components': self.metadata['components'],
163            'sampling_frequency': self.metadata['sampling_frequency']
164        }
165    
166    def get_annotation_info(self) -> Dict[int, str]:
167        """
168        Get information about annotations in the dataset.
169        
170        Returns:
171            Dictionary mapping annotation values to descriptions
172        """
173        return self.metadata['annotations']

Daphnet dataset loader class.

This class handles loading and processing of the Daphnet dataset for gait analysis.

DaphnetLoader(max_workers: int = 8)
25    def __init__(self, max_workers: int = 8):
26        """
27        Initialize Daphnet loader with concurrent download support.
28        
29        Args:
30            max_workers: Maximum number of concurrent download threads (default: 8)
31        """
32        super().__init__(
33            name="daphnet",
34            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease",
35            max_workers=max_workers
36        )
37        self.metadata = {
38            'sensors': ['shank', 'thigh', 'trunk'],
39            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
40            'sampling_frequency': 64,
41            'annotations': {
42                0: 'not_valid',
43                1: 'no_freeze',
44                2: 'freeze'
45            }
46        }

Initialize Daphnet loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
 48    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 49        """
 50        Load Daphnet dataset from the specified directory.
 51        
 52        Args:
 53            data_dir: Directory to store/find the dataset
 54            **kwargs: Additional arguments (unused for Daphnet)
 55            
 56        Returns:
 57            Tuple of (data_list, names_list)
 58        """
 59        # Download and extract if needed
 60        download_dataset("daphnet", data_dir)
 61        extract_dataset("daphnet", data_dir)
 62        
 63        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 64        daphnet_data = []
 65        daphnet_names = []
 66        
 67        # Load all subject files
 68        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 69            # Extract filename from path
 70            filename = os.path.basename(file)
 71            daphnet_names.append(filename)
 72            
 73            # Load CSV with proper column names
 74            column_names = [
 75                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 76                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 77                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 78            ]
 79            
 80            df = pd.read_csv(file, sep=" ", names=column_names)
 81            
 82            # Set time as index
 83            df = df.set_index("time")
 84            
 85            # Calculate magnitude for each sensor
 86            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 87            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 88            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 89            
 90            # Reorder columns for consistency
 91            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 92                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 93                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 94            
 95            daphnet_data.append(df)
 96        
 97        # Store loaded data
 98        self.data = daphnet_data
 99        self.names = daphnet_names
100        
101        return daphnet_data, daphnet_names

Load Daphnet dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Daphnet)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
103    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
104                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
105        """
106        Create sliding windows from the Daphnet dataset.
107        
108        Args:
109            data: List of DataFrames containing Daphnet data
110            names: List of names corresponding to the data
111            window_size: Size of the sliding window (default: 192)
112            step_size: Step size for the sliding window (default: 32)
113            
114        Returns:
115            List of dictionaries containing sliding windows for each DataFrame
116        """
117        windows_data = []
118        
119        for idx, df in enumerate(data):
120            # Filter out invalid data (annotations == 0)
121            df_filtered = df[df.annotations > 0]
122            
123            if df_filtered.empty:
124                continue
125                
126            windows = []
127            processed_columns = set()
128            
129            # Process each sensor column
130            for col in df_filtered.columns:
131                if col != "annotations" and col not in processed_columns:
132                    window_data = sliding_window(df_filtered[col], window_size, step_size)
133                    windows.append({"name": col, "data": window_data})
134                    processed_columns.add(col)
135            
136            # Include annotations separately
137            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
138            windows.append({"name": "annotations", "data": annotations_window})
139            
140            windows_data.append({"name": names[idx], "windows": windows})
141        
142        return windows_data

Create sliding windows from the Daphnet dataset.

Args: data: List of DataFrames containing Daphnet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
144    def get_supported_formats(self) -> List[str]:
145        """
146        Get list of supported file formats for Daphnet dataset.
147        
148        Returns:
149            List of supported file extensions
150        """
151        return ['.txt']

Get list of supported file formats for Daphnet dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
153    def get_sensor_info(self) -> Dict[str, List[str]]:
154        """
155        Get information about sensors in the dataset.
156        
157        Returns:
158            Dictionary containing sensor information
159        """
160        return {
161            'sensors': self.metadata['sensors'],
162            'components': self.metadata['components'],
163            'sampling_frequency': self.metadata['sampling_frequency']
164        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_annotation_info(self) -> Dict[int, str]:
166    def get_annotation_info(self) -> Dict[int, str]:
167        """
168        Get information about annotations in the dataset.
169        
170        Returns:
171            Dictionary mapping annotation values to descriptions
172        """
173        return self.metadata['annotations']

Get information about annotations in the dataset.

Returns: Dictionary mapping annotation values to descriptions

class MobiFallLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 17class MobiFallLoader(BaseDatasetLoader):
 18    """
 19    MobiFall dataset loader class.
 20    
 21    This class handles loading and processing of the MobiFall dataset for gait analysis.
 22    """
 23    
 24    def __init__(self, max_workers: int = 8):
 25        """
 26        Initialize MobiFall loader with concurrent download support.
 27        
 28        Args:
 29            max_workers: Maximum number of concurrent download threads (default: 8)
 30        """
 31        super().__init__(
 32            name="mobifall",
 33            description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection",
 34            max_workers=max_workers
 35        )
 36        self.metadata = {
 37            'sensors': ['accelerometer', 'gyroscope'],
 38            'components': ['x', 'y', 'z'],
 39            'sampling_frequency': 100,  # Typical for MobiFall
 40            'activities': ['ADL', 'FALL']  # Activities of Daily Living and Falls
 41        }
 42    
 43    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 44        """
 45        Load MobiFall dataset from the specified directory.
 46        
 47        Args:
 48            data_dir: Directory to store/find the dataset
 49            **kwargs: Additional arguments (unused for MobiFall)
 50            
 51        Returns:
 52            Tuple of (data_list, names_list)
 53        """
 54        # TODO: Implement MobiFall data loading
 55        # This is a placeholder implementation
 56        print("MobiFall data loading is not yet implemented")
 57        return [], []
 58    
 59    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 60                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 61        """
 62        Create sliding windows from the MobiFall dataset.
 63        
 64        Args:
 65            data: List of DataFrames containing MobiFall data
 66            names: List of names corresponding to the data
 67            window_size: Size of the sliding window (default: 192)
 68            step_size: Step size for the sliding window (default: 32)
 69            
 70        Returns:
 71            List of dictionaries containing sliding windows for each DataFrame
 72        """
 73        # TODO: Implement MobiFall sliding window creation
 74        # This is a placeholder implementation
 75        print("MobiFall sliding window creation is not yet implemented")
 76        return []
 77    
 78    def get_supported_formats(self) -> List[str]:
 79        """
 80        Get list of supported file formats for MobiFall dataset.
 81        
 82        Returns:
 83            List of supported file extensions
 84        """
 85        return ['.csv', '.txt']
 86    
 87    def get_sensor_info(self) -> Dict[str, List[str]]:
 88        """
 89        Get information about sensors in the dataset.
 90        
 91        Returns:
 92            Dictionary containing sensor information
 93        """
 94        return {
 95            'sensors': self.metadata['sensors'],
 96            'components': self.metadata['components'],
 97            'sampling_frequency': self.metadata['sampling_frequency']
 98        }
 99    
100    def get_activity_info(self) -> List[str]:
101        """
102        Get information about activities in the dataset.
103        
104        Returns:
105            List of activity types
106        """
107        return self.metadata['activities']

MobiFall dataset loader class.

This class handles loading and processing of the MobiFall dataset for gait analysis.

MobiFallLoader(max_workers: int = 8)
24    def __init__(self, max_workers: int = 8):
25        """
26        Initialize MobiFall loader with concurrent download support.
27        
28        Args:
29            max_workers: Maximum number of concurrent download threads (default: 8)
30        """
31        super().__init__(
32            name="mobifall",
33            description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection",
34            max_workers=max_workers
35        )
36        self.metadata = {
37            'sensors': ['accelerometer', 'gyroscope'],
38            'components': ['x', 'y', 'z'],
39            'sampling_frequency': 100,  # Typical for MobiFall
40            'activities': ['ADL', 'FALL']  # Activities of Daily Living and Falls
41        }

Initialize MobiFall loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
43    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
44        """
45        Load MobiFall dataset from the specified directory.
46        
47        Args:
48            data_dir: Directory to store/find the dataset
49            **kwargs: Additional arguments (unused for MobiFall)
50            
51        Returns:
52            Tuple of (data_list, names_list)
53        """
54        # TODO: Implement MobiFall data loading
55        # This is a placeholder implementation
56        print("MobiFall data loading is not yet implemented")
57        return [], []

Load MobiFall dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for MobiFall)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
59    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
60                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
61        """
62        Create sliding windows from the MobiFall dataset.
63        
64        Args:
65            data: List of DataFrames containing MobiFall data
66            names: List of names corresponding to the data
67            window_size: Size of the sliding window (default: 192)
68            step_size: Step size for the sliding window (default: 32)
69            
70        Returns:
71            List of dictionaries containing sliding windows for each DataFrame
72        """
73        # TODO: Implement MobiFall sliding window creation
74        # This is a placeholder implementation
75        print("MobiFall sliding window creation is not yet implemented")
76        return []

Create sliding windows from the MobiFall dataset.

Args: data: List of DataFrames containing MobiFall data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
78    def get_supported_formats(self) -> List[str]:
79        """
80        Get list of supported file formats for MobiFall dataset.
81        
82        Returns:
83            List of supported file extensions
84        """
85        return ['.csv', '.txt']

Get list of supported file formats for MobiFall dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
87    def get_sensor_info(self) -> Dict[str, List[str]]:
88        """
89        Get information about sensors in the dataset.
90        
91        Returns:
92            Dictionary containing sensor information
93        """
94        return {
95            'sensors': self.metadata['sensors'],
96            'components': self.metadata['components'],
97            'sampling_frequency': self.metadata['sampling_frequency']
98        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> List[str]:
100    def get_activity_info(self) -> List[str]:
101        """
102        Get information about activities in the dataset.
103        
104        Returns:
105            List of activity types
106        """
107        return self.metadata['activities']

Get information about activities in the dataset.

Returns: List of activity types

class ArduousLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 17class ArduousLoader(BaseDatasetLoader):
 18    """
 19    Arduous dataset loader class.
 20    
 21    This class handles loading and processing of the Arduous dataset for gait analysis.
 22    """
 23    
 24    def __init__(self, max_workers: int = 8):
 25        """
 26        Initialize Arduous loader with concurrent download support.
 27        
 28        Args:
 29            max_workers: Maximum number of concurrent download threads (default: 8)
 30        """
 31        super().__init__(
 32            name="arduous",
 33            description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition",
 34            max_workers=max_workers
 35        )
 36        self.metadata = {
 37            'sensors': ['accelerometer', 'gyroscope', 'magnetometer'],
 38            'components': ['x', 'y', 'z'],
 39            'sampling_frequency': 50,  # Typical for Arduous
 40            'activities': ['walking', 'running', 'sitting', 'standing', 'lying']
 41        }
 42    
 43    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 44        """
 45        Load Arduous dataset from the specified directory.
 46        
 47        Args:
 48            data_dir: Directory to store/find the dataset
 49            **kwargs: Additional arguments (unused for Arduous)
 50            
 51        Returns:
 52            Tuple of (data_list, names_list)
 53        """
 54        # TODO: Implement Arduous data loading
 55        # This is a placeholder implementation
 56        print("Arduous data loading is not yet implemented")
 57        return [], []
 58    
 59    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 60                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 61        """
 62        Create sliding windows from the Arduous dataset.
 63        
 64        Args:
 65            data: List of DataFrames containing Arduous data
 66            names: List of names corresponding to the data
 67            window_size: Size of the sliding window (default: 192)
 68            step_size: Step size for the sliding window (default: 32)
 69            
 70        Returns:
 71            List of dictionaries containing sliding windows for each DataFrame
 72        """
 73        # TODO: Implement Arduous sliding window creation
 74        # This is a placeholder implementation
 75        print("Arduous sliding window creation is not yet implemented")
 76        return []
 77    
 78    def get_supported_formats(self) -> List[str]:
 79        """
 80        Get list of supported file formats for Arduous dataset.
 81        
 82        Returns:
 83            List of supported file extensions
 84        """
 85        return ['.csv', '.txt']
 86    
 87    def get_sensor_info(self) -> Dict[str, List[str]]:
 88        """
 89        Get information about sensors in the dataset.
 90        
 91        Returns:
 92            Dictionary containing sensor information
 93        """
 94        return {
 95            'sensors': self.metadata['sensors'],
 96            'components': self.metadata['components'],
 97            'sampling_frequency': self.metadata['sampling_frequency']
 98        }
 99    
100    def get_activity_info(self) -> List[str]:
101        """
102        Get information about activities in the dataset.
103        
104        Returns:
105            List of activity types
106        """
107        return self.metadata['activities']

Arduous dataset loader class.

This class handles loading and processing of the Arduous dataset for gait analysis.

ArduousLoader(max_workers: int = 8)
24    def __init__(self, max_workers: int = 8):
25        """
26        Initialize Arduous loader with concurrent download support.
27        
28        Args:
29            max_workers: Maximum number of concurrent download threads (default: 8)
30        """
31        super().__init__(
32            name="arduous",
33            description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition",
34            max_workers=max_workers
35        )
36        self.metadata = {
37            'sensors': ['accelerometer', 'gyroscope', 'magnetometer'],
38            'components': ['x', 'y', 'z'],
39            'sampling_frequency': 50,  # Typical for Arduous
40            'activities': ['walking', 'running', 'sitting', 'standing', 'lying']
41        }

Initialize Arduous loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
43    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
44        """
45        Load Arduous dataset from the specified directory.
46        
47        Args:
48            data_dir: Directory to store/find the dataset
49            **kwargs: Additional arguments (unused for Arduous)
50            
51        Returns:
52            Tuple of (data_list, names_list)
53        """
54        # TODO: Implement Arduous data loading
55        # This is a placeholder implementation
56        print("Arduous data loading is not yet implemented")
57        return [], []

Load Arduous dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Arduous)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
59    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
60                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
61        """
62        Create sliding windows from the Arduous dataset.
63        
64        Args:
65            data: List of DataFrames containing Arduous data
66            names: List of names corresponding to the data
67            window_size: Size of the sliding window (default: 192)
68            step_size: Step size for the sliding window (default: 32)
69            
70        Returns:
71            List of dictionaries containing sliding windows for each DataFrame
72        """
73        # TODO: Implement Arduous sliding window creation
74        # This is a placeholder implementation
75        print("Arduous sliding window creation is not yet implemented")
76        return []

Create sliding windows from the Arduous dataset.

Args: data: List of DataFrames containing Arduous data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
78    def get_supported_formats(self) -> List[str]:
79        """
80        Get list of supported file formats for Arduous dataset.
81        
82        Returns:
83            List of supported file extensions
84        """
85        return ['.csv', '.txt']

Get list of supported file formats for Arduous dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
87    def get_sensor_info(self) -> Dict[str, List[str]]:
88        """
89        Get information about sensors in the dataset.
90        
91        Returns:
92            Dictionary containing sensor information
93        """
94        return {
95            'sensors': self.metadata['sensors'],
96            'components': self.metadata['components'],
97            'sampling_frequency': self.metadata['sampling_frequency']
98        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> List[str]:
100    def get_activity_info(self) -> List[str]:
101        """
102        Get information about activities in the dataset.
103        
104        Returns:
105            List of activity types
106        """
107        return self.metadata['activities']

Get information about activities in the dataset.

Returns: List of activity types

class PhysioNetLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 25class PhysioNetLoader(BaseDatasetLoader):
 26    """
 27    PhysioNet VGRF dataset loader class.
 28    
 29    This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset.
 30    The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's 
 31    disease and healthy controls.
 32    
 33    Features concurrent downloading for efficient data retrieval.
 34    """
 35    
 36    def __init__(self, max_workers: int = 8):
 37        """
 38        Initialize PhysioNet loader with concurrent download support.
 39        
 40        Args:
 41            max_workers: Maximum number of concurrent download threads (default: 8)
 42        """
 43        super().__init__(
 44            name="physionet",
 45            description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls",
 46            max_workers=max_workers
 47        )
 48        self.metadata = {
 49            'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8',
 50                       'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'],
 51            'sampling_frequency': 100,  # 100 Hz sampling frequency
 52            'subjects': {
 53                'Co': 'Control subjects',
 54                'Pt': 'Parkinson\'s disease patients'
 55            },
 56            'window_size': 600,  # 6 seconds at 100 Hz
 57            'url': 'https://physionet.org/files/gaitpdb/1.0.0/'
 58        }
 59        self.labels = []
 60        self.subject_types = []
 61    
 62    def _download_physionet_data(self, data_dir: str) -> str:
 63        """
 64        Download PhysioNet dataset if not already present using concurrent downloads.
 65        
 66        This method uses multi-threaded downloading to significantly speed up the
 67        download process for the 100+ files in the PhysioNet dataset.
 68        
 69        Args:
 70            data_dir: Directory to store the dataset
 71            
 72        Returns:
 73            Path to the downloaded/existing dataset directory
 74        """
 75        dataset_path = os.path.join(data_dir, "physionet_gaitpdb")
 76        
 77        if os.path.exists(dataset_path) and len(os.listdir(dataset_path)) > 0:
 78            print(f"PhysioNet dataset already exists at: {dataset_path}")
 79            return dataset_path
 80        
 81        os.makedirs(dataset_path, exist_ok=True)
 82        
 83        # Download the dataset files
 84        base_url = "https://physionet.org/files/gaitpdb/1.0.0/"
 85        
 86        # Get list of files (basic file names based on the reference)
 87        file_patterns = [
 88            # Control subjects - Ga prefix
 89            *[f"GaCo{i:02d}_{j:02d}.txt" for i in range(1, 18) for j in range(1, 3)],
 90            "GaCo22_01.txt", "GaCo22_10.txt",
 91            
 92            # Parkinson's patients - Ga prefix
 93            *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(3, 10) for j in range(1, 3)],
 94            *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(12, 34) for j in range(1, 3)],
 95            *[f"GaPt{i:02d}_10.txt" for i in range(13, 34)],
 96            
 97            # Control subjects - Ju prefix
 98            *[f"JuCo{i:02d}_01.txt" for i in range(1, 27)],
 99            
100            # Parkinson's patients - Ju prefix
101            *[f"JuPt{i:02d}_{j:02d}.txt" for i in range(1, 30) for j in range(1, 8)],
102            
103            # Control subjects - Si prefix
104            *[f"SiCo{i:02d}_01.txt" for i in range(1, 31)],
105            
106            # Parkinson's patients - Si prefix
107            *[f"SiPt{i:02d}_01.txt" for i in range(2, 41)]
108        ]
109        
110        # Prepare download tasks for concurrent execution
111        download_tasks = [
112            {
113                'url': base_url + filename,
114                'dest_path': os.path.join(dataset_path, filename)
115            }
116            for filename in file_patterns
117        ]
118        
119        print(f"Downloading PhysioNet dataset to {dataset_path} using {self.max_workers} threads")
120        
121        # Use concurrent downloading from base class
122        results = self.download_files_concurrent(
123            download_tasks, 
124            show_progress=True, 
125            desc="Downloading PhysioNet files"
126        )
127        
128        # Print summary
129        print(f"\nDownload Summary:")
130        print(f"  Total files: {results['total']}")
131        print(f"  Successfully downloaded: {results['success']}")
132        print(f"  Already existed (skipped): {results['skipped']}")
133        print(f"  Failed: {results['failed']}")
134        
135        if results['failed'] > 0 and len(results['failed_downloads']) > 0:
136            print(f"\nFailed downloads (showing first 10):")
137            for failed in results['failed_downloads'][:10]:
138                print(f"  - {os.path.basename(failed['dest_path'])}: {failed['error']}")
139            if len(results['failed_downloads']) > 10:
140                print(f"  ... and {len(results['failed_downloads']) - 10} more failures")
141        
142        return dataset_path
143    
144    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
145        """
146        Load PhysioNet VGRF dataset from the specified directory.
147        
148        Args:
149            data_dir: Directory to store/find the dataset
150            **kwargs: Additional arguments (unused for PhysioNet)
151            
152        Returns:
153            Tuple of (data_list, names_list)
154        """
155        # Download dataset if needed
156        dataset_path = self._download_physionet_data(data_dir)
157        
158        physionet_data = []
159        physionet_names = []
160        self.labels = []
161        self.subject_types = []
162        
163        # Load all available files
164        for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))):
165            filename = os.path.basename(filepath)
166            
167            # Extract subject type from filename
168            if 'Co' in filename:
169                subject_type = 'Control'
170                label = 'Co'
171            elif 'Pt' in filename:
172                subject_type = 'Patient'
173                label = 'Pt'
174            else:
175                continue  # Skip files that don't match expected pattern
176            
177            try:
178                # Read the file - PhysioNet files are tab-delimited with variable columns
179                # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist
180                df = pd.read_csv(filepath, delimiter='\t', header=None)
181                
182                # Handle variable number of columns
183                n_cols = min(df.shape[1], 19)  # Limit to 19 columns max
184                df = df.iloc[:, :n_cols]
185                
186                # Create column names
187                col_names = ['time']
188                for i in range(1, n_cols):
189                    if i <= 8:
190                        col_names.append(f'VGRF_L{i}')
191                    elif i <= 16:
192                        col_names.append(f'VGRF_R{i-8}')
193                    else:
194                        col_names.append(f'sensor_{i}')
195                
196                df.columns = col_names
197                
198                # Set time as index
199                df = df.set_index('time')
200                
201                # Add subject metadata
202                df['subject_type'] = subject_type
203                df['label'] = label
204                
205                physionet_data.append(df)
206                physionet_names.append(filename)
207                self.labels.append(label)
208                self.subject_types.append(subject_type)
209                
210            except Exception as e:
211                print(f"Error loading {filename}: {e}")
212                continue
213        
214        # Store loaded data
215        self.data = physionet_data
216        self.names = physionet_names
217        
218        print(f"Loaded {len(physionet_data)} PhysioNet files")
219        print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}")
220        
221        return physionet_data, physionet_names
222    
223    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
224                             window_size: int = 600, step_size: int = 100) -> List[Dict]:
225        """
226        Create sliding windows from the PhysioNet dataset.
227        
228        Args:
229            data: List of DataFrames containing PhysioNet data
230            names: List of names corresponding to the data
231            window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz)
232            step_size: Step size for the sliding window (default: 100)
233            
234        Returns:
235            List of dictionaries containing sliding windows for each DataFrame
236        """
237        windows_data = []
238        
239        for idx, df in enumerate(data):
240            # Remove metadata columns for windowing
241            sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')]
242            df_sensors = df[sensor_columns]
243            
244            if df_sensors.empty or len(df_sensors) < window_size:
245                continue
246                
247            windows = []
248            
249            # Create windows for each sensor
250            for col in sensor_columns:
251                try:
252                    window_data = sliding_window(df_sensors[col].values, window_size, step_size)
253                    windows.append({"name": col, "data": window_data})
254                except Exception as e:
255                    print(f"Error creating windows for {col} in {names[idx]}: {e}")
256                    continue
257            
258            if windows:
259                windows_data.append({
260                    "name": names[idx],
261                    "windows": windows,
262                    "metadata": {
263                        "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown',
264                        "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown',
265                        "window_size": window_size,
266                        "step_size": step_size,
267                        "num_windows": len(windows[0]["data"]) if windows else 0
268                    }
269                })
270        
271        return windows_data
272    
273    def get_supported_formats(self) -> List[str]:
274        """
275        Get list of supported file formats for PhysioNet dataset.
276        
277        Returns:
278            List of supported file extensions
279        """
280        return ['.txt']
281    
282    def get_sensor_info(self) -> Dict[str, List[str]]:
283        """
284        Get information about sensors in the dataset.
285        
286        Returns:
287            Dictionary containing sensor information
288        """
289        return {
290            'sensors': self.metadata['sensors'],
291            'sampling_frequency': self.metadata['sampling_frequency'],
292            'window_size': self.metadata['window_size']
293        }
294    
295    def get_subject_info(self) -> Dict[str, str]:
296        """
297        Get information about subjects in the dataset.
298        
299        Returns:
300            Dictionary containing subject information
301        """
302        return self.metadata['subjects']
303    
304    def get_labels(self) -> List[str]:
305        """
306        Get labels for loaded data.
307        
308        Returns:
309            List of labels corresponding to loaded data
310        """
311        return self.labels
312    
313    def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]:
314        """
315        Filter loaded data by subject type.
316        
317        Args:
318            subject_type: 'Control' or 'Patient'
319            
320        Returns:
321            Tuple of (filtered_data, filtered_names)
322        """
323        if not self.data:
324            raise ValueError("No data loaded. Call load_data() first.")
325        
326        filtered_data = []
327        filtered_names = []
328        
329        for i, df in enumerate(self.data):
330            if df['subject_type'].iloc[0] == subject_type:
331                filtered_data.append(df)
332                filtered_names.append(self.names[i])
333        
334        return filtered_data, filtered_names

PhysioNet VGRF dataset loader class.

This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset. The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's disease and healthy controls.

Features concurrent downloading for efficient data retrieval.

PhysioNetLoader(max_workers: int = 8)
36    def __init__(self, max_workers: int = 8):
37        """
38        Initialize PhysioNet loader with concurrent download support.
39        
40        Args:
41            max_workers: Maximum number of concurrent download threads (default: 8)
42        """
43        super().__init__(
44            name="physionet",
45            description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls",
46            max_workers=max_workers
47        )
48        self.metadata = {
49            'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8',
50                       'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'],
51            'sampling_frequency': 100,  # 100 Hz sampling frequency
52            'subjects': {
53                'Co': 'Control subjects',
54                'Pt': 'Parkinson\'s disease patients'
55            },
56            'window_size': 600,  # 6 seconds at 100 Hz
57            'url': 'https://physionet.org/files/gaitpdb/1.0.0/'
58        }
59        self.labels = []
60        self.subject_types = []

Initialize PhysioNet loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
labels
subject_types
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
144    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
145        """
146        Load PhysioNet VGRF dataset from the specified directory.
147        
148        Args:
149            data_dir: Directory to store/find the dataset
150            **kwargs: Additional arguments (unused for PhysioNet)
151            
152        Returns:
153            Tuple of (data_list, names_list)
154        """
155        # Download dataset if needed
156        dataset_path = self._download_physionet_data(data_dir)
157        
158        physionet_data = []
159        physionet_names = []
160        self.labels = []
161        self.subject_types = []
162        
163        # Load all available files
164        for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))):
165            filename = os.path.basename(filepath)
166            
167            # Extract subject type from filename
168            if 'Co' in filename:
169                subject_type = 'Control'
170                label = 'Co'
171            elif 'Pt' in filename:
172                subject_type = 'Patient'
173                label = 'Pt'
174            else:
175                continue  # Skip files that don't match expected pattern
176            
177            try:
178                # Read the file - PhysioNet files are tab-delimited with variable columns
179                # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist
180                df = pd.read_csv(filepath, delimiter='\t', header=None)
181                
182                # Handle variable number of columns
183                n_cols = min(df.shape[1], 19)  # Limit to 19 columns max
184                df = df.iloc[:, :n_cols]
185                
186                # Create column names
187                col_names = ['time']
188                for i in range(1, n_cols):
189                    if i <= 8:
190                        col_names.append(f'VGRF_L{i}')
191                    elif i <= 16:
192                        col_names.append(f'VGRF_R{i-8}')
193                    else:
194                        col_names.append(f'sensor_{i}')
195                
196                df.columns = col_names
197                
198                # Set time as index
199                df = df.set_index('time')
200                
201                # Add subject metadata
202                df['subject_type'] = subject_type
203                df['label'] = label
204                
205                physionet_data.append(df)
206                physionet_names.append(filename)
207                self.labels.append(label)
208                self.subject_types.append(subject_type)
209                
210            except Exception as e:
211                print(f"Error loading {filename}: {e}")
212                continue
213        
214        # Store loaded data
215        self.data = physionet_data
216        self.names = physionet_names
217        
218        print(f"Loaded {len(physionet_data)} PhysioNet files")
219        print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}")
220        
221        return physionet_data, physionet_names

Load PhysioNet VGRF dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for PhysioNet)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 600, step_size: int = 100) -> List[Dict]:
223    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
224                             window_size: int = 600, step_size: int = 100) -> List[Dict]:
225        """
226        Create sliding windows from the PhysioNet dataset.
227        
228        Args:
229            data: List of DataFrames containing PhysioNet data
230            names: List of names corresponding to the data
231            window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz)
232            step_size: Step size for the sliding window (default: 100)
233            
234        Returns:
235            List of dictionaries containing sliding windows for each DataFrame
236        """
237        windows_data = []
238        
239        for idx, df in enumerate(data):
240            # Remove metadata columns for windowing
241            sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')]
242            df_sensors = df[sensor_columns]
243            
244            if df_sensors.empty or len(df_sensors) < window_size:
245                continue
246                
247            windows = []
248            
249            # Create windows for each sensor
250            for col in sensor_columns:
251                try:
252                    window_data = sliding_window(df_sensors[col].values, window_size, step_size)
253                    windows.append({"name": col, "data": window_data})
254                except Exception as e:
255                    print(f"Error creating windows for {col} in {names[idx]}: {e}")
256                    continue
257            
258            if windows:
259                windows_data.append({
260                    "name": names[idx],
261                    "windows": windows,
262                    "metadata": {
263                        "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown',
264                        "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown',
265                        "window_size": window_size,
266                        "step_size": step_size,
267                        "num_windows": len(windows[0]["data"]) if windows else 0
268                    }
269                })
270        
271        return windows_data

Create sliding windows from the PhysioNet dataset.

Args: data: List of DataFrames containing PhysioNet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) step_size: Step size for the sliding window (default: 100)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
273    def get_supported_formats(self) -> List[str]:
274        """
275        Get list of supported file formats for PhysioNet dataset.
276        
277        Returns:
278            List of supported file extensions
279        """
280        return ['.txt']

Get list of supported file formats for PhysioNet dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
282    def get_sensor_info(self) -> Dict[str, List[str]]:
283        """
284        Get information about sensors in the dataset.
285        
286        Returns:
287            Dictionary containing sensor information
288        """
289        return {
290            'sensors': self.metadata['sensors'],
291            'sampling_frequency': self.metadata['sampling_frequency'],
292            'window_size': self.metadata['window_size']
293        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_subject_info(self) -> Dict[str, str]:
295    def get_subject_info(self) -> Dict[str, str]:
296        """
297        Get information about subjects in the dataset.
298        
299        Returns:
300            Dictionary containing subject information
301        """
302        return self.metadata['subjects']

Get information about subjects in the dataset.

Returns: Dictionary containing subject information

def get_labels(self) -> List[str]:
304    def get_labels(self) -> List[str]:
305        """
306        Get labels for loaded data.
307        
308        Returns:
309            List of labels corresponding to loaded data
310        """
311        return self.labels

Get labels for loaded data.

Returns: List of labels corresponding to loaded data

def filter_by_subject_type( self, subject_type: str) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
313    def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]:
314        """
315        Filter loaded data by subject type.
316        
317        Args:
318            subject_type: 'Control' or 'Patient'
319            
320        Returns:
321            Tuple of (filtered_data, filtered_names)
322        """
323        if not self.data:
324            raise ValueError("No data loaded. Call load_data() first.")
325        
326        filtered_data = []
327        filtered_names = []
328        
329        for i, df in enumerate(self.data):
330            if df['subject_type'].iloc[0] == subject_type:
331                filtered_data.append(df)
332                filtered_names.append(self.names[i])
333        
334        return filtered_data, filtered_names

Filter loaded data by subject type.

Args: subject_type: 'Control' or 'Patient'

Returns: Tuple of (filtered_data, filtered_names)

class HARUPLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 26class HARUPLoader(BaseDatasetLoader):
 27    """
 28    HAR-UP dataset loader class.
 29    
 30    This class handles loading and processing of the HAR-UP dataset for human activity recognition
 31    and fall detection analysis.
 32    """
 33    
 34    def __init__(self, max_workers: int = 8):
 35        """
 36        Initialize HAR-UP loader with concurrent download support.
 37        
 38        Args:
 39            max_workers: Maximum number of concurrent download threads (default: 8)
 40        """
 41        super().__init__(
 42            name="harup",
 43            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition",
 44            max_workers=max_workers
 45        )
 46        self.metadata = {
 47            'sensors': [
 48                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
 49                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
 50                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
 51                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
 52                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
 53                'BrainSensor', 'Infrared'
 54            ],
 55            'components': {
 56                'Accelerometer': ['x', 'y', 'z'],
 57                'AngularVelocity': ['x', 'y', 'z'],
 58                'Luminosity': ['illuminance'],
 59                'BrainSensor': ['value'],
 60                'Infrared': ['value']
 61            },
 62            'sampling_frequency': 100,  # Hz
 63            'activities': {
 64                1: 'Walking',
 65                2: 'Walking upstairs',
 66                3: 'Walking downstairs',
 67                4: 'Sitting',
 68                5: 'Standing',
 69                6: 'Lying',
 70                7: 'Falling forward using hands',
 71                8: 'Falling forward using knees',
 72                9: 'Falling backwards',
 73                10: 'Falling sideward',
 74                11: 'Falling sitting in empty chair'
 75            }
 76        }
 77        
 78        # Features used in HAR-UP
 79        self.features = [
 80            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
 81            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
 82            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
 83            'Energy'
 84        ]
 85    
 86    def download_harup_data(self, data_dir: str) -> Optional[str]:
 87        """
 88        Download HAR-UP dataset if not already present.
 89        
 90        Args:
 91            data_dir: Directory to store the dataset
 92            
 93        Returns:
 94            Path to the extracted dataset or None if not found
 95        """
 96        # Use the utility function to download and extract the dataset
 97        download_dataset("harup", data_dir)
 98        extract_dataset("harup", data_dir)
 99        
100        # Check if dataset exists after download attempt
101        dataset_path = os.path.join(data_dir, "DataSet")
102        if not os.path.exists(dataset_path):
103            print("HAR-UP dataset not found after download attempt.")
104            print("Please ensure the dataset is organized in the following structure:")
105            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
106            return None
107        
108        return dataset_path
109    
110    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
111                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
112                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
113        """
114        Load HAR-UP dataset from the specified directory.
115        Args:
116            data_dir: Directory containing the dataset
117            subjects: List of subject IDs to load (default: all subjects)
118            activities: List of activity IDs to load (default: all activities)
119            trials: List of trial IDs to load (default: all trials)
120            **kwargs: Additional arguments
121        Returns:
122            Tuple of (data_list, names_list)
123        """
124        import re
125        import os
126        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
127        if subjects is None:
128            subjects = list(range(1, 5))  # 4 subjects
129        if activities is None:
130            activities = list(range(1, 12))  # 11 activities
131        if trials is None:
132            trials = list(range(1, 4))  # 3 trials
133
134        # Column names as per official HAR-UP documentation
135        columns = [
136            "Timestamp",
137            "EEG_NeuroSky",
138            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
139            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
140            "Belt_Luminosity",
141            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
142            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
143            "Neck_Luminosity",
144            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
145            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
146            "Pocket_Luminosity",
147            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
148            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
149            "Wrist_Luminosity",
150            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
151        ]
152
153        # If data_dir does not exist, trigger interactive download
154        if not os.path.exists(data_dir):
155            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
156            self.download_harup_data(data_dir)
157        # If still doesn't exist, error out
158        if not os.path.exists(data_dir):
159            print(f"Failed to create or download dataset directory: {data_dir}")
160            return [], []
161
162        # Find the UP_Fall_Detection_Dataset directory
163        dataset_path = None
164        for entry in os.listdir(data_dir):
165            entry_path = os.path.join(data_dir, entry)
166            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
167                dataset_path = entry_path
168                break
169        if dataset_path is None:
170            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
171            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
172            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
173            return [], []
174
175        harup_data = []
176        harup_names = []
177
178        # Iterate over subjects
179        for subject_id in subjects:
180            subject_folder = f"Subject_{subject_id:02d}"
181            subject_path = os.path.join(dataset_path, subject_folder)
182            if not os.path.isdir(subject_path):
183                continue
184            
185            # Initialize empty DataFrame for this subject
186            subject_df = pd.DataFrame()
187            
188            # Iterate over activities in order
189            for activity_id in sorted(activities):
190                activity_folder = f"A{activity_id:02d}"
191                activity_path = os.path.join(subject_path, activity_folder)
192                if not os.path.isdir(activity_path):
193                    continue
194                
195                # Iterate over trials in order
196                for trial_id in sorted(trials):
197                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
198                    file_path = os.path.join(activity_path, file_name)
199                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
200                    
201                    try:
202                        df = pd.read_csv(file_path, header=0)
203                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
204                        df['subject_id'] = subject_id
205                        df['activity_id'] = activity_id 
206                        df['trial_id'] = trial_id
207                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
208                        
209                        # Concatenate to subject's DataFrame
210                        subject_df = pd.concat([subject_df, df], ignore_index=True)
211                        harup_names.append(name)
212                        
213                    except Exception as e:
214                        print(f"Error loading {file_path}: {e}")
215            
216            # Add complete subject DataFrame to data list
217            if not subject_df.empty:
218                harup_data.append(subject_df)
219                
220        self.data = harup_data
221        self.names = harup_names
222
223        return harup_data, harup_names
224    
225    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
226                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
227        """
228        Create sliding windows from the HAR-UP dataset.
229        
230        Args:
231            data: List of DataFrames containing HAR-UP data
232            names: List of names corresponding to the data
233            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
234            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
235            
236        Returns:
237            List of dictionaries containing sliding windows for each DataFrame
238        """
239        windows_data = []
240        
241        for idx, df in enumerate(data):
242            if df.empty:
243                continue
244                
245            windows = []
246            processed_columns = set()
247            
248            # Only use numeric columns (skip TIME and any non-numeric)
249            sensor_columns = [col for col in df.columns if col not in 
250                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
251                             and pd.api.types.is_numeric_dtype(df[col])]
252            
253
254            # Process each sensor column
255            for col in sensor_columns:
256                if col not in processed_columns:
257                    
258                    window_data = sliding_window(df[col], window_size, step_size)
259                    windows.append({"name": col, "data": window_data})
260                    processed_columns.add(col)
261            
262            # Include activity ID for each window
263            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
264            windows.append({"name": "activity_id", "data": activity_windows})
265            
266            # For each window, take the most common activity ID as the label
267            labels = []
268            for window in activity_windows:
269                # Get most common activity in this window
270                unique_vals, counts = np.unique(window, return_counts=True)
271                most_common_idx = np.argmax(counts)
272                labels.append(unique_vals[most_common_idx])
273            
274            windows.append({"name": "labels", "data": np.array(labels)})
275            
276            windows_data.append({"name": names[idx], "windows": windows})
277        
278        return windows_data
279    
280    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
281                       freq_domain_features: bool = True) -> List[Dict]:
282        """
283        Extract features from sliding windows using HAR-UP feature extraction methods.
284        Args:
285            windows_data: List of dictionaries containing sliding windows
286            time_domain_features: Whether to extract time domain features
287            freq_domain_features: Whether to extract frequency domain features
288        Returns:
289            List of dictionaries containing extracted features
290        """
291        # Mapping from original sensor names to actual CSV column names
292        sensor_map = {
293            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
294            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
295            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
296            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
297            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
298            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
299            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
300            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
301            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
302            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
303            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
304            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
305            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
306            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
307            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
308            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
309            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
310            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
311            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
312            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
313            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
314            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
315            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
316            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
317            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
318            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
319            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
320            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
321            'BrainSensor': 'HELMET_RAW',
322            'Infrared1': 'IR_1',
323            'Infrared2': 'IR_2',
324            'Infrared3': 'IR_3',
325            'Infrared4': 'IR_4',
326        }
327        extractor = HARUPFeatureExtractor(verbose=True)
328        extractor.config['time_domain'] = time_domain_features
329        extractor.config['frequency_domain'] = freq_domain_features
330        all_features = []
331        for window_dict in windows_data:
332            name = window_dict["name"]
333            windows = window_dict["windows"]
334            labels = None
335            for window in windows:
336                if window["name"] == "labels":
337                    labels = window["data"]
338                    break
339            if labels is None:
340                print(f"No labels found for {name}, skipping feature extraction")
341                continue
342            filtered_windows = []
343            missing = []
344            for orig_sensor, csv_col in sensor_map.items():
345                found = False
346                for window in windows:
347                    if window["name"] == csv_col:
348                        filtered_windows.append(window)
349                        found = True
350                        break
351                if not found:
352                    missing.append((orig_sensor, csv_col))
353            if missing:
354                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
355            for window in windows:
356                if window["name"] == "activity_id" or window["name"] == "labels":
357                    filtered_windows.append(window)
358            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
359            for i, feature in enumerate(features):
360                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
361                if window_idx < len(labels):
362                    feature["label"] = labels[window_idx]
363            all_features.append({"name": name, "features": features})
364        return all_features
365    
366    def get_supported_formats(self) -> List[str]:
367        """
368        Get list of supported file formats for HAR-UP dataset.
369        
370        Returns:
371            List of supported file extensions
372        """
373        return ['.csv']
374    
375    def get_sensor_info(self) -> Dict[str, List[str]]:
376        """
377        Get information about sensors in the dataset.
378        
379        Returns:
380            Dictionary containing sensor information
381        """
382        return {
383            'sensors': self.metadata['sensors'],
384            'components': self.metadata['components'],
385            'sampling_frequency': self.metadata['sampling_frequency']
386        }
387    
388    def get_activity_info(self) -> Dict[int, str]:
389        """
390        Get information about activities in the dataset.
391        
392        Returns:
393            Dictionary mapping activity IDs to descriptions
394        """
395        return self.metadata['activities']

HAR-UP dataset loader class.

This class handles loading and processing of the HAR-UP dataset for human activity recognition and fall detection analysis.

HARUPLoader(max_workers: int = 8)
34    def __init__(self, max_workers: int = 8):
35        """
36        Initialize HAR-UP loader with concurrent download support.
37        
38        Args:
39            max_workers: Maximum number of concurrent download threads (default: 8)
40        """
41        super().__init__(
42            name="harup",
43            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition",
44            max_workers=max_workers
45        )
46        self.metadata = {
47            'sensors': [
48                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
49                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
50                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
51                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
52                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
53                'BrainSensor', 'Infrared'
54            ],
55            'components': {
56                'Accelerometer': ['x', 'y', 'z'],
57                'AngularVelocity': ['x', 'y', 'z'],
58                'Luminosity': ['illuminance'],
59                'BrainSensor': ['value'],
60                'Infrared': ['value']
61            },
62            'sampling_frequency': 100,  # Hz
63            'activities': {
64                1: 'Walking',
65                2: 'Walking upstairs',
66                3: 'Walking downstairs',
67                4: 'Sitting',
68                5: 'Standing',
69                6: 'Lying',
70                7: 'Falling forward using hands',
71                8: 'Falling forward using knees',
72                9: 'Falling backwards',
73                10: 'Falling sideward',
74                11: 'Falling sitting in empty chair'
75            }
76        }
77        
78        # Features used in HAR-UP
79        self.features = [
80            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
81            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
82            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
83            'Energy'
84        ]

Initialize HAR-UP loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
features
def download_harup_data(self, data_dir: str) -> Optional[str]:
 86    def download_harup_data(self, data_dir: str) -> Optional[str]:
 87        """
 88        Download HAR-UP dataset if not already present.
 89        
 90        Args:
 91            data_dir: Directory to store the dataset
 92            
 93        Returns:
 94            Path to the extracted dataset or None if not found
 95        """
 96        # Use the utility function to download and extract the dataset
 97        download_dataset("harup", data_dir)
 98        extract_dataset("harup", data_dir)
 99        
100        # Check if dataset exists after download attempt
101        dataset_path = os.path.join(data_dir, "DataSet")
102        if not os.path.exists(dataset_path):
103            print("HAR-UP dataset not found after download attempt.")
104            print("Please ensure the dataset is organized in the following structure:")
105            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
106            return None
107        
108        return dataset_path

Download HAR-UP dataset if not already present.

Args: data_dir: Directory to store the dataset

Returns: Path to the extracted dataset or None if not found

def load_data( self, data_dir: str, subjects: Optional[List[int]] = None, activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
110    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
111                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
112                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
113        """
114        Load HAR-UP dataset from the specified directory.
115        Args:
116            data_dir: Directory containing the dataset
117            subjects: List of subject IDs to load (default: all subjects)
118            activities: List of activity IDs to load (default: all activities)
119            trials: List of trial IDs to load (default: all trials)
120            **kwargs: Additional arguments
121        Returns:
122            Tuple of (data_list, names_list)
123        """
124        import re
125        import os
126        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
127        if subjects is None:
128            subjects = list(range(1, 5))  # 4 subjects
129        if activities is None:
130            activities = list(range(1, 12))  # 11 activities
131        if trials is None:
132            trials = list(range(1, 4))  # 3 trials
133
134        # Column names as per official HAR-UP documentation
135        columns = [
136            "Timestamp",
137            "EEG_NeuroSky",
138            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
139            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
140            "Belt_Luminosity",
141            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
142            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
143            "Neck_Luminosity",
144            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
145            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
146            "Pocket_Luminosity",
147            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
148            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
149            "Wrist_Luminosity",
150            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
151        ]
152
153        # If data_dir does not exist, trigger interactive download
154        if not os.path.exists(data_dir):
155            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
156            self.download_harup_data(data_dir)
157        # If still doesn't exist, error out
158        if not os.path.exists(data_dir):
159            print(f"Failed to create or download dataset directory: {data_dir}")
160            return [], []
161
162        # Find the UP_Fall_Detection_Dataset directory
163        dataset_path = None
164        for entry in os.listdir(data_dir):
165            entry_path = os.path.join(data_dir, entry)
166            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
167                dataset_path = entry_path
168                break
169        if dataset_path is None:
170            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
171            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
172            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
173            return [], []
174
175        harup_data = []
176        harup_names = []
177
178        # Iterate over subjects
179        for subject_id in subjects:
180            subject_folder = f"Subject_{subject_id:02d}"
181            subject_path = os.path.join(dataset_path, subject_folder)
182            if not os.path.isdir(subject_path):
183                continue
184            
185            # Initialize empty DataFrame for this subject
186            subject_df = pd.DataFrame()
187            
188            # Iterate over activities in order
189            for activity_id in sorted(activities):
190                activity_folder = f"A{activity_id:02d}"
191                activity_path = os.path.join(subject_path, activity_folder)
192                if not os.path.isdir(activity_path):
193                    continue
194                
195                # Iterate over trials in order
196                for trial_id in sorted(trials):
197                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
198                    file_path = os.path.join(activity_path, file_name)
199                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
200                    
201                    try:
202                        df = pd.read_csv(file_path, header=0)
203                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
204                        df['subject_id'] = subject_id
205                        df['activity_id'] = activity_id 
206                        df['trial_id'] = trial_id
207                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
208                        
209                        # Concatenate to subject's DataFrame
210                        subject_df = pd.concat([subject_df, df], ignore_index=True)
211                        harup_names.append(name)
212                        
213                    except Exception as e:
214                        print(f"Error loading {file_path}: {e}")
215            
216            # Add complete subject DataFrame to data list
217            if not subject_df.empty:
218                harup_data.append(subject_df)
219                
220        self.data = harup_data
221        self.names = harup_names
222
223        return harup_data, harup_names

Load HAR-UP dataset from the specified directory. Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials) **kwargs: Additional arguments Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 100, step_size: int = 50) -> List[Dict]:
225    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
226                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
227        """
228        Create sliding windows from the HAR-UP dataset.
229        
230        Args:
231            data: List of DataFrames containing HAR-UP data
232            names: List of names corresponding to the data
233            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
234            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
235            
236        Returns:
237            List of dictionaries containing sliding windows for each DataFrame
238        """
239        windows_data = []
240        
241        for idx, df in enumerate(data):
242            if df.empty:
243                continue
244                
245            windows = []
246            processed_columns = set()
247            
248            # Only use numeric columns (skip TIME and any non-numeric)
249            sensor_columns = [col for col in df.columns if col not in 
250                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
251                             and pd.api.types.is_numeric_dtype(df[col])]
252            
253
254            # Process each sensor column
255            for col in sensor_columns:
256                if col not in processed_columns:
257                    
258                    window_data = sliding_window(df[col], window_size, step_size)
259                    windows.append({"name": col, "data": window_data})
260                    processed_columns.add(col)
261            
262            # Include activity ID for each window
263            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
264            windows.append({"name": "activity_id", "data": activity_windows})
265            
266            # For each window, take the most common activity ID as the label
267            labels = []
268            for window in activity_windows:
269                # Get most common activity in this window
270                unique_vals, counts = np.unique(window, return_counts=True)
271                most_common_idx = np.argmax(counts)
272                labels.append(unique_vals[most_common_idx])
273            
274            windows.append({"name": "labels", "data": np.array(labels)})
275            
276            windows_data.append({"name": names[idx], "windows": windows})
277        
278        return windows_data

Create sliding windows from the HAR-UP dataset.

Args: data: List of DataFrames containing HAR-UP data names: List of names corresponding to the data window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_features( self, windows_data: List[Dict], time_domain_features: bool = True, freq_domain_features: bool = True) -> List[Dict]:
280    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
281                       freq_domain_features: bool = True) -> List[Dict]:
282        """
283        Extract features from sliding windows using HAR-UP feature extraction methods.
284        Args:
285            windows_data: List of dictionaries containing sliding windows
286            time_domain_features: Whether to extract time domain features
287            freq_domain_features: Whether to extract frequency domain features
288        Returns:
289            List of dictionaries containing extracted features
290        """
291        # Mapping from original sensor names to actual CSV column names
292        sensor_map = {
293            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
294            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
295            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
296            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
297            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
298            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
299            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
300            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
301            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
302            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
303            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
304            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
305            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
306            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
307            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
308            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
309            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
310            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
311            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
312            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
313            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
314            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
315            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
316            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
317            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
318            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
319            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
320            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
321            'BrainSensor': 'HELMET_RAW',
322            'Infrared1': 'IR_1',
323            'Infrared2': 'IR_2',
324            'Infrared3': 'IR_3',
325            'Infrared4': 'IR_4',
326        }
327        extractor = HARUPFeatureExtractor(verbose=True)
328        extractor.config['time_domain'] = time_domain_features
329        extractor.config['frequency_domain'] = freq_domain_features
330        all_features = []
331        for window_dict in windows_data:
332            name = window_dict["name"]
333            windows = window_dict["windows"]
334            labels = None
335            for window in windows:
336                if window["name"] == "labels":
337                    labels = window["data"]
338                    break
339            if labels is None:
340                print(f"No labels found for {name}, skipping feature extraction")
341                continue
342            filtered_windows = []
343            missing = []
344            for orig_sensor, csv_col in sensor_map.items():
345                found = False
346                for window in windows:
347                    if window["name"] == csv_col:
348                        filtered_windows.append(window)
349                        found = True
350                        break
351                if not found:
352                    missing.append((orig_sensor, csv_col))
353            if missing:
354                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
355            for window in windows:
356                if window["name"] == "activity_id" or window["name"] == "labels":
357                    filtered_windows.append(window)
358            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
359            for i, feature in enumerate(features):
360                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
361                if window_idx < len(labels):
362                    feature["label"] = labels[window_idx]
363            all_features.append({"name": name, "features": features})
364        return all_features

Extract features from sliding windows using HAR-UP feature extraction methods. Args: windows_data: List of dictionaries containing sliding windows time_domain_features: Whether to extract time domain features freq_domain_features: Whether to extract frequency domain features Returns: List of dictionaries containing extracted features

def get_supported_formats(self) -> List[str]:
366    def get_supported_formats(self) -> List[str]:
367        """
368        Get list of supported file formats for HAR-UP dataset.
369        
370        Returns:
371            List of supported file extensions
372        """
373        return ['.csv']

Get list of supported file formats for HAR-UP dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
375    def get_sensor_info(self) -> Dict[str, List[str]]:
376        """
377        Get information about sensors in the dataset.
378        
379        Returns:
380            Dictionary containing sensor information
381        """
382        return {
383            'sensors': self.metadata['sensors'],
384            'components': self.metadata['components'],
385            'sampling_frequency': self.metadata['sampling_frequency']
386        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
388    def get_activity_info(self) -> Dict[int, str]:
389        """
390        Get information about activities in the dataset.
391        
392        Returns:
393            Dictionary mapping activity IDs to descriptions
394        """
395        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to descriptions

class UrFallLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 24class UrFallLoader(BaseDatasetLoader):
 25    """
 26    UrFall dataset loader class.
 27    
 28    This class handles loading and processing of the UrFall dataset for fall detection.
 29    Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video,
 30    and pre-extracted features from depth maps.
 31    """
 32    
 33    def __init__(self, max_workers: int = 8):
 34        """
 35        Initialize UrFall loader with concurrent download support.
 36        
 37        Args:
 38            max_workers: Maximum number of concurrent download threads (default: 8)
 39        """
 40        super().__init__(
 41            name="urfall",
 42            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data",
 43            max_workers=max_workers
 44        )
 45        self.metadata = {
 46            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
 47            'camera': 'cam0',  # Front camera
 48            'sampling_frequency': 30,  # Depth/RGB camera fps
 49            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
 50            'activities': {
 51                -1: 'Not lying (standing/walking)',
 52                0: 'Falling (transient)',
 53                1: 'Lying on ground'
 54            },
 55            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
 56            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
 57            'feature_columns': [
 58                'sequence_name',
 59                'frame_number',
 60                'label',
 61                'HeightWidthRatio',
 62                'MajorMinorRatio',
 63                'BoundingBoxOccupancy',
 64                'MaxStdXZ',
 65                'HHmaxRatio',
 66                'H',
 67                'D',
 68                'P40'
 69            ],
 70            'feature_descriptions': {
 71                'HeightWidthRatio': 'Bounding box height to width ratio',
 72                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
 73                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
 74                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
 75                'HHmaxRatio': 'Human height in frame to standing height ratio',
 76                'H': 'Actual height in mm',
 77                'D': 'Distance of person center to floor in mm',
 78                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
 79            }
 80        }
 81    
 82    def load_data(self, data_dir: str, 
 83                  data_types: Optional[List[str]] = None,
 84                  sequences: Optional[List[str]] = None,
 85                  use_falls: bool = True,
 86                  use_adls: bool = True,
 87                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 88        """
 89        Load UrFall dataset from the specified directory.
 90        
 91        Args:
 92            data_dir: Directory containing the dataset
 93            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 94                       'synchronization', 'video', 'features' (default: ['features'])
 95            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 96                      If None, loads all based on use_falls and use_adls
 97            use_falls: Whether to load fall sequences (default: True)
 98            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 99            **kwargs: Additional arguments
100            
101        Returns:
102            Tuple of (data_list, names_list)
103        """
104        # Default to loading pre-extracted features if not specified
105        if data_types is None:
106            data_types = ['features']
107        
108        # Validate data types
109        valid_types = set(self.metadata['data_types'])
110        requested_types = set(data_types)
111        invalid_types = requested_types - valid_types
112        if invalid_types:
113            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
114        
115        # Create directory if it doesn't exist
116        os.makedirs(data_dir, exist_ok=True)
117        
118        data_list = []
119        names_list = []
120        
121        # Load pre-extracted features (CSV files)
122        if 'features' in data_types:
123            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
124            data_list.extend(features_data)
125            names_list.extend(features_names)
126        
127        # Load raw accelerometer data
128        if 'accelerometer' in data_types:
129            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
130            data_list.extend(accel_data)
131            names_list.extend(accel_names)
132        
133        # Load synchronization data
134        if 'synchronization' in data_types:
135            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
136            data_list.extend(sync_data)
137            names_list.extend(sync_names)
138        
139        # Note: Depth, RGB, and Video data are image/video files
140        # These would require specialized loading and are not typically loaded into DataFrames
141        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
142            print("Note: Depth, RGB, and Video data types contain image/video files.")
143            print("These are not loaded into DataFrames but their paths can be accessed.")
144            print("Use the get_file_paths() method to retrieve paths to these files.")
145        
146        self.data = data_list
147        return data_list, names_list
148    
149    def _load_features(self, data_dir: str, sequences: Optional[List[str]], 
150                       use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
151        """
152        Load pre-extracted features from CSV files.
153        
154        Args:
155            data_dir: Directory containing the dataset
156            sequences: Specific sequences to load
157            use_falls: Whether to include fall sequences
158            use_adls: Whether to include ADL sequences
159            
160        Returns:
161            Tuple of (data_list, names_list)
162        """
163        data_list = []
164        names_list = []
165        
166        # Load falls features
167        if use_falls:
168            falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv")
169            if os.path.exists(falls_csv):
170                df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns'])
171                
172                # Filter by specific sequences if provided
173                if sequences is not None:
174                    fall_sequences = [s for s in sequences if s.startswith('fall-')]
175                    if fall_sequences:
176                        df = df[df['sequence_name'].isin(fall_sequences)]
177                
178                # Add metadata columns
179                df['activity_type'] = 'fall'
180                df['activity_id'] = 1  # Falls are labeled as 1
181                
182                data_list.append(df)
183                names_list.append("urfall-cam0-falls")
184            else:
185                print(f"Warning: Falls features file not found at {falls_csv}")
186        
187        # Load ADLs features
188        if use_adls:
189            adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv")
190            if os.path.exists(adls_csv):
191                df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns'])
192                
193                # Filter by specific sequences if provided
194                if sequences is not None:
195                    adl_sequences = [s for s in sequences if s.startswith('adl-')]
196                    if adl_sequences:
197                        df = df[df['sequence_name'].isin(adl_sequences)]
198                
199                # Add metadata columns
200                df['activity_type'] = 'adl'
201                df['activity_id'] = 0  # ADLs are labeled as 0
202                
203                data_list.append(df)
204                names_list.append("urfall-cam0-adls")
205            else:
206                print(f"Warning: ADLs features file not found at {adls_csv}")
207        
208        return data_list, names_list
209    
210    def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]],
211                            use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
212        """
213        Load accelerometer CSV data files.
214        
215        Args:
216            data_dir: Directory containing the dataset
217            sequences: Specific sequences to load
218            use_falls: Whether to include fall sequences
219            use_adls: Whether to include ADL sequences
220            
221        Returns:
222            Tuple of (data_list, names_list)
223        """
224        data_list = []
225        names_list = []
226        
227        # Determine which sequences to load
228        seq_list = []
229        if sequences is not None:
230            seq_list = sequences
231        else:
232            if use_falls:
233                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
234            if use_adls:
235                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
236        
237        # Load accelerometer data for each sequence
238        for seq in seq_list:
239            accel_file = os.path.join(data_dir, f"{seq}-acc.csv")
240            if os.path.exists(accel_file):
241                try:
242                    df = pd.read_csv(accel_file)
243                    df['sequence_name'] = seq
244                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
245                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
246                    data_list.append(df)
247                    names_list.append(f"{seq}-accelerometer")
248                except Exception as e:
249                    print(f"Warning: Could not load accelerometer data from {accel_file}: {e}")
250        
251        return data_list, names_list
252    
253    def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]],
254                              use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
255        """
256        Load synchronization CSV data files.
257        
258        Args:
259            data_dir: Directory containing the dataset
260            sequences: Specific sequences to load
261            use_falls: Whether to include fall sequences
262            use_adls: Whether to include ADL sequences
263            
264        Returns:
265            Tuple of (data_list, names_list)
266        """
267        data_list = []
268        names_list = []
269        
270        # Determine which sequences to load
271        seq_list = []
272        if sequences is not None:
273            seq_list = sequences
274        else:
275            if use_falls:
276                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
277            if use_adls:
278                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
279        
280        # Load synchronization data for each sequence
281        for seq in seq_list:
282            sync_file = os.path.join(data_dir, f"{seq}-data.csv")
283            if os.path.exists(sync_file):
284                try:
285                    df = pd.read_csv(sync_file)
286                    df['sequence_name'] = seq
287                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
288                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
289                    data_list.append(df)
290                    names_list.append(f"{seq}-synchronization")
291                except Exception as e:
292                    print(f"Warning: Could not load synchronization data from {sync_file}: {e}")
293        
294        return data_list, names_list
295    
296    def get_file_paths(self, data_dir: str, data_type: str, 
297                       sequences: Optional[List[str]] = None,
298                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
299        """
300        Get file paths for image/video data types (depth, RGB, video).
301        
302        Args:
303            data_dir: Directory containing the dataset
304            data_type: Type of data ('depth', 'rgb', 'video')
305            sequences: Specific sequences to get paths for
306            use_falls: Whether to include fall sequences
307            use_adls: Whether to include ADL sequences
308            
309        Returns:
310            Dictionary mapping sequence names to file paths
311        """
312        if data_type not in ['depth', 'rgb', 'video']:
313            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
314        
315        file_paths = {}
316        
317        # Determine which sequences to include
318        seq_list = []
319        if sequences is not None:
320            seq_list = sequences
321        else:
322            if use_falls:
323                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
324            if use_adls:
325                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
326        
327        # Map data type to file extension
328        extension_map = {
329            'depth': '-cam0-d.zip',
330            'rgb': '-cam0-rgb.zip',
331            'video': '-cam0.mp4'
332        }
333        
334        ext = extension_map[data_type]
335        
336        for seq in seq_list:
337            file_path = os.path.join(data_dir, f"{seq}{ext}")
338            if os.path.exists(file_path):
339                file_paths[seq] = file_path
340        
341        return file_paths
342    
343    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
344                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
345        """
346        Create sliding windows from the loaded data.
347        
348        Args:
349            data: List of DataFrames containing the dataset
350            names: List of names corresponding to each DataFrame
351            window_size: Size of the sliding window (default: 30 frames for depth features)
352            step_size: Step size for sliding window (default: 15 frames)
353            
354        Returns:
355            List of dictionaries containing windowed data
356        """
357        windows_data = []
358        
359        for idx, df in enumerate(data):
360            if df.empty:
361                continue
362            
363            # Get numeric feature columns (exclude metadata columns)
364            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
365            feature_cols = [col for col in df.columns 
366                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
367            
368            if not feature_cols:
369                continue
370            
371            windows = []
372            
373            # Create windows for each feature column
374            for col in feature_cols:
375                win = sliding_window(df[col].values, window_size, step_size)
376                windows.append({"name": col, "data": win})
377            
378            # Create windows for labels if present
379            if 'label' in df.columns:
380                label_windows = sliding_window(df['label'].values, window_size, step_size)
381                # Majority voting for each window
382                labels = []
383                for w in label_windows:
384                    vals, counts = np.unique(w, return_counts=True)
385                    labels.append(vals[np.argmax(counts)])
386                windows.append({"name": "labels", "data": np.array(labels)})
387            
388            # Create activity_id windows
389            if 'activity_id' in df.columns:
390                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
391                windows.append({"name": "activity_id", "data": activity_windows})
392            
393            windows_data.append({"name": names[idx], "windows": windows})
394        
395        return windows_data
396    
397    def get_supported_formats(self) -> List[str]:
398        """
399        Get list of supported file formats for UrFall dataset.
400        
401        Returns:
402            List of supported file extensions
403        """
404        return ['.csv', '.zip', '.mp4']
405    
406    def get_sensor_info(self) -> Dict[str, any]:
407        """
408        Get information about sensors in the dataset.
409        
410        Returns:
411            Dictionary containing sensor information
412        """
413        return {
414            'data_types': self.metadata['data_types'],
415            'camera': self.metadata['camera'],
416            'sampling_frequency': self.metadata['sampling_frequency'],
417            'accelerometer_frequency': self.metadata['accelerometer_frequency']
418        }
419    
420    def get_activity_info(self) -> Dict[int, str]:
421        """
422        Get information about activities in the dataset.
423        
424        Returns:
425            Dictionary mapping activity IDs to labels
426        """
427        return self.metadata['activities']
428    
429    def get_feature_info(self) -> Dict[str, str]:
430        """
431        Get information about pre-extracted features.
432        
433        Returns:
434            Dictionary mapping feature names to descriptions
435        """
436        return self.metadata['feature_descriptions']

UrFall dataset loader class.

This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.

UrFallLoader(max_workers: int = 8)
33    def __init__(self, max_workers: int = 8):
34        """
35        Initialize UrFall loader with concurrent download support.
36        
37        Args:
38            max_workers: Maximum number of concurrent download threads (default: 8)
39        """
40        super().__init__(
41            name="urfall",
42            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data",
43            max_workers=max_workers
44        )
45        self.metadata = {
46            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
47            'camera': 'cam0',  # Front camera
48            'sampling_frequency': 30,  # Depth/RGB camera fps
49            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
50            'activities': {
51                -1: 'Not lying (standing/walking)',
52                0: 'Falling (transient)',
53                1: 'Lying on ground'
54            },
55            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
56            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
57            'feature_columns': [
58                'sequence_name',
59                'frame_number',
60                'label',
61                'HeightWidthRatio',
62                'MajorMinorRatio',
63                'BoundingBoxOccupancy',
64                'MaxStdXZ',
65                'HHmaxRatio',
66                'H',
67                'D',
68                'P40'
69            ],
70            'feature_descriptions': {
71                'HeightWidthRatio': 'Bounding box height to width ratio',
72                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
73                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
74                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
75                'HHmaxRatio': 'Human height in frame to standing height ratio',
76                'H': 'Actual height in mm',
77                'D': 'Distance of person center to floor in mm',
78                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
79            }
80        }

Initialize UrFall loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
def load_data( self, data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
 82    def load_data(self, data_dir: str, 
 83                  data_types: Optional[List[str]] = None,
 84                  sequences: Optional[List[str]] = None,
 85                  use_falls: bool = True,
 86                  use_adls: bool = True,
 87                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 88        """
 89        Load UrFall dataset from the specified directory.
 90        
 91        Args:
 92            data_dir: Directory containing the dataset
 93            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 94                       'synchronization', 'video', 'features' (default: ['features'])
 95            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 96                      If None, loads all based on use_falls and use_adls
 97            use_falls: Whether to load fall sequences (default: True)
 98            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 99            **kwargs: Additional arguments
100            
101        Returns:
102            Tuple of (data_list, names_list)
103        """
104        # Default to loading pre-extracted features if not specified
105        if data_types is None:
106            data_types = ['features']
107        
108        # Validate data types
109        valid_types = set(self.metadata['data_types'])
110        requested_types = set(data_types)
111        invalid_types = requested_types - valid_types
112        if invalid_types:
113            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
114        
115        # Create directory if it doesn't exist
116        os.makedirs(data_dir, exist_ok=True)
117        
118        data_list = []
119        names_list = []
120        
121        # Load pre-extracted features (CSV files)
122        if 'features' in data_types:
123            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
124            data_list.extend(features_data)
125            names_list.extend(features_names)
126        
127        # Load raw accelerometer data
128        if 'accelerometer' in data_types:
129            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
130            data_list.extend(accel_data)
131            names_list.extend(accel_names)
132        
133        # Load synchronization data
134        if 'synchronization' in data_types:
135            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
136            data_list.extend(sync_data)
137            names_list.extend(sync_names)
138        
139        # Note: Depth, RGB, and Video data are image/video files
140        # These would require specialized loading and are not typically loaded into DataFrames
141        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
142            print("Note: Depth, RGB, and Video data types contain image/video files.")
143            print("These are not loaded into DataFrames but their paths can be accessed.")
144            print("Use the get_file_paths() method to retrieve paths to these files.")
145        
146        self.data = data_list
147        return data_list, names_list

Load UrFall dataset from the specified directory.

Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments

Returns: Tuple of (data_list, names_list)

def get_file_paths( self, data_dir: str, data_type: str, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
296    def get_file_paths(self, data_dir: str, data_type: str, 
297                       sequences: Optional[List[str]] = None,
298                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
299        """
300        Get file paths for image/video data types (depth, RGB, video).
301        
302        Args:
303            data_dir: Directory containing the dataset
304            data_type: Type of data ('depth', 'rgb', 'video')
305            sequences: Specific sequences to get paths for
306            use_falls: Whether to include fall sequences
307            use_adls: Whether to include ADL sequences
308            
309        Returns:
310            Dictionary mapping sequence names to file paths
311        """
312        if data_type not in ['depth', 'rgb', 'video']:
313            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
314        
315        file_paths = {}
316        
317        # Determine which sequences to include
318        seq_list = []
319        if sequences is not None:
320            seq_list = sequences
321        else:
322            if use_falls:
323                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
324            if use_adls:
325                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
326        
327        # Map data type to file extension
328        extension_map = {
329            'depth': '-cam0-d.zip',
330            'rgb': '-cam0-rgb.zip',
331            'video': '-cam0.mp4'
332        }
333        
334        ext = extension_map[data_type]
335        
336        for seq in seq_list:
337            file_path = os.path.join(data_dir, f"{seq}{ext}")
338            if os.path.exists(file_path):
339                file_paths[seq] = file_path
340        
341        return file_paths

Get file paths for image/video data types (depth, RGB, video).

Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences

Returns: Dictionary mapping sequence names to file paths

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 30, step_size: int = 15) -> List[Dict]:
343    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
344                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
345        """
346        Create sliding windows from the loaded data.
347        
348        Args:
349            data: List of DataFrames containing the dataset
350            names: List of names corresponding to each DataFrame
351            window_size: Size of the sliding window (default: 30 frames for depth features)
352            step_size: Step size for sliding window (default: 15 frames)
353            
354        Returns:
355            List of dictionaries containing windowed data
356        """
357        windows_data = []
358        
359        for idx, df in enumerate(data):
360            if df.empty:
361                continue
362            
363            # Get numeric feature columns (exclude metadata columns)
364            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
365            feature_cols = [col for col in df.columns 
366                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
367            
368            if not feature_cols:
369                continue
370            
371            windows = []
372            
373            # Create windows for each feature column
374            for col in feature_cols:
375                win = sliding_window(df[col].values, window_size, step_size)
376                windows.append({"name": col, "data": win})
377            
378            # Create windows for labels if present
379            if 'label' in df.columns:
380                label_windows = sliding_window(df['label'].values, window_size, step_size)
381                # Majority voting for each window
382                labels = []
383                for w in label_windows:
384                    vals, counts = np.unique(w, return_counts=True)
385                    labels.append(vals[np.argmax(counts)])
386                windows.append({"name": "labels", "data": np.array(labels)})
387            
388            # Create activity_id windows
389            if 'activity_id' in df.columns:
390                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
391                windows.append({"name": "activity_id", "data": activity_windows})
392            
393            windows_data.append({"name": names[idx], "windows": windows})
394        
395        return windows_data

Create sliding windows from the loaded data.

Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)

Returns: List of dictionaries containing windowed data

def get_supported_formats(self) -> List[str]:
397    def get_supported_formats(self) -> List[str]:
398        """
399        Get list of supported file formats for UrFall dataset.
400        
401        Returns:
402            List of supported file extensions
403        """
404        return ['.csv', '.zip', '.mp4']

Get list of supported file formats for UrFall dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, <built-in function any>]:
406    def get_sensor_info(self) -> Dict[str, any]:
407        """
408        Get information about sensors in the dataset.
409        
410        Returns:
411            Dictionary containing sensor information
412        """
413        return {
414            'data_types': self.metadata['data_types'],
415            'camera': self.metadata['camera'],
416            'sampling_frequency': self.metadata['sampling_frequency'],
417            'accelerometer_frequency': self.metadata['accelerometer_frequency']
418        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
420    def get_activity_info(self) -> Dict[int, str]:
421        """
422        Get information about activities in the dataset.
423        
424        Returns:
425            Dictionary mapping activity IDs to labels
426        """
427        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to labels

def get_feature_info(self) -> Dict[str, str]:
429    def get_feature_info(self) -> Dict[str, str]:
430        """
431        Get information about pre-extracted features.
432        
433        Returns:
434            Dictionary mapping feature names to descriptions
435        """
436        return self.metadata['feature_descriptions']

Get information about pre-extracted features.

Returns: Dictionary mapping feature names to descriptions

def load_daphnet_data(data_dir: str):
177def load_daphnet_data(data_dir: str):
178    """
179    Legacy function for loading Daphnet data.
180    
181    Args:
182        data_dir: Directory to store the dataset
183        
184    Returns:
185        Tuple of (data_list, names_list)
186    """
187    loader = DaphnetLoader()
188    return loader.load_data(data_dir)

Legacy function for loading Daphnet data.

Args: data_dir: Directory to store the dataset

Returns: Tuple of (data_list, names_list)

def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
191def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
192    """
193    Legacy function for creating sliding windows.
194    
195    Args:
196        daphnet: List of dataframes containing Daphnet data
197        daphnet_names: List of names of the Daphnet dataframes
198        window_size: Size of the sliding window
199        step_size: Step size for the sliding window
200        
201    Returns:
202        List of dictionaries containing sliding windows for each DataFrame
203    """
204    loader = DaphnetLoader()
205    return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)

Legacy function for creating sliding windows.

Args: daphnet: List of dataframes containing Daphnet data daphnet_names: List of names of the Daphnet dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def load_mobifall_data():
111def load_mobifall_data():
112    """
113    Legacy function for loading MobiFall data.
114    
115    Returns:
116        Tuple of (data_list, names_list)
117    """
118    loader = MobiFallLoader()
119    return loader.load_data("")

Legacy function for loading MobiFall data.

Returns: Tuple of (data_list, names_list)

def load_arduous_data():
111def load_arduous_data():
112    """
113    Legacy function for loading Arduous data.
114    
115    Returns:
116        Tuple of (data_list, names_list)
117    """
118    loader = ArduousLoader()
119    return loader.load_data("")

Legacy function for loading Arduous data.

Returns: Tuple of (data_list, names_list)

def load_physionet_data(data_dir: str) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
338def load_physionet_data(data_dir: str) -> Tuple[List[pd.DataFrame], List[str]]:
339    """
340    Legacy function to load PhysioNet data.
341    
342    Args:
343        data_dir: Directory containing the dataset
344        
345    Returns:
346        Tuple of (data_list, names_list)
347    """
348    loader = PhysioNetLoader()
349    return loader.load_data(data_dir)

Legacy function to load PhysioNet data.

Args: data_dir: Directory containing the dataset

Returns: Tuple of (data_list, names_list)

def create_physionet_windows( data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 600, step_size: int = 100) -> List[Dict]:
352def create_physionet_windows(data: List[pd.DataFrame], names: List[str], 
353                           window_size: int = 600, step_size: int = 100) -> List[Dict]:
354    """
355    Legacy function to create sliding windows from PhysioNet data.
356    
357    Args:
358        data: List of DataFrames
359        names: List of names
360        window_size: Size of sliding window
361        step_size: Step size for sliding window
362        
363    Returns:
364        List of sliding window dictionaries
365    """
366    loader = PhysioNetLoader()
367    return loader.create_sliding_windows(data, names, window_size, step_size) 

Legacy function to create sliding windows from PhysioNet data.

Args: data: List of DataFrames names: List of names window_size: Size of sliding window step_size: Step size for sliding window

Returns: List of sliding window dictionaries

def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
399def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
400    """
401    Legacy function for loading HAR-UP data.
402    
403    Args:
404        data_dir: Directory containing the dataset
405        subjects: List of subject IDs to load (default: all subjects)
406        activities: List of activity IDs to load (default: all activities)
407        trials: List of trial IDs to load (default: all trials)
408        
409    Returns:
410        Tuple of (data_list, names_list)
411    """
412    loader = HARUPLoader()
413    return loader.load_data(data_dir, subjects, activities, trials)

Legacy function for loading HAR-UP data.

Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials)

Returns: Tuple of (data_list, names_list)

def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
416def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
417    """
418    Legacy function for creating sliding windows from HAR-UP data.
419    
420    Args:
421        harup_data: List of dataframes containing HAR-UP data
422        harup_names: List of names of the HAR-UP dataframes
423        window_size: Size of the sliding window
424        step_size: Step size for the sliding window
425        
426    Returns:
427        List of dictionaries containing sliding windows for each DataFrame
428    """
429    loader = HARUPLoader()
430    return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)

Legacy function for creating sliding windows from HAR-UP data.

Args: harup_data: List of dataframes containing HAR-UP data harup_names: List of names of the HAR-UP dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
433def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
434    """
435    Legacy function for extracting features from HAR-UP windows.
436    
437    Args:
438        windows_data: List of dictionaries containing sliding windows
439        time_domain: Whether to extract time domain features
440        freq_domain: Whether to extract frequency domain features
441        
442    Returns:
443        List of dictionaries containing extracted features
444    """
445    loader = HARUPLoader()
446    return loader.extract_features(windows_data, time_domain, freq_domain)

Legacy function for extracting features from HAR-UP windows.

Args: windows_data: List of dictionaries containing sliding windows time_domain: Whether to extract time domain features freq_domain: Whether to extract frequency domain features

Returns: List of dictionaries containing extracted features

def load_urfall_data( data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True):
440def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None,
441                     sequences: Optional[List[str]] = None,
442                     use_falls: bool = True, use_adls: bool = True):
443    """
444    Load UrFall dataset using the legacy function interface.
445    
446    Args:
447        data_dir: Directory containing the dataset
448        data_types: List of data types to load
449        sequences: List of specific sequences to load
450        use_falls: Whether to load fall sequences
451        use_adls: Whether to load ADL sequences
452        
453    Returns:
454        Tuple of (data_list, names_list)
455    """
456    loader = UrFallLoader()
457    return loader.load_data(data_dir, data_types=data_types, sequences=sequences,
458                           use_falls=use_falls, use_adls=use_adls)

Load UrFall dataset using the legacy function interface.

Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences

Returns: Tuple of (data_list, names_list)

def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
461def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
462    """
463    Create sliding windows from UrFall data using the legacy function interface.
464    
465    Args:
466        urfall_data: List of DataFrames
467        urfall_names: List of names
468        window_size: Size of sliding window
469        step_size: Step size for sliding window
470        
471    Returns:
472        List of dictionaries containing windowed data
473    """
474    loader = UrFallLoader()
475    return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)

Create sliding windows from UrFall data using the legacy function interface.

Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window

Returns: List of dictionaries containing windowed data

def download_dataset(dataset_name, data_dir):
25def download_dataset(dataset_name, data_dir):
26    """Download the dataset."""
27    if dataset_name == "daphnet":
28        download_daphnet_data(data_dir)
29    elif dataset_name == "mobifall":
30        download_mobifall_data(data_dir)
31    elif dataset_name == "arduous":
32        download_arduous_data(data_dir)
33    elif dataset_name == "harup":
34        download_harup_data(data_dir)
35    elif dataset_name == "urfall":
36        download_urfall_data(data_dir)
37    elif dataset_name == "physionet":
38        # PhysioNet dataset is handled by the PhysioNetLoader itself
39        pass
40    else:
41        raise ValueError(f"Dataset {dataset_name} not supported.")

Download the dataset.

def extract_dataset(dataset_name, data_dir):
243def extract_dataset(dataset_name, data_dir):
244    """Extract the dataset."""
245    if dataset_name == "daphnet":
246        extract_daphnet_data(data_dir)
247    elif dataset_name == "mobifall":
248        extract_mobifall_data(data_dir)
249    elif dataset_name == "arduous":
250        extract_arduous_data(data_dir)
251    elif dataset_name == "harup":
252        extract_harup_data(data_dir)
253    elif dataset_name == "urfall":
254        extract_urfall_data(data_dir)
255    elif dataset_name == "physionet":
256        # PhysioNet dataset is handled by the PhysioNetLoader itself
257        pass
258    else:
259        raise ValueError(f"Dataset {dataset_name} not supported.")

Extract the dataset.

def sliding_window(data, window_size, step_size):
320def sliding_window(data, window_size, step_size):
321    num_windows = (len(data) - window_size) // step_size + 1
322    windows = []
323    for i in range(num_windows):
324        start = i * step_size
325        end = start + window_size
326        windows.append(data[start:end])
327    return windows
def get_dataset_manager():
53def get_dataset_manager():
54    """Get the singleton DatasetManager instance."""
55    return DatasetManager()

Get the singleton DatasetManager instance.

def get_available_datasets():
58def get_available_datasets():
59    """Get list of available dataset names."""
60    return DatasetManager().get_available_components()

Get list of available dataset names.

def load_dataset(name: str, data_dir: str, **kwargs):
63def load_dataset(name: str, data_dir: str, **kwargs):
64    """
65    Load a dataset using the DatasetManager.
66    
67    Args:
68        name: Name of the dataset loader
69        data_dir: Directory containing the dataset
70        **kwargs: Additional arguments for the loader
71        
72    Returns:
73        Dataset loader instance with loaded data
74    """
75    return DatasetManager().load_dataset(name, data_dir, **kwargs)

Load a dataset using the DatasetManager.

Args: name: Name of the dataset loader data_dir: Directory containing the dataset **kwargs: Additional arguments for the loader

Returns: Dataset loader instance with loaded data