gaitsetpy.dataset.harup

HAR-UP Dataset Loader and Utils. Maintainer: @aharshit123456

This file contains the HAR-UP dataset loader class that inherits from BaseDatasetLoader. HAR-UP is a multimodal dataset for human activity recognition and fall detection.

Reference:

  1'''
  2HAR-UP Dataset Loader and Utils.
  3Maintainer: @aharshit123456
  4
  5This file contains the HAR-UP dataset loader class that inherits from BaseDatasetLoader.
  6HAR-UP is a multimodal dataset for human activity recognition and fall detection.
  7
  8Reference:
  9- Website: https://sites.google.com/up.edu.mx/har-up/
 10- GitHub: https://github.com/jpnm561/HAR-UP
 11'''
 12
 13import os
 14import pandas as pd
 15import numpy as np
 16from typing import List, Dict, Tuple, Optional
 17from glob import glob
 18import datetime
 19from tqdm import tqdm
 20from ..core.base_classes import BaseDatasetLoader
 21from .utils import download_dataset, extract_dataset, sliding_window
 22from ..features.harup_features import HARUPFeatureExtractor
 23
 24
 25class HARUPLoader(BaseDatasetLoader):
 26    """
 27    HAR-UP dataset loader class.
 28    
 29    This class handles loading and processing of the HAR-UP dataset for human activity recognition
 30    and fall detection analysis.
 31    """
 32    
 33    def __init__(self, max_workers: int = 8):
 34        """
 35        Initialize HAR-UP loader with concurrent download support.
 36        
 37        Args:
 38            max_workers: Maximum number of concurrent download threads (default: 8)
 39        """
 40        super().__init__(
 41            name="harup",
 42            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition",
 43            max_workers=max_workers
 44        )
 45        self.metadata = {
 46            'sensors': [
 47                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
 48                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
 49                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
 50                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
 51                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
 52                'BrainSensor', 'Infrared'
 53            ],
 54            'components': {
 55                'Accelerometer': ['x', 'y', 'z'],
 56                'AngularVelocity': ['x', 'y', 'z'],
 57                'Luminosity': ['illuminance'],
 58                'BrainSensor': ['value'],
 59                'Infrared': ['value']
 60            },
 61            'sampling_frequency': 100,  # Hz
 62            'activities': {
 63                1: 'Walking',
 64                2: 'Walking upstairs',
 65                3: 'Walking downstairs',
 66                4: 'Sitting',
 67                5: 'Standing',
 68                6: 'Lying',
 69                7: 'Falling forward using hands',
 70                8: 'Falling forward using knees',
 71                9: 'Falling backwards',
 72                10: 'Falling sideward',
 73                11: 'Falling sitting in empty chair'
 74            }
 75        }
 76        
 77        # Features used in HAR-UP
 78        self.features = [
 79            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
 80            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
 81            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
 82            'Energy'
 83        ]
 84    
 85    def download_harup_data(self, data_dir: str) -> Optional[str]:
 86        """
 87        Download HAR-UP dataset if not already present.
 88        
 89        Args:
 90            data_dir: Directory to store the dataset
 91            
 92        Returns:
 93            Path to the extracted dataset or None if not found
 94        """
 95        # Use the utility function to download and extract the dataset
 96        download_dataset("harup", data_dir)
 97        extract_dataset("harup", data_dir)
 98        
 99        # Check if dataset exists after download attempt
100        dataset_path = os.path.join(data_dir, "DataSet")
101        if not os.path.exists(dataset_path):
102            print("HAR-UP dataset not found after download attempt.")
103            print("Please ensure the dataset is organized in the following structure:")
104            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
105            return None
106        
107        return dataset_path
108    
109    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
110                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
111                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
112        """
113        Load HAR-UP dataset from the specified directory.
114        Args:
115            data_dir: Directory containing the dataset
116            subjects: List of subject IDs to load (default: all subjects)
117            activities: List of activity IDs to load (default: all activities)
118            trials: List of trial IDs to load (default: all trials)
119            **kwargs: Additional arguments
120        Returns:
121            Tuple of (data_list, names_list)
122        """
123        import re
124        import os
125        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
126        if subjects is None:
127            subjects = list(range(1, 5))  # 4 subjects
128        if activities is None:
129            activities = list(range(1, 12))  # 11 activities
130        if trials is None:
131            trials = list(range(1, 4))  # 3 trials
132
133        # Column names as per official HAR-UP documentation
134        columns = [
135            "Timestamp",
136            "EEG_NeuroSky",
137            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
138            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
139            "Belt_Luminosity",
140            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
141            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
142            "Neck_Luminosity",
143            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
144            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
145            "Pocket_Luminosity",
146            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
147            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
148            "Wrist_Luminosity",
149            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
150        ]
151
152        # If data_dir does not exist, trigger interactive download
153        if not os.path.exists(data_dir):
154            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
155            self.download_harup_data(data_dir)
156        # If still doesn't exist, error out
157        if not os.path.exists(data_dir):
158            print(f"Failed to create or download dataset directory: {data_dir}")
159            return [], []
160
161        # Find the UP_Fall_Detection_Dataset directory
162        dataset_path = None
163        for entry in os.listdir(data_dir):
164            entry_path = os.path.join(data_dir, entry)
165            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
166                dataset_path = entry_path
167                break
168        if dataset_path is None:
169            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
170            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
171            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
172            return [], []
173
174        harup_data = []
175        harup_names = []
176
177        # Iterate over subjects
178        for subject_id in subjects:
179            subject_folder = f"Subject_{subject_id:02d}"
180            subject_path = os.path.join(dataset_path, subject_folder)
181            if not os.path.isdir(subject_path):
182                continue
183            
184            # Initialize empty DataFrame for this subject
185            subject_df = pd.DataFrame()
186            
187            # Iterate over activities in order
188            for activity_id in sorted(activities):
189                activity_folder = f"A{activity_id:02d}"
190                activity_path = os.path.join(subject_path, activity_folder)
191                if not os.path.isdir(activity_path):
192                    continue
193                
194                # Iterate over trials in order
195                for trial_id in sorted(trials):
196                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
197                    file_path = os.path.join(activity_path, file_name)
198                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
199                    
200                    try:
201                        df = pd.read_csv(file_path, header=0)
202                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
203                        df['subject_id'] = subject_id
204                        df['activity_id'] = activity_id 
205                        df['trial_id'] = trial_id
206                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
207                        
208                        # Concatenate to subject's DataFrame
209                        subject_df = pd.concat([subject_df, df], ignore_index=True)
210                        harup_names.append(name)
211                        
212                    except Exception as e:
213                        print(f"Error loading {file_path}: {e}")
214            
215            # Add complete subject DataFrame to data list
216            if not subject_df.empty:
217                harup_data.append(subject_df)
218                
219        self.data = harup_data
220        self.names = harup_names
221
222        return harup_data, harup_names
223    
224    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
225                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
226        """
227        Create sliding windows from the HAR-UP dataset.
228        
229        Args:
230            data: List of DataFrames containing HAR-UP data
231            names: List of names corresponding to the data
232            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
233            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
234            
235        Returns:
236            List of dictionaries containing sliding windows for each DataFrame
237        """
238        windows_data = []
239        
240        for idx, df in enumerate(data):
241            if df.empty:
242                continue
243                
244            windows = []
245            processed_columns = set()
246            
247            # Only use numeric columns (skip TIME and any non-numeric)
248            sensor_columns = [col for col in df.columns if col not in 
249                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
250                             and pd.api.types.is_numeric_dtype(df[col])]
251            
252
253            # Process each sensor column
254            for col in sensor_columns:
255                if col not in processed_columns:
256                    
257                    window_data = sliding_window(df[col], window_size, step_size)
258                    windows.append({"name": col, "data": window_data})
259                    processed_columns.add(col)
260            
261            # Include activity ID for each window
262            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
263            windows.append({"name": "activity_id", "data": activity_windows})
264            
265            # For each window, take the most common activity ID as the label
266            labels = []
267            for window in activity_windows:
268                # Get most common activity in this window
269                unique_vals, counts = np.unique(window, return_counts=True)
270                most_common_idx = np.argmax(counts)
271                labels.append(unique_vals[most_common_idx])
272            
273            windows.append({"name": "labels", "data": np.array(labels)})
274            
275            windows_data.append({"name": names[idx], "windows": windows})
276        
277        return windows_data
278    
279    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
280                       freq_domain_features: bool = True) -> List[Dict]:
281        """
282        Extract features from sliding windows using HAR-UP feature extraction methods.
283        Args:
284            windows_data: List of dictionaries containing sliding windows
285            time_domain_features: Whether to extract time domain features
286            freq_domain_features: Whether to extract frequency domain features
287        Returns:
288            List of dictionaries containing extracted features
289        """
290        # Mapping from original sensor names to actual CSV column names
291        sensor_map = {
292            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
293            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
294            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
295            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
296            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
297            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
298            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
299            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
300            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
301            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
302            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
303            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
304            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
305            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
306            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
307            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
308            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
309            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
310            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
311            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
312            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
313            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
314            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
315            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
316            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
317            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
318            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
319            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
320            'BrainSensor': 'HELMET_RAW',
321            'Infrared1': 'IR_1',
322            'Infrared2': 'IR_2',
323            'Infrared3': 'IR_3',
324            'Infrared4': 'IR_4',
325        }
326        extractor = HARUPFeatureExtractor(verbose=True)
327        extractor.config['time_domain'] = time_domain_features
328        extractor.config['frequency_domain'] = freq_domain_features
329        all_features = []
330        for window_dict in windows_data:
331            name = window_dict["name"]
332            windows = window_dict["windows"]
333            labels = None
334            for window in windows:
335                if window["name"] == "labels":
336                    labels = window["data"]
337                    break
338            if labels is None:
339                print(f"No labels found for {name}, skipping feature extraction")
340                continue
341            filtered_windows = []
342            missing = []
343            for orig_sensor, csv_col in sensor_map.items():
344                found = False
345                for window in windows:
346                    if window["name"] == csv_col:
347                        filtered_windows.append(window)
348                        found = True
349                        break
350                if not found:
351                    missing.append((orig_sensor, csv_col))
352            if missing:
353                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
354            for window in windows:
355                if window["name"] == "activity_id" or window["name"] == "labels":
356                    filtered_windows.append(window)
357            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
358            for i, feature in enumerate(features):
359                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
360                if window_idx < len(labels):
361                    feature["label"] = labels[window_idx]
362            all_features.append({"name": name, "features": features})
363        return all_features
364    
365    def get_supported_formats(self) -> List[str]:
366        """
367        Get list of supported file formats for HAR-UP dataset.
368        
369        Returns:
370            List of supported file extensions
371        """
372        return ['.csv']
373    
374    def get_sensor_info(self) -> Dict[str, List[str]]:
375        """
376        Get information about sensors in the dataset.
377        
378        Returns:
379            Dictionary containing sensor information
380        """
381        return {
382            'sensors': self.metadata['sensors'],
383            'components': self.metadata['components'],
384            'sampling_frequency': self.metadata['sampling_frequency']
385        }
386    
387    def get_activity_info(self) -> Dict[int, str]:
388        """
389        Get information about activities in the dataset.
390        
391        Returns:
392            Dictionary mapping activity IDs to descriptions
393        """
394        return self.metadata['activities']
395
396
397# Legacy function wrappers for backward compatibility
398def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
399    """
400    Legacy function for loading HAR-UP data.
401    
402    Args:
403        data_dir: Directory containing the dataset
404        subjects: List of subject IDs to load (default: all subjects)
405        activities: List of activity IDs to load (default: all activities)
406        trials: List of trial IDs to load (default: all trials)
407        
408    Returns:
409        Tuple of (data_list, names_list)
410    """
411    loader = HARUPLoader()
412    return loader.load_data(data_dir, subjects, activities, trials)
413
414
415def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
416    """
417    Legacy function for creating sliding windows from HAR-UP data.
418    
419    Args:
420        harup_data: List of dataframes containing HAR-UP data
421        harup_names: List of names of the HAR-UP dataframes
422        window_size: Size of the sliding window
423        step_size: Step size for the sliding window
424        
425    Returns:
426        List of dictionaries containing sliding windows for each DataFrame
427    """
428    loader = HARUPLoader()
429    return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)
430
431
432def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
433    """
434    Legacy function for extracting features from HAR-UP windows.
435    
436    Args:
437        windows_data: List of dictionaries containing sliding windows
438        time_domain: Whether to extract time domain features
439        freq_domain: Whether to extract frequency domain features
440        
441    Returns:
442        List of dictionaries containing extracted features
443    """
444    loader = HARUPLoader()
445    return loader.extract_features(windows_data, time_domain, freq_domain)
class HARUPLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 26class HARUPLoader(BaseDatasetLoader):
 27    """
 28    HAR-UP dataset loader class.
 29    
 30    This class handles loading and processing of the HAR-UP dataset for human activity recognition
 31    and fall detection analysis.
 32    """
 33    
 34    def __init__(self, max_workers: int = 8):
 35        """
 36        Initialize HAR-UP loader with concurrent download support.
 37        
 38        Args:
 39            max_workers: Maximum number of concurrent download threads (default: 8)
 40        """
 41        super().__init__(
 42            name="harup",
 43            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition",
 44            max_workers=max_workers
 45        )
 46        self.metadata = {
 47            'sensors': [
 48                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
 49                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
 50                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
 51                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
 52                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
 53                'BrainSensor', 'Infrared'
 54            ],
 55            'components': {
 56                'Accelerometer': ['x', 'y', 'z'],
 57                'AngularVelocity': ['x', 'y', 'z'],
 58                'Luminosity': ['illuminance'],
 59                'BrainSensor': ['value'],
 60                'Infrared': ['value']
 61            },
 62            'sampling_frequency': 100,  # Hz
 63            'activities': {
 64                1: 'Walking',
 65                2: 'Walking upstairs',
 66                3: 'Walking downstairs',
 67                4: 'Sitting',
 68                5: 'Standing',
 69                6: 'Lying',
 70                7: 'Falling forward using hands',
 71                8: 'Falling forward using knees',
 72                9: 'Falling backwards',
 73                10: 'Falling sideward',
 74                11: 'Falling sitting in empty chair'
 75            }
 76        }
 77        
 78        # Features used in HAR-UP
 79        self.features = [
 80            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
 81            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
 82            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
 83            'Energy'
 84        ]
 85    
 86    def download_harup_data(self, data_dir: str) -> Optional[str]:
 87        """
 88        Download HAR-UP dataset if not already present.
 89        
 90        Args:
 91            data_dir: Directory to store the dataset
 92            
 93        Returns:
 94            Path to the extracted dataset or None if not found
 95        """
 96        # Use the utility function to download and extract the dataset
 97        download_dataset("harup", data_dir)
 98        extract_dataset("harup", data_dir)
 99        
100        # Check if dataset exists after download attempt
101        dataset_path = os.path.join(data_dir, "DataSet")
102        if not os.path.exists(dataset_path):
103            print("HAR-UP dataset not found after download attempt.")
104            print("Please ensure the dataset is organized in the following structure:")
105            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
106            return None
107        
108        return dataset_path
109    
110    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
111                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
112                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
113        """
114        Load HAR-UP dataset from the specified directory.
115        Args:
116            data_dir: Directory containing the dataset
117            subjects: List of subject IDs to load (default: all subjects)
118            activities: List of activity IDs to load (default: all activities)
119            trials: List of trial IDs to load (default: all trials)
120            **kwargs: Additional arguments
121        Returns:
122            Tuple of (data_list, names_list)
123        """
124        import re
125        import os
126        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
127        if subjects is None:
128            subjects = list(range(1, 5))  # 4 subjects
129        if activities is None:
130            activities = list(range(1, 12))  # 11 activities
131        if trials is None:
132            trials = list(range(1, 4))  # 3 trials
133
134        # Column names as per official HAR-UP documentation
135        columns = [
136            "Timestamp",
137            "EEG_NeuroSky",
138            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
139            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
140            "Belt_Luminosity",
141            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
142            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
143            "Neck_Luminosity",
144            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
145            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
146            "Pocket_Luminosity",
147            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
148            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
149            "Wrist_Luminosity",
150            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
151        ]
152
153        # If data_dir does not exist, trigger interactive download
154        if not os.path.exists(data_dir):
155            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
156            self.download_harup_data(data_dir)
157        # If still doesn't exist, error out
158        if not os.path.exists(data_dir):
159            print(f"Failed to create or download dataset directory: {data_dir}")
160            return [], []
161
162        # Find the UP_Fall_Detection_Dataset directory
163        dataset_path = None
164        for entry in os.listdir(data_dir):
165            entry_path = os.path.join(data_dir, entry)
166            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
167                dataset_path = entry_path
168                break
169        if dataset_path is None:
170            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
171            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
172            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
173            return [], []
174
175        harup_data = []
176        harup_names = []
177
178        # Iterate over subjects
179        for subject_id in subjects:
180            subject_folder = f"Subject_{subject_id:02d}"
181            subject_path = os.path.join(dataset_path, subject_folder)
182            if not os.path.isdir(subject_path):
183                continue
184            
185            # Initialize empty DataFrame for this subject
186            subject_df = pd.DataFrame()
187            
188            # Iterate over activities in order
189            for activity_id in sorted(activities):
190                activity_folder = f"A{activity_id:02d}"
191                activity_path = os.path.join(subject_path, activity_folder)
192                if not os.path.isdir(activity_path):
193                    continue
194                
195                # Iterate over trials in order
196                for trial_id in sorted(trials):
197                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
198                    file_path = os.path.join(activity_path, file_name)
199                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
200                    
201                    try:
202                        df = pd.read_csv(file_path, header=0)
203                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
204                        df['subject_id'] = subject_id
205                        df['activity_id'] = activity_id 
206                        df['trial_id'] = trial_id
207                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
208                        
209                        # Concatenate to subject's DataFrame
210                        subject_df = pd.concat([subject_df, df], ignore_index=True)
211                        harup_names.append(name)
212                        
213                    except Exception as e:
214                        print(f"Error loading {file_path}: {e}")
215            
216            # Add complete subject DataFrame to data list
217            if not subject_df.empty:
218                harup_data.append(subject_df)
219                
220        self.data = harup_data
221        self.names = harup_names
222
223        return harup_data, harup_names
224    
225    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
226                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
227        """
228        Create sliding windows from the HAR-UP dataset.
229        
230        Args:
231            data: List of DataFrames containing HAR-UP data
232            names: List of names corresponding to the data
233            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
234            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
235            
236        Returns:
237            List of dictionaries containing sliding windows for each DataFrame
238        """
239        windows_data = []
240        
241        for idx, df in enumerate(data):
242            if df.empty:
243                continue
244                
245            windows = []
246            processed_columns = set()
247            
248            # Only use numeric columns (skip TIME and any non-numeric)
249            sensor_columns = [col for col in df.columns if col not in 
250                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
251                             and pd.api.types.is_numeric_dtype(df[col])]
252            
253
254            # Process each sensor column
255            for col in sensor_columns:
256                if col not in processed_columns:
257                    
258                    window_data = sliding_window(df[col], window_size, step_size)
259                    windows.append({"name": col, "data": window_data})
260                    processed_columns.add(col)
261            
262            # Include activity ID for each window
263            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
264            windows.append({"name": "activity_id", "data": activity_windows})
265            
266            # For each window, take the most common activity ID as the label
267            labels = []
268            for window in activity_windows:
269                # Get most common activity in this window
270                unique_vals, counts = np.unique(window, return_counts=True)
271                most_common_idx = np.argmax(counts)
272                labels.append(unique_vals[most_common_idx])
273            
274            windows.append({"name": "labels", "data": np.array(labels)})
275            
276            windows_data.append({"name": names[idx], "windows": windows})
277        
278        return windows_data
279    
280    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
281                       freq_domain_features: bool = True) -> List[Dict]:
282        """
283        Extract features from sliding windows using HAR-UP feature extraction methods.
284        Args:
285            windows_data: List of dictionaries containing sliding windows
286            time_domain_features: Whether to extract time domain features
287            freq_domain_features: Whether to extract frequency domain features
288        Returns:
289            List of dictionaries containing extracted features
290        """
291        # Mapping from original sensor names to actual CSV column names
292        sensor_map = {
293            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
294            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
295            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
296            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
297            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
298            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
299            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
300            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
301            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
302            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
303            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
304            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
305            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
306            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
307            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
308            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
309            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
310            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
311            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
312            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
313            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
314            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
315            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
316            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
317            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
318            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
319            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
320            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
321            'BrainSensor': 'HELMET_RAW',
322            'Infrared1': 'IR_1',
323            'Infrared2': 'IR_2',
324            'Infrared3': 'IR_3',
325            'Infrared4': 'IR_4',
326        }
327        extractor = HARUPFeatureExtractor(verbose=True)
328        extractor.config['time_domain'] = time_domain_features
329        extractor.config['frequency_domain'] = freq_domain_features
330        all_features = []
331        for window_dict in windows_data:
332            name = window_dict["name"]
333            windows = window_dict["windows"]
334            labels = None
335            for window in windows:
336                if window["name"] == "labels":
337                    labels = window["data"]
338                    break
339            if labels is None:
340                print(f"No labels found for {name}, skipping feature extraction")
341                continue
342            filtered_windows = []
343            missing = []
344            for orig_sensor, csv_col in sensor_map.items():
345                found = False
346                for window in windows:
347                    if window["name"] == csv_col:
348                        filtered_windows.append(window)
349                        found = True
350                        break
351                if not found:
352                    missing.append((orig_sensor, csv_col))
353            if missing:
354                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
355            for window in windows:
356                if window["name"] == "activity_id" or window["name"] == "labels":
357                    filtered_windows.append(window)
358            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
359            for i, feature in enumerate(features):
360                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
361                if window_idx < len(labels):
362                    feature["label"] = labels[window_idx]
363            all_features.append({"name": name, "features": features})
364        return all_features
365    
366    def get_supported_formats(self) -> List[str]:
367        """
368        Get list of supported file formats for HAR-UP dataset.
369        
370        Returns:
371            List of supported file extensions
372        """
373        return ['.csv']
374    
375    def get_sensor_info(self) -> Dict[str, List[str]]:
376        """
377        Get information about sensors in the dataset.
378        
379        Returns:
380            Dictionary containing sensor information
381        """
382        return {
383            'sensors': self.metadata['sensors'],
384            'components': self.metadata['components'],
385            'sampling_frequency': self.metadata['sampling_frequency']
386        }
387    
388    def get_activity_info(self) -> Dict[int, str]:
389        """
390        Get information about activities in the dataset.
391        
392        Returns:
393            Dictionary mapping activity IDs to descriptions
394        """
395        return self.metadata['activities']

HAR-UP dataset loader class.

This class handles loading and processing of the HAR-UP dataset for human activity recognition and fall detection analysis.

HARUPLoader(max_workers: int = 8)
34    def __init__(self, max_workers: int = 8):
35        """
36        Initialize HAR-UP loader with concurrent download support.
37        
38        Args:
39            max_workers: Maximum number of concurrent download threads (default: 8)
40        """
41        super().__init__(
42            name="harup",
43            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition",
44            max_workers=max_workers
45        )
46        self.metadata = {
47            'sensors': [
48                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
49                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
50                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
51                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
52                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
53                'BrainSensor', 'Infrared'
54            ],
55            'components': {
56                'Accelerometer': ['x', 'y', 'z'],
57                'AngularVelocity': ['x', 'y', 'z'],
58                'Luminosity': ['illuminance'],
59                'BrainSensor': ['value'],
60                'Infrared': ['value']
61            },
62            'sampling_frequency': 100,  # Hz
63            'activities': {
64                1: 'Walking',
65                2: 'Walking upstairs',
66                3: 'Walking downstairs',
67                4: 'Sitting',
68                5: 'Standing',
69                6: 'Lying',
70                7: 'Falling forward using hands',
71                8: 'Falling forward using knees',
72                9: 'Falling backwards',
73                10: 'Falling sideward',
74                11: 'Falling sitting in empty chair'
75            }
76        }
77        
78        # Features used in HAR-UP
79        self.features = [
80            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
81            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
82            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
83            'Energy'
84        ]

Initialize HAR-UP loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
features
def download_harup_data(self, data_dir: str) -> Optional[str]:
 86    def download_harup_data(self, data_dir: str) -> Optional[str]:
 87        """
 88        Download HAR-UP dataset if not already present.
 89        
 90        Args:
 91            data_dir: Directory to store the dataset
 92            
 93        Returns:
 94            Path to the extracted dataset or None if not found
 95        """
 96        # Use the utility function to download and extract the dataset
 97        download_dataset("harup", data_dir)
 98        extract_dataset("harup", data_dir)
 99        
100        # Check if dataset exists after download attempt
101        dataset_path = os.path.join(data_dir, "DataSet")
102        if not os.path.exists(dataset_path):
103            print("HAR-UP dataset not found after download attempt.")
104            print("Please ensure the dataset is organized in the following structure:")
105            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
106            return None
107        
108        return dataset_path

Download HAR-UP dataset if not already present.

Args: data_dir: Directory to store the dataset

Returns: Path to the extracted dataset or None if not found

def load_data( self, data_dir: str, subjects: Optional[List[int]] = None, activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
110    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
111                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
112                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
113        """
114        Load HAR-UP dataset from the specified directory.
115        Args:
116            data_dir: Directory containing the dataset
117            subjects: List of subject IDs to load (default: all subjects)
118            activities: List of activity IDs to load (default: all activities)
119            trials: List of trial IDs to load (default: all trials)
120            **kwargs: Additional arguments
121        Returns:
122            Tuple of (data_list, names_list)
123        """
124        import re
125        import os
126        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
127        if subjects is None:
128            subjects = list(range(1, 5))  # 4 subjects
129        if activities is None:
130            activities = list(range(1, 12))  # 11 activities
131        if trials is None:
132            trials = list(range(1, 4))  # 3 trials
133
134        # Column names as per official HAR-UP documentation
135        columns = [
136            "Timestamp",
137            "EEG_NeuroSky",
138            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
139            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
140            "Belt_Luminosity",
141            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
142            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
143            "Neck_Luminosity",
144            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
145            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
146            "Pocket_Luminosity",
147            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
148            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
149            "Wrist_Luminosity",
150            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
151        ]
152
153        # If data_dir does not exist, trigger interactive download
154        if not os.path.exists(data_dir):
155            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
156            self.download_harup_data(data_dir)
157        # If still doesn't exist, error out
158        if not os.path.exists(data_dir):
159            print(f"Failed to create or download dataset directory: {data_dir}")
160            return [], []
161
162        # Find the UP_Fall_Detection_Dataset directory
163        dataset_path = None
164        for entry in os.listdir(data_dir):
165            entry_path = os.path.join(data_dir, entry)
166            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
167                dataset_path = entry_path
168                break
169        if dataset_path is None:
170            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
171            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
172            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
173            return [], []
174
175        harup_data = []
176        harup_names = []
177
178        # Iterate over subjects
179        for subject_id in subjects:
180            subject_folder = f"Subject_{subject_id:02d}"
181            subject_path = os.path.join(dataset_path, subject_folder)
182            if not os.path.isdir(subject_path):
183                continue
184            
185            # Initialize empty DataFrame for this subject
186            subject_df = pd.DataFrame()
187            
188            # Iterate over activities in order
189            for activity_id in sorted(activities):
190                activity_folder = f"A{activity_id:02d}"
191                activity_path = os.path.join(subject_path, activity_folder)
192                if not os.path.isdir(activity_path):
193                    continue
194                
195                # Iterate over trials in order
196                for trial_id in sorted(trials):
197                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
198                    file_path = os.path.join(activity_path, file_name)
199                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
200                    
201                    try:
202                        df = pd.read_csv(file_path, header=0)
203                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
204                        df['subject_id'] = subject_id
205                        df['activity_id'] = activity_id 
206                        df['trial_id'] = trial_id
207                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
208                        
209                        # Concatenate to subject's DataFrame
210                        subject_df = pd.concat([subject_df, df], ignore_index=True)
211                        harup_names.append(name)
212                        
213                    except Exception as e:
214                        print(f"Error loading {file_path}: {e}")
215            
216            # Add complete subject DataFrame to data list
217            if not subject_df.empty:
218                harup_data.append(subject_df)
219                
220        self.data = harup_data
221        self.names = harup_names
222
223        return harup_data, harup_names

Load HAR-UP dataset from the specified directory. Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials) **kwargs: Additional arguments Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 100, step_size: int = 50) -> List[Dict]:
225    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
226                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
227        """
228        Create sliding windows from the HAR-UP dataset.
229        
230        Args:
231            data: List of DataFrames containing HAR-UP data
232            names: List of names corresponding to the data
233            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
234            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
235            
236        Returns:
237            List of dictionaries containing sliding windows for each DataFrame
238        """
239        windows_data = []
240        
241        for idx, df in enumerate(data):
242            if df.empty:
243                continue
244                
245            windows = []
246            processed_columns = set()
247            
248            # Only use numeric columns (skip TIME and any non-numeric)
249            sensor_columns = [col for col in df.columns if col not in 
250                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
251                             and pd.api.types.is_numeric_dtype(df[col])]
252            
253
254            # Process each sensor column
255            for col in sensor_columns:
256                if col not in processed_columns:
257                    
258                    window_data = sliding_window(df[col], window_size, step_size)
259                    windows.append({"name": col, "data": window_data})
260                    processed_columns.add(col)
261            
262            # Include activity ID for each window
263            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
264            windows.append({"name": "activity_id", "data": activity_windows})
265            
266            # For each window, take the most common activity ID as the label
267            labels = []
268            for window in activity_windows:
269                # Get most common activity in this window
270                unique_vals, counts = np.unique(window, return_counts=True)
271                most_common_idx = np.argmax(counts)
272                labels.append(unique_vals[most_common_idx])
273            
274            windows.append({"name": "labels", "data": np.array(labels)})
275            
276            windows_data.append({"name": names[idx], "windows": windows})
277        
278        return windows_data

Create sliding windows from the HAR-UP dataset.

Args: data: List of DataFrames containing HAR-UP data names: List of names corresponding to the data window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_features( self, windows_data: List[Dict], time_domain_features: bool = True, freq_domain_features: bool = True) -> List[Dict]:
280    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
281                       freq_domain_features: bool = True) -> List[Dict]:
282        """
283        Extract features from sliding windows using HAR-UP feature extraction methods.
284        Args:
285            windows_data: List of dictionaries containing sliding windows
286            time_domain_features: Whether to extract time domain features
287            freq_domain_features: Whether to extract frequency domain features
288        Returns:
289            List of dictionaries containing extracted features
290        """
291        # Mapping from original sensor names to actual CSV column names
292        sensor_map = {
293            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
294            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
295            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
296            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
297            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
298            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
299            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
300            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
301            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
302            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
303            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
304            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
305            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
306            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
307            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
308            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
309            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
310            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
311            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
312            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
313            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
314            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
315            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
316            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
317            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
318            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
319            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
320            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
321            'BrainSensor': 'HELMET_RAW',
322            'Infrared1': 'IR_1',
323            'Infrared2': 'IR_2',
324            'Infrared3': 'IR_3',
325            'Infrared4': 'IR_4',
326        }
327        extractor = HARUPFeatureExtractor(verbose=True)
328        extractor.config['time_domain'] = time_domain_features
329        extractor.config['frequency_domain'] = freq_domain_features
330        all_features = []
331        for window_dict in windows_data:
332            name = window_dict["name"]
333            windows = window_dict["windows"]
334            labels = None
335            for window in windows:
336                if window["name"] == "labels":
337                    labels = window["data"]
338                    break
339            if labels is None:
340                print(f"No labels found for {name}, skipping feature extraction")
341                continue
342            filtered_windows = []
343            missing = []
344            for orig_sensor, csv_col in sensor_map.items():
345                found = False
346                for window in windows:
347                    if window["name"] == csv_col:
348                        filtered_windows.append(window)
349                        found = True
350                        break
351                if not found:
352                    missing.append((orig_sensor, csv_col))
353            if missing:
354                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
355            for window in windows:
356                if window["name"] == "activity_id" or window["name"] == "labels":
357                    filtered_windows.append(window)
358            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
359            for i, feature in enumerate(features):
360                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
361                if window_idx < len(labels):
362                    feature["label"] = labels[window_idx]
363            all_features.append({"name": name, "features": features})
364        return all_features

Extract features from sliding windows using HAR-UP feature extraction methods. Args: windows_data: List of dictionaries containing sliding windows time_domain_features: Whether to extract time domain features freq_domain_features: Whether to extract frequency domain features Returns: List of dictionaries containing extracted features

def get_supported_formats(self) -> List[str]:
366    def get_supported_formats(self) -> List[str]:
367        """
368        Get list of supported file formats for HAR-UP dataset.
369        
370        Returns:
371            List of supported file extensions
372        """
373        return ['.csv']

Get list of supported file formats for HAR-UP dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
375    def get_sensor_info(self) -> Dict[str, List[str]]:
376        """
377        Get information about sensors in the dataset.
378        
379        Returns:
380            Dictionary containing sensor information
381        """
382        return {
383            'sensors': self.metadata['sensors'],
384            'components': self.metadata['components'],
385            'sampling_frequency': self.metadata['sampling_frequency']
386        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
388    def get_activity_info(self) -> Dict[int, str]:
389        """
390        Get information about activities in the dataset.
391        
392        Returns:
393            Dictionary mapping activity IDs to descriptions
394        """
395        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to descriptions

def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
399def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
400    """
401    Legacy function for loading HAR-UP data.
402    
403    Args:
404        data_dir: Directory containing the dataset
405        subjects: List of subject IDs to load (default: all subjects)
406        activities: List of activity IDs to load (default: all activities)
407        trials: List of trial IDs to load (default: all trials)
408        
409    Returns:
410        Tuple of (data_list, names_list)
411    """
412    loader = HARUPLoader()
413    return loader.load_data(data_dir, subjects, activities, trials)

Legacy function for loading HAR-UP data.

Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials)

Returns: Tuple of (data_list, names_list)

def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
416def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
417    """
418    Legacy function for creating sliding windows from HAR-UP data.
419    
420    Args:
421        harup_data: List of dataframes containing HAR-UP data
422        harup_names: List of names of the HAR-UP dataframes
423        window_size: Size of the sliding window
424        step_size: Step size for the sliding window
425        
426    Returns:
427        List of dictionaries containing sliding windows for each DataFrame
428    """
429    loader = HARUPLoader()
430    return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)

Legacy function for creating sliding windows from HAR-UP data.

Args: harup_data: List of dataframes containing HAR-UP data harup_names: List of names of the HAR-UP dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
433def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
434    """
435    Legacy function for extracting features from HAR-UP windows.
436    
437    Args:
438        windows_data: List of dictionaries containing sliding windows
439        time_domain: Whether to extract time domain features
440        freq_domain: Whether to extract frequency domain features
441        
442    Returns:
443        List of dictionaries containing extracted features
444    """
445    loader = HARUPLoader()
446    return loader.extract_features(windows_data, time_domain, freq_domain)

Legacy function for extracting features from HAR-UP windows.

Args: windows_data: List of dictionaries containing sliding windows time_domain: Whether to extract time domain features freq_domain: Whether to extract frequency domain features

Returns: List of dictionaries containing extracted features