gaitsetpy.features.gait_features
Gait Feature Extractor Class Maintainer: @aharshit123456
This module contains the GaitFeatureExtractor class that inherits from BaseFeatureExtractor and provides comprehensive gait feature extraction functionality.
1''' 2Gait Feature Extractor Class 3Maintainer: @aharshit123456 4 5This module contains the GaitFeatureExtractor class that inherits from BaseFeatureExtractor 6and provides comprehensive gait feature extraction functionality. 7''' 8 9from typing import List, Dict, Any 10import numpy as np 11import logging 12from tqdm import tqdm 13from ..core.base_classes import BaseFeatureExtractor 14from .utils import ( 15 calculate_mean, 16 calculate_standard_deviation, 17 calculate_variance, 18 calculate_skewness, 19 calculate_kurtosis, 20 calculate_root_mean_square, 21 calculate_range, 22 calculate_median, 23 calculate_mode, 24 calculate_mean_absolute_value, 25 calculate_median_absolute_deviation, 26 calculate_peak_height, 27 calculate_stride_times, 28 calculate_step_time, 29 calculate_cadence, 30 calculate_freezing_index, 31 calculate_dominant_frequency, 32 calculate_peak_frequency, 33 calculate_power_spectral_entropy, 34 calculate_principal_harmonic_frequency, 35 calculate_entropy, 36 calculate_interquartile_range, 37 calculate_correlation, 38 calculate_auto_regression_coefficients, 39 calculate_zero_crossing_rate, 40 calculate_energy, 41) 42 43# Set up logging 44logging.basicConfig(level=logging.INFO) 45logger = logging.getLogger(__name__) 46 47 48class GaitFeatureExtractor(BaseFeatureExtractor): 49 """ 50 Comprehensive gait feature extractor class. 51 52 This class extracts various time-domain, frequency-domain, and statistical features 53 from gait data sliding windows. 54 """ 55 56 def __init__(self, verbose: bool = True): 57 super().__init__( 58 name="gait_features", 59 description="Comprehensive gait feature extractor for time-domain, frequency-domain, and statistical features" 60 ) 61 self.verbose = verbose 62 self.config = { 63 'time_domain': True, 64 'frequency_domain': True, 65 'statistical': True, 66 'ar_order': 3 # Order for auto-regression coefficients 67 } 68 69 if self.verbose: 70 print("🚀 GaitFeatureExtractor initialized successfully!") 71 print(f"📊 Default configuration: {self.config}") 72 73 def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]: 74 """ 75 Extract gait features from sliding windows. 76 77 Args: 78 windows: List of sliding window dictionaries 79 fs: Sampling frequency 80 **kwargs: Additional arguments including time_domain, frequency_domain, statistical flags 81 82 Returns: 83 List of feature dictionaries for each sensor 84 """ 85 # Update config with any passed arguments 86 time_domain = kwargs.get('time_domain', self.config['time_domain']) 87 frequency_domain = kwargs.get('frequency_domain', self.config['frequency_domain']) 88 statistical = kwargs.get('statistical', self.config['statistical']) 89 ar_order = kwargs.get('ar_order', self.config['ar_order']) 90 91 if self.verbose: 92 print("\n" + "="*60) 93 print("🔍 STARTING GAIT FEATURE EXTRACTION") 94 print("="*60) 95 print(f"📈 Total sensors/windows to process: {len(windows)}") 96 print(f"🔊 Sampling frequency: {fs} Hz") 97 print(f"⏱️ Time domain features: {'✅' if time_domain else '❌'}") 98 print(f"🌊 Frequency domain features: {'✅' if frequency_domain else '❌'}") 99 print(f"📊 Statistical features: {'✅' if statistical else '❌'}") 100 print(f"🔄 Auto-regression order: {ar_order}") 101 print("-"*60) 102 103 features = [] 104 105 # Main progress bar for processing all windows 106 main_pbar = tqdm( 107 windows, 108 desc="🔍 Processing Sensors", 109 unit="sensor", 110 disable=not self.verbose 111 ) 112 113 for i, window_dict in enumerate(main_pbar): 114 sensor_name = window_dict['name'] 115 window_data = window_dict['data'] 116 117 if self.verbose: 118 main_pbar.set_postfix({ 119 'Current': sensor_name, 120 'Windows': len(window_data) if isinstance(window_data, list) else 1 121 }) 122 123 # Skip annotation windows 124 if sensor_name == 'annotations': 125 if self.verbose: 126 logger.info(f"📝 Processing annotation data for {sensor_name}") 127 128 features.append({ 129 'name': sensor_name, 130 'features': {}, 131 'annotations': [self._extract_annotation_labels(window) for window in window_data] 132 }) 133 continue 134 135 if self.verbose: 136 logger.info(f"🎯 Processing sensor: {sensor_name}") 137 logger.info(f"📦 Number of windows: {len(window_data)}") 138 139 sensor_features = {'name': sensor_name, 'features': {}} 140 141 # Time domain features 142 if time_domain: 143 if self.verbose: 144 print(f" ⏱️ Extracting time domain features for {sensor_name}...") 145 146 time_features = self._extract_time_domain_features(window_data) 147 sensor_features['features'].update(time_features) 148 149 if self.verbose: 150 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in time_features.values()) 151 print(f" ✅ Time domain: {len(time_features)} feature types, {feature_count} total features") 152 153 # Frequency domain features 154 if frequency_domain: 155 if self.verbose: 156 print(f" 🌊 Extracting frequency domain features for {sensor_name}...") 157 158 freq_features = self._extract_frequency_domain_features(window_data, fs) 159 sensor_features['features'].update(freq_features) 160 161 if self.verbose: 162 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in freq_features.values()) 163 print(f" ✅ Frequency domain: {len(freq_features)} feature types, {feature_count} total features") 164 165 # Statistical features 166 if statistical: 167 if self.verbose: 168 print(f" 📊 Extracting statistical features for {sensor_name}...") 169 170 stat_features = self._extract_statistical_features(window_data) 171 sensor_features['features'].update(stat_features) 172 173 if self.verbose: 174 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in stat_features.values()) 175 print(f" ✅ Statistical: {len(stat_features)} feature types, {feature_count} total features") 176 177 # Auto-regression coefficients 178 if self.verbose: 179 print(f" 🔄 Extracting auto-regression coefficients for {sensor_name}...") 180 181 ar_features = self._extract_ar_coefficients(window_data, ar_order) 182 sensor_features['features'].update(ar_features) 183 184 if self.verbose: 185 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in ar_features.values()) 186 print(f" ✅ Auto-regression: {len(ar_features)} feature types, {feature_count} total features") 187 188 # Calculate total features for this sensor 189 total_features = sum( 190 len(v) if isinstance(v, list) else 1 191 for v in sensor_features['features'].values() 192 ) 193 194 if self.verbose: 195 print(f" 🎯 Total features extracted for {sensor_name}: {total_features}") 196 print(f" 📋 Feature types: {list(sensor_features['features'].keys())}") 197 print("-"*40) 198 199 features.append(sensor_features) 200 201 if self.verbose: 202 print("\n" + "="*60) 203 print("🎉 FEATURE EXTRACTION COMPLETED!") 204 print("="*60) 205 print(f"📊 Total sensors processed: {len(features)}") 206 207 # Calculate overall statistics 208 total_feature_count = 0 209 for feature_dict in features: 210 if 'features' in feature_dict: 211 total_feature_count += sum( 212 len(v) if isinstance(v, list) else 1 213 for v in feature_dict['features'].values() 214 ) 215 216 print(f"🔢 Total features extracted: {total_feature_count}") 217 print(f"📈 Average features per sensor: {total_feature_count / len(features):.1f}") 218 print("="*60) 219 220 return features 221 222 def _extract_time_domain_features(self, windows: List) -> Dict[str, List]: 223 """Extract time domain features from windows.""" 224 if self.verbose: 225 print(" 🔍 Computing time domain features...") 226 227 time_features = {} 228 229 # Define time domain feature functions 230 time_domain_funcs = { 231 'mean': calculate_mean, 232 'std': calculate_standard_deviation, 233 'variance': calculate_variance, 234 'rms': calculate_root_mean_square, 235 'range': calculate_range, 236 'median': calculate_median, 237 'mode': calculate_mode, 238 'mean_absolute_value': calculate_mean_absolute_value, 239 'median_absolute_deviation': calculate_median_absolute_deviation, 240 'peak_height': calculate_peak_height, 241 'zero_crossing_rate': calculate_zero_crossing_rate, 242 'energy': calculate_energy, 243 } 244 245 # Progress bar for time domain features 246 feature_pbar = tqdm( 247 time_domain_funcs.items(), 248 desc=" ⏱️ Time features", 249 unit="feature", 250 leave=False, 251 disable=not self.verbose 252 ) 253 254 for feature_name, func in feature_pbar: 255 if self.verbose: 256 feature_pbar.set_postfix({'Computing': feature_name}) 257 258 time_features[feature_name] = [ 259 func(self._ensure_numpy_array(window)) for window in windows 260 ] 261 262 return time_features 263 264 def _ensure_numpy_array(self, signal): 265 """Convert pandas Series to numpy array if needed.""" 266 if hasattr(signal, 'values'): 267 return signal.values 268 return signal 269 270 def _extract_frequency_domain_features(self, windows: List, fs: int) -> Dict[str, List]: 271 """Extract frequency domain features from windows.""" 272 if self.verbose: 273 print(" 🔍 Computing frequency domain features...") 274 275 freq_features = {} 276 277 # Define frequency domain feature functions 278 freq_domain_funcs = { 279 'dominant_frequency': lambda w: calculate_dominant_frequency(w, fs), 280 'peak_frequency': lambda w: calculate_peak_frequency(w, fs), 281 'power_spectral_entropy': lambda w: calculate_power_spectral_entropy(w, fs), 282 'principal_harmonic_frequency': lambda w: calculate_principal_harmonic_frequency(w, fs), 283 'stride_times': lambda w: calculate_stride_times(w, fs), 284 'step_time': lambda w: calculate_step_time(w, fs), 285 'cadence': lambda w: calculate_cadence(w, fs), 286 'freezing_index': lambda w: calculate_freezing_index(w, fs), 287 } 288 289 # Progress bar for frequency domain features 290 feature_pbar = tqdm( 291 freq_domain_funcs.items(), 292 desc=" 🌊 Freq features", 293 unit="feature", 294 leave=False, 295 disable=not self.verbose 296 ) 297 298 for feature_name, func in feature_pbar: 299 if self.verbose: 300 feature_pbar.set_postfix({'Computing': feature_name}) 301 302 freq_features[feature_name] = [ 303 func(self._ensure_numpy_array(window)) for window in windows 304 ] 305 306 return freq_features 307 308 def _extract_statistical_features(self, windows: List) -> Dict[str, List]: 309 """Extract statistical features from windows.""" 310 if self.verbose: 311 print(" 🔍 Computing statistical features...") 312 313 stat_features = {} 314 315 # Define statistical feature functions 316 stat_funcs = { 317 'skewness': calculate_skewness, 318 'kurtosis': calculate_kurtosis, 319 'entropy': calculate_entropy, 320 'interquartile_range': calculate_interquartile_range, 321 } 322 323 # Progress bar for statistical features 324 feature_pbar = tqdm( 325 stat_funcs.items(), 326 desc=" 📊 Stat features", 327 unit="feature", 328 leave=False, 329 disable=not self.verbose 330 ) 331 332 for feature_name, func in feature_pbar: 333 if self.verbose: 334 feature_pbar.set_postfix({'Computing': feature_name}) 335 336 stat_features[feature_name] = [ 337 func(self._ensure_numpy_array(window)) for window in windows 338 ] 339 340 # Handle correlation separately (needs two signals) 341 if self.verbose: 342 print(" 🔗 Computing correlation features...") 343 344 stat_features['correlation'] = [ 345 calculate_correlation( 346 self._ensure_numpy_array(window)[:-1], 347 self._ensure_numpy_array(window)[1:] 348 ) if len(window) > 1 else 0 349 for window in windows 350 ] 351 352 return stat_features 353 354 def _extract_ar_coefficients(self, windows: List, order: int) -> Dict[str, List]: 355 """Extract auto-regression coefficients from windows.""" 356 if self.verbose: 357 print(f" 🔍 Computing auto-regression coefficients (order={order})...") 358 359 # Progress bar for AR coefficients 360 ar_pbar = tqdm( 361 windows, 362 desc=" 🔄 AR coeffs", 363 unit="window", 364 leave=False, 365 disable=not self.verbose 366 ) 367 368 ar_coeffs = [] 369 for window in ar_pbar: 370 coeffs = calculate_auto_regression_coefficients( 371 self._ensure_numpy_array(window), order 372 ) 373 ar_coeffs.append(coeffs) 374 375 return {'ar_coefficients': ar_coeffs} 376 377 def _extract_annotation_labels(self, window) -> int: 378 """Extract the most common annotation label from a window.""" 379 if hasattr(window, 'mode'): 380 return window.mode().iloc[0] if len(window.mode()) > 0 else 0 381 else: 382 # For numpy arrays or other types 383 unique, counts = np.unique(window, return_counts=True) 384 return unique[np.argmax(counts)] 385 386 def get_feature_names(self) -> List[str]: 387 """ 388 Get names of all features that can be extracted. 389 390 Returns: 391 List of feature names 392 """ 393 time_domain_features = [ 394 'mean', 'std', 'variance', 'rms', 'range', 'median', 'mode', 395 'mean_absolute_value', 'median_absolute_deviation', 'peak_height', 396 'zero_crossing_rate', 'energy' 397 ] 398 399 frequency_domain_features = [ 400 'dominant_frequency', 'peak_frequency', 'power_spectral_entropy', 401 'principal_harmonic_frequency', 'stride_times', 'step_time', 402 'cadence', 'freezing_index' 403 ] 404 405 statistical_features = [ 406 'skewness', 'kurtosis', 'entropy', 'interquartile_range', 'correlation' 407 ] 408 409 other_features = ['ar_coefficients'] 410 411 return time_domain_features + frequency_domain_features + statistical_features + other_features 412 413 def print_extraction_summary(self, features: List[Dict]) -> None: 414 """ 415 Print a detailed summary of extracted features. 416 417 Args: 418 features: List of feature dictionaries returned by extract_features 419 """ 420 print("\n" + "="*80) 421 print("📊 FEATURE EXTRACTION SUMMARY") 422 print("="*80) 423 424 for i, feature_dict in enumerate(features): 425 sensor_name = feature_dict['name'] 426 print(f"\n🎯 Sensor {i+1}: {sensor_name}") 427 print("-" * 40) 428 429 if 'features' in feature_dict and feature_dict['features']: 430 for feature_type, feature_values in feature_dict['features'].items(): 431 if isinstance(feature_values, list): 432 print(f" 📈 {feature_type}: {len(feature_values)} values") 433 if feature_values: 434 sample_value = feature_values[0] 435 if isinstance(sample_value, (list, np.ndarray)): 436 print(f" └── Shape per window: {np.array(sample_value).shape}") 437 else: 438 print(f" └── Sample value: {sample_value:.4f}") 439 else: 440 print(f" 📈 {feature_type}: {feature_values}") 441 442 if 'annotations' in feature_dict: 443 print(f" 📝 Annotations: {len(feature_dict['annotations'])} windows") 444 445 print("\n" + "="*80) 446 447 448# Legacy function wrappers for backward compatibility 449def get_stride_times_for_windows(windows, fs): 450 """Calculate stride times for all windows in the input.""" 451 return [calculate_stride_times(window, fs) for window in windows] 452 453def get_zero_crossing_rates_for_windows(windows): 454 """Calculate zero-crossing rates for all windows in the input.""" 455 return [calculate_zero_crossing_rate(window) for window in windows] 456 457def get_freezing_indices_for_windows(windows, fs): 458 """Calculate freezing indices for all windows in the input.""" 459 return [calculate_freezing_index(window, fs) for window in windows] 460 461def get_standard_deviations_for_windows(windows): 462 """Calculate standard deviations for all windows in the input.""" 463 return [calculate_standard_deviation(window) for window in windows] 464 465def get_entropies_for_windows(windows): 466 """Calculate entropies for all windows in the input.""" 467 return [calculate_entropy(window) for window in windows] 468 469def get_energies_for_windows(windows): 470 """Calculate energies for all windows in the input.""" 471 return [calculate_energy(window) for window in windows] 472 473def get_variances_for_windows(windows): 474 """Calculate variances for all windows in the input.""" 475 return [calculate_variance(window) for window in windows] 476 477def get_kurtosis_for_windows(windows): 478 """Calculate kurtosis values for all windows in the input.""" 479 return [calculate_kurtosis(window) for window in windows] 480 481def get_step_times_for_windows(windows, fs): 482 """Calculate step times for all windows in the input.""" 483 return [calculate_step_time(window, fs) for window in windows] 484 485def get_mean_for_windows(windows): 486 return [calculate_mean(window) for window in windows] 487 488def get_standard_deviation_for_windows(windows): 489 return [calculate_standard_deviation(window) for window in windows] 490 491def get_variance_for_windows(windows): 492 return [calculate_variance(window) for window in windows] 493 494def get_skewness_for_windows(windows): 495 return [calculate_skewness(window) for window in windows] 496 497def get_root_mean_square_for_windows(windows): 498 return [calculate_root_mean_square(window) for window in windows] 499 500def get_range_for_windows(windows): 501 return [calculate_range(window) for window in windows] 502 503def get_median_for_windows(windows): 504 return [calculate_median(window) for window in windows] 505 506def get_mode_for_windows(windows): 507 return [calculate_mode(window) for window in windows] 508 509def get_mean_absolute_value_for_windows(windows): 510 return [calculate_mean_absolute_value(window) for window in windows] 511 512def get_median_absolute_deviation_for_windows(windows): 513 return [calculate_median_absolute_deviation(window) for window in windows] 514 515def get_peak_height_for_windows(windows): 516 return [calculate_peak_height(window) for window in windows] 517 518def get_cadence_for_windows(windows, fs): 519 return [calculate_cadence(window, fs) for window in windows] 520 521def get_freezing_index_for_windows(windows, fs): 522 return [calculate_freezing_index(window, fs) for window in windows] 523 524def get_dominant_frequency_for_windows(windows, fs): 525 return [calculate_dominant_frequency(window, fs) for window in windows] 526 527def get_peak_frequency_for_windows(windows, fs): 528 return [calculate_peak_frequency(window, fs) for window in windows] 529 530def get_power_spectral_entropy_for_windows(windows, fs): 531 return [calculate_power_spectral_entropy(window, fs) for window in windows] 532 533def get_principal_harmonic_frequency_for_windows(windows, fs): 534 return [calculate_principal_harmonic_frequency(window, fs) for window in windows] 535 536def get_entropy_for_windows(windows): 537 return [calculate_entropy(window) for window in windows] 538 539def get_interquartile_range_for_windows(windows): 540 return [calculate_interquartile_range(window) for window in windows] 541 542def get_correlation_for_windows(windows): 543 # For correlation, we need to handle it differently since it needs two signals 544 # We'll calculate autocorrelation for each window 545 return [calculate_correlation(window[:-1], window[1:]) if len(window) > 1 else 0 for window in windows] 546 547def get_auto_regression_coefficients_for_windows(windows, order=3): 548 return [calculate_auto_regression_coefficients(window, order) for window in windows] 549 550def extract_gait_features(daphnet_windows, fs, time_domain=True, frequency_domain=True, statistical=True, verbose=True): 551 """ 552 Legacy function for extracting gait features. 553 554 Args: 555 daphnet_windows: List of sliding window dictionaries 556 fs: Sampling frequency 557 time_domain: Whether to extract time domain features 558 frequency_domain: Whether to extract frequency domain features 559 statistical: Whether to extract statistical features 560 verbose: Whether to show verbose output and progress bars 561 562 Returns: 563 List of feature dictionaries 564 """ 565 extractor = GaitFeatureExtractor(verbose=verbose) 566 return extractor.extract_features( 567 daphnet_windows, fs, 568 time_domain=time_domain, 569 frequency_domain=frequency_domain, 570 statistical=statistical 571 )
49class GaitFeatureExtractor(BaseFeatureExtractor): 50 """ 51 Comprehensive gait feature extractor class. 52 53 This class extracts various time-domain, frequency-domain, and statistical features 54 from gait data sliding windows. 55 """ 56 57 def __init__(self, verbose: bool = True): 58 super().__init__( 59 name="gait_features", 60 description="Comprehensive gait feature extractor for time-domain, frequency-domain, and statistical features" 61 ) 62 self.verbose = verbose 63 self.config = { 64 'time_domain': True, 65 'frequency_domain': True, 66 'statistical': True, 67 'ar_order': 3 # Order for auto-regression coefficients 68 } 69 70 if self.verbose: 71 print("🚀 GaitFeatureExtractor initialized successfully!") 72 print(f"📊 Default configuration: {self.config}") 73 74 def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]: 75 """ 76 Extract gait features from sliding windows. 77 78 Args: 79 windows: List of sliding window dictionaries 80 fs: Sampling frequency 81 **kwargs: Additional arguments including time_domain, frequency_domain, statistical flags 82 83 Returns: 84 List of feature dictionaries for each sensor 85 """ 86 # Update config with any passed arguments 87 time_domain = kwargs.get('time_domain', self.config['time_domain']) 88 frequency_domain = kwargs.get('frequency_domain', self.config['frequency_domain']) 89 statistical = kwargs.get('statistical', self.config['statistical']) 90 ar_order = kwargs.get('ar_order', self.config['ar_order']) 91 92 if self.verbose: 93 print("\n" + "="*60) 94 print("🔍 STARTING GAIT FEATURE EXTRACTION") 95 print("="*60) 96 print(f"📈 Total sensors/windows to process: {len(windows)}") 97 print(f"🔊 Sampling frequency: {fs} Hz") 98 print(f"⏱️ Time domain features: {'✅' if time_domain else '❌'}") 99 print(f"🌊 Frequency domain features: {'✅' if frequency_domain else '❌'}") 100 print(f"📊 Statistical features: {'✅' if statistical else '❌'}") 101 print(f"🔄 Auto-regression order: {ar_order}") 102 print("-"*60) 103 104 features = [] 105 106 # Main progress bar for processing all windows 107 main_pbar = tqdm( 108 windows, 109 desc="🔍 Processing Sensors", 110 unit="sensor", 111 disable=not self.verbose 112 ) 113 114 for i, window_dict in enumerate(main_pbar): 115 sensor_name = window_dict['name'] 116 window_data = window_dict['data'] 117 118 if self.verbose: 119 main_pbar.set_postfix({ 120 'Current': sensor_name, 121 'Windows': len(window_data) if isinstance(window_data, list) else 1 122 }) 123 124 # Skip annotation windows 125 if sensor_name == 'annotations': 126 if self.verbose: 127 logger.info(f"📝 Processing annotation data for {sensor_name}") 128 129 features.append({ 130 'name': sensor_name, 131 'features': {}, 132 'annotations': [self._extract_annotation_labels(window) for window in window_data] 133 }) 134 continue 135 136 if self.verbose: 137 logger.info(f"🎯 Processing sensor: {sensor_name}") 138 logger.info(f"📦 Number of windows: {len(window_data)}") 139 140 sensor_features = {'name': sensor_name, 'features': {}} 141 142 # Time domain features 143 if time_domain: 144 if self.verbose: 145 print(f" ⏱️ Extracting time domain features for {sensor_name}...") 146 147 time_features = self._extract_time_domain_features(window_data) 148 sensor_features['features'].update(time_features) 149 150 if self.verbose: 151 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in time_features.values()) 152 print(f" ✅ Time domain: {len(time_features)} feature types, {feature_count} total features") 153 154 # Frequency domain features 155 if frequency_domain: 156 if self.verbose: 157 print(f" 🌊 Extracting frequency domain features for {sensor_name}...") 158 159 freq_features = self._extract_frequency_domain_features(window_data, fs) 160 sensor_features['features'].update(freq_features) 161 162 if self.verbose: 163 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in freq_features.values()) 164 print(f" ✅ Frequency domain: {len(freq_features)} feature types, {feature_count} total features") 165 166 # Statistical features 167 if statistical: 168 if self.verbose: 169 print(f" 📊 Extracting statistical features for {sensor_name}...") 170 171 stat_features = self._extract_statistical_features(window_data) 172 sensor_features['features'].update(stat_features) 173 174 if self.verbose: 175 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in stat_features.values()) 176 print(f" ✅ Statistical: {len(stat_features)} feature types, {feature_count} total features") 177 178 # Auto-regression coefficients 179 if self.verbose: 180 print(f" 🔄 Extracting auto-regression coefficients for {sensor_name}...") 181 182 ar_features = self._extract_ar_coefficients(window_data, ar_order) 183 sensor_features['features'].update(ar_features) 184 185 if self.verbose: 186 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in ar_features.values()) 187 print(f" ✅ Auto-regression: {len(ar_features)} feature types, {feature_count} total features") 188 189 # Calculate total features for this sensor 190 total_features = sum( 191 len(v) if isinstance(v, list) else 1 192 for v in sensor_features['features'].values() 193 ) 194 195 if self.verbose: 196 print(f" 🎯 Total features extracted for {sensor_name}: {total_features}") 197 print(f" 📋 Feature types: {list(sensor_features['features'].keys())}") 198 print("-"*40) 199 200 features.append(sensor_features) 201 202 if self.verbose: 203 print("\n" + "="*60) 204 print("🎉 FEATURE EXTRACTION COMPLETED!") 205 print("="*60) 206 print(f"📊 Total sensors processed: {len(features)}") 207 208 # Calculate overall statistics 209 total_feature_count = 0 210 for feature_dict in features: 211 if 'features' in feature_dict: 212 total_feature_count += sum( 213 len(v) if isinstance(v, list) else 1 214 for v in feature_dict['features'].values() 215 ) 216 217 print(f"🔢 Total features extracted: {total_feature_count}") 218 print(f"📈 Average features per sensor: {total_feature_count / len(features):.1f}") 219 print("="*60) 220 221 return features 222 223 def _extract_time_domain_features(self, windows: List) -> Dict[str, List]: 224 """Extract time domain features from windows.""" 225 if self.verbose: 226 print(" 🔍 Computing time domain features...") 227 228 time_features = {} 229 230 # Define time domain feature functions 231 time_domain_funcs = { 232 'mean': calculate_mean, 233 'std': calculate_standard_deviation, 234 'variance': calculate_variance, 235 'rms': calculate_root_mean_square, 236 'range': calculate_range, 237 'median': calculate_median, 238 'mode': calculate_mode, 239 'mean_absolute_value': calculate_mean_absolute_value, 240 'median_absolute_deviation': calculate_median_absolute_deviation, 241 'peak_height': calculate_peak_height, 242 'zero_crossing_rate': calculate_zero_crossing_rate, 243 'energy': calculate_energy, 244 } 245 246 # Progress bar for time domain features 247 feature_pbar = tqdm( 248 time_domain_funcs.items(), 249 desc=" ⏱️ Time features", 250 unit="feature", 251 leave=False, 252 disable=not self.verbose 253 ) 254 255 for feature_name, func in feature_pbar: 256 if self.verbose: 257 feature_pbar.set_postfix({'Computing': feature_name}) 258 259 time_features[feature_name] = [ 260 func(self._ensure_numpy_array(window)) for window in windows 261 ] 262 263 return time_features 264 265 def _ensure_numpy_array(self, signal): 266 """Convert pandas Series to numpy array if needed.""" 267 if hasattr(signal, 'values'): 268 return signal.values 269 return signal 270 271 def _extract_frequency_domain_features(self, windows: List, fs: int) -> Dict[str, List]: 272 """Extract frequency domain features from windows.""" 273 if self.verbose: 274 print(" 🔍 Computing frequency domain features...") 275 276 freq_features = {} 277 278 # Define frequency domain feature functions 279 freq_domain_funcs = { 280 'dominant_frequency': lambda w: calculate_dominant_frequency(w, fs), 281 'peak_frequency': lambda w: calculate_peak_frequency(w, fs), 282 'power_spectral_entropy': lambda w: calculate_power_spectral_entropy(w, fs), 283 'principal_harmonic_frequency': lambda w: calculate_principal_harmonic_frequency(w, fs), 284 'stride_times': lambda w: calculate_stride_times(w, fs), 285 'step_time': lambda w: calculate_step_time(w, fs), 286 'cadence': lambda w: calculate_cadence(w, fs), 287 'freezing_index': lambda w: calculate_freezing_index(w, fs), 288 } 289 290 # Progress bar for frequency domain features 291 feature_pbar = tqdm( 292 freq_domain_funcs.items(), 293 desc=" 🌊 Freq features", 294 unit="feature", 295 leave=False, 296 disable=not self.verbose 297 ) 298 299 for feature_name, func in feature_pbar: 300 if self.verbose: 301 feature_pbar.set_postfix({'Computing': feature_name}) 302 303 freq_features[feature_name] = [ 304 func(self._ensure_numpy_array(window)) for window in windows 305 ] 306 307 return freq_features 308 309 def _extract_statistical_features(self, windows: List) -> Dict[str, List]: 310 """Extract statistical features from windows.""" 311 if self.verbose: 312 print(" 🔍 Computing statistical features...") 313 314 stat_features = {} 315 316 # Define statistical feature functions 317 stat_funcs = { 318 'skewness': calculate_skewness, 319 'kurtosis': calculate_kurtosis, 320 'entropy': calculate_entropy, 321 'interquartile_range': calculate_interquartile_range, 322 } 323 324 # Progress bar for statistical features 325 feature_pbar = tqdm( 326 stat_funcs.items(), 327 desc=" 📊 Stat features", 328 unit="feature", 329 leave=False, 330 disable=not self.verbose 331 ) 332 333 for feature_name, func in feature_pbar: 334 if self.verbose: 335 feature_pbar.set_postfix({'Computing': feature_name}) 336 337 stat_features[feature_name] = [ 338 func(self._ensure_numpy_array(window)) for window in windows 339 ] 340 341 # Handle correlation separately (needs two signals) 342 if self.verbose: 343 print(" 🔗 Computing correlation features...") 344 345 stat_features['correlation'] = [ 346 calculate_correlation( 347 self._ensure_numpy_array(window)[:-1], 348 self._ensure_numpy_array(window)[1:] 349 ) if len(window) > 1 else 0 350 for window in windows 351 ] 352 353 return stat_features 354 355 def _extract_ar_coefficients(self, windows: List, order: int) -> Dict[str, List]: 356 """Extract auto-regression coefficients from windows.""" 357 if self.verbose: 358 print(f" 🔍 Computing auto-regression coefficients (order={order})...") 359 360 # Progress bar for AR coefficients 361 ar_pbar = tqdm( 362 windows, 363 desc=" 🔄 AR coeffs", 364 unit="window", 365 leave=False, 366 disable=not self.verbose 367 ) 368 369 ar_coeffs = [] 370 for window in ar_pbar: 371 coeffs = calculate_auto_regression_coefficients( 372 self._ensure_numpy_array(window), order 373 ) 374 ar_coeffs.append(coeffs) 375 376 return {'ar_coefficients': ar_coeffs} 377 378 def _extract_annotation_labels(self, window) -> int: 379 """Extract the most common annotation label from a window.""" 380 if hasattr(window, 'mode'): 381 return window.mode().iloc[0] if len(window.mode()) > 0 else 0 382 else: 383 # For numpy arrays or other types 384 unique, counts = np.unique(window, return_counts=True) 385 return unique[np.argmax(counts)] 386 387 def get_feature_names(self) -> List[str]: 388 """ 389 Get names of all features that can be extracted. 390 391 Returns: 392 List of feature names 393 """ 394 time_domain_features = [ 395 'mean', 'std', 'variance', 'rms', 'range', 'median', 'mode', 396 'mean_absolute_value', 'median_absolute_deviation', 'peak_height', 397 'zero_crossing_rate', 'energy' 398 ] 399 400 frequency_domain_features = [ 401 'dominant_frequency', 'peak_frequency', 'power_spectral_entropy', 402 'principal_harmonic_frequency', 'stride_times', 'step_time', 403 'cadence', 'freezing_index' 404 ] 405 406 statistical_features = [ 407 'skewness', 'kurtosis', 'entropy', 'interquartile_range', 'correlation' 408 ] 409 410 other_features = ['ar_coefficients'] 411 412 return time_domain_features + frequency_domain_features + statistical_features + other_features 413 414 def print_extraction_summary(self, features: List[Dict]) -> None: 415 """ 416 Print a detailed summary of extracted features. 417 418 Args: 419 features: List of feature dictionaries returned by extract_features 420 """ 421 print("\n" + "="*80) 422 print("📊 FEATURE EXTRACTION SUMMARY") 423 print("="*80) 424 425 for i, feature_dict in enumerate(features): 426 sensor_name = feature_dict['name'] 427 print(f"\n🎯 Sensor {i+1}: {sensor_name}") 428 print("-" * 40) 429 430 if 'features' in feature_dict and feature_dict['features']: 431 for feature_type, feature_values in feature_dict['features'].items(): 432 if isinstance(feature_values, list): 433 print(f" 📈 {feature_type}: {len(feature_values)} values") 434 if feature_values: 435 sample_value = feature_values[0] 436 if isinstance(sample_value, (list, np.ndarray)): 437 print(f" └── Shape per window: {np.array(sample_value).shape}") 438 else: 439 print(f" └── Sample value: {sample_value:.4f}") 440 else: 441 print(f" 📈 {feature_type}: {feature_values}") 442 443 if 'annotations' in feature_dict: 444 print(f" 📝 Annotations: {len(feature_dict['annotations'])} windows") 445 446 print("\n" + "="*80)
Comprehensive gait feature extractor class.
This class extracts various time-domain, frequency-domain, and statistical features from gait data sliding windows.
57 def __init__(self, verbose: bool = True): 58 super().__init__( 59 name="gait_features", 60 description="Comprehensive gait feature extractor for time-domain, frequency-domain, and statistical features" 61 ) 62 self.verbose = verbose 63 self.config = { 64 'time_domain': True, 65 'frequency_domain': True, 66 'statistical': True, 67 'ar_order': 3 # Order for auto-regression coefficients 68 } 69 70 if self.verbose: 71 print("🚀 GaitFeatureExtractor initialized successfully!") 72 print(f"📊 Default configuration: {self.config}")
Initialize the feature extractor.
Args: name: Name of the feature extractor description: Description of the feature extractor
74 def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]: 75 """ 76 Extract gait features from sliding windows. 77 78 Args: 79 windows: List of sliding window dictionaries 80 fs: Sampling frequency 81 **kwargs: Additional arguments including time_domain, frequency_domain, statistical flags 82 83 Returns: 84 List of feature dictionaries for each sensor 85 """ 86 # Update config with any passed arguments 87 time_domain = kwargs.get('time_domain', self.config['time_domain']) 88 frequency_domain = kwargs.get('frequency_domain', self.config['frequency_domain']) 89 statistical = kwargs.get('statistical', self.config['statistical']) 90 ar_order = kwargs.get('ar_order', self.config['ar_order']) 91 92 if self.verbose: 93 print("\n" + "="*60) 94 print("🔍 STARTING GAIT FEATURE EXTRACTION") 95 print("="*60) 96 print(f"📈 Total sensors/windows to process: {len(windows)}") 97 print(f"🔊 Sampling frequency: {fs} Hz") 98 print(f"⏱️ Time domain features: {'✅' if time_domain else '❌'}") 99 print(f"🌊 Frequency domain features: {'✅' if frequency_domain else '❌'}") 100 print(f"📊 Statistical features: {'✅' if statistical else '❌'}") 101 print(f"🔄 Auto-regression order: {ar_order}") 102 print("-"*60) 103 104 features = [] 105 106 # Main progress bar for processing all windows 107 main_pbar = tqdm( 108 windows, 109 desc="🔍 Processing Sensors", 110 unit="sensor", 111 disable=not self.verbose 112 ) 113 114 for i, window_dict in enumerate(main_pbar): 115 sensor_name = window_dict['name'] 116 window_data = window_dict['data'] 117 118 if self.verbose: 119 main_pbar.set_postfix({ 120 'Current': sensor_name, 121 'Windows': len(window_data) if isinstance(window_data, list) else 1 122 }) 123 124 # Skip annotation windows 125 if sensor_name == 'annotations': 126 if self.verbose: 127 logger.info(f"📝 Processing annotation data for {sensor_name}") 128 129 features.append({ 130 'name': sensor_name, 131 'features': {}, 132 'annotations': [self._extract_annotation_labels(window) for window in window_data] 133 }) 134 continue 135 136 if self.verbose: 137 logger.info(f"🎯 Processing sensor: {sensor_name}") 138 logger.info(f"📦 Number of windows: {len(window_data)}") 139 140 sensor_features = {'name': sensor_name, 'features': {}} 141 142 # Time domain features 143 if time_domain: 144 if self.verbose: 145 print(f" ⏱️ Extracting time domain features for {sensor_name}...") 146 147 time_features = self._extract_time_domain_features(window_data) 148 sensor_features['features'].update(time_features) 149 150 if self.verbose: 151 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in time_features.values()) 152 print(f" ✅ Time domain: {len(time_features)} feature types, {feature_count} total features") 153 154 # Frequency domain features 155 if frequency_domain: 156 if self.verbose: 157 print(f" 🌊 Extracting frequency domain features for {sensor_name}...") 158 159 freq_features = self._extract_frequency_domain_features(window_data, fs) 160 sensor_features['features'].update(freq_features) 161 162 if self.verbose: 163 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in freq_features.values()) 164 print(f" ✅ Frequency domain: {len(freq_features)} feature types, {feature_count} total features") 165 166 # Statistical features 167 if statistical: 168 if self.verbose: 169 print(f" 📊 Extracting statistical features for {sensor_name}...") 170 171 stat_features = self._extract_statistical_features(window_data) 172 sensor_features['features'].update(stat_features) 173 174 if self.verbose: 175 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in stat_features.values()) 176 print(f" ✅ Statistical: {len(stat_features)} feature types, {feature_count} total features") 177 178 # Auto-regression coefficients 179 if self.verbose: 180 print(f" 🔄 Extracting auto-regression coefficients for {sensor_name}...") 181 182 ar_features = self._extract_ar_coefficients(window_data, ar_order) 183 sensor_features['features'].update(ar_features) 184 185 if self.verbose: 186 feature_count = sum(len(v) if isinstance(v, list) else 1 for v in ar_features.values()) 187 print(f" ✅ Auto-regression: {len(ar_features)} feature types, {feature_count} total features") 188 189 # Calculate total features for this sensor 190 total_features = sum( 191 len(v) if isinstance(v, list) else 1 192 for v in sensor_features['features'].values() 193 ) 194 195 if self.verbose: 196 print(f" 🎯 Total features extracted for {sensor_name}: {total_features}") 197 print(f" 📋 Feature types: {list(sensor_features['features'].keys())}") 198 print("-"*40) 199 200 features.append(sensor_features) 201 202 if self.verbose: 203 print("\n" + "="*60) 204 print("🎉 FEATURE EXTRACTION COMPLETED!") 205 print("="*60) 206 print(f"📊 Total sensors processed: {len(features)}") 207 208 # Calculate overall statistics 209 total_feature_count = 0 210 for feature_dict in features: 211 if 'features' in feature_dict: 212 total_feature_count += sum( 213 len(v) if isinstance(v, list) else 1 214 for v in feature_dict['features'].values() 215 ) 216 217 print(f"🔢 Total features extracted: {total_feature_count}") 218 print(f"📈 Average features per sensor: {total_feature_count / len(features):.1f}") 219 print("="*60) 220 221 return features
Extract gait features from sliding windows.
Args: windows: List of sliding window dictionaries fs: Sampling frequency **kwargs: Additional arguments including time_domain, frequency_domain, statistical flags
Returns: List of feature dictionaries for each sensor
387 def get_feature_names(self) -> List[str]: 388 """ 389 Get names of all features that can be extracted. 390 391 Returns: 392 List of feature names 393 """ 394 time_domain_features = [ 395 'mean', 'std', 'variance', 'rms', 'range', 'median', 'mode', 396 'mean_absolute_value', 'median_absolute_deviation', 'peak_height', 397 'zero_crossing_rate', 'energy' 398 ] 399 400 frequency_domain_features = [ 401 'dominant_frequency', 'peak_frequency', 'power_spectral_entropy', 402 'principal_harmonic_frequency', 'stride_times', 'step_time', 403 'cadence', 'freezing_index' 404 ] 405 406 statistical_features = [ 407 'skewness', 'kurtosis', 'entropy', 'interquartile_range', 'correlation' 408 ] 409 410 other_features = ['ar_coefficients'] 411 412 return time_domain_features + frequency_domain_features + statistical_features + other_features
Get names of all features that can be extracted.
Returns: List of feature names
414 def print_extraction_summary(self, features: List[Dict]) -> None: 415 """ 416 Print a detailed summary of extracted features. 417 418 Args: 419 features: List of feature dictionaries returned by extract_features 420 """ 421 print("\n" + "="*80) 422 print("📊 FEATURE EXTRACTION SUMMARY") 423 print("="*80) 424 425 for i, feature_dict in enumerate(features): 426 sensor_name = feature_dict['name'] 427 print(f"\n🎯 Sensor {i+1}: {sensor_name}") 428 print("-" * 40) 429 430 if 'features' in feature_dict and feature_dict['features']: 431 for feature_type, feature_values in feature_dict['features'].items(): 432 if isinstance(feature_values, list): 433 print(f" 📈 {feature_type}: {len(feature_values)} values") 434 if feature_values: 435 sample_value = feature_values[0] 436 if isinstance(sample_value, (list, np.ndarray)): 437 print(f" └── Shape per window: {np.array(sample_value).shape}") 438 else: 439 print(f" └── Sample value: {sample_value:.4f}") 440 else: 441 print(f" 📈 {feature_type}: {feature_values}") 442 443 if 'annotations' in feature_dict: 444 print(f" 📝 Annotations: {len(feature_dict['annotations'])} windows") 445 446 print("\n" + "="*80)
Print a detailed summary of extracted features.
Args: features: List of feature dictionaries returned by extract_features
Inherited Members
450def get_stride_times_for_windows(windows, fs): 451 """Calculate stride times for all windows in the input.""" 452 return [calculate_stride_times(window, fs) for window in windows]
Calculate stride times for all windows in the input.
454def get_zero_crossing_rates_for_windows(windows): 455 """Calculate zero-crossing rates for all windows in the input.""" 456 return [calculate_zero_crossing_rate(window) for window in windows]
Calculate zero-crossing rates for all windows in the input.
458def get_freezing_indices_for_windows(windows, fs): 459 """Calculate freezing indices for all windows in the input.""" 460 return [calculate_freezing_index(window, fs) for window in windows]
Calculate freezing indices for all windows in the input.
462def get_standard_deviations_for_windows(windows): 463 """Calculate standard deviations for all windows in the input.""" 464 return [calculate_standard_deviation(window) for window in windows]
Calculate standard deviations for all windows in the input.
466def get_entropies_for_windows(windows): 467 """Calculate entropies for all windows in the input.""" 468 return [calculate_entropy(window) for window in windows]
Calculate entropies for all windows in the input.
470def get_energies_for_windows(windows): 471 """Calculate energies for all windows in the input.""" 472 return [calculate_energy(window) for window in windows]
Calculate energies for all windows in the input.
474def get_variances_for_windows(windows): 475 """Calculate variances for all windows in the input.""" 476 return [calculate_variance(window) for window in windows]
Calculate variances for all windows in the input.
478def get_kurtosis_for_windows(windows): 479 """Calculate kurtosis values for all windows in the input.""" 480 return [calculate_kurtosis(window) for window in windows]
Calculate kurtosis values for all windows in the input.
482def get_step_times_for_windows(windows, fs): 483 """Calculate step times for all windows in the input.""" 484 return [calculate_step_time(window, fs) for window in windows]
Calculate step times for all windows in the input.
551def extract_gait_features(daphnet_windows, fs, time_domain=True, frequency_domain=True, statistical=True, verbose=True): 552 """ 553 Legacy function for extracting gait features. 554 555 Args: 556 daphnet_windows: List of sliding window dictionaries 557 fs: Sampling frequency 558 time_domain: Whether to extract time domain features 559 frequency_domain: Whether to extract frequency domain features 560 statistical: Whether to extract statistical features 561 verbose: Whether to show verbose output and progress bars 562 563 Returns: 564 List of feature dictionaries 565 """ 566 extractor = GaitFeatureExtractor(verbose=verbose) 567 return extractor.extract_features( 568 daphnet_windows, fs, 569 time_domain=time_domain, 570 frequency_domain=frequency_domain, 571 statistical=statistical 572 )
Legacy function for extracting gait features.
Args: daphnet_windows: List of sliding window dictionaries fs: Sampling frequency time_domain: Whether to extract time domain features frequency_domain: Whether to extract frequency domain features statistical: Whether to extract statistical features verbose: Whether to show verbose output and progress bars
Returns: List of feature dictionaries