gaitsetpy.eda.analyzers
EDA analyzer classes for gait data analysis.
This module contains individual EDA analyzer classes that inherit from BaseEDAAnalyzer and provide specific analysis and visualization functionality.
Maintainer: @aharshit123456
1''' 2EDA analyzer classes for gait data analysis. 3 4This module contains individual EDA analyzer classes that inherit from BaseEDAAnalyzer 5and provide specific analysis and visualization functionality. 6 7Maintainer: @aharshit123456 8''' 9 10from typing import Dict, List, Any, Union, Optional 11import pandas as pd 12import numpy as np 13import matplotlib.pyplot as plt 14from ..core.base_classes import BaseEDAAnalyzer 15 16 17class DaphnetVisualizationAnalyzer(BaseEDAAnalyzer): 18 """ 19 EDA analyzer for Daphnet dataset visualization. 20 21 This analyzer provides comprehensive visualization capabilities for Daphnet dataset 22 including thigh, shank, and trunk sensor data. 23 """ 24 25 def __init__(self): 26 super().__init__( 27 name="daphnet_visualization", 28 description="Comprehensive visualization analyzer for Daphnet dataset sensor data" 29 ) 30 self.config = { 31 'figsize': (20, 16), 32 'colors': { 33 'no_freeze': 'orange', 34 'freeze': 'purple' 35 }, 36 'alpha': 0.6 37 } 38 39 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 40 """ 41 Analyze the data and return statistical summaries. 42 43 Args: 44 data: Input data to analyze 45 **kwargs: Additional arguments 46 47 Returns: 48 Dictionary containing analysis results 49 """ 50 if isinstance(data, list): 51 # Multiple datasets 52 results = {} 53 for i, df in enumerate(data): 54 results[f'dataset_{i}'] = self._analyze_single_dataset(df) 55 return results 56 else: 57 # Single dataset 58 return self._analyze_single_dataset(data) 59 60 def _analyze_single_dataset(self, df: pd.DataFrame) -> Dict[str, Any]: 61 """Analyze a single dataset.""" 62 # Basic statistics 63 stats = { 64 'shape': df.shape, 65 'columns': df.columns.tolist(), 66 'annotation_distribution': df['annotations'].value_counts().to_dict() if 'annotations' in df.columns else {}, 67 'missing_values': df.isnull().sum().to_dict(), 68 'data_range': { 69 'min': df.select_dtypes(include=[np.number]).min().to_dict(), 70 'max': df.select_dtypes(include=[np.number]).max().to_dict() 71 } 72 } 73 74 # Sensor-specific statistics 75 sensor_stats = {} 76 for sensor in ['thigh', 'shank', 'trunk']: 77 if sensor in df.columns: 78 sensor_stats[sensor] = { 79 'mean': df[sensor].mean(), 80 'std': df[sensor].std(), 81 'min': df[sensor].min(), 82 'max': df[sensor].max() 83 } 84 85 stats['sensor_statistics'] = sensor_stats 86 return stats 87 88 def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs): 89 """ 90 Create visualizations of the data. 91 92 Args: 93 data: Input data to visualize 94 **kwargs: Additional arguments including sensor_type, dataset_index, names 95 """ 96 sensor_type = kwargs.get('sensor_type', 'all') 97 dataset_index = kwargs.get('dataset_index', 0) 98 names = kwargs.get('names', []) 99 100 if isinstance(data, list): 101 if dataset_index < len(data): 102 df = data[dataset_index] 103 dataset_name = names[dataset_index] if dataset_index < len(names) else f"Dataset {dataset_index}" 104 else: 105 print(f"Dataset index {dataset_index} out of range") 106 return 107 else: 108 df = data 109 dataset_name = names[0] if names else "Dataset" 110 111 if sensor_type == 'all': 112 self._plot_all_sensors(df, dataset_name) 113 elif sensor_type == 'thigh': 114 self._plot_thigh_data(df, dataset_name) 115 elif sensor_type == 'shank': 116 self._plot_shank_data(df, dataset_name) 117 elif sensor_type == 'trunk': 118 self._plot_trunk_data(df, dataset_name) 119 else: 120 print(f"Unknown sensor type: {sensor_type}") 121 122 def _plot_thigh_data(self, df: pd.DataFrame, dataset_name: str): 123 """Plot thigh sensor data.""" 124 print(f"Plotting thigh data for {dataset_name}") 125 126 # Filter data 127 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 128 129 if df_filtered.empty: 130 print("No valid data to plot") 131 return 132 133 # Create figure 134 fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize']) 135 fig.suptitle(f"Thigh Data from {dataset_name}") 136 137 # Separate freeze and no-freeze data 138 if 'annotations' in df.columns: 139 neg = df_filtered[df_filtered.annotations == 1] # No freeze 140 pos = df_filtered[df_filtered.annotations == 2] # Freeze 141 else: 142 neg = df_filtered 143 pos = pd.DataFrame() 144 145 # Plot each component 146 components = ['thigh_h_fd', 'thigh_v', 'thigh_h_l', 'thigh'] 147 labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall'] 148 149 for i, (component, label) in enumerate(zip(components, labels)): 150 if component in df_filtered.columns: 151 # Plot main signal 152 axes[i].plot(df_filtered.index, df_filtered[component]) 153 axes[i].set_ylabel(f"{label} Thigh Acceleration") 154 155 # Plot annotations if available 156 if not neg.empty: 157 axes[i].scatter(neg.index, neg[component], 158 c=self.config['colors']['no_freeze'], 159 label="no freeze", alpha=self.config['alpha']) 160 if not pos.empty: 161 axes[i].scatter(pos.index, pos[component], 162 c=self.config['colors']['freeze'], 163 label="freeze", alpha=self.config['alpha']) 164 165 axes[i].legend() 166 167 plt.xlabel("Time") 168 plt.tight_layout() 169 plt.show() 170 171 def _plot_shank_data(self, df: pd.DataFrame, dataset_name: str): 172 """Plot shank sensor data.""" 173 print(f"Plotting shank data for {dataset_name}") 174 175 # Filter data 176 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 177 178 if df_filtered.empty: 179 print("No valid data to plot") 180 return 181 182 # Create figure 183 fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize']) 184 fig.suptitle(f"Shank Data from {dataset_name}") 185 186 # Separate freeze and no-freeze data 187 if 'annotations' in df.columns: 188 neg = df_filtered[df_filtered.annotations == 1] # No freeze 189 pos = df_filtered[df_filtered.annotations == 2] # Freeze 190 else: 191 neg = df_filtered 192 pos = pd.DataFrame() 193 194 # Plot each component 195 components = ['shank_h_fd', 'shank_v', 'shank_h_l', 'shank'] 196 labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall'] 197 198 for i, (component, label) in enumerate(zip(components, labels)): 199 if component in df_filtered.columns: 200 # Plot main signal 201 axes[i].plot(df_filtered.index, df_filtered[component]) 202 axes[i].set_ylabel(f"{label} Shank Acceleration") 203 204 # Plot annotations if available 205 if not neg.empty: 206 axes[i].scatter(neg.index, neg[component], 207 c=self.config['colors']['no_freeze'], 208 label="no freeze", alpha=self.config['alpha']) 209 if not pos.empty: 210 axes[i].scatter(pos.index, pos[component], 211 c=self.config['colors']['freeze'], 212 label="freeze", alpha=self.config['alpha']) 213 214 axes[i].legend() 215 216 plt.xlabel("Time") 217 plt.tight_layout() 218 plt.show() 219 220 def _plot_trunk_data(self, df: pd.DataFrame, dataset_name: str): 221 """Plot trunk sensor data.""" 222 print(f"Plotting trunk data for {dataset_name}") 223 224 # Filter data 225 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 226 227 if df_filtered.empty: 228 print("No valid data to plot") 229 return 230 231 # Create figure 232 fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize']) 233 fig.suptitle(f"Trunk Data from {dataset_name}") 234 235 # Separate freeze and no-freeze data 236 if 'annotations' in df.columns: 237 neg = df_filtered[df_filtered.annotations == 1] # No freeze 238 pos = df_filtered[df_filtered.annotations == 2] # Freeze 239 else: 240 neg = df_filtered 241 pos = pd.DataFrame() 242 243 # Plot each component 244 components = ['trunk_h_fd', 'trunk_v', 'trunk_h_l', 'trunk'] 245 labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall'] 246 247 for i, (component, label) in enumerate(zip(components, labels)): 248 if component in df_filtered.columns: 249 # Plot main signal 250 axes[i].plot(df_filtered.index, df_filtered[component]) 251 axes[i].set_ylabel(f"{label} Trunk Acceleration") 252 253 # Plot annotations if available 254 if not neg.empty: 255 axes[i].scatter(neg.index, neg[component], 256 c=self.config['colors']['no_freeze'], 257 label="no freeze", alpha=self.config['alpha']) 258 if not pos.empty: 259 axes[i].scatter(pos.index, pos[component], 260 c=self.config['colors']['freeze'], 261 label="freeze", alpha=self.config['alpha']) 262 263 axes[i].legend() 264 265 plt.xlabel("Time") 266 plt.tight_layout() 267 plt.show() 268 269 def _plot_all_sensors(self, df: pd.DataFrame, dataset_name: str): 270 """Plot all sensor data in a combined view.""" 271 print(f"Plotting all sensor data for {dataset_name}") 272 273 # Create figure with subplots for each sensor 274 fig, axes = plt.subplots(3, 1, sharex=True, figsize=self.config['figsize']) 275 fig.suptitle(f"All Sensor Data from {dataset_name}") 276 277 # Filter data 278 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 279 280 if df_filtered.empty: 281 print("No valid data to plot") 282 return 283 284 sensors = ['thigh', 'shank', 'trunk'] 285 for i, sensor in enumerate(sensors): 286 if sensor in df_filtered.columns: 287 axes[i].plot(df_filtered.index, df_filtered[sensor]) 288 axes[i].set_ylabel(f"{sensor.capitalize()} Acceleration") 289 290 # Add annotations if available 291 if 'annotations' in df_filtered.columns: 292 neg = df_filtered[df_filtered.annotations == 1] 293 pos = df_filtered[df_filtered.annotations == 2] 294 295 if not neg.empty: 296 axes[i].scatter(neg.index, neg[sensor], 297 c=self.config['colors']['no_freeze'], 298 label="no freeze", alpha=self.config['alpha']) 299 if not pos.empty: 300 axes[i].scatter(pos.index, pos[sensor], 301 c=self.config['colors']['freeze'], 302 label="freeze", alpha=self.config['alpha']) 303 304 axes[i].legend() 305 306 plt.xlabel("Time") 307 plt.tight_layout() 308 plt.show() 309 310 311class SensorStatisticsAnalyzer(BaseEDAAnalyzer): 312 """ 313 EDA analyzer for sensor data statistics and feature visualization. 314 315 This analyzer provides statistical analysis and feature visualization capabilities 316 for sensor data including sliding windows and extracted features. 317 """ 318 319 def __init__(self): 320 super().__init__( 321 name="sensor_statistics", 322 description="Statistical analysis and feature visualization for sensor data" 323 ) 324 self.config = { 325 'figsize': (20, 10), 326 'feature_markers': { 327 'mean': 'x', 328 'rms': 'o', 329 'peak_height': 'v', 330 'mode': '<', 331 'median': '^' 332 } 333 } 334 335 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 336 """ 337 Analyze sensor data and return statistical summaries. 338 339 Args: 340 data: Input data to analyze 341 **kwargs: Additional arguments 342 343 Returns: 344 Dictionary containing analysis results 345 """ 346 if isinstance(data, list): 347 # Multiple datasets 348 results = {} 349 for i, df in enumerate(data): 350 results[f'dataset_{i}'] = self._compute_statistics(df) 351 return results 352 else: 353 # Single dataset 354 return self._compute_statistics(data) 355 356 def _compute_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: 357 """Compute comprehensive statistics for a dataset.""" 358 stats = { 359 'basic_stats': df.describe().to_dict(), 360 'correlation_matrix': df.corr().to_dict() if len(df.select_dtypes(include=[np.number]).columns) > 1 else {}, 361 'skewness': df.skew().to_dict(), 362 'kurtosis': df.kurtosis().to_dict() 363 } 364 365 # Add sensor-specific statistics 366 sensor_stats = {} 367 for sensor in ['thigh', 'shank', 'trunk']: 368 if sensor in df.columns: 369 sensor_data = df[sensor].dropna() 370 sensor_stats[sensor] = { 371 'mean': sensor_data.mean(), 372 'std': sensor_data.std(), 373 'variance': sensor_data.var(), 374 'min': sensor_data.min(), 375 'max': sensor_data.max(), 376 'range': sensor_data.max() - sensor_data.min(), 377 'median': sensor_data.median(), 378 'q25': sensor_data.quantile(0.25), 379 'q75': sensor_data.quantile(0.75), 380 'iqr': sensor_data.quantile(0.75) - sensor_data.quantile(0.25) 381 } 382 383 stats['sensor_statistics'] = sensor_stats 384 return stats 385 386 def visualize(self, sliding_windows: List[Dict], features: List[Dict], **kwargs): 387 """ 388 Create visualizations of sensor data with overlaid features. 389 390 Args: 391 sliding_windows: List of sliding window dictionaries 392 features: List of feature dictionaries 393 **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows 394 """ 395 sensor_name = kwargs.get('sensor_name', 'shank') 396 start_idx = kwargs.get('start_idx', 0) 397 end_idx = kwargs.get('end_idx', 1000) 398 num_windows = kwargs.get('num_windows', 10) 399 save = kwargs.get('save', False) 400 401 self._plot_sensor_with_features(sliding_windows, features, start_idx, end_idx, 402 sensor_name, num_windows, save) 403 404 def _plot_sensor_with_features(self, sliding_windows: List[Dict], features: List[Dict], 405 start_idx: int, end_idx: int, sensor_name: str = "shank", 406 num_windows: int = 10, save: bool = False): 407 """ 408 Plot sliding windows of sensor data with overlaid statistical features. 409 410 Args: 411 sliding_windows: List of sliding window dictionaries 412 features: List of feature dictionaries 413 start_idx: Start index of the time window 414 end_idx: End index of the time window 415 sensor_name: Name of the sensor to plot 416 num_windows: Number of sliding windows to plot 417 save: Whether to save the plot 418 """ 419 fig, axes = plt.subplots(2, 1, figsize=self.config['figsize'], 420 gridspec_kw={'height_ratios': [3, 1]}) 421 422 # Extract sensor windows 423 sensor_windows = next((sw['data'] for sw in sliding_windows if sw['name'] == sensor_name), None) 424 if sensor_windows is None: 425 print(f"Sensor '{sensor_name}' not found in sliding_windows.") 426 return 427 428 # Extract corresponding features 429 sensor_features = next((feat['features'] for feat in features if feat['name'] == sensor_name), None) 430 if sensor_features is None: 431 print(f"Sensor '{sensor_name}' not found in features.") 432 return 433 434 # Filter windows based on start_idx and end_idx 435 filtered_windows = [series for series in sensor_windows 436 if start_idx <= series.index[0] and series.index[-1] <= end_idx] 437 438 if not filtered_windows: 439 print(f"No windows found in the specified index range ({start_idx} - {end_idx}).") 440 return 441 442 # Store entropy & frequency features for separate plotting 443 entropy_values = [] 444 dominant_frequencies = [] 445 446 # Plot first num_windows windows 447 for i in range(min(num_windows, len(filtered_windows))): 448 series = filtered_windows[i] 449 450 # Extract time and signal values 451 time_values = series.index.to_numpy() 452 signal_values = series.values 453 454 # Determine actual start and end indices for this window 455 window_start, window_end = time_values[0], time_values[-1] 456 457 # Plot time series data 458 axes[0].plot(time_values, signal_values, alpha=0.6) 459 460 # Mark start and end of each window with vertical dotted lines 461 axes[0].axvline(x=window_start, color='black', linestyle='dotted', alpha=0.7) 462 axes[0].axvline(x=window_end, color='black', linestyle='dotted', alpha=0.7) 463 464 # Overlay statistical features 465 for feature_name, marker in self.config['feature_markers'].items(): 466 if feature_name in sensor_features and len(sensor_features[feature_name]) > i: 467 feature_value = sensor_features[feature_name][i] 468 if feature_value != 0: # Skip zero values 469 closest_index = np.argmin(np.abs(signal_values - feature_value)) 470 closest_time = time_values[closest_index] 471 axes[0].scatter(closest_time, feature_value, color='red', 472 marker=marker, s=100, label=feature_name if i == 0 else "") 473 474 # Store entropy & frequency features for separate plotting 475 if 'entropy' in sensor_features and len(sensor_features['entropy']) > i: 476 entropy_values.append(sensor_features['entropy'][i]) 477 if 'dominant_frequency' in sensor_features and len(sensor_features['dominant_frequency']) > i: 478 dominant_frequencies.append(sensor_features['dominant_frequency'][i]) 479 480 # Labels and title for time-series plot 481 axes[0].set_xlabel('Time') 482 axes[0].set_ylabel(f'{sensor_name} Signal') 483 axes[0].set_title(f'First {num_windows} windows of {sensor_name} in range {start_idx}-{end_idx} with Features') 484 axes[0].legend() 485 486 # Frequency-domain & entropy plot 487 if dominant_frequencies: 488 window_indices = list(range(len(dominant_frequencies))) 489 axes[1].plot(window_indices, dominant_frequencies, 490 label="Dominant Frequency", marker="o", linestyle="dashed", color="blue") 491 492 if entropy_values: 493 axes[1].bar(window_indices, entropy_values, alpha=0.6, label="Entropy", color="green") 494 495 axes[1].set_xlabel("Window Index") 496 axes[1].set_ylabel("Feature Value") 497 axes[1].set_title("Frequency & Entropy Features") 498 axes[1].legend() 499 500 plt.tight_layout() 501 502 # Save or show plot 503 if save: 504 file_path = input("Enter the file path to save the plot (e.g., 'plot.png'): ") 505 plt.savefig(file_path, dpi=300) 506 print(f"Plot saved at {file_path}") 507 else: 508 plt.show() 509 510 511def harup_basic_stats(harup_df): 512 """ 513 Print and return basic statistics for each sensor column in a HAR-UP DataFrame. 514 Args: 515 harup_df (pd.DataFrame): DataFrame containing HAR-UP data. 516 Returns: 517 pd.DataFrame: DataFrame of statistics. 518 """ 519 import pandas as pd 520 stats = harup_df.describe().T 521 print(stats) 522 return stats 523 524def harup_missing_data_report(harup_df): 525 """ 526 Print and return missing value counts for each column in a HAR-UP DataFrame. 527 Args: 528 harup_df (pd.DataFrame): DataFrame containing HAR-UP data. 529 Returns: 530 pd.Series: Series of missing value counts. 531 """ 532 missing = harup_df.isnull().sum() 533 print(missing) 534 return missing 535 536def harup_activity_stats(harup_df): 537 """ 538 Print and return counts for each activity label in a HAR-UP DataFrame. 539 Args: 540 harup_df (pd.DataFrame): DataFrame containing HAR-UP data. 541 Returns: 542 pd.Series: Series of activity label counts. 543 """ 544 if 'activity_label' not in harup_df.columns: 545 print("No 'activity_label' column found.") 546 return None 547 counts = harup_df['activity_label'].value_counts().sort_index() 548 print(counts) 549 return counts
18class DaphnetVisualizationAnalyzer(BaseEDAAnalyzer): 19 """ 20 EDA analyzer for Daphnet dataset visualization. 21 22 This analyzer provides comprehensive visualization capabilities for Daphnet dataset 23 including thigh, shank, and trunk sensor data. 24 """ 25 26 def __init__(self): 27 super().__init__( 28 name="daphnet_visualization", 29 description="Comprehensive visualization analyzer for Daphnet dataset sensor data" 30 ) 31 self.config = { 32 'figsize': (20, 16), 33 'colors': { 34 'no_freeze': 'orange', 35 'freeze': 'purple' 36 }, 37 'alpha': 0.6 38 } 39 40 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 41 """ 42 Analyze the data and return statistical summaries. 43 44 Args: 45 data: Input data to analyze 46 **kwargs: Additional arguments 47 48 Returns: 49 Dictionary containing analysis results 50 """ 51 if isinstance(data, list): 52 # Multiple datasets 53 results = {} 54 for i, df in enumerate(data): 55 results[f'dataset_{i}'] = self._analyze_single_dataset(df) 56 return results 57 else: 58 # Single dataset 59 return self._analyze_single_dataset(data) 60 61 def _analyze_single_dataset(self, df: pd.DataFrame) -> Dict[str, Any]: 62 """Analyze a single dataset.""" 63 # Basic statistics 64 stats = { 65 'shape': df.shape, 66 'columns': df.columns.tolist(), 67 'annotation_distribution': df['annotations'].value_counts().to_dict() if 'annotations' in df.columns else {}, 68 'missing_values': df.isnull().sum().to_dict(), 69 'data_range': { 70 'min': df.select_dtypes(include=[np.number]).min().to_dict(), 71 'max': df.select_dtypes(include=[np.number]).max().to_dict() 72 } 73 } 74 75 # Sensor-specific statistics 76 sensor_stats = {} 77 for sensor in ['thigh', 'shank', 'trunk']: 78 if sensor in df.columns: 79 sensor_stats[sensor] = { 80 'mean': df[sensor].mean(), 81 'std': df[sensor].std(), 82 'min': df[sensor].min(), 83 'max': df[sensor].max() 84 } 85 86 stats['sensor_statistics'] = sensor_stats 87 return stats 88 89 def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs): 90 """ 91 Create visualizations of the data. 92 93 Args: 94 data: Input data to visualize 95 **kwargs: Additional arguments including sensor_type, dataset_index, names 96 """ 97 sensor_type = kwargs.get('sensor_type', 'all') 98 dataset_index = kwargs.get('dataset_index', 0) 99 names = kwargs.get('names', []) 100 101 if isinstance(data, list): 102 if dataset_index < len(data): 103 df = data[dataset_index] 104 dataset_name = names[dataset_index] if dataset_index < len(names) else f"Dataset {dataset_index}" 105 else: 106 print(f"Dataset index {dataset_index} out of range") 107 return 108 else: 109 df = data 110 dataset_name = names[0] if names else "Dataset" 111 112 if sensor_type == 'all': 113 self._plot_all_sensors(df, dataset_name) 114 elif sensor_type == 'thigh': 115 self._plot_thigh_data(df, dataset_name) 116 elif sensor_type == 'shank': 117 self._plot_shank_data(df, dataset_name) 118 elif sensor_type == 'trunk': 119 self._plot_trunk_data(df, dataset_name) 120 else: 121 print(f"Unknown sensor type: {sensor_type}") 122 123 def _plot_thigh_data(self, df: pd.DataFrame, dataset_name: str): 124 """Plot thigh sensor data.""" 125 print(f"Plotting thigh data for {dataset_name}") 126 127 # Filter data 128 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 129 130 if df_filtered.empty: 131 print("No valid data to plot") 132 return 133 134 # Create figure 135 fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize']) 136 fig.suptitle(f"Thigh Data from {dataset_name}") 137 138 # Separate freeze and no-freeze data 139 if 'annotations' in df.columns: 140 neg = df_filtered[df_filtered.annotations == 1] # No freeze 141 pos = df_filtered[df_filtered.annotations == 2] # Freeze 142 else: 143 neg = df_filtered 144 pos = pd.DataFrame() 145 146 # Plot each component 147 components = ['thigh_h_fd', 'thigh_v', 'thigh_h_l', 'thigh'] 148 labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall'] 149 150 for i, (component, label) in enumerate(zip(components, labels)): 151 if component in df_filtered.columns: 152 # Plot main signal 153 axes[i].plot(df_filtered.index, df_filtered[component]) 154 axes[i].set_ylabel(f"{label} Thigh Acceleration") 155 156 # Plot annotations if available 157 if not neg.empty: 158 axes[i].scatter(neg.index, neg[component], 159 c=self.config['colors']['no_freeze'], 160 label="no freeze", alpha=self.config['alpha']) 161 if not pos.empty: 162 axes[i].scatter(pos.index, pos[component], 163 c=self.config['colors']['freeze'], 164 label="freeze", alpha=self.config['alpha']) 165 166 axes[i].legend() 167 168 plt.xlabel("Time") 169 plt.tight_layout() 170 plt.show() 171 172 def _plot_shank_data(self, df: pd.DataFrame, dataset_name: str): 173 """Plot shank sensor data.""" 174 print(f"Plotting shank data for {dataset_name}") 175 176 # Filter data 177 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 178 179 if df_filtered.empty: 180 print("No valid data to plot") 181 return 182 183 # Create figure 184 fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize']) 185 fig.suptitle(f"Shank Data from {dataset_name}") 186 187 # Separate freeze and no-freeze data 188 if 'annotations' in df.columns: 189 neg = df_filtered[df_filtered.annotations == 1] # No freeze 190 pos = df_filtered[df_filtered.annotations == 2] # Freeze 191 else: 192 neg = df_filtered 193 pos = pd.DataFrame() 194 195 # Plot each component 196 components = ['shank_h_fd', 'shank_v', 'shank_h_l', 'shank'] 197 labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall'] 198 199 for i, (component, label) in enumerate(zip(components, labels)): 200 if component in df_filtered.columns: 201 # Plot main signal 202 axes[i].plot(df_filtered.index, df_filtered[component]) 203 axes[i].set_ylabel(f"{label} Shank Acceleration") 204 205 # Plot annotations if available 206 if not neg.empty: 207 axes[i].scatter(neg.index, neg[component], 208 c=self.config['colors']['no_freeze'], 209 label="no freeze", alpha=self.config['alpha']) 210 if not pos.empty: 211 axes[i].scatter(pos.index, pos[component], 212 c=self.config['colors']['freeze'], 213 label="freeze", alpha=self.config['alpha']) 214 215 axes[i].legend() 216 217 plt.xlabel("Time") 218 plt.tight_layout() 219 plt.show() 220 221 def _plot_trunk_data(self, df: pd.DataFrame, dataset_name: str): 222 """Plot trunk sensor data.""" 223 print(f"Plotting trunk data for {dataset_name}") 224 225 # Filter data 226 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 227 228 if df_filtered.empty: 229 print("No valid data to plot") 230 return 231 232 # Create figure 233 fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize']) 234 fig.suptitle(f"Trunk Data from {dataset_name}") 235 236 # Separate freeze and no-freeze data 237 if 'annotations' in df.columns: 238 neg = df_filtered[df_filtered.annotations == 1] # No freeze 239 pos = df_filtered[df_filtered.annotations == 2] # Freeze 240 else: 241 neg = df_filtered 242 pos = pd.DataFrame() 243 244 # Plot each component 245 components = ['trunk_h_fd', 'trunk_v', 'trunk_h_l', 'trunk'] 246 labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall'] 247 248 for i, (component, label) in enumerate(zip(components, labels)): 249 if component in df_filtered.columns: 250 # Plot main signal 251 axes[i].plot(df_filtered.index, df_filtered[component]) 252 axes[i].set_ylabel(f"{label} Trunk Acceleration") 253 254 # Plot annotations if available 255 if not neg.empty: 256 axes[i].scatter(neg.index, neg[component], 257 c=self.config['colors']['no_freeze'], 258 label="no freeze", alpha=self.config['alpha']) 259 if not pos.empty: 260 axes[i].scatter(pos.index, pos[component], 261 c=self.config['colors']['freeze'], 262 label="freeze", alpha=self.config['alpha']) 263 264 axes[i].legend() 265 266 plt.xlabel("Time") 267 plt.tight_layout() 268 plt.show() 269 270 def _plot_all_sensors(self, df: pd.DataFrame, dataset_name: str): 271 """Plot all sensor data in a combined view.""" 272 print(f"Plotting all sensor data for {dataset_name}") 273 274 # Create figure with subplots for each sensor 275 fig, axes = plt.subplots(3, 1, sharex=True, figsize=self.config['figsize']) 276 fig.suptitle(f"All Sensor Data from {dataset_name}") 277 278 # Filter data 279 df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df 280 281 if df_filtered.empty: 282 print("No valid data to plot") 283 return 284 285 sensors = ['thigh', 'shank', 'trunk'] 286 for i, sensor in enumerate(sensors): 287 if sensor in df_filtered.columns: 288 axes[i].plot(df_filtered.index, df_filtered[sensor]) 289 axes[i].set_ylabel(f"{sensor.capitalize()} Acceleration") 290 291 # Add annotations if available 292 if 'annotations' in df_filtered.columns: 293 neg = df_filtered[df_filtered.annotations == 1] 294 pos = df_filtered[df_filtered.annotations == 2] 295 296 if not neg.empty: 297 axes[i].scatter(neg.index, neg[sensor], 298 c=self.config['colors']['no_freeze'], 299 label="no freeze", alpha=self.config['alpha']) 300 if not pos.empty: 301 axes[i].scatter(pos.index, pos[sensor], 302 c=self.config['colors']['freeze'], 303 label="freeze", alpha=self.config['alpha']) 304 305 axes[i].legend() 306 307 plt.xlabel("Time") 308 plt.tight_layout() 309 plt.show()
EDA analyzer for Daphnet dataset visualization.
This analyzer provides comprehensive visualization capabilities for Daphnet dataset including thigh, shank, and trunk sensor data.
26 def __init__(self): 27 super().__init__( 28 name="daphnet_visualization", 29 description="Comprehensive visualization analyzer for Daphnet dataset sensor data" 30 ) 31 self.config = { 32 'figsize': (20, 16), 33 'colors': { 34 'no_freeze': 'orange', 35 'freeze': 'purple' 36 }, 37 'alpha': 0.6 38 }
Initialize the EDA analyzer.
Args: name: Name of the EDA analyzer description: Description of the EDA analyzer
40 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 41 """ 42 Analyze the data and return statistical summaries. 43 44 Args: 45 data: Input data to analyze 46 **kwargs: Additional arguments 47 48 Returns: 49 Dictionary containing analysis results 50 """ 51 if isinstance(data, list): 52 # Multiple datasets 53 results = {} 54 for i, df in enumerate(data): 55 results[f'dataset_{i}'] = self._analyze_single_dataset(df) 56 return results 57 else: 58 # Single dataset 59 return self._analyze_single_dataset(data)
Analyze the data and return statistical summaries.
Args: data: Input data to analyze **kwargs: Additional arguments
Returns: Dictionary containing analysis results
89 def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs): 90 """ 91 Create visualizations of the data. 92 93 Args: 94 data: Input data to visualize 95 **kwargs: Additional arguments including sensor_type, dataset_index, names 96 """ 97 sensor_type = kwargs.get('sensor_type', 'all') 98 dataset_index = kwargs.get('dataset_index', 0) 99 names = kwargs.get('names', []) 100 101 if isinstance(data, list): 102 if dataset_index < len(data): 103 df = data[dataset_index] 104 dataset_name = names[dataset_index] if dataset_index < len(names) else f"Dataset {dataset_index}" 105 else: 106 print(f"Dataset index {dataset_index} out of range") 107 return 108 else: 109 df = data 110 dataset_name = names[0] if names else "Dataset" 111 112 if sensor_type == 'all': 113 self._plot_all_sensors(df, dataset_name) 114 elif sensor_type == 'thigh': 115 self._plot_thigh_data(df, dataset_name) 116 elif sensor_type == 'shank': 117 self._plot_shank_data(df, dataset_name) 118 elif sensor_type == 'trunk': 119 self._plot_trunk_data(df, dataset_name) 120 else: 121 print(f"Unknown sensor type: {sensor_type}")
Create visualizations of the data.
Args: data: Input data to visualize **kwargs: Additional arguments including sensor_type, dataset_index, names
Inherited Members
312class SensorStatisticsAnalyzer(BaseEDAAnalyzer): 313 """ 314 EDA analyzer for sensor data statistics and feature visualization. 315 316 This analyzer provides statistical analysis and feature visualization capabilities 317 for sensor data including sliding windows and extracted features. 318 """ 319 320 def __init__(self): 321 super().__init__( 322 name="sensor_statistics", 323 description="Statistical analysis and feature visualization for sensor data" 324 ) 325 self.config = { 326 'figsize': (20, 10), 327 'feature_markers': { 328 'mean': 'x', 329 'rms': 'o', 330 'peak_height': 'v', 331 'mode': '<', 332 'median': '^' 333 } 334 } 335 336 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 337 """ 338 Analyze sensor data and return statistical summaries. 339 340 Args: 341 data: Input data to analyze 342 **kwargs: Additional arguments 343 344 Returns: 345 Dictionary containing analysis results 346 """ 347 if isinstance(data, list): 348 # Multiple datasets 349 results = {} 350 for i, df in enumerate(data): 351 results[f'dataset_{i}'] = self._compute_statistics(df) 352 return results 353 else: 354 # Single dataset 355 return self._compute_statistics(data) 356 357 def _compute_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: 358 """Compute comprehensive statistics for a dataset.""" 359 stats = { 360 'basic_stats': df.describe().to_dict(), 361 'correlation_matrix': df.corr().to_dict() if len(df.select_dtypes(include=[np.number]).columns) > 1 else {}, 362 'skewness': df.skew().to_dict(), 363 'kurtosis': df.kurtosis().to_dict() 364 } 365 366 # Add sensor-specific statistics 367 sensor_stats = {} 368 for sensor in ['thigh', 'shank', 'trunk']: 369 if sensor in df.columns: 370 sensor_data = df[sensor].dropna() 371 sensor_stats[sensor] = { 372 'mean': sensor_data.mean(), 373 'std': sensor_data.std(), 374 'variance': sensor_data.var(), 375 'min': sensor_data.min(), 376 'max': sensor_data.max(), 377 'range': sensor_data.max() - sensor_data.min(), 378 'median': sensor_data.median(), 379 'q25': sensor_data.quantile(0.25), 380 'q75': sensor_data.quantile(0.75), 381 'iqr': sensor_data.quantile(0.75) - sensor_data.quantile(0.25) 382 } 383 384 stats['sensor_statistics'] = sensor_stats 385 return stats 386 387 def visualize(self, sliding_windows: List[Dict], features: List[Dict], **kwargs): 388 """ 389 Create visualizations of sensor data with overlaid features. 390 391 Args: 392 sliding_windows: List of sliding window dictionaries 393 features: List of feature dictionaries 394 **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows 395 """ 396 sensor_name = kwargs.get('sensor_name', 'shank') 397 start_idx = kwargs.get('start_idx', 0) 398 end_idx = kwargs.get('end_idx', 1000) 399 num_windows = kwargs.get('num_windows', 10) 400 save = kwargs.get('save', False) 401 402 self._plot_sensor_with_features(sliding_windows, features, start_idx, end_idx, 403 sensor_name, num_windows, save) 404 405 def _plot_sensor_with_features(self, sliding_windows: List[Dict], features: List[Dict], 406 start_idx: int, end_idx: int, sensor_name: str = "shank", 407 num_windows: int = 10, save: bool = False): 408 """ 409 Plot sliding windows of sensor data with overlaid statistical features. 410 411 Args: 412 sliding_windows: List of sliding window dictionaries 413 features: List of feature dictionaries 414 start_idx: Start index of the time window 415 end_idx: End index of the time window 416 sensor_name: Name of the sensor to plot 417 num_windows: Number of sliding windows to plot 418 save: Whether to save the plot 419 """ 420 fig, axes = plt.subplots(2, 1, figsize=self.config['figsize'], 421 gridspec_kw={'height_ratios': [3, 1]}) 422 423 # Extract sensor windows 424 sensor_windows = next((sw['data'] for sw in sliding_windows if sw['name'] == sensor_name), None) 425 if sensor_windows is None: 426 print(f"Sensor '{sensor_name}' not found in sliding_windows.") 427 return 428 429 # Extract corresponding features 430 sensor_features = next((feat['features'] for feat in features if feat['name'] == sensor_name), None) 431 if sensor_features is None: 432 print(f"Sensor '{sensor_name}' not found in features.") 433 return 434 435 # Filter windows based on start_idx and end_idx 436 filtered_windows = [series for series in sensor_windows 437 if start_idx <= series.index[0] and series.index[-1] <= end_idx] 438 439 if not filtered_windows: 440 print(f"No windows found in the specified index range ({start_idx} - {end_idx}).") 441 return 442 443 # Store entropy & frequency features for separate plotting 444 entropy_values = [] 445 dominant_frequencies = [] 446 447 # Plot first num_windows windows 448 for i in range(min(num_windows, len(filtered_windows))): 449 series = filtered_windows[i] 450 451 # Extract time and signal values 452 time_values = series.index.to_numpy() 453 signal_values = series.values 454 455 # Determine actual start and end indices for this window 456 window_start, window_end = time_values[0], time_values[-1] 457 458 # Plot time series data 459 axes[0].plot(time_values, signal_values, alpha=0.6) 460 461 # Mark start and end of each window with vertical dotted lines 462 axes[0].axvline(x=window_start, color='black', linestyle='dotted', alpha=0.7) 463 axes[0].axvline(x=window_end, color='black', linestyle='dotted', alpha=0.7) 464 465 # Overlay statistical features 466 for feature_name, marker in self.config['feature_markers'].items(): 467 if feature_name in sensor_features and len(sensor_features[feature_name]) > i: 468 feature_value = sensor_features[feature_name][i] 469 if feature_value != 0: # Skip zero values 470 closest_index = np.argmin(np.abs(signal_values - feature_value)) 471 closest_time = time_values[closest_index] 472 axes[0].scatter(closest_time, feature_value, color='red', 473 marker=marker, s=100, label=feature_name if i == 0 else "") 474 475 # Store entropy & frequency features for separate plotting 476 if 'entropy' in sensor_features and len(sensor_features['entropy']) > i: 477 entropy_values.append(sensor_features['entropy'][i]) 478 if 'dominant_frequency' in sensor_features and len(sensor_features['dominant_frequency']) > i: 479 dominant_frequencies.append(sensor_features['dominant_frequency'][i]) 480 481 # Labels and title for time-series plot 482 axes[0].set_xlabel('Time') 483 axes[0].set_ylabel(f'{sensor_name} Signal') 484 axes[0].set_title(f'First {num_windows} windows of {sensor_name} in range {start_idx}-{end_idx} with Features') 485 axes[0].legend() 486 487 # Frequency-domain & entropy plot 488 if dominant_frequencies: 489 window_indices = list(range(len(dominant_frequencies))) 490 axes[1].plot(window_indices, dominant_frequencies, 491 label="Dominant Frequency", marker="o", linestyle="dashed", color="blue") 492 493 if entropy_values: 494 axes[1].bar(window_indices, entropy_values, alpha=0.6, label="Entropy", color="green") 495 496 axes[1].set_xlabel("Window Index") 497 axes[1].set_ylabel("Feature Value") 498 axes[1].set_title("Frequency & Entropy Features") 499 axes[1].legend() 500 501 plt.tight_layout() 502 503 # Save or show plot 504 if save: 505 file_path = input("Enter the file path to save the plot (e.g., 'plot.png'): ") 506 plt.savefig(file_path, dpi=300) 507 print(f"Plot saved at {file_path}") 508 else: 509 plt.show()
EDA analyzer for sensor data statistics and feature visualization.
This analyzer provides statistical analysis and feature visualization capabilities for sensor data including sliding windows and extracted features.
320 def __init__(self): 321 super().__init__( 322 name="sensor_statistics", 323 description="Statistical analysis and feature visualization for sensor data" 324 ) 325 self.config = { 326 'figsize': (20, 10), 327 'feature_markers': { 328 'mean': 'x', 329 'rms': 'o', 330 'peak_height': 'v', 331 'mode': '<', 332 'median': '^' 333 } 334 }
Initialize the EDA analyzer.
Args: name: Name of the EDA analyzer description: Description of the EDA analyzer
336 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 337 """ 338 Analyze sensor data and return statistical summaries. 339 340 Args: 341 data: Input data to analyze 342 **kwargs: Additional arguments 343 344 Returns: 345 Dictionary containing analysis results 346 """ 347 if isinstance(data, list): 348 # Multiple datasets 349 results = {} 350 for i, df in enumerate(data): 351 results[f'dataset_{i}'] = self._compute_statistics(df) 352 return results 353 else: 354 # Single dataset 355 return self._compute_statistics(data)
Analyze sensor data and return statistical summaries.
Args: data: Input data to analyze **kwargs: Additional arguments
Returns: Dictionary containing analysis results
387 def visualize(self, sliding_windows: List[Dict], features: List[Dict], **kwargs): 388 """ 389 Create visualizations of sensor data with overlaid features. 390 391 Args: 392 sliding_windows: List of sliding window dictionaries 393 features: List of feature dictionaries 394 **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows 395 """ 396 sensor_name = kwargs.get('sensor_name', 'shank') 397 start_idx = kwargs.get('start_idx', 0) 398 end_idx = kwargs.get('end_idx', 1000) 399 num_windows = kwargs.get('num_windows', 10) 400 save = kwargs.get('save', False) 401 402 self._plot_sensor_with_features(sliding_windows, features, start_idx, end_idx, 403 sensor_name, num_windows, save)
Create visualizations of sensor data with overlaid features.
Args: sliding_windows: List of sliding window dictionaries features: List of feature dictionaries **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows
Inherited Members
512def harup_basic_stats(harup_df): 513 """ 514 Print and return basic statistics for each sensor column in a HAR-UP DataFrame. 515 Args: 516 harup_df (pd.DataFrame): DataFrame containing HAR-UP data. 517 Returns: 518 pd.DataFrame: DataFrame of statistics. 519 """ 520 import pandas as pd 521 stats = harup_df.describe().T 522 print(stats) 523 return stats
Print and return basic statistics for each sensor column in a HAR-UP DataFrame. Args: harup_df (pd.DataFrame): DataFrame containing HAR-UP data. Returns: pd.DataFrame: DataFrame of statistics.
525def harup_missing_data_report(harup_df): 526 """ 527 Print and return missing value counts for each column in a HAR-UP DataFrame. 528 Args: 529 harup_df (pd.DataFrame): DataFrame containing HAR-UP data. 530 Returns: 531 pd.Series: Series of missing value counts. 532 """ 533 missing = harup_df.isnull().sum() 534 print(missing) 535 return missing
Print and return missing value counts for each column in a HAR-UP DataFrame. Args: harup_df (pd.DataFrame): DataFrame containing HAR-UP data. Returns: pd.Series: Series of missing value counts.
537def harup_activity_stats(harup_df): 538 """ 539 Print and return counts for each activity label in a HAR-UP DataFrame. 540 Args: 541 harup_df (pd.DataFrame): DataFrame containing HAR-UP data. 542 Returns: 543 pd.Series: Series of activity label counts. 544 """ 545 if 'activity_label' not in harup_df.columns: 546 print("No 'activity_label' column found.") 547 return None 548 counts = harup_df['activity_label'].value_counts().sort_index() 549 print(counts) 550 return counts
Print and return counts for each activity label in a HAR-UP DataFrame. Args: harup_df (pd.DataFrame): DataFrame containing HAR-UP data. Returns: pd.Series: Series of activity label counts.