gaitsetpy.eda.analyzers

EDA analyzer classes for gait data analysis.

This module contains individual EDA analyzer classes that inherit from BaseEDAAnalyzer and provide specific analysis and visualization functionality.

Maintainer: @aharshit123456

  1'''
  2EDA analyzer classes for gait data analysis.
  3
  4This module contains individual EDA analyzer classes that inherit from BaseEDAAnalyzer
  5and provide specific analysis and visualization functionality.
  6
  7Maintainer: @aharshit123456
  8'''
  9
 10from typing import Dict, List, Any, Union, Optional
 11import pandas as pd
 12import numpy as np
 13import matplotlib.pyplot as plt
 14from ..core.base_classes import BaseEDAAnalyzer
 15
 16
 17class DaphnetVisualizationAnalyzer(BaseEDAAnalyzer):
 18    """
 19    EDA analyzer for Daphnet dataset visualization.
 20    
 21    This analyzer provides comprehensive visualization capabilities for Daphnet dataset
 22    including thigh, shank, and trunk sensor data.
 23    """
 24    
 25    def __init__(self):
 26        super().__init__(
 27            name="daphnet_visualization",
 28            description="Comprehensive visualization analyzer for Daphnet dataset sensor data"
 29        )
 30        self.config = {
 31            'figsize': (20, 16),
 32            'colors': {
 33                'no_freeze': 'orange',
 34                'freeze': 'purple'
 35            },
 36            'alpha': 0.6
 37        }
 38    
 39    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
 40        """
 41        Analyze the data and return statistical summaries.
 42        
 43        Args:
 44            data: Input data to analyze
 45            **kwargs: Additional arguments
 46            
 47        Returns:
 48            Dictionary containing analysis results
 49        """
 50        if isinstance(data, list):
 51            # Multiple datasets
 52            results = {}
 53            for i, df in enumerate(data):
 54                results[f'dataset_{i}'] = self._analyze_single_dataset(df)
 55            return results
 56        else:
 57            # Single dataset
 58            return self._analyze_single_dataset(data)
 59    
 60    def _analyze_single_dataset(self, df: pd.DataFrame) -> Dict[str, Any]:
 61        """Analyze a single dataset."""
 62        # Basic statistics
 63        stats = {
 64            'shape': df.shape,
 65            'columns': df.columns.tolist(),
 66            'annotation_distribution': df['annotations'].value_counts().to_dict() if 'annotations' in df.columns else {},
 67            'missing_values': df.isnull().sum().to_dict(),
 68            'data_range': {
 69                'min': df.select_dtypes(include=[np.number]).min().to_dict(),
 70                'max': df.select_dtypes(include=[np.number]).max().to_dict()
 71            }
 72        }
 73        
 74        # Sensor-specific statistics
 75        sensor_stats = {}
 76        for sensor in ['thigh', 'shank', 'trunk']:
 77            if sensor in df.columns:
 78                sensor_stats[sensor] = {
 79                    'mean': df[sensor].mean(),
 80                    'std': df[sensor].std(),
 81                    'min': df[sensor].min(),
 82                    'max': df[sensor].max()
 83                }
 84        
 85        stats['sensor_statistics'] = sensor_stats
 86        return stats
 87    
 88    def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs):
 89        """
 90        Create visualizations of the data.
 91        
 92        Args:
 93            data: Input data to visualize
 94            **kwargs: Additional arguments including sensor_type, dataset_index, names
 95        """
 96        sensor_type = kwargs.get('sensor_type', 'all')
 97        dataset_index = kwargs.get('dataset_index', 0)
 98        names = kwargs.get('names', [])
 99        
100        if isinstance(data, list):
101            if dataset_index < len(data):
102                df = data[dataset_index]
103                dataset_name = names[dataset_index] if dataset_index < len(names) else f"Dataset {dataset_index}"
104            else:
105                print(f"Dataset index {dataset_index} out of range")
106                return
107        else:
108            df = data
109            dataset_name = names[0] if names else "Dataset"
110        
111        if sensor_type == 'all':
112            self._plot_all_sensors(df, dataset_name)
113        elif sensor_type == 'thigh':
114            self._plot_thigh_data(df, dataset_name)
115        elif sensor_type == 'shank':
116            self._plot_shank_data(df, dataset_name)
117        elif sensor_type == 'trunk':
118            self._plot_trunk_data(df, dataset_name)
119        else:
120            print(f"Unknown sensor type: {sensor_type}")
121    
122    def _plot_thigh_data(self, df: pd.DataFrame, dataset_name: str):
123        """Plot thigh sensor data."""
124        print(f"Plotting thigh data for {dataset_name}")
125        
126        # Filter data
127        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
128        
129        if df_filtered.empty:
130            print("No valid data to plot")
131            return
132        
133        # Create figure
134        fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize'])
135        fig.suptitle(f"Thigh Data from {dataset_name}")
136        
137        # Separate freeze and no-freeze data
138        if 'annotations' in df.columns:
139            neg = df_filtered[df_filtered.annotations == 1]  # No freeze
140            pos = df_filtered[df_filtered.annotations == 2]  # Freeze
141        else:
142            neg = df_filtered
143            pos = pd.DataFrame()
144        
145        # Plot each component
146        components = ['thigh_h_fd', 'thigh_v', 'thigh_h_l', 'thigh']
147        labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall']
148        
149        for i, (component, label) in enumerate(zip(components, labels)):
150            if component in df_filtered.columns:
151                # Plot main signal
152                axes[i].plot(df_filtered.index, df_filtered[component])
153                axes[i].set_ylabel(f"{label} Thigh Acceleration")
154                
155                # Plot annotations if available
156                if not neg.empty:
157                    axes[i].scatter(neg.index, neg[component], 
158                                  c=self.config['colors']['no_freeze'], 
159                                  label="no freeze", alpha=self.config['alpha'])
160                if not pos.empty:
161                    axes[i].scatter(pos.index, pos[component], 
162                                  c=self.config['colors']['freeze'], 
163                                  label="freeze", alpha=self.config['alpha'])
164                
165                axes[i].legend()
166        
167        plt.xlabel("Time")
168        plt.tight_layout()
169        plt.show()
170    
171    def _plot_shank_data(self, df: pd.DataFrame, dataset_name: str):
172        """Plot shank sensor data."""
173        print(f"Plotting shank data for {dataset_name}")
174        
175        # Filter data
176        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
177        
178        if df_filtered.empty:
179            print("No valid data to plot")
180            return
181        
182        # Create figure
183        fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize'])
184        fig.suptitle(f"Shank Data from {dataset_name}")
185        
186        # Separate freeze and no-freeze data
187        if 'annotations' in df.columns:
188            neg = df_filtered[df_filtered.annotations == 1]  # No freeze
189            pos = df_filtered[df_filtered.annotations == 2]  # Freeze
190        else:
191            neg = df_filtered
192            pos = pd.DataFrame()
193        
194        # Plot each component
195        components = ['shank_h_fd', 'shank_v', 'shank_h_l', 'shank']
196        labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall']
197        
198        for i, (component, label) in enumerate(zip(components, labels)):
199            if component in df_filtered.columns:
200                # Plot main signal
201                axes[i].plot(df_filtered.index, df_filtered[component])
202                axes[i].set_ylabel(f"{label} Shank Acceleration")
203                
204                # Plot annotations if available
205                if not neg.empty:
206                    axes[i].scatter(neg.index, neg[component], 
207                                  c=self.config['colors']['no_freeze'], 
208                                  label="no freeze", alpha=self.config['alpha'])
209                if not pos.empty:
210                    axes[i].scatter(pos.index, pos[component], 
211                                  c=self.config['colors']['freeze'], 
212                                  label="freeze", alpha=self.config['alpha'])
213                
214                axes[i].legend()
215        
216        plt.xlabel("Time")
217        plt.tight_layout()
218        plt.show()
219    
220    def _plot_trunk_data(self, df: pd.DataFrame, dataset_name: str):
221        """Plot trunk sensor data."""
222        print(f"Plotting trunk data for {dataset_name}")
223        
224        # Filter data
225        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
226        
227        if df_filtered.empty:
228            print("No valid data to plot")
229            return
230        
231        # Create figure
232        fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize'])
233        fig.suptitle(f"Trunk Data from {dataset_name}")
234        
235        # Separate freeze and no-freeze data
236        if 'annotations' in df.columns:
237            neg = df_filtered[df_filtered.annotations == 1]  # No freeze
238            pos = df_filtered[df_filtered.annotations == 2]  # Freeze
239        else:
240            neg = df_filtered
241            pos = pd.DataFrame()
242        
243        # Plot each component
244        components = ['trunk_h_fd', 'trunk_v', 'trunk_h_l', 'trunk']
245        labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall']
246        
247        for i, (component, label) in enumerate(zip(components, labels)):
248            if component in df_filtered.columns:
249                # Plot main signal
250                axes[i].plot(df_filtered.index, df_filtered[component])
251                axes[i].set_ylabel(f"{label} Trunk Acceleration")
252                
253                # Plot annotations if available
254                if not neg.empty:
255                    axes[i].scatter(neg.index, neg[component], 
256                                  c=self.config['colors']['no_freeze'], 
257                                  label="no freeze", alpha=self.config['alpha'])
258                if not pos.empty:
259                    axes[i].scatter(pos.index, pos[component], 
260                                  c=self.config['colors']['freeze'], 
261                                  label="freeze", alpha=self.config['alpha'])
262                
263                axes[i].legend()
264        
265        plt.xlabel("Time")
266        plt.tight_layout()
267        plt.show()
268    
269    def _plot_all_sensors(self, df: pd.DataFrame, dataset_name: str):
270        """Plot all sensor data in a combined view."""
271        print(f"Plotting all sensor data for {dataset_name}")
272        
273        # Create figure with subplots for each sensor
274        fig, axes = plt.subplots(3, 1, sharex=True, figsize=self.config['figsize'])
275        fig.suptitle(f"All Sensor Data from {dataset_name}")
276        
277        # Filter data
278        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
279        
280        if df_filtered.empty:
281            print("No valid data to plot")
282            return
283        
284        sensors = ['thigh', 'shank', 'trunk']
285        for i, sensor in enumerate(sensors):
286            if sensor in df_filtered.columns:
287                axes[i].plot(df_filtered.index, df_filtered[sensor])
288                axes[i].set_ylabel(f"{sensor.capitalize()} Acceleration")
289                
290                # Add annotations if available
291                if 'annotations' in df_filtered.columns:
292                    neg = df_filtered[df_filtered.annotations == 1]
293                    pos = df_filtered[df_filtered.annotations == 2]
294                    
295                    if not neg.empty:
296                        axes[i].scatter(neg.index, neg[sensor], 
297                                      c=self.config['colors']['no_freeze'], 
298                                      label="no freeze", alpha=self.config['alpha'])
299                    if not pos.empty:
300                        axes[i].scatter(pos.index, pos[sensor], 
301                                      c=self.config['colors']['freeze'], 
302                                      label="freeze", alpha=self.config['alpha'])
303                    
304                    axes[i].legend()
305        
306        plt.xlabel("Time")
307        plt.tight_layout()
308        plt.show()
309
310
311class SensorStatisticsAnalyzer(BaseEDAAnalyzer):
312    """
313    EDA analyzer for sensor data statistics and feature visualization.
314    
315    This analyzer provides statistical analysis and feature visualization capabilities
316    for sensor data including sliding windows and extracted features.
317    """
318    
319    def __init__(self):
320        super().__init__(
321            name="sensor_statistics",
322            description="Statistical analysis and feature visualization for sensor data"
323        )
324        self.config = {
325            'figsize': (20, 10),
326            'feature_markers': {
327                'mean': 'x',
328                'rms': 'o',
329                'peak_height': 'v',
330                'mode': '<',
331                'median': '^'
332            }
333        }
334    
335    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
336        """
337        Analyze sensor data and return statistical summaries.
338        
339        Args:
340            data: Input data to analyze
341            **kwargs: Additional arguments
342            
343        Returns:
344            Dictionary containing analysis results
345        """
346        if isinstance(data, list):
347            # Multiple datasets
348            results = {}
349            for i, df in enumerate(data):
350                results[f'dataset_{i}'] = self._compute_statistics(df)
351            return results
352        else:
353            # Single dataset
354            return self._compute_statistics(data)
355    
356    def _compute_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
357        """Compute comprehensive statistics for a dataset."""
358        stats = {
359            'basic_stats': df.describe().to_dict(),
360            'correlation_matrix': df.corr().to_dict() if len(df.select_dtypes(include=[np.number]).columns) > 1 else {},
361            'skewness': df.skew().to_dict(),
362            'kurtosis': df.kurtosis().to_dict()
363        }
364        
365        # Add sensor-specific statistics
366        sensor_stats = {}
367        for sensor in ['thigh', 'shank', 'trunk']:
368            if sensor in df.columns:
369                sensor_data = df[sensor].dropna()
370                sensor_stats[sensor] = {
371                    'mean': sensor_data.mean(),
372                    'std': sensor_data.std(),
373                    'variance': sensor_data.var(),
374                    'min': sensor_data.min(),
375                    'max': sensor_data.max(),
376                    'range': sensor_data.max() - sensor_data.min(),
377                    'median': sensor_data.median(),
378                    'q25': sensor_data.quantile(0.25),
379                    'q75': sensor_data.quantile(0.75),
380                    'iqr': sensor_data.quantile(0.75) - sensor_data.quantile(0.25)
381                }
382        
383        stats['sensor_statistics'] = sensor_stats
384        return stats
385    
386    def visualize(self, sliding_windows: List[Dict], features: List[Dict], **kwargs):
387        """
388        Create visualizations of sensor data with overlaid features.
389        
390        Args:
391            sliding_windows: List of sliding window dictionaries
392            features: List of feature dictionaries
393            **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows
394        """
395        sensor_name = kwargs.get('sensor_name', 'shank')
396        start_idx = kwargs.get('start_idx', 0)
397        end_idx = kwargs.get('end_idx', 1000)
398        num_windows = kwargs.get('num_windows', 10)
399        save = kwargs.get('save', False)
400        
401        self._plot_sensor_with_features(sliding_windows, features, start_idx, end_idx, 
402                                      sensor_name, num_windows, save)
403    
404    def _plot_sensor_with_features(self, sliding_windows: List[Dict], features: List[Dict], 
405                                 start_idx: int, end_idx: int, sensor_name: str = "shank", 
406                                 num_windows: int = 10, save: bool = False):
407        """
408        Plot sliding windows of sensor data with overlaid statistical features.
409        
410        Args:
411            sliding_windows: List of sliding window dictionaries
412            features: List of feature dictionaries
413            start_idx: Start index of the time window
414            end_idx: End index of the time window
415            sensor_name: Name of the sensor to plot
416            num_windows: Number of sliding windows to plot
417            save: Whether to save the plot
418        """
419        fig, axes = plt.subplots(2, 1, figsize=self.config['figsize'], 
420                                gridspec_kw={'height_ratios': [3, 1]})
421        
422        # Extract sensor windows
423        sensor_windows = next((sw['data'] for sw in sliding_windows if sw['name'] == sensor_name), None)
424        if sensor_windows is None:
425            print(f"Sensor '{sensor_name}' not found in sliding_windows.")
426            return
427        
428        # Extract corresponding features
429        sensor_features = next((feat['features'] for feat in features if feat['name'] == sensor_name), None)
430        if sensor_features is None:
431            print(f"Sensor '{sensor_name}' not found in features.")
432            return
433        
434        # Filter windows based on start_idx and end_idx
435        filtered_windows = [series for series in sensor_windows 
436                           if start_idx <= series.index[0] and series.index[-1] <= end_idx]
437        
438        if not filtered_windows:
439            print(f"No windows found in the specified index range ({start_idx} - {end_idx}).")
440            return
441        
442        # Store entropy & frequency features for separate plotting
443        entropy_values = []
444        dominant_frequencies = []
445        
446        # Plot first num_windows windows
447        for i in range(min(num_windows, len(filtered_windows))):
448            series = filtered_windows[i]
449            
450            # Extract time and signal values
451            time_values = series.index.to_numpy()
452            signal_values = series.values
453            
454            # Determine actual start and end indices for this window
455            window_start, window_end = time_values[0], time_values[-1]
456            
457            # Plot time series data
458            axes[0].plot(time_values, signal_values, alpha=0.6)
459            
460            # Mark start and end of each window with vertical dotted lines
461            axes[0].axvline(x=window_start, color='black', linestyle='dotted', alpha=0.7)
462            axes[0].axvline(x=window_end, color='black', linestyle='dotted', alpha=0.7)
463            
464            # Overlay statistical features
465            for feature_name, marker in self.config['feature_markers'].items():
466                if feature_name in sensor_features and len(sensor_features[feature_name]) > i:
467                    feature_value = sensor_features[feature_name][i]
468                    if feature_value != 0:  # Skip zero values
469                        closest_index = np.argmin(np.abs(signal_values - feature_value))
470                        closest_time = time_values[closest_index]
471                        axes[0].scatter(closest_time, feature_value, color='red', 
472                                      marker=marker, s=100, label=feature_name if i == 0 else "")
473            
474            # Store entropy & frequency features for separate plotting
475            if 'entropy' in sensor_features and len(sensor_features['entropy']) > i:
476                entropy_values.append(sensor_features['entropy'][i])
477            if 'dominant_frequency' in sensor_features and len(sensor_features['dominant_frequency']) > i:
478                dominant_frequencies.append(sensor_features['dominant_frequency'][i])
479        
480        # Labels and title for time-series plot
481        axes[0].set_xlabel('Time')
482        axes[0].set_ylabel(f'{sensor_name} Signal')
483        axes[0].set_title(f'First {num_windows} windows of {sensor_name} in range {start_idx}-{end_idx} with Features')
484        axes[0].legend()
485        
486        # Frequency-domain & entropy plot
487        if dominant_frequencies:
488            window_indices = list(range(len(dominant_frequencies)))
489            axes[1].plot(window_indices, dominant_frequencies, 
490                        label="Dominant Frequency", marker="o", linestyle="dashed", color="blue")
491        
492        if entropy_values:
493            axes[1].bar(window_indices, entropy_values, alpha=0.6, label="Entropy", color="green")
494        
495        axes[1].set_xlabel("Window Index")
496        axes[1].set_ylabel("Feature Value")
497        axes[1].set_title("Frequency & Entropy Features")
498        axes[1].legend()
499        
500        plt.tight_layout()
501        
502        # Save or show plot
503        if save:
504            file_path = input("Enter the file path to save the plot (e.g., 'plot.png'): ")
505            plt.savefig(file_path, dpi=300)
506            print(f"Plot saved at {file_path}")
507        else:
508            plt.show() 
509
510
511def harup_basic_stats(harup_df):
512    """
513    Print and return basic statistics for each sensor column in a HAR-UP DataFrame.
514    Args:
515        harup_df (pd.DataFrame): DataFrame containing HAR-UP data.
516    Returns:
517        pd.DataFrame: DataFrame of statistics.
518    """
519    import pandas as pd
520    stats = harup_df.describe().T
521    print(stats)
522    return stats
523
524def harup_missing_data_report(harup_df):
525    """
526    Print and return missing value counts for each column in a HAR-UP DataFrame.
527    Args:
528        harup_df (pd.DataFrame): DataFrame containing HAR-UP data.
529    Returns:
530        pd.Series: Series of missing value counts.
531    """
532    missing = harup_df.isnull().sum()
533    print(missing)
534    return missing
535
536def harup_activity_stats(harup_df):
537    """
538    Print and return counts for each activity label in a HAR-UP DataFrame.
539    Args:
540        harup_df (pd.DataFrame): DataFrame containing HAR-UP data.
541    Returns:
542        pd.Series: Series of activity label counts.
543    """
544    if 'activity_label' not in harup_df.columns:
545        print("No 'activity_label' column found.")
546        return None
547    counts = harup_df['activity_label'].value_counts().sort_index()
548    print(counts)
549    return counts 
class DaphnetVisualizationAnalyzer(gaitsetpy.core.base_classes.BaseEDAAnalyzer):
 18class DaphnetVisualizationAnalyzer(BaseEDAAnalyzer):
 19    """
 20    EDA analyzer for Daphnet dataset visualization.
 21    
 22    This analyzer provides comprehensive visualization capabilities for Daphnet dataset
 23    including thigh, shank, and trunk sensor data.
 24    """
 25    
 26    def __init__(self):
 27        super().__init__(
 28            name="daphnet_visualization",
 29            description="Comprehensive visualization analyzer for Daphnet dataset sensor data"
 30        )
 31        self.config = {
 32            'figsize': (20, 16),
 33            'colors': {
 34                'no_freeze': 'orange',
 35                'freeze': 'purple'
 36            },
 37            'alpha': 0.6
 38        }
 39    
 40    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
 41        """
 42        Analyze the data and return statistical summaries.
 43        
 44        Args:
 45            data: Input data to analyze
 46            **kwargs: Additional arguments
 47            
 48        Returns:
 49            Dictionary containing analysis results
 50        """
 51        if isinstance(data, list):
 52            # Multiple datasets
 53            results = {}
 54            for i, df in enumerate(data):
 55                results[f'dataset_{i}'] = self._analyze_single_dataset(df)
 56            return results
 57        else:
 58            # Single dataset
 59            return self._analyze_single_dataset(data)
 60    
 61    def _analyze_single_dataset(self, df: pd.DataFrame) -> Dict[str, Any]:
 62        """Analyze a single dataset."""
 63        # Basic statistics
 64        stats = {
 65            'shape': df.shape,
 66            'columns': df.columns.tolist(),
 67            'annotation_distribution': df['annotations'].value_counts().to_dict() if 'annotations' in df.columns else {},
 68            'missing_values': df.isnull().sum().to_dict(),
 69            'data_range': {
 70                'min': df.select_dtypes(include=[np.number]).min().to_dict(),
 71                'max': df.select_dtypes(include=[np.number]).max().to_dict()
 72            }
 73        }
 74        
 75        # Sensor-specific statistics
 76        sensor_stats = {}
 77        for sensor in ['thigh', 'shank', 'trunk']:
 78            if sensor in df.columns:
 79                sensor_stats[sensor] = {
 80                    'mean': df[sensor].mean(),
 81                    'std': df[sensor].std(),
 82                    'min': df[sensor].min(),
 83                    'max': df[sensor].max()
 84                }
 85        
 86        stats['sensor_statistics'] = sensor_stats
 87        return stats
 88    
 89    def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs):
 90        """
 91        Create visualizations of the data.
 92        
 93        Args:
 94            data: Input data to visualize
 95            **kwargs: Additional arguments including sensor_type, dataset_index, names
 96        """
 97        sensor_type = kwargs.get('sensor_type', 'all')
 98        dataset_index = kwargs.get('dataset_index', 0)
 99        names = kwargs.get('names', [])
100        
101        if isinstance(data, list):
102            if dataset_index < len(data):
103                df = data[dataset_index]
104                dataset_name = names[dataset_index] if dataset_index < len(names) else f"Dataset {dataset_index}"
105            else:
106                print(f"Dataset index {dataset_index} out of range")
107                return
108        else:
109            df = data
110            dataset_name = names[0] if names else "Dataset"
111        
112        if sensor_type == 'all':
113            self._plot_all_sensors(df, dataset_name)
114        elif sensor_type == 'thigh':
115            self._plot_thigh_data(df, dataset_name)
116        elif sensor_type == 'shank':
117            self._plot_shank_data(df, dataset_name)
118        elif sensor_type == 'trunk':
119            self._plot_trunk_data(df, dataset_name)
120        else:
121            print(f"Unknown sensor type: {sensor_type}")
122    
123    def _plot_thigh_data(self, df: pd.DataFrame, dataset_name: str):
124        """Plot thigh sensor data."""
125        print(f"Plotting thigh data for {dataset_name}")
126        
127        # Filter data
128        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
129        
130        if df_filtered.empty:
131            print("No valid data to plot")
132            return
133        
134        # Create figure
135        fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize'])
136        fig.suptitle(f"Thigh Data from {dataset_name}")
137        
138        # Separate freeze and no-freeze data
139        if 'annotations' in df.columns:
140            neg = df_filtered[df_filtered.annotations == 1]  # No freeze
141            pos = df_filtered[df_filtered.annotations == 2]  # Freeze
142        else:
143            neg = df_filtered
144            pos = pd.DataFrame()
145        
146        # Plot each component
147        components = ['thigh_h_fd', 'thigh_v', 'thigh_h_l', 'thigh']
148        labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall']
149        
150        for i, (component, label) in enumerate(zip(components, labels)):
151            if component in df_filtered.columns:
152                # Plot main signal
153                axes[i].plot(df_filtered.index, df_filtered[component])
154                axes[i].set_ylabel(f"{label} Thigh Acceleration")
155                
156                # Plot annotations if available
157                if not neg.empty:
158                    axes[i].scatter(neg.index, neg[component], 
159                                  c=self.config['colors']['no_freeze'], 
160                                  label="no freeze", alpha=self.config['alpha'])
161                if not pos.empty:
162                    axes[i].scatter(pos.index, pos[component], 
163                                  c=self.config['colors']['freeze'], 
164                                  label="freeze", alpha=self.config['alpha'])
165                
166                axes[i].legend()
167        
168        plt.xlabel("Time")
169        plt.tight_layout()
170        plt.show()
171    
172    def _plot_shank_data(self, df: pd.DataFrame, dataset_name: str):
173        """Plot shank sensor data."""
174        print(f"Plotting shank data for {dataset_name}")
175        
176        # Filter data
177        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
178        
179        if df_filtered.empty:
180            print("No valid data to plot")
181            return
182        
183        # Create figure
184        fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize'])
185        fig.suptitle(f"Shank Data from {dataset_name}")
186        
187        # Separate freeze and no-freeze data
188        if 'annotations' in df.columns:
189            neg = df_filtered[df_filtered.annotations == 1]  # No freeze
190            pos = df_filtered[df_filtered.annotations == 2]  # Freeze
191        else:
192            neg = df_filtered
193            pos = pd.DataFrame()
194        
195        # Plot each component
196        components = ['shank_h_fd', 'shank_v', 'shank_h_l', 'shank']
197        labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall']
198        
199        for i, (component, label) in enumerate(zip(components, labels)):
200            if component in df_filtered.columns:
201                # Plot main signal
202                axes[i].plot(df_filtered.index, df_filtered[component])
203                axes[i].set_ylabel(f"{label} Shank Acceleration")
204                
205                # Plot annotations if available
206                if not neg.empty:
207                    axes[i].scatter(neg.index, neg[component], 
208                                  c=self.config['colors']['no_freeze'], 
209                                  label="no freeze", alpha=self.config['alpha'])
210                if not pos.empty:
211                    axes[i].scatter(pos.index, pos[component], 
212                                  c=self.config['colors']['freeze'], 
213                                  label="freeze", alpha=self.config['alpha'])
214                
215                axes[i].legend()
216        
217        plt.xlabel("Time")
218        plt.tight_layout()
219        plt.show()
220    
221    def _plot_trunk_data(self, df: pd.DataFrame, dataset_name: str):
222        """Plot trunk sensor data."""
223        print(f"Plotting trunk data for {dataset_name}")
224        
225        # Filter data
226        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
227        
228        if df_filtered.empty:
229            print("No valid data to plot")
230            return
231        
232        # Create figure
233        fig, axes = plt.subplots(4, 1, sharex=True, figsize=self.config['figsize'])
234        fig.suptitle(f"Trunk Data from {dataset_name}")
235        
236        # Separate freeze and no-freeze data
237        if 'annotations' in df.columns:
238            neg = df_filtered[df_filtered.annotations == 1]  # No freeze
239            pos = df_filtered[df_filtered.annotations == 2]  # Freeze
240        else:
241            neg = df_filtered
242            pos = pd.DataFrame()
243        
244        # Plot each component
245        components = ['trunk_h_fd', 'trunk_v', 'trunk_h_l', 'trunk']
246        labels = ['Horizontal Forward', 'Vertical', 'Horizontal Lateral', 'Overall']
247        
248        for i, (component, label) in enumerate(zip(components, labels)):
249            if component in df_filtered.columns:
250                # Plot main signal
251                axes[i].plot(df_filtered.index, df_filtered[component])
252                axes[i].set_ylabel(f"{label} Trunk Acceleration")
253                
254                # Plot annotations if available
255                if not neg.empty:
256                    axes[i].scatter(neg.index, neg[component], 
257                                  c=self.config['colors']['no_freeze'], 
258                                  label="no freeze", alpha=self.config['alpha'])
259                if not pos.empty:
260                    axes[i].scatter(pos.index, pos[component], 
261                                  c=self.config['colors']['freeze'], 
262                                  label="freeze", alpha=self.config['alpha'])
263                
264                axes[i].legend()
265        
266        plt.xlabel("Time")
267        plt.tight_layout()
268        plt.show()
269    
270    def _plot_all_sensors(self, df: pd.DataFrame, dataset_name: str):
271        """Plot all sensor data in a combined view."""
272        print(f"Plotting all sensor data for {dataset_name}")
273        
274        # Create figure with subplots for each sensor
275        fig, axes = plt.subplots(3, 1, sharex=True, figsize=self.config['figsize'])
276        fig.suptitle(f"All Sensor Data from {dataset_name}")
277        
278        # Filter data
279        df_filtered = df[df.annotations > 0] if 'annotations' in df.columns else df
280        
281        if df_filtered.empty:
282            print("No valid data to plot")
283            return
284        
285        sensors = ['thigh', 'shank', 'trunk']
286        for i, sensor in enumerate(sensors):
287            if sensor in df_filtered.columns:
288                axes[i].plot(df_filtered.index, df_filtered[sensor])
289                axes[i].set_ylabel(f"{sensor.capitalize()} Acceleration")
290                
291                # Add annotations if available
292                if 'annotations' in df_filtered.columns:
293                    neg = df_filtered[df_filtered.annotations == 1]
294                    pos = df_filtered[df_filtered.annotations == 2]
295                    
296                    if not neg.empty:
297                        axes[i].scatter(neg.index, neg[sensor], 
298                                      c=self.config['colors']['no_freeze'], 
299                                      label="no freeze", alpha=self.config['alpha'])
300                    if not pos.empty:
301                        axes[i].scatter(pos.index, pos[sensor], 
302                                      c=self.config['colors']['freeze'], 
303                                      label="freeze", alpha=self.config['alpha'])
304                    
305                    axes[i].legend()
306        
307        plt.xlabel("Time")
308        plt.tight_layout()
309        plt.show()

EDA analyzer for Daphnet dataset visualization.

This analyzer provides comprehensive visualization capabilities for Daphnet dataset including thigh, shank, and trunk sensor data.

DaphnetVisualizationAnalyzer()
26    def __init__(self):
27        super().__init__(
28            name="daphnet_visualization",
29            description="Comprehensive visualization analyzer for Daphnet dataset sensor data"
30        )
31        self.config = {
32            'figsize': (20, 16),
33            'colors': {
34                'no_freeze': 'orange',
35                'freeze': 'purple'
36            },
37            'alpha': 0.6
38        }

Initialize the EDA analyzer.

Args: name: Name of the EDA analyzer description: Description of the EDA analyzer

config
def analyze( self, data: Union[pandas.core.frame.DataFrame, List[pandas.core.frame.DataFrame]], **kwargs) -> Dict[str, Any]:
40    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
41        """
42        Analyze the data and return statistical summaries.
43        
44        Args:
45            data: Input data to analyze
46            **kwargs: Additional arguments
47            
48        Returns:
49            Dictionary containing analysis results
50        """
51        if isinstance(data, list):
52            # Multiple datasets
53            results = {}
54            for i, df in enumerate(data):
55                results[f'dataset_{i}'] = self._analyze_single_dataset(df)
56            return results
57        else:
58            # Single dataset
59            return self._analyze_single_dataset(data)

Analyze the data and return statistical summaries.

Args: data: Input data to analyze **kwargs: Additional arguments

Returns: Dictionary containing analysis results

def visualize( self, data: Union[pandas.core.frame.DataFrame, List[pandas.core.frame.DataFrame]], **kwargs):
 89    def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs):
 90        """
 91        Create visualizations of the data.
 92        
 93        Args:
 94            data: Input data to visualize
 95            **kwargs: Additional arguments including sensor_type, dataset_index, names
 96        """
 97        sensor_type = kwargs.get('sensor_type', 'all')
 98        dataset_index = kwargs.get('dataset_index', 0)
 99        names = kwargs.get('names', [])
100        
101        if isinstance(data, list):
102            if dataset_index < len(data):
103                df = data[dataset_index]
104                dataset_name = names[dataset_index] if dataset_index < len(names) else f"Dataset {dataset_index}"
105            else:
106                print(f"Dataset index {dataset_index} out of range")
107                return
108        else:
109            df = data
110            dataset_name = names[0] if names else "Dataset"
111        
112        if sensor_type == 'all':
113            self._plot_all_sensors(df, dataset_name)
114        elif sensor_type == 'thigh':
115            self._plot_thigh_data(df, dataset_name)
116        elif sensor_type == 'shank':
117            self._plot_shank_data(df, dataset_name)
118        elif sensor_type == 'trunk':
119            self._plot_trunk_data(df, dataset_name)
120        else:
121            print(f"Unknown sensor type: {sensor_type}")

Create visualizations of the data.

Args: data: Input data to visualize **kwargs: Additional arguments including sensor_type, dataset_index, names

class SensorStatisticsAnalyzer(gaitsetpy.core.base_classes.BaseEDAAnalyzer):
312class SensorStatisticsAnalyzer(BaseEDAAnalyzer):
313    """
314    EDA analyzer for sensor data statistics and feature visualization.
315    
316    This analyzer provides statistical analysis and feature visualization capabilities
317    for sensor data including sliding windows and extracted features.
318    """
319    
320    def __init__(self):
321        super().__init__(
322            name="sensor_statistics",
323            description="Statistical analysis and feature visualization for sensor data"
324        )
325        self.config = {
326            'figsize': (20, 10),
327            'feature_markers': {
328                'mean': 'x',
329                'rms': 'o',
330                'peak_height': 'v',
331                'mode': '<',
332                'median': '^'
333            }
334        }
335    
336    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
337        """
338        Analyze sensor data and return statistical summaries.
339        
340        Args:
341            data: Input data to analyze
342            **kwargs: Additional arguments
343            
344        Returns:
345            Dictionary containing analysis results
346        """
347        if isinstance(data, list):
348            # Multiple datasets
349            results = {}
350            for i, df in enumerate(data):
351                results[f'dataset_{i}'] = self._compute_statistics(df)
352            return results
353        else:
354            # Single dataset
355            return self._compute_statistics(data)
356    
357    def _compute_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
358        """Compute comprehensive statistics for a dataset."""
359        stats = {
360            'basic_stats': df.describe().to_dict(),
361            'correlation_matrix': df.corr().to_dict() if len(df.select_dtypes(include=[np.number]).columns) > 1 else {},
362            'skewness': df.skew().to_dict(),
363            'kurtosis': df.kurtosis().to_dict()
364        }
365        
366        # Add sensor-specific statistics
367        sensor_stats = {}
368        for sensor in ['thigh', 'shank', 'trunk']:
369            if sensor in df.columns:
370                sensor_data = df[sensor].dropna()
371                sensor_stats[sensor] = {
372                    'mean': sensor_data.mean(),
373                    'std': sensor_data.std(),
374                    'variance': sensor_data.var(),
375                    'min': sensor_data.min(),
376                    'max': sensor_data.max(),
377                    'range': sensor_data.max() - sensor_data.min(),
378                    'median': sensor_data.median(),
379                    'q25': sensor_data.quantile(0.25),
380                    'q75': sensor_data.quantile(0.75),
381                    'iqr': sensor_data.quantile(0.75) - sensor_data.quantile(0.25)
382                }
383        
384        stats['sensor_statistics'] = sensor_stats
385        return stats
386    
387    def visualize(self, sliding_windows: List[Dict], features: List[Dict], **kwargs):
388        """
389        Create visualizations of sensor data with overlaid features.
390        
391        Args:
392            sliding_windows: List of sliding window dictionaries
393            features: List of feature dictionaries
394            **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows
395        """
396        sensor_name = kwargs.get('sensor_name', 'shank')
397        start_idx = kwargs.get('start_idx', 0)
398        end_idx = kwargs.get('end_idx', 1000)
399        num_windows = kwargs.get('num_windows', 10)
400        save = kwargs.get('save', False)
401        
402        self._plot_sensor_with_features(sliding_windows, features, start_idx, end_idx, 
403                                      sensor_name, num_windows, save)
404    
405    def _plot_sensor_with_features(self, sliding_windows: List[Dict], features: List[Dict], 
406                                 start_idx: int, end_idx: int, sensor_name: str = "shank", 
407                                 num_windows: int = 10, save: bool = False):
408        """
409        Plot sliding windows of sensor data with overlaid statistical features.
410        
411        Args:
412            sliding_windows: List of sliding window dictionaries
413            features: List of feature dictionaries
414            start_idx: Start index of the time window
415            end_idx: End index of the time window
416            sensor_name: Name of the sensor to plot
417            num_windows: Number of sliding windows to plot
418            save: Whether to save the plot
419        """
420        fig, axes = plt.subplots(2, 1, figsize=self.config['figsize'], 
421                                gridspec_kw={'height_ratios': [3, 1]})
422        
423        # Extract sensor windows
424        sensor_windows = next((sw['data'] for sw in sliding_windows if sw['name'] == sensor_name), None)
425        if sensor_windows is None:
426            print(f"Sensor '{sensor_name}' not found in sliding_windows.")
427            return
428        
429        # Extract corresponding features
430        sensor_features = next((feat['features'] for feat in features if feat['name'] == sensor_name), None)
431        if sensor_features is None:
432            print(f"Sensor '{sensor_name}' not found in features.")
433            return
434        
435        # Filter windows based on start_idx and end_idx
436        filtered_windows = [series for series in sensor_windows 
437                           if start_idx <= series.index[0] and series.index[-1] <= end_idx]
438        
439        if not filtered_windows:
440            print(f"No windows found in the specified index range ({start_idx} - {end_idx}).")
441            return
442        
443        # Store entropy & frequency features for separate plotting
444        entropy_values = []
445        dominant_frequencies = []
446        
447        # Plot first num_windows windows
448        for i in range(min(num_windows, len(filtered_windows))):
449            series = filtered_windows[i]
450            
451            # Extract time and signal values
452            time_values = series.index.to_numpy()
453            signal_values = series.values
454            
455            # Determine actual start and end indices for this window
456            window_start, window_end = time_values[0], time_values[-1]
457            
458            # Plot time series data
459            axes[0].plot(time_values, signal_values, alpha=0.6)
460            
461            # Mark start and end of each window with vertical dotted lines
462            axes[0].axvline(x=window_start, color='black', linestyle='dotted', alpha=0.7)
463            axes[0].axvline(x=window_end, color='black', linestyle='dotted', alpha=0.7)
464            
465            # Overlay statistical features
466            for feature_name, marker in self.config['feature_markers'].items():
467                if feature_name in sensor_features and len(sensor_features[feature_name]) > i:
468                    feature_value = sensor_features[feature_name][i]
469                    if feature_value != 0:  # Skip zero values
470                        closest_index = np.argmin(np.abs(signal_values - feature_value))
471                        closest_time = time_values[closest_index]
472                        axes[0].scatter(closest_time, feature_value, color='red', 
473                                      marker=marker, s=100, label=feature_name if i == 0 else "")
474            
475            # Store entropy & frequency features for separate plotting
476            if 'entropy' in sensor_features and len(sensor_features['entropy']) > i:
477                entropy_values.append(sensor_features['entropy'][i])
478            if 'dominant_frequency' in sensor_features and len(sensor_features['dominant_frequency']) > i:
479                dominant_frequencies.append(sensor_features['dominant_frequency'][i])
480        
481        # Labels and title for time-series plot
482        axes[0].set_xlabel('Time')
483        axes[0].set_ylabel(f'{sensor_name} Signal')
484        axes[0].set_title(f'First {num_windows} windows of {sensor_name} in range {start_idx}-{end_idx} with Features')
485        axes[0].legend()
486        
487        # Frequency-domain & entropy plot
488        if dominant_frequencies:
489            window_indices = list(range(len(dominant_frequencies)))
490            axes[1].plot(window_indices, dominant_frequencies, 
491                        label="Dominant Frequency", marker="o", linestyle="dashed", color="blue")
492        
493        if entropy_values:
494            axes[1].bar(window_indices, entropy_values, alpha=0.6, label="Entropy", color="green")
495        
496        axes[1].set_xlabel("Window Index")
497        axes[1].set_ylabel("Feature Value")
498        axes[1].set_title("Frequency & Entropy Features")
499        axes[1].legend()
500        
501        plt.tight_layout()
502        
503        # Save or show plot
504        if save:
505            file_path = input("Enter the file path to save the plot (e.g., 'plot.png'): ")
506            plt.savefig(file_path, dpi=300)
507            print(f"Plot saved at {file_path}")
508        else:
509            plt.show() 

EDA analyzer for sensor data statistics and feature visualization.

This analyzer provides statistical analysis and feature visualization capabilities for sensor data including sliding windows and extracted features.

SensorStatisticsAnalyzer()
320    def __init__(self):
321        super().__init__(
322            name="sensor_statistics",
323            description="Statistical analysis and feature visualization for sensor data"
324        )
325        self.config = {
326            'figsize': (20, 10),
327            'feature_markers': {
328                'mean': 'x',
329                'rms': 'o',
330                'peak_height': 'v',
331                'mode': '<',
332                'median': '^'
333            }
334        }

Initialize the EDA analyzer.

Args: name: Name of the EDA analyzer description: Description of the EDA analyzer

config
def analyze( self, data: Union[pandas.core.frame.DataFrame, List[pandas.core.frame.DataFrame]], **kwargs) -> Dict[str, Any]:
336    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
337        """
338        Analyze sensor data and return statistical summaries.
339        
340        Args:
341            data: Input data to analyze
342            **kwargs: Additional arguments
343            
344        Returns:
345            Dictionary containing analysis results
346        """
347        if isinstance(data, list):
348            # Multiple datasets
349            results = {}
350            for i, df in enumerate(data):
351                results[f'dataset_{i}'] = self._compute_statistics(df)
352            return results
353        else:
354            # Single dataset
355            return self._compute_statistics(data)

Analyze sensor data and return statistical summaries.

Args: data: Input data to analyze **kwargs: Additional arguments

Returns: Dictionary containing analysis results

def visualize(self, sliding_windows: List[Dict], features: List[Dict], **kwargs):
387    def visualize(self, sliding_windows: List[Dict], features: List[Dict], **kwargs):
388        """
389        Create visualizations of sensor data with overlaid features.
390        
391        Args:
392            sliding_windows: List of sliding window dictionaries
393            features: List of feature dictionaries
394            **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows
395        """
396        sensor_name = kwargs.get('sensor_name', 'shank')
397        start_idx = kwargs.get('start_idx', 0)
398        end_idx = kwargs.get('end_idx', 1000)
399        num_windows = kwargs.get('num_windows', 10)
400        save = kwargs.get('save', False)
401        
402        self._plot_sensor_with_features(sliding_windows, features, start_idx, end_idx, 
403                                      sensor_name, num_windows, save)

Create visualizations of sensor data with overlaid features.

Args: sliding_windows: List of sliding window dictionaries features: List of feature dictionaries **kwargs: Additional arguments including sensor_name, start_idx, end_idx, num_windows

def harup_basic_stats(harup_df):
512def harup_basic_stats(harup_df):
513    """
514    Print and return basic statistics for each sensor column in a HAR-UP DataFrame.
515    Args:
516        harup_df (pd.DataFrame): DataFrame containing HAR-UP data.
517    Returns:
518        pd.DataFrame: DataFrame of statistics.
519    """
520    import pandas as pd
521    stats = harup_df.describe().T
522    print(stats)
523    return stats

Print and return basic statistics for each sensor column in a HAR-UP DataFrame. Args: harup_df (pd.DataFrame): DataFrame containing HAR-UP data. Returns: pd.DataFrame: DataFrame of statistics.

def harup_missing_data_report(harup_df):
525def harup_missing_data_report(harup_df):
526    """
527    Print and return missing value counts for each column in a HAR-UP DataFrame.
528    Args:
529        harup_df (pd.DataFrame): DataFrame containing HAR-UP data.
530    Returns:
531        pd.Series: Series of missing value counts.
532    """
533    missing = harup_df.isnull().sum()
534    print(missing)
535    return missing

Print and return missing value counts for each column in a HAR-UP DataFrame. Args: harup_df (pd.DataFrame): DataFrame containing HAR-UP data. Returns: pd.Series: Series of missing value counts.

def harup_activity_stats(harup_df):
537def harup_activity_stats(harup_df):
538    """
539    Print and return counts for each activity label in a HAR-UP DataFrame.
540    Args:
541        harup_df (pd.DataFrame): DataFrame containing HAR-UP data.
542    Returns:
543        pd.Series: Series of activity label counts.
544    """
545    if 'activity_label' not in harup_df.columns:
546        print("No 'activity_label' column found.")
547        return None
548    counts = harup_df['activity_label'].value_counts().sort_index()
549    print(counts)
550    return counts 

Print and return counts for each activity label in a HAR-UP DataFrame. Args: harup_df (pd.DataFrame): DataFrame containing HAR-UP data. Returns: pd.Series: Series of activity label counts.