gaitsetpy.preprocessing

preprocessing: Preprocessing pipelines for gait data.

This module provides both the new class-based preprocessors and legacy function-based API. All preprocessors inherit from BasePreprocessor and are registered with the PreprocessingManager.

Features:

  • Clipping and normalization
  • Noise removal (moving average, frequency filtering)
  • Outlier detection and removal
  • Baseline and drift correction
  • Artifact removal and trend removal
  • DC offset correction

Maintainer: @aharshit123456

  1"""
  2preprocessing: Preprocessing pipelines for gait data.
  3
  4This module provides both the new class-based preprocessors and legacy function-based API.
  5All preprocessors inherit from BasePreprocessor and are registered with the PreprocessingManager.
  6
  7Features:
  8- Clipping and normalization
  9- Noise removal (moving average, frequency filtering)
 10- Outlier detection and removal
 11- Baseline and drift correction
 12- Artifact removal and trend removal
 13- DC offset correction
 14
 15Maintainer: @aharshit123456
 16"""
 17
 18# Import the new class-based preprocessors
 19from .preprocessors import (
 20    ClippingPreprocessor,
 21    NoiseRemovalPreprocessor,
 22    OutlierRemovalPreprocessor,
 23    BaselineRemovalPreprocessor,
 24    DriftRemovalPreprocessor,
 25    HighFrequencyNoiseRemovalPreprocessor,
 26    LowFrequencyNoiseRemovalPreprocessor,
 27    ArtifactRemovalPreprocessor,
 28    TrendRemovalPreprocessor,
 29    DCOffsetRemovalPreprocessor
 30)
 31
 32# Import legacy functions for backward compatibility
 33from .pipeline import (
 34    clip_sliding_windows,
 35    remove_noise,
 36    remove_outliers,
 37    remove_baseline,
 38    remove_drift,
 39    remove_artifacts,
 40    remove_trend,
 41    remove_dc_offset,
 42    remove_high_frequency_noise,
 43    remove_low_frequency_noise
 44)
 45
 46# Import managers
 47from ..core.managers import PreprocessingManager
 48
 49# Register all preprocessors with the manager
 50def _register_preprocessors():
 51    """Register all available preprocessors with the PreprocessingManager."""
 52    manager = PreprocessingManager()
 53    manager.register_preprocessor("clipping", ClippingPreprocessor)
 54    manager.register_preprocessor("noise_removal", NoiseRemovalPreprocessor)
 55    manager.register_preprocessor("outlier_removal", OutlierRemovalPreprocessor)
 56    manager.register_preprocessor("baseline_removal", BaselineRemovalPreprocessor)
 57    manager.register_preprocessor("drift_removal", DriftRemovalPreprocessor)
 58    manager.register_preprocessor("high_frequency_noise_removal", HighFrequencyNoiseRemovalPreprocessor)
 59    manager.register_preprocessor("low_frequency_noise_removal", LowFrequencyNoiseRemovalPreprocessor)
 60    manager.register_preprocessor("artifact_removal", ArtifactRemovalPreprocessor)
 61    manager.register_preprocessor("trend_removal", TrendRemovalPreprocessor)
 62    manager.register_preprocessor("dc_offset_removal", DCOffsetRemovalPreprocessor)
 63
 64# Auto-register preprocessors when module is imported
 65_register_preprocessors()
 66
 67# Convenient access to the preprocessing manager
 68def get_preprocessing_manager():
 69    """Get the singleton PreprocessingManager instance."""
 70    return PreprocessingManager()
 71
 72# Helper function to get available preprocessors
 73def get_available_preprocessors():
 74    """Get list of available preprocessor names."""
 75    return PreprocessingManager().get_available_components()
 76
 77# Helper function to preprocess data using manager
 78def preprocess_data(preprocessor_name: str, data, **kwargs):
 79    """
 80    Preprocess data using the PreprocessingManager.
 81    
 82    Args:
 83        preprocessor_name: Name of the preprocessor
 84        data: Input data to preprocess
 85        **kwargs: Additional arguments for preprocessing
 86        
 87    Returns:
 88        Preprocessed data
 89    """
 90    return PreprocessingManager().preprocess_data(preprocessor_name, data, **kwargs)
 91
 92# Pipeline function for chaining multiple preprocessors
 93def create_preprocessing_pipeline(preprocessor_names: list, **kwargs):
 94    """
 95    Create a preprocessing pipeline with multiple preprocessors.
 96    
 97    Args:
 98        preprocessor_names: List of preprocessor names to chain
 99        **kwargs: Additional arguments for individual preprocessors
100        
101    Returns:
102        Function that applies all preprocessors in sequence
103    """
104    manager = PreprocessingManager()
105    
106    def pipeline(data):
107        processed_data = data
108        for name in preprocessor_names:
109            preprocessor = manager.get_cached_instance(name, name, f"{name} preprocessor")
110            processed_data = preprocessor.fit_transform(processed_data, **kwargs.get(name, {}))
111        return processed_data
112    
113    return pipeline
114
115__all__ = [
116    # New class-based preprocessors
117    'ClippingPreprocessor',
118    'NoiseRemovalPreprocessor',
119    'OutlierRemovalPreprocessor',
120    'BaselineRemovalPreprocessor',
121    'DriftRemovalPreprocessor',
122    'HighFrequencyNoiseRemovalPreprocessor',
123    'LowFrequencyNoiseRemovalPreprocessor',
124    'ArtifactRemovalPreprocessor',
125    'TrendRemovalPreprocessor',
126    'DCOffsetRemovalPreprocessor',
127    # Legacy functions for backward compatibility
128    'clip_sliding_windows',
129    'remove_noise',
130    'remove_outliers',
131    'remove_baseline',
132    'remove_drift',
133    'remove_artifacts',
134    'remove_trend',
135    'remove_dc_offset',
136    'remove_high_frequency_noise',
137    'remove_low_frequency_noise',
138    # Manager functions
139    'get_preprocessing_manager',
140    'get_available_preprocessors',
141    'preprocess_data',
142    'create_preprocessing_pipeline'
143]
class ClippingPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
18class ClippingPreprocessor(BasePreprocessor):
19    """
20    Preprocessor for clipping values to a specified range.
21    """
22    
23    def __init__(self, min_val: float = -1, max_val: float = 1):
24        super().__init__(
25            name="clipping",
26            description="Clips values in the data to be within a specified range"
27        )
28        self.config = {
29            'min_val': min_val,
30            'max_val': max_val
31        }
32    
33    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
34        """
35        Fit the preprocessor (no fitting needed for clipping).
36        
37        Args:
38            data: Input data to fit on
39            **kwargs: Additional arguments
40        """
41        # Update config with any passed arguments
42        self.config.update({k: v for k, v in kwargs.items() if k in ['min_val', 'max_val']})
43        self.fitted = True
44    
45    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
46        """
47        Clip values in the data to be within the specified range.
48        
49        Args:
50            data: Input data to transform
51            **kwargs: Additional arguments
52            
53        Returns:
54            Clipped data
55        """
56        min_val = kwargs.get('min_val', self.config['min_val'])
57        max_val = kwargs.get('max_val', self.config['max_val'])
58        
59        return np.clip(data, min_val, max_val)

Preprocessor for clipping values to a specified range.

ClippingPreprocessor(min_val: float = -1, max_val: float = 1)
23    def __init__(self, min_val: float = -1, max_val: float = 1):
24        super().__init__(
25            name="clipping",
26            description="Clips values in the data to be within a specified range"
27        )
28        self.config = {
29            'min_val': min_val,
30            'max_val': max_val
31        }

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
33    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
34        """
35        Fit the preprocessor (no fitting needed for clipping).
36        
37        Args:
38            data: Input data to fit on
39            **kwargs: Additional arguments
40        """
41        # Update config with any passed arguments
42        self.config.update({k: v for k, v in kwargs.items() if k in ['min_val', 'max_val']})
43        self.fitted = True

Fit the preprocessor (no fitting needed for clipping).

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
45    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
46        """
47        Clip values in the data to be within the specified range.
48        
49        Args:
50            data: Input data to transform
51            **kwargs: Additional arguments
52            
53        Returns:
54            Clipped data
55        """
56        min_val = kwargs.get('min_val', self.config['min_val'])
57        max_val = kwargs.get('max_val', self.config['max_val'])
58        
59        return np.clip(data, min_val, max_val)

Clip values in the data to be within the specified range.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Clipped data

class NoiseRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
 62class NoiseRemovalPreprocessor(BasePreprocessor):
 63    """
 64    Preprocessor for removing noise using moving average filter.
 65    """
 66    
 67    def __init__(self, window_size: int = 5):
 68        super().__init__(
 69            name="noise_removal",
 70            description="Applies a moving average filter to reduce noise"
 71        )
 72        self.config = {
 73            'window_size': window_size
 74        }
 75    
 76    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
 77        """
 78        Fit the preprocessor (no fitting needed for noise removal).
 79        
 80        Args:
 81            data: Input data to fit on
 82            **kwargs: Additional arguments
 83        """
 84        self.config.update({k: v for k, v in kwargs.items() if k in ['window_size']})
 85        self.fitted = True
 86    
 87    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
 88        """
 89        Apply a moving average filter to reduce noise.
 90        
 91        Args:
 92            data: Input data to transform
 93            **kwargs: Additional arguments
 94            
 95        Returns:
 96            Noise-reduced data
 97        """
 98        window_size = kwargs.get('window_size', self.config['window_size'])
 99        
100        if isinstance(data, pd.DataFrame):
101            return data.rolling(window=window_size, center=True).mean().bfill().ffill()
102        elif isinstance(data, pd.Series):
103            return data.rolling(window=window_size, center=True).mean().bfill().ffill()
104        else:
105            # For numpy arrays, use uniform filter
106            from scipy.ndimage import uniform_filter1d
107            return uniform_filter1d(data, size=window_size, mode='nearest')

Preprocessor for removing noise using moving average filter.

NoiseRemovalPreprocessor(window_size: int = 5)
67    def __init__(self, window_size: int = 5):
68        super().__init__(
69            name="noise_removal",
70            description="Applies a moving average filter to reduce noise"
71        )
72        self.config = {
73            'window_size': window_size
74        }

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
76    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
77        """
78        Fit the preprocessor (no fitting needed for noise removal).
79        
80        Args:
81            data: Input data to fit on
82            **kwargs: Additional arguments
83        """
84        self.config.update({k: v for k, v in kwargs.items() if k in ['window_size']})
85        self.fitted = True

Fit the preprocessor (no fitting needed for noise removal).

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
 87    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
 88        """
 89        Apply a moving average filter to reduce noise.
 90        
 91        Args:
 92            data: Input data to transform
 93            **kwargs: Additional arguments
 94            
 95        Returns:
 96            Noise-reduced data
 97        """
 98        window_size = kwargs.get('window_size', self.config['window_size'])
 99        
100        if isinstance(data, pd.DataFrame):
101            return data.rolling(window=window_size, center=True).mean().bfill().ffill()
102        elif isinstance(data, pd.Series):
103            return data.rolling(window=window_size, center=True).mean().bfill().ffill()
104        else:
105            # For numpy arrays, use uniform filter
106            from scipy.ndimage import uniform_filter1d
107            return uniform_filter1d(data, size=window_size, mode='nearest')

Apply a moving average filter to reduce noise.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Noise-reduced data

class OutlierRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
110class OutlierRemovalPreprocessor(BasePreprocessor):
111    """
112    Preprocessor for removing outliers using Z-score method.
113    """
114    
115    def __init__(self, threshold: float = 3):
116        super().__init__(
117            name="outlier_removal",
118            description="Removes outliers beyond a given threshold using the Z-score method"
119        )
120        self.config = {
121            'threshold': threshold
122        }
123        self.mean_ = None
124        self.std_ = None
125    
126    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
127        """
128        Fit the preprocessor by computing mean and standard deviation.
129        
130        Args:
131            data: Input data to fit on
132            **kwargs: Additional arguments
133        """
134        self.config.update({k: v for k, v in kwargs.items() if k in ['threshold']})
135        
136        if isinstance(data, (pd.DataFrame, pd.Series)):
137            self.mean_ = data.mean()
138            self.std_ = data.std()
139        else:
140            self.mean_ = np.mean(data)
141            self.std_ = np.std(data)
142        
143        self.fitted = True
144    
145    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
146        """
147        Remove outliers beyond the threshold using Z-score method.
148        
149        Args:
150            data: Input data to transform
151            **kwargs: Additional arguments
152            
153        Returns:
154            Data with outliers removed
155        """
156        threshold = kwargs.get('threshold', self.config['threshold'])
157        
158        if isinstance(data, (pd.DataFrame, pd.Series)):
159            z_scores = (data - self.mean_).abs() / self.std_
160            return data[z_scores <= threshold]
161        else:
162            z_scores = np.abs(data - self.mean_) / self.std_
163            return data[z_scores <= threshold]

Preprocessor for removing outliers using Z-score method.

OutlierRemovalPreprocessor(threshold: float = 3)
115    def __init__(self, threshold: float = 3):
116        super().__init__(
117            name="outlier_removal",
118            description="Removes outliers beyond a given threshold using the Z-score method"
119        )
120        self.config = {
121            'threshold': threshold
122        }
123        self.mean_ = None
124        self.std_ = None

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
mean_
std_
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
126    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
127        """
128        Fit the preprocessor by computing mean and standard deviation.
129        
130        Args:
131            data: Input data to fit on
132            **kwargs: Additional arguments
133        """
134        self.config.update({k: v for k, v in kwargs.items() if k in ['threshold']})
135        
136        if isinstance(data, (pd.DataFrame, pd.Series)):
137            self.mean_ = data.mean()
138            self.std_ = data.std()
139        else:
140            self.mean_ = np.mean(data)
141            self.std_ = np.std(data)
142        
143        self.fitted = True

Fit the preprocessor by computing mean and standard deviation.

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
145    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
146        """
147        Remove outliers beyond the threshold using Z-score method.
148        
149        Args:
150            data: Input data to transform
151            **kwargs: Additional arguments
152            
153        Returns:
154            Data with outliers removed
155        """
156        threshold = kwargs.get('threshold', self.config['threshold'])
157        
158        if isinstance(data, (pd.DataFrame, pd.Series)):
159            z_scores = (data - self.mean_).abs() / self.std_
160            return data[z_scores <= threshold]
161        else:
162            z_scores = np.abs(data - self.mean_) / self.std_
163            return data[z_scores <= threshold]

Remove outliers beyond the threshold using Z-score method.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Data with outliers removed

class BaselineRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
166class BaselineRemovalPreprocessor(BasePreprocessor):
167    """
168    Preprocessor for removing baseline by subtracting the mean.
169    """
170    
171    def __init__(self):
172        super().__init__(
173            name="baseline_removal",
174            description="Removes baseline by subtracting the mean"
175        )
176        self.mean_ = None
177    
178    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
179        """
180        Fit the preprocessor by computing the mean.
181        
182        Args:
183            data: Input data to fit on
184            **kwargs: Additional arguments
185        """
186        if isinstance(data, (pd.DataFrame, pd.Series)):
187            self.mean_ = data.mean()
188        else:
189            self.mean_ = np.mean(data)
190        
191        self.fitted = True
192    
193    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
194        """
195        Remove baseline by subtracting the mean.
196        
197        Args:
198            data: Input data to transform
199            **kwargs: Additional arguments
200            
201        Returns:
202            Baseline-corrected data
203        """
204        return data - self.mean_

Preprocessor for removing baseline by subtracting the mean.

BaselineRemovalPreprocessor()
171    def __init__(self):
172        super().__init__(
173            name="baseline_removal",
174            description="Removes baseline by subtracting the mean"
175        )
176        self.mean_ = None

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

mean_
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
178    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
179        """
180        Fit the preprocessor by computing the mean.
181        
182        Args:
183            data: Input data to fit on
184            **kwargs: Additional arguments
185        """
186        if isinstance(data, (pd.DataFrame, pd.Series)):
187            self.mean_ = data.mean()
188        else:
189            self.mean_ = np.mean(data)
190        
191        self.fitted = True

Fit the preprocessor by computing the mean.

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
193    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
194        """
195        Remove baseline by subtracting the mean.
196        
197        Args:
198            data: Input data to transform
199            **kwargs: Additional arguments
200            
201        Returns:
202            Baseline-corrected data
203        """
204        return data - self.mean_

Remove baseline by subtracting the mean.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Baseline-corrected data

class DriftRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
207class DriftRemovalPreprocessor(BasePreprocessor):
208    """
209    Preprocessor for removing low-frequency drift using high-pass filter.
210    """
211    
212    def __init__(self, cutoff: float = 0.01, fs: int = 100):
213        super().__init__(
214            name="drift_removal",
215            description="Removes low-frequency drift using a high-pass filter"
216        )
217        self.config = {
218            'cutoff': cutoff,
219            'fs': fs
220        }
221    
222    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
223        """
224        Fit the preprocessor (no fitting needed for drift removal).
225        
226        Args:
227            data: Input data to fit on
228            **kwargs: Additional arguments
229        """
230        self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']})
231        self.fitted = True
232    
233    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
234        """
235        Remove low-frequency drift using a high-pass filter.
236        
237        Args:
238            data: Input data to transform
239            **kwargs: Additional arguments
240            
241        Returns:
242            Drift-corrected data
243        """
244        cutoff = kwargs.get('cutoff', self.config['cutoff'])
245        fs = kwargs.get('fs', self.config['fs'])
246        
247        b, a = butter(1, cutoff / (fs / 2), btype='highpass')
248        
249        if isinstance(data, (pd.DataFrame, pd.Series)):
250            return pd.Series(filtfilt(b, a, data), index=data.index)
251        else:
252            return filtfilt(b, a, data)

Preprocessor for removing low-frequency drift using high-pass filter.

DriftRemovalPreprocessor(cutoff: float = 0.01, fs: int = 100)
212    def __init__(self, cutoff: float = 0.01, fs: int = 100):
213        super().__init__(
214            name="drift_removal",
215            description="Removes low-frequency drift using a high-pass filter"
216        )
217        self.config = {
218            'cutoff': cutoff,
219            'fs': fs
220        }

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
222    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
223        """
224        Fit the preprocessor (no fitting needed for drift removal).
225        
226        Args:
227            data: Input data to fit on
228            **kwargs: Additional arguments
229        """
230        self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']})
231        self.fitted = True

Fit the preprocessor (no fitting needed for drift removal).

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
233    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
234        """
235        Remove low-frequency drift using a high-pass filter.
236        
237        Args:
238            data: Input data to transform
239            **kwargs: Additional arguments
240            
241        Returns:
242            Drift-corrected data
243        """
244        cutoff = kwargs.get('cutoff', self.config['cutoff'])
245        fs = kwargs.get('fs', self.config['fs'])
246        
247        b, a = butter(1, cutoff / (fs / 2), btype='highpass')
248        
249        if isinstance(data, (pd.DataFrame, pd.Series)):
250            return pd.Series(filtfilt(b, a, data), index=data.index)
251        else:
252            return filtfilt(b, a, data)

Remove low-frequency drift using a high-pass filter.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Drift-corrected data

class HighFrequencyNoiseRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
255class HighFrequencyNoiseRemovalPreprocessor(BasePreprocessor):
256    """
257    Preprocessor for removing high-frequency noise using low-pass filter.
258    """
259    
260    def __init__(self, cutoff: float = 10, fs: int = 100):
261        super().__init__(
262            name="high_frequency_noise_removal",
263            description="Applies a low-pass filter to remove high-frequency noise"
264        )
265        self.config = {
266            'cutoff': cutoff,
267            'fs': fs
268        }
269    
270    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
271        """
272        Fit the preprocessor (no fitting needed for filtering).
273        
274        Args:
275            data: Input data to fit on
276            **kwargs: Additional arguments
277        """
278        self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']})
279        self.fitted = True
280    
281    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
282        """
283        Apply a low-pass filter to remove high-frequency noise.
284        
285        Args:
286            data: Input data to transform
287            **kwargs: Additional arguments
288            
289        Returns:
290            Filtered data
291        """
292        cutoff = kwargs.get('cutoff', self.config['cutoff'])
293        fs = kwargs.get('fs', self.config['fs'])
294        
295        b, a = butter(1, cutoff / (fs / 2), btype='lowpass')
296        
297        if isinstance(data, (pd.DataFrame, pd.Series)):
298            return pd.Series(filtfilt(b, a, data), index=data.index)
299        else:
300            return filtfilt(b, a, data)

Preprocessor for removing high-frequency noise using low-pass filter.

HighFrequencyNoiseRemovalPreprocessor(cutoff: float = 10, fs: int = 100)
260    def __init__(self, cutoff: float = 10, fs: int = 100):
261        super().__init__(
262            name="high_frequency_noise_removal",
263            description="Applies a low-pass filter to remove high-frequency noise"
264        )
265        self.config = {
266            'cutoff': cutoff,
267            'fs': fs
268        }

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
270    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
271        """
272        Fit the preprocessor (no fitting needed for filtering).
273        
274        Args:
275            data: Input data to fit on
276            **kwargs: Additional arguments
277        """
278        self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']})
279        self.fitted = True

Fit the preprocessor (no fitting needed for filtering).

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
281    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
282        """
283        Apply a low-pass filter to remove high-frequency noise.
284        
285        Args:
286            data: Input data to transform
287            **kwargs: Additional arguments
288            
289        Returns:
290            Filtered data
291        """
292        cutoff = kwargs.get('cutoff', self.config['cutoff'])
293        fs = kwargs.get('fs', self.config['fs'])
294        
295        b, a = butter(1, cutoff / (fs / 2), btype='lowpass')
296        
297        if isinstance(data, (pd.DataFrame, pd.Series)):
298            return pd.Series(filtfilt(b, a, data), index=data.index)
299        else:
300            return filtfilt(b, a, data)

Apply a low-pass filter to remove high-frequency noise.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Filtered data

class LowFrequencyNoiseRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
303class LowFrequencyNoiseRemovalPreprocessor(BasePreprocessor):
304    """
305    Preprocessor for removing low-frequency noise using high-pass filter.
306    """
307    
308    def __init__(self, cutoff: float = 0.5, fs: int = 100):
309        super().__init__(
310            name="low_frequency_noise_removal",
311            description="Applies a high-pass filter to remove low-frequency noise"
312        )
313        self.config = {
314            'cutoff': cutoff,
315            'fs': fs
316        }
317    
318    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
319        """
320        Fit the preprocessor (no fitting needed for filtering).
321        
322        Args:
323            data: Input data to fit on
324            **kwargs: Additional arguments
325        """
326        self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']})
327        self.fitted = True
328    
329    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
330        """
331        Apply a high-pass filter to remove low-frequency noise.
332        
333        Args:
334            data: Input data to transform
335            **kwargs: Additional arguments
336            
337        Returns:
338            Filtered data
339        """
340        cutoff = kwargs.get('cutoff', self.config['cutoff'])
341        fs = kwargs.get('fs', self.config['fs'])
342        
343        b, a = butter(1, cutoff / (fs / 2), btype='highpass')
344        
345        if isinstance(data, (pd.DataFrame, pd.Series)):
346            return pd.Series(filtfilt(b, a, data), index=data.index)
347        else:
348            return filtfilt(b, a, data)

Preprocessor for removing low-frequency noise using high-pass filter.

LowFrequencyNoiseRemovalPreprocessor(cutoff: float = 0.5, fs: int = 100)
308    def __init__(self, cutoff: float = 0.5, fs: int = 100):
309        super().__init__(
310            name="low_frequency_noise_removal",
311            description="Applies a high-pass filter to remove low-frequency noise"
312        )
313        self.config = {
314            'cutoff': cutoff,
315            'fs': fs
316        }

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
318    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
319        """
320        Fit the preprocessor (no fitting needed for filtering).
321        
322        Args:
323            data: Input data to fit on
324            **kwargs: Additional arguments
325        """
326        self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']})
327        self.fitted = True

Fit the preprocessor (no fitting needed for filtering).

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
329    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
330        """
331        Apply a high-pass filter to remove low-frequency noise.
332        
333        Args:
334            data: Input data to transform
335            **kwargs: Additional arguments
336            
337        Returns:
338            Filtered data
339        """
340        cutoff = kwargs.get('cutoff', self.config['cutoff'])
341        fs = kwargs.get('fs', self.config['fs'])
342        
343        b, a = butter(1, cutoff / (fs / 2), btype='highpass')
344        
345        if isinstance(data, (pd.DataFrame, pd.Series)):
346            return pd.Series(filtfilt(b, a, data), index=data.index)
347        else:
348            return filtfilt(b, a, data)

Apply a high-pass filter to remove low-frequency noise.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Filtered data

class ArtifactRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
351class ArtifactRemovalPreprocessor(BasePreprocessor):
352    """
353    Preprocessor for removing artifacts by interpolating missing values.
354    """
355    
356    def __init__(self, method: str = "linear"):
357        super().__init__(
358            name="artifact_removal",
359            description="Removes artifacts by interpolating missing values"
360        )
361        self.config = {
362            'method': method
363        }
364    
365    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
366        """
367        Fit the preprocessor (no fitting needed for interpolation).
368        
369        Args:
370            data: Input data to fit on
371            **kwargs: Additional arguments
372        """
373        self.config.update({k: v for k, v in kwargs.items() if k in ['method']})
374        self.fitted = True
375    
376    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
377        """
378        Remove artifacts by interpolating missing values.
379        
380        Args:
381            data: Input data to transform
382            **kwargs: Additional arguments
383            
384        Returns:
385            Artifact-free data
386        """
387        method = kwargs.get('method', self.config['method'])
388        
389        if isinstance(data, (pd.DataFrame, pd.Series)):
390            return data.interpolate(method=method).bfill().ffill()
391        else:
392            # For numpy arrays, use linear interpolation
393            from scipy.interpolate import interp1d
394            x = np.arange(len(data))
395            valid_mask = ~np.isnan(data)
396            if np.any(valid_mask):
397                f = interp1d(x[valid_mask], data[valid_mask], kind='linear', fill_value='extrapolate')
398                return f(x)
399            else:
400                return data

Preprocessor for removing artifacts by interpolating missing values.

ArtifactRemovalPreprocessor(method: str = 'linear')
356    def __init__(self, method: str = "linear"):
357        super().__init__(
358            name="artifact_removal",
359            description="Removes artifacts by interpolating missing values"
360        )
361        self.config = {
362            'method': method
363        }

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
365    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
366        """
367        Fit the preprocessor (no fitting needed for interpolation).
368        
369        Args:
370            data: Input data to fit on
371            **kwargs: Additional arguments
372        """
373        self.config.update({k: v for k, v in kwargs.items() if k in ['method']})
374        self.fitted = True

Fit the preprocessor (no fitting needed for interpolation).

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
376    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
377        """
378        Remove artifacts by interpolating missing values.
379        
380        Args:
381            data: Input data to transform
382            **kwargs: Additional arguments
383            
384        Returns:
385            Artifact-free data
386        """
387        method = kwargs.get('method', self.config['method'])
388        
389        if isinstance(data, (pd.DataFrame, pd.Series)):
390            return data.interpolate(method=method).bfill().ffill()
391        else:
392            # For numpy arrays, use linear interpolation
393            from scipy.interpolate import interp1d
394            x = np.arange(len(data))
395            valid_mask = ~np.isnan(data)
396            if np.any(valid_mask):
397                f = interp1d(x[valid_mask], data[valid_mask], kind='linear', fill_value='extrapolate')
398                return f(x)
399            else:
400                return data

Remove artifacts by interpolating missing values.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Artifact-free data

class TrendRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
403class TrendRemovalPreprocessor(BasePreprocessor):
404    """
405    Preprocessor for removing trends using polynomial fitting.
406    """
407    
408    def __init__(self, order: int = 2):
409        super().__init__(
410            name="trend_removal",
411            description="Removes trends using polynomial fitting"
412        )
413        self.config = {
414            'order': order
415        }
416    
417    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
418        """
419        Fit the preprocessor (no fitting needed for detrending).
420        
421        Args:
422            data: Input data to fit on
423            **kwargs: Additional arguments
424        """
425        self.config.update({k: v for k, v in kwargs.items() if k in ['order']})
426        self.fitted = True
427    
428    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
429        """
430        Remove trends using polynomial fitting.
431        
432        Args:
433            data: Input data to transform
434            **kwargs: Additional arguments
435            
436        Returns:
437            Detrended data
438        """
439        order = kwargs.get('order', self.config['order'])
440        
441        if isinstance(data, (pd.DataFrame, pd.Series)):
442            x = np.arange(len(data))
443            poly_coeffs = np.polyfit(x, data, order)
444            trend = np.polyval(poly_coeffs, x)
445            return data - trend
446        else:
447            x = np.arange(len(data))
448            poly_coeffs = np.polyfit(x, data, order)
449            trend = np.polyval(poly_coeffs, x)
450            return data - trend

Preprocessor for removing trends using polynomial fitting.

TrendRemovalPreprocessor(order: int = 2)
408    def __init__(self, order: int = 2):
409        super().__init__(
410            name="trend_removal",
411            description="Removes trends using polynomial fitting"
412        )
413        self.config = {
414            'order': order
415        }

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

config
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
417    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
418        """
419        Fit the preprocessor (no fitting needed for detrending).
420        
421        Args:
422            data: Input data to fit on
423            **kwargs: Additional arguments
424        """
425        self.config.update({k: v for k, v in kwargs.items() if k in ['order']})
426        self.fitted = True

Fit the preprocessor (no fitting needed for detrending).

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
428    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
429        """
430        Remove trends using polynomial fitting.
431        
432        Args:
433            data: Input data to transform
434            **kwargs: Additional arguments
435            
436        Returns:
437            Detrended data
438        """
439        order = kwargs.get('order', self.config['order'])
440        
441        if isinstance(data, (pd.DataFrame, pd.Series)):
442            x = np.arange(len(data))
443            poly_coeffs = np.polyfit(x, data, order)
444            trend = np.polyval(poly_coeffs, x)
445            return data - trend
446        else:
447            x = np.arange(len(data))
448            poly_coeffs = np.polyfit(x, data, order)
449            trend = np.polyval(poly_coeffs, x)
450            return data - trend

Remove trends using polynomial fitting.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: Detrended data

class DCOffsetRemovalPreprocessor(gaitsetpy.core.base_classes.BasePreprocessor):
453class DCOffsetRemovalPreprocessor(BasePreprocessor):
454    """
455    Preprocessor for removing DC offset by subtracting the mean.
456    """
457    
458    def __init__(self):
459        super().__init__(
460            name="dc_offset_removal",
461            description="Removes DC offset by subtracting the mean"
462        )
463        self.mean_ = None
464    
465    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
466        """
467        Fit the preprocessor by computing the mean.
468        
469        Args:
470            data: Input data to fit on
471            **kwargs: Additional arguments
472        """
473        if isinstance(data, (pd.DataFrame, pd.Series)):
474            self.mean_ = data.mean()
475        else:
476            self.mean_ = np.mean(data)
477        
478        self.fitted = True
479    
480    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
481        """
482        Remove DC offset by subtracting the mean.
483        
484        Args:
485            data: Input data to transform
486            **kwargs: Additional arguments
487            
488        Returns:
489            DC-corrected data
490        """
491        return data - self.mean_ 

Preprocessor for removing DC offset by subtracting the mean.

DCOffsetRemovalPreprocessor()
458    def __init__(self):
459        super().__init__(
460            name="dc_offset_removal",
461            description="Removes DC offset by subtracting the mean"
462        )
463        self.mean_ = None

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

mean_
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
465    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
466        """
467        Fit the preprocessor by computing the mean.
468        
469        Args:
470            data: Input data to fit on
471            **kwargs: Additional arguments
472        """
473        if isinstance(data, (pd.DataFrame, pd.Series)):
474            self.mean_ = data.mean()
475        else:
476            self.mean_ = np.mean(data)
477        
478        self.fitted = True

Fit the preprocessor by computing the mean.

Args: data: Input data to fit on **kwargs: Additional arguments

def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
480    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
481        """
482        Remove DC offset by subtracting the mean.
483        
484        Args:
485            data: Input data to transform
486            **kwargs: Additional arguments
487            
488        Returns:
489            DC-corrected data
490        """
491        return data - self.mean_ 

Remove DC offset by subtracting the mean.

Args: data: Input data to transform **kwargs: Additional arguments

Returns: DC-corrected data

def clip_sliding_windows(data, min_val=-1, max_val=1):
15def clip_sliding_windows(data, min_val=-1, max_val=1):
16    """
17    Clip values in the sliding windows to be within a specified range.
18    """
19    return np.clip(data, min_val, max_val)

Clip values in the sliding windows to be within a specified range.

def remove_noise(data, window_size=5):
21def remove_noise(data, window_size=5):
22    """
23    Apply a moving average filter to reduce noise.
24    """
25    return data.rolling(window=window_size, center=True).mean().fillna(method="bfill").fillna(method="ffill")

Apply a moving average filter to reduce noise.

def remove_outliers(data, threshold=3):
27def remove_outliers(data, threshold=3):
28    """
29    Remove outliers beyond a given threshold using the Z-score method.
30    """
31    mean, std = data.mean(), data.std()
32    return data[(data - mean).abs() <= threshold * std]

Remove outliers beyond a given threshold using the Z-score method.

def remove_baseline(data):
34def remove_baseline(data):
35    """
36    Remove baseline by subtracting the mean.
37    """
38    return data - data.mean()

Remove baseline by subtracting the mean.

def remove_drift(data, cutoff=0.01, fs=100):
40def remove_drift(data, cutoff=0.01, fs=100):
41    """
42    Remove low-frequency drift using a high-pass filter.
43    """
44    b, a = butter(1, cutoff / (fs / 2), btype='highpass')
45    return filtfilt(b, a, data)

Remove low-frequency drift using a high-pass filter.

def remove_artifacts(data, method='interpolate'):
47def remove_artifacts(data, method="interpolate"):
48    """
49    Remove artifacts by interpolating missing values.
50    """
51    return data.interpolate(method="linear").fillna(method="bfill").fillna(method="ffill")

Remove artifacts by interpolating missing values.

def remove_trend(data, order=2):
53def remove_trend(data, order=2):
54    """
55    Remove trends using polynomial fitting.
56    """
57    x = np.arange(len(data))
58    poly_coeffs = np.polyfit(x, data, order)
59    trend = np.polyval(poly_coeffs, x)
60    return data - trend

Remove trends using polynomial fitting.

def remove_dc_offset(data):
62def remove_dc_offset(data):
63    """
64    Remove DC offset by subtracting the mean.
65    """
66    return data - data.mean()

Remove DC offset by subtracting the mean.

def remove_high_frequency_noise(data, cutoff=10, fs=100):
68def remove_high_frequency_noise(data, cutoff=10, fs=100):
69    """
70    Apply a low-pass filter to remove high-frequency noise.
71    """
72    b, a = butter(1, cutoff / (fs / 2), btype='lowpass')
73    return filtfilt(b, a, data)

Apply a low-pass filter to remove high-frequency noise.

def remove_low_frequency_noise(data, cutoff=0.5, fs=100):
75def remove_low_frequency_noise(data, cutoff=0.5, fs=100):
76    """
77    Apply a high-pass filter to remove low-frequency noise.
78    """
79    b, a = butter(1, cutoff / (fs / 2), btype='highpass')
80    return filtfilt(b, a, data)

Apply a high-pass filter to remove low-frequency noise.

def get_preprocessing_manager():
69def get_preprocessing_manager():
70    """Get the singleton PreprocessingManager instance."""
71    return PreprocessingManager()

Get the singleton PreprocessingManager instance.

def get_available_preprocessors():
74def get_available_preprocessors():
75    """Get list of available preprocessor names."""
76    return PreprocessingManager().get_available_components()

Get list of available preprocessor names.

def preprocess_data(preprocessor_name: str, data, **kwargs):
79def preprocess_data(preprocessor_name: str, data, **kwargs):
80    """
81    Preprocess data using the PreprocessingManager.
82    
83    Args:
84        preprocessor_name: Name of the preprocessor
85        data: Input data to preprocess
86        **kwargs: Additional arguments for preprocessing
87        
88    Returns:
89        Preprocessed data
90    """
91    return PreprocessingManager().preprocess_data(preprocessor_name, data, **kwargs)

Preprocess data using the PreprocessingManager.

Args: preprocessor_name: Name of the preprocessor data: Input data to preprocess **kwargs: Additional arguments for preprocessing

Returns: Preprocessed data

def create_preprocessing_pipeline(preprocessor_names: list, **kwargs):
 94def create_preprocessing_pipeline(preprocessor_names: list, **kwargs):
 95    """
 96    Create a preprocessing pipeline with multiple preprocessors.
 97    
 98    Args:
 99        preprocessor_names: List of preprocessor names to chain
100        **kwargs: Additional arguments for individual preprocessors
101        
102    Returns:
103        Function that applies all preprocessors in sequence
104    """
105    manager = PreprocessingManager()
106    
107    def pipeline(data):
108        processed_data = data
109        for name in preprocessor_names:
110            preprocessor = manager.get_cached_instance(name, name, f"{name} preprocessor")
111            processed_data = preprocessor.fit_transform(processed_data, **kwargs.get(name, {}))
112        return processed_data
113    
114    return pipeline

Create a preprocessing pipeline with multiple preprocessors.

Args: preprocessor_names: List of preprocessor names to chain **kwargs: Additional arguments for individual preprocessors

Returns: Function that applies all preprocessors in sequence