gaitsetpy.preprocessing.preprocessors
Preprocessor classes for gait data preprocessing.
This module contains individual preprocessor classes that inherit from BasePreprocessor and provide specific preprocessing functionality.
Maintainer: @aharshit123456
1''' 2Preprocessor classes for gait data preprocessing. 3 4This module contains individual preprocessor classes that inherit from BasePreprocessor 5and provide specific preprocessing functionality. 6 7Maintainer: @aharshit123456 8''' 9 10from typing import Union, Dict, Any 11import numpy as np 12import pandas as pd 13from scipy.signal import butter, filtfilt 14from ..core.base_classes import BasePreprocessor 15 16 17class ClippingPreprocessor(BasePreprocessor): 18 """ 19 Preprocessor for clipping values to a specified range. 20 """ 21 22 def __init__(self, min_val: float = -1, max_val: float = 1): 23 super().__init__( 24 name="clipping", 25 description="Clips values in the data to be within a specified range" 26 ) 27 self.config = { 28 'min_val': min_val, 29 'max_val': max_val 30 } 31 32 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 33 """ 34 Fit the preprocessor (no fitting needed for clipping). 35 36 Args: 37 data: Input data to fit on 38 **kwargs: Additional arguments 39 """ 40 # Update config with any passed arguments 41 self.config.update({k: v for k, v in kwargs.items() if k in ['min_val', 'max_val']}) 42 self.fitted = True 43 44 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 45 """ 46 Clip values in the data to be within the specified range. 47 48 Args: 49 data: Input data to transform 50 **kwargs: Additional arguments 51 52 Returns: 53 Clipped data 54 """ 55 min_val = kwargs.get('min_val', self.config['min_val']) 56 max_val = kwargs.get('max_val', self.config['max_val']) 57 58 return np.clip(data, min_val, max_val) 59 60 61class NoiseRemovalPreprocessor(BasePreprocessor): 62 """ 63 Preprocessor for removing noise using moving average filter. 64 """ 65 66 def __init__(self, window_size: int = 5): 67 super().__init__( 68 name="noise_removal", 69 description="Applies a moving average filter to reduce noise" 70 ) 71 self.config = { 72 'window_size': window_size 73 } 74 75 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 76 """ 77 Fit the preprocessor (no fitting needed for noise removal). 78 79 Args: 80 data: Input data to fit on 81 **kwargs: Additional arguments 82 """ 83 self.config.update({k: v for k, v in kwargs.items() if k in ['window_size']}) 84 self.fitted = True 85 86 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 87 """ 88 Apply a moving average filter to reduce noise. 89 90 Args: 91 data: Input data to transform 92 **kwargs: Additional arguments 93 94 Returns: 95 Noise-reduced data 96 """ 97 window_size = kwargs.get('window_size', self.config['window_size']) 98 99 if isinstance(data, pd.DataFrame): 100 return data.rolling(window=window_size, center=True).mean().bfill().ffill() 101 elif isinstance(data, pd.Series): 102 return data.rolling(window=window_size, center=True).mean().bfill().ffill() 103 else: 104 # For numpy arrays, use uniform filter 105 from scipy.ndimage import uniform_filter1d 106 return uniform_filter1d(data, size=window_size, mode='nearest') 107 108 109class OutlierRemovalPreprocessor(BasePreprocessor): 110 """ 111 Preprocessor for removing outliers using Z-score method. 112 """ 113 114 def __init__(self, threshold: float = 3): 115 super().__init__( 116 name="outlier_removal", 117 description="Removes outliers beyond a given threshold using the Z-score method" 118 ) 119 self.config = { 120 'threshold': threshold 121 } 122 self.mean_ = None 123 self.std_ = None 124 125 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 126 """ 127 Fit the preprocessor by computing mean and standard deviation. 128 129 Args: 130 data: Input data to fit on 131 **kwargs: Additional arguments 132 """ 133 self.config.update({k: v for k, v in kwargs.items() if k in ['threshold']}) 134 135 if isinstance(data, (pd.DataFrame, pd.Series)): 136 self.mean_ = data.mean() 137 self.std_ = data.std() 138 else: 139 self.mean_ = np.mean(data) 140 self.std_ = np.std(data) 141 142 self.fitted = True 143 144 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 145 """ 146 Remove outliers beyond the threshold using Z-score method. 147 148 Args: 149 data: Input data to transform 150 **kwargs: Additional arguments 151 152 Returns: 153 Data with outliers removed 154 """ 155 threshold = kwargs.get('threshold', self.config['threshold']) 156 157 if isinstance(data, (pd.DataFrame, pd.Series)): 158 z_scores = (data - self.mean_).abs() / self.std_ 159 return data[z_scores <= threshold] 160 else: 161 z_scores = np.abs(data - self.mean_) / self.std_ 162 return data[z_scores <= threshold] 163 164 165class BaselineRemovalPreprocessor(BasePreprocessor): 166 """ 167 Preprocessor for removing baseline by subtracting the mean. 168 """ 169 170 def __init__(self): 171 super().__init__( 172 name="baseline_removal", 173 description="Removes baseline by subtracting the mean" 174 ) 175 self.mean_ = None 176 177 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 178 """ 179 Fit the preprocessor by computing the mean. 180 181 Args: 182 data: Input data to fit on 183 **kwargs: Additional arguments 184 """ 185 if isinstance(data, (pd.DataFrame, pd.Series)): 186 self.mean_ = data.mean() 187 else: 188 self.mean_ = np.mean(data) 189 190 self.fitted = True 191 192 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 193 """ 194 Remove baseline by subtracting the mean. 195 196 Args: 197 data: Input data to transform 198 **kwargs: Additional arguments 199 200 Returns: 201 Baseline-corrected data 202 """ 203 return data - self.mean_ 204 205 206class DriftRemovalPreprocessor(BasePreprocessor): 207 """ 208 Preprocessor for removing low-frequency drift using high-pass filter. 209 """ 210 211 def __init__(self, cutoff: float = 0.01, fs: int = 100): 212 super().__init__( 213 name="drift_removal", 214 description="Removes low-frequency drift using a high-pass filter" 215 ) 216 self.config = { 217 'cutoff': cutoff, 218 'fs': fs 219 } 220 221 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 222 """ 223 Fit the preprocessor (no fitting needed for drift removal). 224 225 Args: 226 data: Input data to fit on 227 **kwargs: Additional arguments 228 """ 229 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 230 self.fitted = True 231 232 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 233 """ 234 Remove low-frequency drift using a high-pass filter. 235 236 Args: 237 data: Input data to transform 238 **kwargs: Additional arguments 239 240 Returns: 241 Drift-corrected data 242 """ 243 cutoff = kwargs.get('cutoff', self.config['cutoff']) 244 fs = kwargs.get('fs', self.config['fs']) 245 246 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 247 248 if isinstance(data, (pd.DataFrame, pd.Series)): 249 return pd.Series(filtfilt(b, a, data), index=data.index) 250 else: 251 return filtfilt(b, a, data) 252 253 254class HighFrequencyNoiseRemovalPreprocessor(BasePreprocessor): 255 """ 256 Preprocessor for removing high-frequency noise using low-pass filter. 257 """ 258 259 def __init__(self, cutoff: float = 10, fs: int = 100): 260 super().__init__( 261 name="high_frequency_noise_removal", 262 description="Applies a low-pass filter to remove high-frequency noise" 263 ) 264 self.config = { 265 'cutoff': cutoff, 266 'fs': fs 267 } 268 269 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 270 """ 271 Fit the preprocessor (no fitting needed for filtering). 272 273 Args: 274 data: Input data to fit on 275 **kwargs: Additional arguments 276 """ 277 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 278 self.fitted = True 279 280 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 281 """ 282 Apply a low-pass filter to remove high-frequency noise. 283 284 Args: 285 data: Input data to transform 286 **kwargs: Additional arguments 287 288 Returns: 289 Filtered data 290 """ 291 cutoff = kwargs.get('cutoff', self.config['cutoff']) 292 fs = kwargs.get('fs', self.config['fs']) 293 294 b, a = butter(1, cutoff / (fs / 2), btype='lowpass') 295 296 if isinstance(data, (pd.DataFrame, pd.Series)): 297 return pd.Series(filtfilt(b, a, data), index=data.index) 298 else: 299 return filtfilt(b, a, data) 300 301 302class LowFrequencyNoiseRemovalPreprocessor(BasePreprocessor): 303 """ 304 Preprocessor for removing low-frequency noise using high-pass filter. 305 """ 306 307 def __init__(self, cutoff: float = 0.5, fs: int = 100): 308 super().__init__( 309 name="low_frequency_noise_removal", 310 description="Applies a high-pass filter to remove low-frequency noise" 311 ) 312 self.config = { 313 'cutoff': cutoff, 314 'fs': fs 315 } 316 317 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 318 """ 319 Fit the preprocessor (no fitting needed for filtering). 320 321 Args: 322 data: Input data to fit on 323 **kwargs: Additional arguments 324 """ 325 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 326 self.fitted = True 327 328 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 329 """ 330 Apply a high-pass filter to remove low-frequency noise. 331 332 Args: 333 data: Input data to transform 334 **kwargs: Additional arguments 335 336 Returns: 337 Filtered data 338 """ 339 cutoff = kwargs.get('cutoff', self.config['cutoff']) 340 fs = kwargs.get('fs', self.config['fs']) 341 342 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 343 344 if isinstance(data, (pd.DataFrame, pd.Series)): 345 return pd.Series(filtfilt(b, a, data), index=data.index) 346 else: 347 return filtfilt(b, a, data) 348 349 350class ArtifactRemovalPreprocessor(BasePreprocessor): 351 """ 352 Preprocessor for removing artifacts by interpolating missing values. 353 """ 354 355 def __init__(self, method: str = "linear"): 356 super().__init__( 357 name="artifact_removal", 358 description="Removes artifacts by interpolating missing values" 359 ) 360 self.config = { 361 'method': method 362 } 363 364 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 365 """ 366 Fit the preprocessor (no fitting needed for interpolation). 367 368 Args: 369 data: Input data to fit on 370 **kwargs: Additional arguments 371 """ 372 self.config.update({k: v for k, v in kwargs.items() if k in ['method']}) 373 self.fitted = True 374 375 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 376 """ 377 Remove artifacts by interpolating missing values. 378 379 Args: 380 data: Input data to transform 381 **kwargs: Additional arguments 382 383 Returns: 384 Artifact-free data 385 """ 386 method = kwargs.get('method', self.config['method']) 387 388 if isinstance(data, (pd.DataFrame, pd.Series)): 389 return data.interpolate(method=method).bfill().ffill() 390 else: 391 # For numpy arrays, use linear interpolation 392 from scipy.interpolate import interp1d 393 x = np.arange(len(data)) 394 valid_mask = ~np.isnan(data) 395 if np.any(valid_mask): 396 f = interp1d(x[valid_mask], data[valid_mask], kind='linear', fill_value='extrapolate') 397 return f(x) 398 else: 399 return data 400 401 402class TrendRemovalPreprocessor(BasePreprocessor): 403 """ 404 Preprocessor for removing trends using polynomial fitting. 405 """ 406 407 def __init__(self, order: int = 2): 408 super().__init__( 409 name="trend_removal", 410 description="Removes trends using polynomial fitting" 411 ) 412 self.config = { 413 'order': order 414 } 415 416 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 417 """ 418 Fit the preprocessor (no fitting needed for detrending). 419 420 Args: 421 data: Input data to fit on 422 **kwargs: Additional arguments 423 """ 424 self.config.update({k: v for k, v in kwargs.items() if k in ['order']}) 425 self.fitted = True 426 427 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 428 """ 429 Remove trends using polynomial fitting. 430 431 Args: 432 data: Input data to transform 433 **kwargs: Additional arguments 434 435 Returns: 436 Detrended data 437 """ 438 order = kwargs.get('order', self.config['order']) 439 440 if isinstance(data, (pd.DataFrame, pd.Series)): 441 x = np.arange(len(data)) 442 poly_coeffs = np.polyfit(x, data, order) 443 trend = np.polyval(poly_coeffs, x) 444 return data - trend 445 else: 446 x = np.arange(len(data)) 447 poly_coeffs = np.polyfit(x, data, order) 448 trend = np.polyval(poly_coeffs, x) 449 return data - trend 450 451 452class DCOffsetRemovalPreprocessor(BasePreprocessor): 453 """ 454 Preprocessor for removing DC offset by subtracting the mean. 455 """ 456 457 def __init__(self): 458 super().__init__( 459 name="dc_offset_removal", 460 description="Removes DC offset by subtracting the mean" 461 ) 462 self.mean_ = None 463 464 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 465 """ 466 Fit the preprocessor by computing the mean. 467 468 Args: 469 data: Input data to fit on 470 **kwargs: Additional arguments 471 """ 472 if isinstance(data, (pd.DataFrame, pd.Series)): 473 self.mean_ = data.mean() 474 else: 475 self.mean_ = np.mean(data) 476 477 self.fitted = True 478 479 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 480 """ 481 Remove DC offset by subtracting the mean. 482 483 Args: 484 data: Input data to transform 485 **kwargs: Additional arguments 486 487 Returns: 488 DC-corrected data 489 """ 490 return data - self.mean_
18class ClippingPreprocessor(BasePreprocessor): 19 """ 20 Preprocessor for clipping values to a specified range. 21 """ 22 23 def __init__(self, min_val: float = -1, max_val: float = 1): 24 super().__init__( 25 name="clipping", 26 description="Clips values in the data to be within a specified range" 27 ) 28 self.config = { 29 'min_val': min_val, 30 'max_val': max_val 31 } 32 33 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 34 """ 35 Fit the preprocessor (no fitting needed for clipping). 36 37 Args: 38 data: Input data to fit on 39 **kwargs: Additional arguments 40 """ 41 # Update config with any passed arguments 42 self.config.update({k: v for k, v in kwargs.items() if k in ['min_val', 'max_val']}) 43 self.fitted = True 44 45 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 46 """ 47 Clip values in the data to be within the specified range. 48 49 Args: 50 data: Input data to transform 51 **kwargs: Additional arguments 52 53 Returns: 54 Clipped data 55 """ 56 min_val = kwargs.get('min_val', self.config['min_val']) 57 max_val = kwargs.get('max_val', self.config['max_val']) 58 59 return np.clip(data, min_val, max_val)
Preprocessor for clipping values to a specified range.
23 def __init__(self, min_val: float = -1, max_val: float = 1): 24 super().__init__( 25 name="clipping", 26 description="Clips values in the data to be within a specified range" 27 ) 28 self.config = { 29 'min_val': min_val, 30 'max_val': max_val 31 }
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
33 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 34 """ 35 Fit the preprocessor (no fitting needed for clipping). 36 37 Args: 38 data: Input data to fit on 39 **kwargs: Additional arguments 40 """ 41 # Update config with any passed arguments 42 self.config.update({k: v for k, v in kwargs.items() if k in ['min_val', 'max_val']}) 43 self.fitted = True
Fit the preprocessor (no fitting needed for clipping).
Args: data: Input data to fit on **kwargs: Additional arguments
45 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 46 """ 47 Clip values in the data to be within the specified range. 48 49 Args: 50 data: Input data to transform 51 **kwargs: Additional arguments 52 53 Returns: 54 Clipped data 55 """ 56 min_val = kwargs.get('min_val', self.config['min_val']) 57 max_val = kwargs.get('max_val', self.config['max_val']) 58 59 return np.clip(data, min_val, max_val)
Clip values in the data to be within the specified range.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Clipped data
Inherited Members
62class NoiseRemovalPreprocessor(BasePreprocessor): 63 """ 64 Preprocessor for removing noise using moving average filter. 65 """ 66 67 def __init__(self, window_size: int = 5): 68 super().__init__( 69 name="noise_removal", 70 description="Applies a moving average filter to reduce noise" 71 ) 72 self.config = { 73 'window_size': window_size 74 } 75 76 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 77 """ 78 Fit the preprocessor (no fitting needed for noise removal). 79 80 Args: 81 data: Input data to fit on 82 **kwargs: Additional arguments 83 """ 84 self.config.update({k: v for k, v in kwargs.items() if k in ['window_size']}) 85 self.fitted = True 86 87 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 88 """ 89 Apply a moving average filter to reduce noise. 90 91 Args: 92 data: Input data to transform 93 **kwargs: Additional arguments 94 95 Returns: 96 Noise-reduced data 97 """ 98 window_size = kwargs.get('window_size', self.config['window_size']) 99 100 if isinstance(data, pd.DataFrame): 101 return data.rolling(window=window_size, center=True).mean().bfill().ffill() 102 elif isinstance(data, pd.Series): 103 return data.rolling(window=window_size, center=True).mean().bfill().ffill() 104 else: 105 # For numpy arrays, use uniform filter 106 from scipy.ndimage import uniform_filter1d 107 return uniform_filter1d(data, size=window_size, mode='nearest')
Preprocessor for removing noise using moving average filter.
67 def __init__(self, window_size: int = 5): 68 super().__init__( 69 name="noise_removal", 70 description="Applies a moving average filter to reduce noise" 71 ) 72 self.config = { 73 'window_size': window_size 74 }
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
76 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 77 """ 78 Fit the preprocessor (no fitting needed for noise removal). 79 80 Args: 81 data: Input data to fit on 82 **kwargs: Additional arguments 83 """ 84 self.config.update({k: v for k, v in kwargs.items() if k in ['window_size']}) 85 self.fitted = True
Fit the preprocessor (no fitting needed for noise removal).
Args: data: Input data to fit on **kwargs: Additional arguments
87 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 88 """ 89 Apply a moving average filter to reduce noise. 90 91 Args: 92 data: Input data to transform 93 **kwargs: Additional arguments 94 95 Returns: 96 Noise-reduced data 97 """ 98 window_size = kwargs.get('window_size', self.config['window_size']) 99 100 if isinstance(data, pd.DataFrame): 101 return data.rolling(window=window_size, center=True).mean().bfill().ffill() 102 elif isinstance(data, pd.Series): 103 return data.rolling(window=window_size, center=True).mean().bfill().ffill() 104 else: 105 # For numpy arrays, use uniform filter 106 from scipy.ndimage import uniform_filter1d 107 return uniform_filter1d(data, size=window_size, mode='nearest')
Apply a moving average filter to reduce noise.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Noise-reduced data
Inherited Members
110class OutlierRemovalPreprocessor(BasePreprocessor): 111 """ 112 Preprocessor for removing outliers using Z-score method. 113 """ 114 115 def __init__(self, threshold: float = 3): 116 super().__init__( 117 name="outlier_removal", 118 description="Removes outliers beyond a given threshold using the Z-score method" 119 ) 120 self.config = { 121 'threshold': threshold 122 } 123 self.mean_ = None 124 self.std_ = None 125 126 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 127 """ 128 Fit the preprocessor by computing mean and standard deviation. 129 130 Args: 131 data: Input data to fit on 132 **kwargs: Additional arguments 133 """ 134 self.config.update({k: v for k, v in kwargs.items() if k in ['threshold']}) 135 136 if isinstance(data, (pd.DataFrame, pd.Series)): 137 self.mean_ = data.mean() 138 self.std_ = data.std() 139 else: 140 self.mean_ = np.mean(data) 141 self.std_ = np.std(data) 142 143 self.fitted = True 144 145 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 146 """ 147 Remove outliers beyond the threshold using Z-score method. 148 149 Args: 150 data: Input data to transform 151 **kwargs: Additional arguments 152 153 Returns: 154 Data with outliers removed 155 """ 156 threshold = kwargs.get('threshold', self.config['threshold']) 157 158 if isinstance(data, (pd.DataFrame, pd.Series)): 159 z_scores = (data - self.mean_).abs() / self.std_ 160 return data[z_scores <= threshold] 161 else: 162 z_scores = np.abs(data - self.mean_) / self.std_ 163 return data[z_scores <= threshold]
Preprocessor for removing outliers using Z-score method.
115 def __init__(self, threshold: float = 3): 116 super().__init__( 117 name="outlier_removal", 118 description="Removes outliers beyond a given threshold using the Z-score method" 119 ) 120 self.config = { 121 'threshold': threshold 122 } 123 self.mean_ = None 124 self.std_ = None
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
126 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 127 """ 128 Fit the preprocessor by computing mean and standard deviation. 129 130 Args: 131 data: Input data to fit on 132 **kwargs: Additional arguments 133 """ 134 self.config.update({k: v for k, v in kwargs.items() if k in ['threshold']}) 135 136 if isinstance(data, (pd.DataFrame, pd.Series)): 137 self.mean_ = data.mean() 138 self.std_ = data.std() 139 else: 140 self.mean_ = np.mean(data) 141 self.std_ = np.std(data) 142 143 self.fitted = True
Fit the preprocessor by computing mean and standard deviation.
Args: data: Input data to fit on **kwargs: Additional arguments
145 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 146 """ 147 Remove outliers beyond the threshold using Z-score method. 148 149 Args: 150 data: Input data to transform 151 **kwargs: Additional arguments 152 153 Returns: 154 Data with outliers removed 155 """ 156 threshold = kwargs.get('threshold', self.config['threshold']) 157 158 if isinstance(data, (pd.DataFrame, pd.Series)): 159 z_scores = (data - self.mean_).abs() / self.std_ 160 return data[z_scores <= threshold] 161 else: 162 z_scores = np.abs(data - self.mean_) / self.std_ 163 return data[z_scores <= threshold]
Remove outliers beyond the threshold using Z-score method.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Data with outliers removed
Inherited Members
166class BaselineRemovalPreprocessor(BasePreprocessor): 167 """ 168 Preprocessor for removing baseline by subtracting the mean. 169 """ 170 171 def __init__(self): 172 super().__init__( 173 name="baseline_removal", 174 description="Removes baseline by subtracting the mean" 175 ) 176 self.mean_ = None 177 178 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 179 """ 180 Fit the preprocessor by computing the mean. 181 182 Args: 183 data: Input data to fit on 184 **kwargs: Additional arguments 185 """ 186 if isinstance(data, (pd.DataFrame, pd.Series)): 187 self.mean_ = data.mean() 188 else: 189 self.mean_ = np.mean(data) 190 191 self.fitted = True 192 193 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 194 """ 195 Remove baseline by subtracting the mean. 196 197 Args: 198 data: Input data to transform 199 **kwargs: Additional arguments 200 201 Returns: 202 Baseline-corrected data 203 """ 204 return data - self.mean_
Preprocessor for removing baseline by subtracting the mean.
171 def __init__(self): 172 super().__init__( 173 name="baseline_removal", 174 description="Removes baseline by subtracting the mean" 175 ) 176 self.mean_ = None
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
178 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 179 """ 180 Fit the preprocessor by computing the mean. 181 182 Args: 183 data: Input data to fit on 184 **kwargs: Additional arguments 185 """ 186 if isinstance(data, (pd.DataFrame, pd.Series)): 187 self.mean_ = data.mean() 188 else: 189 self.mean_ = np.mean(data) 190 191 self.fitted = True
Fit the preprocessor by computing the mean.
Args: data: Input data to fit on **kwargs: Additional arguments
193 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 194 """ 195 Remove baseline by subtracting the mean. 196 197 Args: 198 data: Input data to transform 199 **kwargs: Additional arguments 200 201 Returns: 202 Baseline-corrected data 203 """ 204 return data - self.mean_
Remove baseline by subtracting the mean.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Baseline-corrected data
207class DriftRemovalPreprocessor(BasePreprocessor): 208 """ 209 Preprocessor for removing low-frequency drift using high-pass filter. 210 """ 211 212 def __init__(self, cutoff: float = 0.01, fs: int = 100): 213 super().__init__( 214 name="drift_removal", 215 description="Removes low-frequency drift using a high-pass filter" 216 ) 217 self.config = { 218 'cutoff': cutoff, 219 'fs': fs 220 } 221 222 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 223 """ 224 Fit the preprocessor (no fitting needed for drift removal). 225 226 Args: 227 data: Input data to fit on 228 **kwargs: Additional arguments 229 """ 230 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 231 self.fitted = True 232 233 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 234 """ 235 Remove low-frequency drift using a high-pass filter. 236 237 Args: 238 data: Input data to transform 239 **kwargs: Additional arguments 240 241 Returns: 242 Drift-corrected data 243 """ 244 cutoff = kwargs.get('cutoff', self.config['cutoff']) 245 fs = kwargs.get('fs', self.config['fs']) 246 247 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 248 249 if isinstance(data, (pd.DataFrame, pd.Series)): 250 return pd.Series(filtfilt(b, a, data), index=data.index) 251 else: 252 return filtfilt(b, a, data)
Preprocessor for removing low-frequency drift using high-pass filter.
212 def __init__(self, cutoff: float = 0.01, fs: int = 100): 213 super().__init__( 214 name="drift_removal", 215 description="Removes low-frequency drift using a high-pass filter" 216 ) 217 self.config = { 218 'cutoff': cutoff, 219 'fs': fs 220 }
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
222 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 223 """ 224 Fit the preprocessor (no fitting needed for drift removal). 225 226 Args: 227 data: Input data to fit on 228 **kwargs: Additional arguments 229 """ 230 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 231 self.fitted = True
Fit the preprocessor (no fitting needed for drift removal).
Args: data: Input data to fit on **kwargs: Additional arguments
233 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 234 """ 235 Remove low-frequency drift using a high-pass filter. 236 237 Args: 238 data: Input data to transform 239 **kwargs: Additional arguments 240 241 Returns: 242 Drift-corrected data 243 """ 244 cutoff = kwargs.get('cutoff', self.config['cutoff']) 245 fs = kwargs.get('fs', self.config['fs']) 246 247 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 248 249 if isinstance(data, (pd.DataFrame, pd.Series)): 250 return pd.Series(filtfilt(b, a, data), index=data.index) 251 else: 252 return filtfilt(b, a, data)
Remove low-frequency drift using a high-pass filter.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Drift-corrected data
Inherited Members
255class HighFrequencyNoiseRemovalPreprocessor(BasePreprocessor): 256 """ 257 Preprocessor for removing high-frequency noise using low-pass filter. 258 """ 259 260 def __init__(self, cutoff: float = 10, fs: int = 100): 261 super().__init__( 262 name="high_frequency_noise_removal", 263 description="Applies a low-pass filter to remove high-frequency noise" 264 ) 265 self.config = { 266 'cutoff': cutoff, 267 'fs': fs 268 } 269 270 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 271 """ 272 Fit the preprocessor (no fitting needed for filtering). 273 274 Args: 275 data: Input data to fit on 276 **kwargs: Additional arguments 277 """ 278 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 279 self.fitted = True 280 281 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 282 """ 283 Apply a low-pass filter to remove high-frequency noise. 284 285 Args: 286 data: Input data to transform 287 **kwargs: Additional arguments 288 289 Returns: 290 Filtered data 291 """ 292 cutoff = kwargs.get('cutoff', self.config['cutoff']) 293 fs = kwargs.get('fs', self.config['fs']) 294 295 b, a = butter(1, cutoff / (fs / 2), btype='lowpass') 296 297 if isinstance(data, (pd.DataFrame, pd.Series)): 298 return pd.Series(filtfilt(b, a, data), index=data.index) 299 else: 300 return filtfilt(b, a, data)
Preprocessor for removing high-frequency noise using low-pass filter.
260 def __init__(self, cutoff: float = 10, fs: int = 100): 261 super().__init__( 262 name="high_frequency_noise_removal", 263 description="Applies a low-pass filter to remove high-frequency noise" 264 ) 265 self.config = { 266 'cutoff': cutoff, 267 'fs': fs 268 }
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
270 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 271 """ 272 Fit the preprocessor (no fitting needed for filtering). 273 274 Args: 275 data: Input data to fit on 276 **kwargs: Additional arguments 277 """ 278 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 279 self.fitted = True
Fit the preprocessor (no fitting needed for filtering).
Args: data: Input data to fit on **kwargs: Additional arguments
281 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 282 """ 283 Apply a low-pass filter to remove high-frequency noise. 284 285 Args: 286 data: Input data to transform 287 **kwargs: Additional arguments 288 289 Returns: 290 Filtered data 291 """ 292 cutoff = kwargs.get('cutoff', self.config['cutoff']) 293 fs = kwargs.get('fs', self.config['fs']) 294 295 b, a = butter(1, cutoff / (fs / 2), btype='lowpass') 296 297 if isinstance(data, (pd.DataFrame, pd.Series)): 298 return pd.Series(filtfilt(b, a, data), index=data.index) 299 else: 300 return filtfilt(b, a, data)
Apply a low-pass filter to remove high-frequency noise.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Filtered data
Inherited Members
303class LowFrequencyNoiseRemovalPreprocessor(BasePreprocessor): 304 """ 305 Preprocessor for removing low-frequency noise using high-pass filter. 306 """ 307 308 def __init__(self, cutoff: float = 0.5, fs: int = 100): 309 super().__init__( 310 name="low_frequency_noise_removal", 311 description="Applies a high-pass filter to remove low-frequency noise" 312 ) 313 self.config = { 314 'cutoff': cutoff, 315 'fs': fs 316 } 317 318 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 319 """ 320 Fit the preprocessor (no fitting needed for filtering). 321 322 Args: 323 data: Input data to fit on 324 **kwargs: Additional arguments 325 """ 326 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 327 self.fitted = True 328 329 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 330 """ 331 Apply a high-pass filter to remove low-frequency noise. 332 333 Args: 334 data: Input data to transform 335 **kwargs: Additional arguments 336 337 Returns: 338 Filtered data 339 """ 340 cutoff = kwargs.get('cutoff', self.config['cutoff']) 341 fs = kwargs.get('fs', self.config['fs']) 342 343 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 344 345 if isinstance(data, (pd.DataFrame, pd.Series)): 346 return pd.Series(filtfilt(b, a, data), index=data.index) 347 else: 348 return filtfilt(b, a, data)
Preprocessor for removing low-frequency noise using high-pass filter.
308 def __init__(self, cutoff: float = 0.5, fs: int = 100): 309 super().__init__( 310 name="low_frequency_noise_removal", 311 description="Applies a high-pass filter to remove low-frequency noise" 312 ) 313 self.config = { 314 'cutoff': cutoff, 315 'fs': fs 316 }
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
318 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 319 """ 320 Fit the preprocessor (no fitting needed for filtering). 321 322 Args: 323 data: Input data to fit on 324 **kwargs: Additional arguments 325 """ 326 self.config.update({k: v for k, v in kwargs.items() if k in ['cutoff', 'fs']}) 327 self.fitted = True
Fit the preprocessor (no fitting needed for filtering).
Args: data: Input data to fit on **kwargs: Additional arguments
329 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 330 """ 331 Apply a high-pass filter to remove low-frequency noise. 332 333 Args: 334 data: Input data to transform 335 **kwargs: Additional arguments 336 337 Returns: 338 Filtered data 339 """ 340 cutoff = kwargs.get('cutoff', self.config['cutoff']) 341 fs = kwargs.get('fs', self.config['fs']) 342 343 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 344 345 if isinstance(data, (pd.DataFrame, pd.Series)): 346 return pd.Series(filtfilt(b, a, data), index=data.index) 347 else: 348 return filtfilt(b, a, data)
Apply a high-pass filter to remove low-frequency noise.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Filtered data
Inherited Members
351class ArtifactRemovalPreprocessor(BasePreprocessor): 352 """ 353 Preprocessor for removing artifacts by interpolating missing values. 354 """ 355 356 def __init__(self, method: str = "linear"): 357 super().__init__( 358 name="artifact_removal", 359 description="Removes artifacts by interpolating missing values" 360 ) 361 self.config = { 362 'method': method 363 } 364 365 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 366 """ 367 Fit the preprocessor (no fitting needed for interpolation). 368 369 Args: 370 data: Input data to fit on 371 **kwargs: Additional arguments 372 """ 373 self.config.update({k: v for k, v in kwargs.items() if k in ['method']}) 374 self.fitted = True 375 376 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 377 """ 378 Remove artifacts by interpolating missing values. 379 380 Args: 381 data: Input data to transform 382 **kwargs: Additional arguments 383 384 Returns: 385 Artifact-free data 386 """ 387 method = kwargs.get('method', self.config['method']) 388 389 if isinstance(data, (pd.DataFrame, pd.Series)): 390 return data.interpolate(method=method).bfill().ffill() 391 else: 392 # For numpy arrays, use linear interpolation 393 from scipy.interpolate import interp1d 394 x = np.arange(len(data)) 395 valid_mask = ~np.isnan(data) 396 if np.any(valid_mask): 397 f = interp1d(x[valid_mask], data[valid_mask], kind='linear', fill_value='extrapolate') 398 return f(x) 399 else: 400 return data
Preprocessor for removing artifacts by interpolating missing values.
356 def __init__(self, method: str = "linear"): 357 super().__init__( 358 name="artifact_removal", 359 description="Removes artifacts by interpolating missing values" 360 ) 361 self.config = { 362 'method': method 363 }
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
365 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 366 """ 367 Fit the preprocessor (no fitting needed for interpolation). 368 369 Args: 370 data: Input data to fit on 371 **kwargs: Additional arguments 372 """ 373 self.config.update({k: v for k, v in kwargs.items() if k in ['method']}) 374 self.fitted = True
Fit the preprocessor (no fitting needed for interpolation).
Args: data: Input data to fit on **kwargs: Additional arguments
376 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 377 """ 378 Remove artifacts by interpolating missing values. 379 380 Args: 381 data: Input data to transform 382 **kwargs: Additional arguments 383 384 Returns: 385 Artifact-free data 386 """ 387 method = kwargs.get('method', self.config['method']) 388 389 if isinstance(data, (pd.DataFrame, pd.Series)): 390 return data.interpolate(method=method).bfill().ffill() 391 else: 392 # For numpy arrays, use linear interpolation 393 from scipy.interpolate import interp1d 394 x = np.arange(len(data)) 395 valid_mask = ~np.isnan(data) 396 if np.any(valid_mask): 397 f = interp1d(x[valid_mask], data[valid_mask], kind='linear', fill_value='extrapolate') 398 return f(x) 399 else: 400 return data
Remove artifacts by interpolating missing values.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Artifact-free data
Inherited Members
403class TrendRemovalPreprocessor(BasePreprocessor): 404 """ 405 Preprocessor for removing trends using polynomial fitting. 406 """ 407 408 def __init__(self, order: int = 2): 409 super().__init__( 410 name="trend_removal", 411 description="Removes trends using polynomial fitting" 412 ) 413 self.config = { 414 'order': order 415 } 416 417 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 418 """ 419 Fit the preprocessor (no fitting needed for detrending). 420 421 Args: 422 data: Input data to fit on 423 **kwargs: Additional arguments 424 """ 425 self.config.update({k: v for k, v in kwargs.items() if k in ['order']}) 426 self.fitted = True 427 428 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 429 """ 430 Remove trends using polynomial fitting. 431 432 Args: 433 data: Input data to transform 434 **kwargs: Additional arguments 435 436 Returns: 437 Detrended data 438 """ 439 order = kwargs.get('order', self.config['order']) 440 441 if isinstance(data, (pd.DataFrame, pd.Series)): 442 x = np.arange(len(data)) 443 poly_coeffs = np.polyfit(x, data, order) 444 trend = np.polyval(poly_coeffs, x) 445 return data - trend 446 else: 447 x = np.arange(len(data)) 448 poly_coeffs = np.polyfit(x, data, order) 449 trend = np.polyval(poly_coeffs, x) 450 return data - trend
Preprocessor for removing trends using polynomial fitting.
408 def __init__(self, order: int = 2): 409 super().__init__( 410 name="trend_removal", 411 description="Removes trends using polynomial fitting" 412 ) 413 self.config = { 414 'order': order 415 }
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
417 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 418 """ 419 Fit the preprocessor (no fitting needed for detrending). 420 421 Args: 422 data: Input data to fit on 423 **kwargs: Additional arguments 424 """ 425 self.config.update({k: v for k, v in kwargs.items() if k in ['order']}) 426 self.fitted = True
Fit the preprocessor (no fitting needed for detrending).
Args: data: Input data to fit on **kwargs: Additional arguments
428 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 429 """ 430 Remove trends using polynomial fitting. 431 432 Args: 433 data: Input data to transform 434 **kwargs: Additional arguments 435 436 Returns: 437 Detrended data 438 """ 439 order = kwargs.get('order', self.config['order']) 440 441 if isinstance(data, (pd.DataFrame, pd.Series)): 442 x = np.arange(len(data)) 443 poly_coeffs = np.polyfit(x, data, order) 444 trend = np.polyval(poly_coeffs, x) 445 return data - trend 446 else: 447 x = np.arange(len(data)) 448 poly_coeffs = np.polyfit(x, data, order) 449 trend = np.polyval(poly_coeffs, x) 450 return data - trend
Remove trends using polynomial fitting.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: Detrended data
Inherited Members
453class DCOffsetRemovalPreprocessor(BasePreprocessor): 454 """ 455 Preprocessor for removing DC offset by subtracting the mean. 456 """ 457 458 def __init__(self): 459 super().__init__( 460 name="dc_offset_removal", 461 description="Removes DC offset by subtracting the mean" 462 ) 463 self.mean_ = None 464 465 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 466 """ 467 Fit the preprocessor by computing the mean. 468 469 Args: 470 data: Input data to fit on 471 **kwargs: Additional arguments 472 """ 473 if isinstance(data, (pd.DataFrame, pd.Series)): 474 self.mean_ = data.mean() 475 else: 476 self.mean_ = np.mean(data) 477 478 self.fitted = True 479 480 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 481 """ 482 Remove DC offset by subtracting the mean. 483 484 Args: 485 data: Input data to transform 486 **kwargs: Additional arguments 487 488 Returns: 489 DC-corrected data 490 """ 491 return data - self.mean_
Preprocessor for removing DC offset by subtracting the mean.
458 def __init__(self): 459 super().__init__( 460 name="dc_offset_removal", 461 description="Removes DC offset by subtracting the mean" 462 ) 463 self.mean_ = None
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
465 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 466 """ 467 Fit the preprocessor by computing the mean. 468 469 Args: 470 data: Input data to fit on 471 **kwargs: Additional arguments 472 """ 473 if isinstance(data, (pd.DataFrame, pd.Series)): 474 self.mean_ = data.mean() 475 else: 476 self.mean_ = np.mean(data) 477 478 self.fitted = True
Fit the preprocessor by computing the mean.
Args: data: Input data to fit on **kwargs: Additional arguments
480 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 481 """ 482 Remove DC offset by subtracting the mean. 483 484 Args: 485 data: Input data to transform 486 **kwargs: Additional arguments 487 488 Returns: 489 DC-corrected data 490 """ 491 return data - self.mean_
Remove DC offset by subtracting the mean.
Args: data: Input data to transform **kwargs: Additional arguments
Returns: DC-corrected data