gaitsetpy.preprocessing.pipeline
This file contains functions for preprocessing the data.
Clipping, Noise Removal etc.
We'll have the following functions: clip_sliding_windows, remove_noise, remove_outliers, remove_baseline, remove_drift, remove_artifacts, remove_trend, remove_dc_offset, remove_high_frequency_noise, remove_low_frequency_noise
1''' 2This file contains functions for preprocessing the data. 3 4Clipping, Noise Removal etc. 5 6We'll have the following functions: 7clip_sliding_windows, remove_noise, remove_outliers, remove_baseline, remove_drift, remove_artifacts, remove_trend, remove_dc_offset, remove_high_frequency_noise, remove_low_frequency_noise 8''' 9 10import numpy as np 11import pandas as pd 12from scipy.signal import butter, filtfilt 13 14def clip_sliding_windows(data, min_val=-1, max_val=1): 15 """ 16 Clip values in the sliding windows to be within a specified range. 17 """ 18 return np.clip(data, min_val, max_val) 19 20def remove_noise(data, window_size=5): 21 """ 22 Apply a moving average filter to reduce noise. 23 """ 24 return data.rolling(window=window_size, center=True).mean().fillna(method="bfill").fillna(method="ffill") 25 26def remove_outliers(data, threshold=3): 27 """ 28 Remove outliers beyond a given threshold using the Z-score method. 29 """ 30 mean, std = data.mean(), data.std() 31 return data[(data - mean).abs() <= threshold * std] 32 33def remove_baseline(data): 34 """ 35 Remove baseline by subtracting the mean. 36 """ 37 return data - data.mean() 38 39def remove_drift(data, cutoff=0.01, fs=100): 40 """ 41 Remove low-frequency drift using a high-pass filter. 42 """ 43 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 44 return filtfilt(b, a, data) 45 46def remove_artifacts(data, method="interpolate"): 47 """ 48 Remove artifacts by interpolating missing values. 49 """ 50 return data.interpolate(method="linear").fillna(method="bfill").fillna(method="ffill") 51 52def remove_trend(data, order=2): 53 """ 54 Remove trends using polynomial fitting. 55 """ 56 x = np.arange(len(data)) 57 poly_coeffs = np.polyfit(x, data, order) 58 trend = np.polyval(poly_coeffs, x) 59 return data - trend 60 61def remove_dc_offset(data): 62 """ 63 Remove DC offset by subtracting the mean. 64 """ 65 return data - data.mean() 66 67def remove_high_frequency_noise(data, cutoff=10, fs=100): 68 """ 69 Apply a low-pass filter to remove high-frequency noise. 70 """ 71 b, a = butter(1, cutoff / (fs / 2), btype='lowpass') 72 return filtfilt(b, a, data) 73 74def remove_low_frequency_noise(data, cutoff=0.5, fs=100): 75 """ 76 Apply a high-pass filter to remove low-frequency noise. 77 """ 78 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 79 return filtfilt(b, a, data)
15def clip_sliding_windows(data, min_val=-1, max_val=1): 16 """ 17 Clip values in the sliding windows to be within a specified range. 18 """ 19 return np.clip(data, min_val, max_val)
Clip values in the sliding windows to be within a specified range.
21def remove_noise(data, window_size=5): 22 """ 23 Apply a moving average filter to reduce noise. 24 """ 25 return data.rolling(window=window_size, center=True).mean().fillna(method="bfill").fillna(method="ffill")
Apply a moving average filter to reduce noise.
27def remove_outliers(data, threshold=3): 28 """ 29 Remove outliers beyond a given threshold using the Z-score method. 30 """ 31 mean, std = data.mean(), data.std() 32 return data[(data - mean).abs() <= threshold * std]
Remove outliers beyond a given threshold using the Z-score method.
34def remove_baseline(data): 35 """ 36 Remove baseline by subtracting the mean. 37 """ 38 return data - data.mean()
Remove baseline by subtracting the mean.
40def remove_drift(data, cutoff=0.01, fs=100): 41 """ 42 Remove low-frequency drift using a high-pass filter. 43 """ 44 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 45 return filtfilt(b, a, data)
Remove low-frequency drift using a high-pass filter.
47def remove_artifacts(data, method="interpolate"): 48 """ 49 Remove artifacts by interpolating missing values. 50 """ 51 return data.interpolate(method="linear").fillna(method="bfill").fillna(method="ffill")
Remove artifacts by interpolating missing values.
53def remove_trend(data, order=2): 54 """ 55 Remove trends using polynomial fitting. 56 """ 57 x = np.arange(len(data)) 58 poly_coeffs = np.polyfit(x, data, order) 59 trend = np.polyval(poly_coeffs, x) 60 return data - trend
Remove trends using polynomial fitting.
62def remove_dc_offset(data): 63 """ 64 Remove DC offset by subtracting the mean. 65 """ 66 return data - data.mean()
Remove DC offset by subtracting the mean.
68def remove_high_frequency_noise(data, cutoff=10, fs=100): 69 """ 70 Apply a low-pass filter to remove high-frequency noise. 71 """ 72 b, a = butter(1, cutoff / (fs / 2), btype='lowpass') 73 return filtfilt(b, a, data)
Apply a low-pass filter to remove high-frequency noise.
75def remove_low_frequency_noise(data, cutoff=0.5, fs=100): 76 """ 77 Apply a high-pass filter to remove low-frequency noise. 78 """ 79 b, a = butter(1, cutoff / (fs / 2), btype='highpass') 80 return filtfilt(b, a, data)
Apply a high-pass filter to remove low-frequency noise.