gaitsetpy.preprocessing.pipeline

This file contains functions for preprocessing the data.

Clipping, Noise Removal etc.

We'll have the following functions: clip_sliding_windows, remove_noise, remove_outliers, remove_baseline, remove_drift, remove_artifacts, remove_trend, remove_dc_offset, remove_high_frequency_noise, remove_low_frequency_noise

 1'''
 2This file contains functions for preprocessing the data.
 3
 4Clipping, Noise Removal etc.
 5
 6We'll have the following functions:
 7clip_sliding_windows, remove_noise, remove_outliers, remove_baseline, remove_drift, remove_artifacts, remove_trend, remove_dc_offset, remove_high_frequency_noise, remove_low_frequency_noise
 8'''
 9
10import numpy as np
11import pandas as pd
12from scipy.signal import butter, filtfilt
13
14def clip_sliding_windows(data, min_val=-1, max_val=1):
15    """
16    Clip values in the sliding windows to be within a specified range.
17    """
18    return np.clip(data, min_val, max_val)
19
20def remove_noise(data, window_size=5):
21    """
22    Apply a moving average filter to reduce noise.
23    """
24    return data.rolling(window=window_size, center=True).mean().fillna(method="bfill").fillna(method="ffill")
25
26def remove_outliers(data, threshold=3):
27    """
28    Remove outliers beyond a given threshold using the Z-score method.
29    """
30    mean, std = data.mean(), data.std()
31    return data[(data - mean).abs() <= threshold * std]
32
33def remove_baseline(data):
34    """
35    Remove baseline by subtracting the mean.
36    """
37    return data - data.mean()
38
39def remove_drift(data, cutoff=0.01, fs=100):
40    """
41    Remove low-frequency drift using a high-pass filter.
42    """
43    b, a = butter(1, cutoff / (fs / 2), btype='highpass')
44    return filtfilt(b, a, data)
45
46def remove_artifacts(data, method="interpolate"):
47    """
48    Remove artifacts by interpolating missing values.
49    """
50    return data.interpolate(method="linear").fillna(method="bfill").fillna(method="ffill")
51
52def remove_trend(data, order=2):
53    """
54    Remove trends using polynomial fitting.
55    """
56    x = np.arange(len(data))
57    poly_coeffs = np.polyfit(x, data, order)
58    trend = np.polyval(poly_coeffs, x)
59    return data - trend
60
61def remove_dc_offset(data):
62    """
63    Remove DC offset by subtracting the mean.
64    """
65    return data - data.mean()
66
67def remove_high_frequency_noise(data, cutoff=10, fs=100):
68    """
69    Apply a low-pass filter to remove high-frequency noise.
70    """
71    b, a = butter(1, cutoff / (fs / 2), btype='lowpass')
72    return filtfilt(b, a, data)
73
74def remove_low_frequency_noise(data, cutoff=0.5, fs=100):
75    """
76    Apply a high-pass filter to remove low-frequency noise.
77    """
78    b, a = butter(1, cutoff / (fs / 2), btype='highpass')
79    return filtfilt(b, a, data)
def clip_sliding_windows(data, min_val=-1, max_val=1):
15def clip_sliding_windows(data, min_val=-1, max_val=1):
16    """
17    Clip values in the sliding windows to be within a specified range.
18    """
19    return np.clip(data, min_val, max_val)

Clip values in the sliding windows to be within a specified range.

def remove_noise(data, window_size=5):
21def remove_noise(data, window_size=5):
22    """
23    Apply a moving average filter to reduce noise.
24    """
25    return data.rolling(window=window_size, center=True).mean().fillna(method="bfill").fillna(method="ffill")

Apply a moving average filter to reduce noise.

def remove_outliers(data, threshold=3):
27def remove_outliers(data, threshold=3):
28    """
29    Remove outliers beyond a given threshold using the Z-score method.
30    """
31    mean, std = data.mean(), data.std()
32    return data[(data - mean).abs() <= threshold * std]

Remove outliers beyond a given threshold using the Z-score method.

def remove_baseline(data):
34def remove_baseline(data):
35    """
36    Remove baseline by subtracting the mean.
37    """
38    return data - data.mean()

Remove baseline by subtracting the mean.

def remove_drift(data, cutoff=0.01, fs=100):
40def remove_drift(data, cutoff=0.01, fs=100):
41    """
42    Remove low-frequency drift using a high-pass filter.
43    """
44    b, a = butter(1, cutoff / (fs / 2), btype='highpass')
45    return filtfilt(b, a, data)

Remove low-frequency drift using a high-pass filter.

def remove_artifacts(data, method='interpolate'):
47def remove_artifacts(data, method="interpolate"):
48    """
49    Remove artifacts by interpolating missing values.
50    """
51    return data.interpolate(method="linear").fillna(method="bfill").fillna(method="ffill")

Remove artifacts by interpolating missing values.

def remove_trend(data, order=2):
53def remove_trend(data, order=2):
54    """
55    Remove trends using polynomial fitting.
56    """
57    x = np.arange(len(data))
58    poly_coeffs = np.polyfit(x, data, order)
59    trend = np.polyval(poly_coeffs, x)
60    return data - trend

Remove trends using polynomial fitting.

def remove_dc_offset(data):
62def remove_dc_offset(data):
63    """
64    Remove DC offset by subtracting the mean.
65    """
66    return data - data.mean()

Remove DC offset by subtracting the mean.

def remove_high_frequency_noise(data, cutoff=10, fs=100):
68def remove_high_frequency_noise(data, cutoff=10, fs=100):
69    """
70    Apply a low-pass filter to remove high-frequency noise.
71    """
72    b, a = butter(1, cutoff / (fs / 2), btype='lowpass')
73    return filtfilt(b, a, data)

Apply a low-pass filter to remove high-frequency noise.

def remove_low_frequency_noise(data, cutoff=0.5, fs=100):
75def remove_low_frequency_noise(data, cutoff=0.5, fs=100):
76    """
77    Apply a high-pass filter to remove low-frequency noise.
78    """
79    b, a = butter(1, cutoff / (fs / 2), btype='highpass')
80    return filtfilt(b, a, data)

Apply a high-pass filter to remove low-frequency noise.