gaitsetpy.dataset.daphnet

Daphnet Dataset Loader and Utils. Maintainer: @aharshit123456

This file contains the Daphnet dataset loader class that inherits from BaseDatasetLoader.

  1'''
  2Daphnet Dataset Loader and Utils.
  3Maintainer: @aharshit123456
  4
  5This file contains the Daphnet dataset loader class that inherits from BaseDatasetLoader.
  6'''
  7
  8import os
  9import pandas as pd
 10import numpy as np
 11from typing import List, Dict, Tuple
 12from glob import glob
 13from ..core.base_classes import BaseDatasetLoader
 14from .utils import download_dataset, extract_dataset, sliding_window
 15
 16
 17class DaphnetLoader(BaseDatasetLoader):
 18    """
 19    Daphnet dataset loader class.
 20    
 21    This class handles loading and processing of the Daphnet dataset for gait analysis.
 22    """
 23    
 24    def __init__(self, max_workers: int = 8):
 25        """
 26        Initialize Daphnet loader with concurrent download support.
 27        
 28        Args:
 29            max_workers: Maximum number of concurrent download threads (default: 8)
 30        """
 31        super().__init__(
 32            name="daphnet",
 33            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease",
 34            max_workers=max_workers
 35        )
 36        self.metadata = {
 37            'sensors': ['shank', 'thigh', 'trunk'],
 38            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
 39            'sampling_frequency': 64,
 40            'annotations': {
 41                0: 'not_valid',
 42                1: 'no_freeze',
 43                2: 'freeze'
 44            }
 45        }
 46    
 47    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 48        """
 49        Load Daphnet dataset from the specified directory.
 50        
 51        Args:
 52            data_dir: Directory to store/find the dataset
 53            **kwargs: Additional arguments (unused for Daphnet)
 54            
 55        Returns:
 56            Tuple of (data_list, names_list)
 57        """
 58        # Download and extract if needed
 59        download_dataset("daphnet", data_dir)
 60        extract_dataset("daphnet", data_dir)
 61        
 62        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 63        daphnet_data = []
 64        daphnet_names = []
 65        
 66        # Load all subject files
 67        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 68            # Extract filename from path
 69            filename = os.path.basename(file)
 70            daphnet_names.append(filename)
 71            
 72            # Load CSV with proper column names
 73            column_names = [
 74                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 75                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 76                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 77            ]
 78            
 79            df = pd.read_csv(file, sep=" ", names=column_names)
 80            
 81            # Set time as index
 82            df = df.set_index("time")
 83            
 84            # Calculate magnitude for each sensor
 85            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 86            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 87            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 88            
 89            # Reorder columns for consistency
 90            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 91                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 92                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 93            
 94            daphnet_data.append(df)
 95        
 96        # Store loaded data
 97        self.data = daphnet_data
 98        self.names = daphnet_names
 99        
100        return daphnet_data, daphnet_names
101    
102    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
103                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
104        """
105        Create sliding windows from the Daphnet dataset.
106        
107        Args:
108            data: List of DataFrames containing Daphnet data
109            names: List of names corresponding to the data
110            window_size: Size of the sliding window (default: 192)
111            step_size: Step size for the sliding window (default: 32)
112            
113        Returns:
114            List of dictionaries containing sliding windows for each DataFrame
115        """
116        windows_data = []
117        
118        for idx, df in enumerate(data):
119            # Filter out invalid data (annotations == 0)
120            df_filtered = df[df.annotations > 0]
121            
122            if df_filtered.empty:
123                continue
124                
125            windows = []
126            processed_columns = set()
127            
128            # Process each sensor column
129            for col in df_filtered.columns:
130                if col != "annotations" and col not in processed_columns:
131                    window_data = sliding_window(df_filtered[col], window_size, step_size)
132                    windows.append({"name": col, "data": window_data})
133                    processed_columns.add(col)
134            
135            # Include annotations separately
136            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
137            windows.append({"name": "annotations", "data": annotations_window})
138            
139            windows_data.append({"name": names[idx], "windows": windows})
140        
141        return windows_data
142    
143    def get_supported_formats(self) -> List[str]:
144        """
145        Get list of supported file formats for Daphnet dataset.
146        
147        Returns:
148            List of supported file extensions
149        """
150        return ['.txt']
151    
152    def get_sensor_info(self) -> Dict[str, List[str]]:
153        """
154        Get information about sensors in the dataset.
155        
156        Returns:
157            Dictionary containing sensor information
158        """
159        return {
160            'sensors': self.metadata['sensors'],
161            'components': self.metadata['components'],
162            'sampling_frequency': self.metadata['sampling_frequency']
163        }
164    
165    def get_annotation_info(self) -> Dict[int, str]:
166        """
167        Get information about annotations in the dataset.
168        
169        Returns:
170            Dictionary mapping annotation values to descriptions
171        """
172        return self.metadata['annotations']
173
174
175# Legacy function wrappers for backward compatibility
176def load_daphnet_data(data_dir: str):
177    """
178    Legacy function for loading Daphnet data.
179    
180    Args:
181        data_dir: Directory to store the dataset
182        
183    Returns:
184        Tuple of (data_list, names_list)
185    """
186    loader = DaphnetLoader()
187    return loader.load_data(data_dir)
188
189
190def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
191    """
192    Legacy function for creating sliding windows.
193    
194    Args:
195        daphnet: List of dataframes containing Daphnet data
196        daphnet_names: List of names of the Daphnet dataframes
197        window_size: Size of the sliding window
198        step_size: Step size for the sliding window
199        
200    Returns:
201        List of dictionaries containing sliding windows for each DataFrame
202    """
203    loader = DaphnetLoader()
204    return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)
205
206
207def plot_dataset_sample():
208    """Placeholder for dataset sample plotting."""
209    pass
210
211
212def plot_sliding_window():
213    """Placeholder for sliding window plotting."""
214    pass
class DaphnetLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 18class DaphnetLoader(BaseDatasetLoader):
 19    """
 20    Daphnet dataset loader class.
 21    
 22    This class handles loading and processing of the Daphnet dataset for gait analysis.
 23    """
 24    
 25    def __init__(self, max_workers: int = 8):
 26        """
 27        Initialize Daphnet loader with concurrent download support.
 28        
 29        Args:
 30            max_workers: Maximum number of concurrent download threads (default: 8)
 31        """
 32        super().__init__(
 33            name="daphnet",
 34            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease",
 35            max_workers=max_workers
 36        )
 37        self.metadata = {
 38            'sensors': ['shank', 'thigh', 'trunk'],
 39            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
 40            'sampling_frequency': 64,
 41            'annotations': {
 42                0: 'not_valid',
 43                1: 'no_freeze',
 44                2: 'freeze'
 45            }
 46        }
 47    
 48    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 49        """
 50        Load Daphnet dataset from the specified directory.
 51        
 52        Args:
 53            data_dir: Directory to store/find the dataset
 54            **kwargs: Additional arguments (unused for Daphnet)
 55            
 56        Returns:
 57            Tuple of (data_list, names_list)
 58        """
 59        # Download and extract if needed
 60        download_dataset("daphnet", data_dir)
 61        extract_dataset("daphnet", data_dir)
 62        
 63        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 64        daphnet_data = []
 65        daphnet_names = []
 66        
 67        # Load all subject files
 68        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 69            # Extract filename from path
 70            filename = os.path.basename(file)
 71            daphnet_names.append(filename)
 72            
 73            # Load CSV with proper column names
 74            column_names = [
 75                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 76                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 77                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 78            ]
 79            
 80            df = pd.read_csv(file, sep=" ", names=column_names)
 81            
 82            # Set time as index
 83            df = df.set_index("time")
 84            
 85            # Calculate magnitude for each sensor
 86            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 87            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 88            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 89            
 90            # Reorder columns for consistency
 91            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 92                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 93                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 94            
 95            daphnet_data.append(df)
 96        
 97        # Store loaded data
 98        self.data = daphnet_data
 99        self.names = daphnet_names
100        
101        return daphnet_data, daphnet_names
102    
103    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
104                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
105        """
106        Create sliding windows from the Daphnet dataset.
107        
108        Args:
109            data: List of DataFrames containing Daphnet data
110            names: List of names corresponding to the data
111            window_size: Size of the sliding window (default: 192)
112            step_size: Step size for the sliding window (default: 32)
113            
114        Returns:
115            List of dictionaries containing sliding windows for each DataFrame
116        """
117        windows_data = []
118        
119        for idx, df in enumerate(data):
120            # Filter out invalid data (annotations == 0)
121            df_filtered = df[df.annotations > 0]
122            
123            if df_filtered.empty:
124                continue
125                
126            windows = []
127            processed_columns = set()
128            
129            # Process each sensor column
130            for col in df_filtered.columns:
131                if col != "annotations" and col not in processed_columns:
132                    window_data = sliding_window(df_filtered[col], window_size, step_size)
133                    windows.append({"name": col, "data": window_data})
134                    processed_columns.add(col)
135            
136            # Include annotations separately
137            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
138            windows.append({"name": "annotations", "data": annotations_window})
139            
140            windows_data.append({"name": names[idx], "windows": windows})
141        
142        return windows_data
143    
144    def get_supported_formats(self) -> List[str]:
145        """
146        Get list of supported file formats for Daphnet dataset.
147        
148        Returns:
149            List of supported file extensions
150        """
151        return ['.txt']
152    
153    def get_sensor_info(self) -> Dict[str, List[str]]:
154        """
155        Get information about sensors in the dataset.
156        
157        Returns:
158            Dictionary containing sensor information
159        """
160        return {
161            'sensors': self.metadata['sensors'],
162            'components': self.metadata['components'],
163            'sampling_frequency': self.metadata['sampling_frequency']
164        }
165    
166    def get_annotation_info(self) -> Dict[int, str]:
167        """
168        Get information about annotations in the dataset.
169        
170        Returns:
171            Dictionary mapping annotation values to descriptions
172        """
173        return self.metadata['annotations']

Daphnet dataset loader class.

This class handles loading and processing of the Daphnet dataset for gait analysis.

DaphnetLoader(max_workers: int = 8)
25    def __init__(self, max_workers: int = 8):
26        """
27        Initialize Daphnet loader with concurrent download support.
28        
29        Args:
30            max_workers: Maximum number of concurrent download threads (default: 8)
31        """
32        super().__init__(
33            name="daphnet",
34            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease",
35            max_workers=max_workers
36        )
37        self.metadata = {
38            'sensors': ['shank', 'thigh', 'trunk'],
39            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
40            'sampling_frequency': 64,
41            'annotations': {
42                0: 'not_valid',
43                1: 'no_freeze',
44                2: 'freeze'
45            }
46        }

Initialize Daphnet loader with concurrent download support.

Args: max_workers: Maximum number of concurrent download threads (default: 8)

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
 48    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 49        """
 50        Load Daphnet dataset from the specified directory.
 51        
 52        Args:
 53            data_dir: Directory to store/find the dataset
 54            **kwargs: Additional arguments (unused for Daphnet)
 55            
 56        Returns:
 57            Tuple of (data_list, names_list)
 58        """
 59        # Download and extract if needed
 60        download_dataset("daphnet", data_dir)
 61        extract_dataset("daphnet", data_dir)
 62        
 63        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 64        daphnet_data = []
 65        daphnet_names = []
 66        
 67        # Load all subject files
 68        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 69            # Extract filename from path
 70            filename = os.path.basename(file)
 71            daphnet_names.append(filename)
 72            
 73            # Load CSV with proper column names
 74            column_names = [
 75                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 76                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 77                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 78            ]
 79            
 80            df = pd.read_csv(file, sep=" ", names=column_names)
 81            
 82            # Set time as index
 83            df = df.set_index("time")
 84            
 85            # Calculate magnitude for each sensor
 86            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 87            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 88            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 89            
 90            # Reorder columns for consistency
 91            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 92                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 93                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 94            
 95            daphnet_data.append(df)
 96        
 97        # Store loaded data
 98        self.data = daphnet_data
 99        self.names = daphnet_names
100        
101        return daphnet_data, daphnet_names

Load Daphnet dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Daphnet)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
103    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
104                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
105        """
106        Create sliding windows from the Daphnet dataset.
107        
108        Args:
109            data: List of DataFrames containing Daphnet data
110            names: List of names corresponding to the data
111            window_size: Size of the sliding window (default: 192)
112            step_size: Step size for the sliding window (default: 32)
113            
114        Returns:
115            List of dictionaries containing sliding windows for each DataFrame
116        """
117        windows_data = []
118        
119        for idx, df in enumerate(data):
120            # Filter out invalid data (annotations == 0)
121            df_filtered = df[df.annotations > 0]
122            
123            if df_filtered.empty:
124                continue
125                
126            windows = []
127            processed_columns = set()
128            
129            # Process each sensor column
130            for col in df_filtered.columns:
131                if col != "annotations" and col not in processed_columns:
132                    window_data = sliding_window(df_filtered[col], window_size, step_size)
133                    windows.append({"name": col, "data": window_data})
134                    processed_columns.add(col)
135            
136            # Include annotations separately
137            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
138            windows.append({"name": "annotations", "data": annotations_window})
139            
140            windows_data.append({"name": names[idx], "windows": windows})
141        
142        return windows_data

Create sliding windows from the Daphnet dataset.

Args: data: List of DataFrames containing Daphnet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
144    def get_supported_formats(self) -> List[str]:
145        """
146        Get list of supported file formats for Daphnet dataset.
147        
148        Returns:
149            List of supported file extensions
150        """
151        return ['.txt']

Get list of supported file formats for Daphnet dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
153    def get_sensor_info(self) -> Dict[str, List[str]]:
154        """
155        Get information about sensors in the dataset.
156        
157        Returns:
158            Dictionary containing sensor information
159        """
160        return {
161            'sensors': self.metadata['sensors'],
162            'components': self.metadata['components'],
163            'sampling_frequency': self.metadata['sampling_frequency']
164        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_annotation_info(self) -> Dict[int, str]:
166    def get_annotation_info(self) -> Dict[int, str]:
167        """
168        Get information about annotations in the dataset.
169        
170        Returns:
171            Dictionary mapping annotation values to descriptions
172        """
173        return self.metadata['annotations']

Get information about annotations in the dataset.

Returns: Dictionary mapping annotation values to descriptions

def load_daphnet_data(data_dir: str):
177def load_daphnet_data(data_dir: str):
178    """
179    Legacy function for loading Daphnet data.
180    
181    Args:
182        data_dir: Directory to store the dataset
183        
184    Returns:
185        Tuple of (data_list, names_list)
186    """
187    loader = DaphnetLoader()
188    return loader.load_data(data_dir)

Legacy function for loading Daphnet data.

Args: data_dir: Directory to store the dataset

Returns: Tuple of (data_list, names_list)

def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
191def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
192    """
193    Legacy function for creating sliding windows.
194    
195    Args:
196        daphnet: List of dataframes containing Daphnet data
197        daphnet_names: List of names of the Daphnet dataframes
198        window_size: Size of the sliding window
199        step_size: Step size for the sliding window
200        
201    Returns:
202        List of dictionaries containing sliding windows for each DataFrame
203    """
204    loader = DaphnetLoader()
205    return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)

Legacy function for creating sliding windows.

Args: daphnet: List of dataframes containing Daphnet data daphnet_names: List of names of the Daphnet dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def plot_dataset_sample():
208def plot_dataset_sample():
209    """Placeholder for dataset sample plotting."""
210    pass

Placeholder for dataset sample plotting.

def plot_sliding_window():
213def plot_sliding_window():
214    """Placeholder for sliding window plotting."""
215    pass

Placeholder for sliding window plotting.