gaitsetpy.core.base_classes

Base classes for GaitSetPy components.

This module defines abstract base classes that all components should inherit from. Each base class defines the interface and common functionality for its respective component type.

Maintainer: @aharshit123456

  1"""
  2Base classes for GaitSetPy components.
  3
  4This module defines abstract base classes that all components should inherit from.
  5Each base class defines the interface and common functionality for its respective component type.
  6
  7Maintainer: @aharshit123456
  8"""
  9
 10from abc import ABC, abstractmethod
 11from typing import Any, Dict, List, Optional, Tuple, Union, Callable
 12import pandas as pd
 13import numpy as np
 14import os
 15import requests
 16from concurrent.futures import ThreadPoolExecutor, as_completed
 17from tqdm import tqdm
 18
 19
 20class BaseDatasetLoader(ABC):
 21    """
 22    Base class for all dataset loaders.
 23    
 24    All dataset loaders should inherit from this class and implement the required methods.
 25    This class provides thread-safe concurrent downloading capabilities for efficient data retrieval.
 26    """
 27    
 28    def __init__(self, name: str, description: str = "", max_workers: int = 8):
 29        """
 30        Initialize the dataset loader.
 31        
 32        Args:
 33            name: Name of the dataset
 34            description: Description of the dataset
 35            max_workers: Maximum number of concurrent download threads (default: 8)
 36        """
 37        self.name = name
 38        self.description = description
 39        self.data = None
 40        self.metadata = {}
 41        self.max_workers = max_workers
 42        self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0}
 43    
 44    @abstractmethod
 45    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 46        """
 47        Load dataset from the specified directory.
 48        
 49        Args:
 50            data_dir: Directory containing the dataset
 51            **kwargs: Additional arguments specific to the dataset
 52            
 53        Returns:
 54            Tuple of (data_list, names_list)
 55        """
 56        pass
 57    
 58    @abstractmethod
 59    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 60                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 61        """
 62        Create sliding windows from the loaded data.
 63        
 64        Args:
 65            data: List of DataFrames
 66            names: List of names corresponding to the data
 67            window_size: Size of each sliding window
 68            step_size: Step size for sliding windows
 69            
 70        Returns:
 71            List of dictionaries containing sliding windows
 72        """
 73        pass
 74    
 75    @abstractmethod
 76    def get_supported_formats(self) -> List[str]:
 77        """
 78        Get list of supported file formats.
 79        
 80        Returns:
 81            List of supported file extensions
 82        """
 83        pass
 84    
 85    def _download_file(self, url: str, dest_path: str, 
 86                      chunk_size: int = 8192, timeout: int = 30) -> Tuple[bool, str]:
 87        """
 88        Download a single file from URL to destination path.
 89        
 90        This method is thread-safe and can be called concurrently.
 91        
 92        Args:
 93            url: URL to download from
 94            dest_path: Destination file path
 95            chunk_size: Size of chunks to download (default: 8192 bytes)
 96            timeout: Request timeout in seconds (default: 30)
 97            
 98        Returns:
 99            Tuple of (success: bool, message: str)
100        """
101        try:
102            # Check if file already exists
103            if os.path.exists(dest_path):
104                self._download_stats['skipped'] += 1
105                return True, f"File already exists: {dest_path}"
106            
107            # Make the request
108            response = requests.get(url, stream=True, timeout=timeout)
109            
110            if response.status_code == 200:
111                # Ensure parent directory exists
112                os.makedirs(os.path.dirname(dest_path) if os.path.dirname(dest_path) else '.', exist_ok=True)
113                
114                # Write file in chunks
115                with open(dest_path, 'wb') as f:
116                    for chunk in response.iter_content(chunk_size=chunk_size):
117                        if chunk:
118                            f.write(chunk)
119                
120                self._download_stats['success'] += 1
121                return True, f"Successfully downloaded: {dest_path}"
122            else:
123                self._download_stats['failed'] += 1
124                return False, f"HTTP {response.status_code}: {url}"
125                
126        except requests.exceptions.Timeout:
127            self._download_stats['failed'] += 1
128            return False, f"Timeout downloading: {url}"
129        except requests.exceptions.RequestException as e:
130            self._download_stats['failed'] += 1
131            return False, f"Request error for {url}: {str(e)}"
132        except IOError as e:
133            self._download_stats['failed'] += 1
134            return False, f"IO error for {dest_path}: {str(e)}"
135        except Exception as e:
136            self._download_stats['failed'] += 1
137            return False, f"Unexpected error for {url}: {str(e)}"
138    
139    def download_files_concurrent(self, 
140                                  download_tasks: List[Dict[str, str]], 
141                                  show_progress: bool = True,
142                                  desc: str = "Downloading files") -> Dict[str, Any]:
143        """
144        Download multiple files concurrently using a thread pool.
145        
146        Args:
147            download_tasks: List of dicts with 'url' and 'dest_path' keys
148            show_progress: Whether to show progress bar (default: True)
149            desc: Description for progress bar
150            
151        Returns:
152            Dictionary with download statistics and results
153            
154        Example:
155            tasks = [
156                {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'},
157                {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'}
158            ]
159            results = loader.download_files_concurrent(tasks)
160        """
161        # Reset stats
162        self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0}
163        
164        results = []
165        failed_downloads = []
166        
167        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
168            # Submit all download tasks
169            future_to_task = {
170                executor.submit(self._download_file, task['url'], task['dest_path']): task
171                for task in download_tasks
172            }
173            
174            # Process completed tasks with optional progress bar
175            if show_progress:
176                futures = tqdm(as_completed(future_to_task), 
177                             total=len(download_tasks), 
178                             desc=desc)
179            else:
180                futures = as_completed(future_to_task)
181            
182            for future in futures:
183                task = future_to_task[future]
184                try:
185                    success, message = future.result()
186                    results.append({
187                        'url': task['url'],
188                        'dest_path': task['dest_path'],
189                        'success': success,
190                        'message': message
191                    })
192                    
193                    if not success:
194                        failed_downloads.append({
195                            'url': task['url'],
196                            'dest_path': task['dest_path'],
197                            'error': message
198                        })
199                        
200                except Exception as e:
201                    error_msg = f"Exception during download: {str(e)}"
202                    results.append({
203                        'url': task['url'],
204                        'dest_path': task['dest_path'],
205                        'success': False,
206                        'message': error_msg
207                    })
208                    failed_downloads.append({
209                        'url': task['url'],
210                        'dest_path': task['dest_path'],
211                        'error': error_msg
212                    })
213        
214        # Return comprehensive results
215        return {
216            'total': len(download_tasks),
217            'success': self._download_stats['success'],
218            'failed': self._download_stats['failed'],
219            'skipped': self._download_stats['skipped'],
220            'failed_downloads': failed_downloads,
221            'all_results': results
222        }
223    
224    def set_max_workers(self, max_workers: int):
225        """
226        Set the maximum number of concurrent download threads.
227        
228        Args:
229            max_workers: Maximum number of threads (must be positive)
230        """
231        if max_workers < 1:
232            raise ValueError("max_workers must be at least 1")
233        self.max_workers = max_workers
234    
235    def get_download_stats(self) -> Dict[str, int]:
236        """
237        Get statistics from the last download operation.
238        
239        Returns:
240            Dictionary with success, failed, and skipped counts
241        """
242        return self._download_stats.copy()
243    
244    def get_info(self) -> Dict[str, Any]:
245        """
246        Get information about the dataset.
247        
248        Returns:
249            Dictionary containing dataset information
250        """
251        return {
252            'name': self.name,
253            'description': self.description,
254            'metadata': self.metadata,
255            'supported_formats': self.get_supported_formats(),
256            'max_workers': self.max_workers
257        }
258
259
260class BaseFeatureExtractor(ABC):
261    """
262    Base class for all feature extractors.
263    
264    All feature extractors should inherit from this class and implement the required methods.
265    """
266    
267    def __init__(self, name: str, description: str = ""):
268        """
269        Initialize the feature extractor.
270        
271        Args:
272            name: Name of the feature extractor
273            description: Description of the feature extractor
274        """
275        self.name = name
276        self.description = description
277        self.config = {}
278    
279    @abstractmethod
280    def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]:
281        """
282        Extract features from sliding windows.
283        
284        Args:
285            windows: List of sliding window dictionaries
286            fs: Sampling frequency
287            **kwargs: Additional arguments for feature extraction
288            
289        Returns:
290            List of feature dictionaries
291        """
292        pass
293    
294    @abstractmethod
295    def get_feature_names(self) -> List[str]:
296        """
297        Get names of features extracted by this extractor.
298        
299        Returns:
300            List of feature names
301        """
302        pass
303    
304    def configure(self, config: Dict[str, Any]):
305        """
306        Configure the feature extractor.
307        
308        Args:
309            config: Configuration dictionary
310        """
311        self.config.update(config)
312    
313    def get_info(self) -> Dict[str, Any]:
314        """
315        Get information about the feature extractor.
316        
317        Returns:
318            Dictionary containing feature extractor information
319        """
320        return {
321            'name': self.name,
322            'description': self.description,
323            'config': self.config,
324            'feature_names': self.get_feature_names()
325        }
326
327
328class BasePreprocessor(ABC):
329    """
330    Base class for all preprocessors.
331    
332    All preprocessors should inherit from this class and implement the required methods.
333    """
334    
335    def __init__(self, name: str, description: str = ""):
336        """
337        Initialize the preprocessor.
338        
339        Args:
340            name: Name of the preprocessor
341            description: Description of the preprocessor
342        """
343        self.name = name
344        self.description = description
345        self.config = {}
346        self.fitted = False
347    
348    @abstractmethod
349    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
350        """
351        Fit the preprocessor to the data.
352        
353        Args:
354            data: Input data to fit on
355            **kwargs: Additional arguments for fitting
356        """
357        pass
358    
359    @abstractmethod
360    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
361        """
362        Transform the data using the fitted preprocessor.
363        
364        Args:
365            data: Input data to transform
366            **kwargs: Additional arguments for transformation
367            
368        Returns:
369            Transformed data
370        """
371        pass
372    
373    def fit_transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
374        """
375        Fit the preprocessor and transform the data.
376        
377        Args:
378            data: Input data to fit and transform
379            **kwargs: Additional arguments
380            
381        Returns:
382            Transformed data
383        """
384        self.fit(data, **kwargs)
385        return self.transform(data, **kwargs)
386    
387    def configure(self, config: Dict[str, Any]):
388        """
389        Configure the preprocessor.
390        
391        Args:
392            config: Configuration dictionary
393        """
394        self.config.update(config)
395    
396    def get_info(self) -> Dict[str, Any]:
397        """
398        Get information about the preprocessor.
399        
400        Returns:
401            Dictionary containing preprocessor information
402        """
403        return {
404            'name': self.name,
405            'description': self.description,
406            'config': self.config,
407            'fitted': self.fitted
408        }
409
410
411class BaseEDAAnalyzer(ABC):
412    """
413    Base class for all EDA analyzers.
414    
415    All EDA analyzers should inherit from this class and implement the required methods.
416    """
417    
418    def __init__(self, name: str, description: str = ""):
419        """
420        Initialize the EDA analyzer.
421        
422        Args:
423            name: Name of the EDA analyzer
424            description: Description of the EDA analyzer
425        """
426        self.name = name
427        self.description = description
428        self.config = {}
429    
430    @abstractmethod
431    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
432        """
433        Perform analysis on the data.
434        
435        Args:
436            data: Input data to analyze
437            **kwargs: Additional arguments for analysis
438            
439        Returns:
440            Dictionary containing analysis results
441        """
442        pass
443    
444    @abstractmethod
445    def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs):
446        """
447        Create visualizations of the data.
448        
449        Args:
450            data: Input data to visualize
451            **kwargs: Additional arguments for visualization
452        """
453        pass
454    
455    def configure(self, config: Dict[str, Any]):
456        """
457        Configure the EDA analyzer.
458        
459        Args:
460            config: Configuration dictionary
461        """
462        self.config.update(config)
463    
464    def get_info(self) -> Dict[str, Any]:
465        """
466        Get information about the EDA analyzer.
467        
468        Returns:
469            Dictionary containing EDA analyzer information
470        """
471        return {
472            'name': self.name,
473            'description': self.description,
474            'config': self.config
475        }
476
477
478class BaseClassificationModel(ABC):
479    """
480    Base class for all classification models.
481    
482    All classification models should inherit from this class and implement the required methods.
483    """
484    
485    def __init__(self, name: str, description: str = ""):
486        """
487        Initialize the classification model.
488        
489        Args:
490            name: Name of the classification model
491            description: Description of the classification model
492        """
493        self.name = name
494        self.description = description
495        self.model = None
496        self.config = {}
497        self.trained = False
498    
499    @abstractmethod
500    def train(self, features: List[Dict], **kwargs):
501        """
502        Train the classification model.
503        
504        Args:
505            features: List of feature dictionaries
506            **kwargs: Additional arguments for training
507        """
508        pass
509    
510    @abstractmethod
511    def predict(self, features: List[Dict], **kwargs) -> np.ndarray:
512        """
513        Make predictions using the trained model.
514        
515        Args:
516            features: List of feature dictionaries
517            **kwargs: Additional arguments for prediction
518            
519        Returns:
520            Array of predictions
521        """
522        pass
523    
524    @abstractmethod
525    def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
526        """
527        Evaluate the model performance.
528        
529        Args:
530            features: List of feature dictionaries
531            **kwargs: Additional arguments for evaluation
532            
533        Returns:
534            Dictionary containing evaluation metrics
535        """
536        pass
537    
538    @abstractmethod
539    def save_model(self, filepath: str):
540        """
541        Save the trained model to a file.
542        
543        Args:
544            filepath: Path to save the model
545        """
546        pass
547    
548    @abstractmethod
549    def load_model(self, filepath: str):
550        """
551        Load a trained model from a file.
552        
553        Args:
554            filepath: Path to the saved model
555        """
556        pass
557    
558    def configure(self, config: Dict[str, Any]):
559        """
560        Configure the classification model.
561        
562        Args:
563            config: Configuration dictionary
564        """
565        self.config.update(config)
566    
567    def get_info(self) -> Dict[str, Any]:
568        """
569        Get information about the classification model.
570        
571        Returns:
572            Dictionary containing model information
573        """
574        return {
575            'name': self.name,
576            'description': self.description,
577            'config': self.config,
578            'trained': self.trained
579        } 
class BaseDatasetLoader(abc.ABC):
 21class BaseDatasetLoader(ABC):
 22    """
 23    Base class for all dataset loaders.
 24    
 25    All dataset loaders should inherit from this class and implement the required methods.
 26    This class provides thread-safe concurrent downloading capabilities for efficient data retrieval.
 27    """
 28    
 29    def __init__(self, name: str, description: str = "", max_workers: int = 8):
 30        """
 31        Initialize the dataset loader.
 32        
 33        Args:
 34            name: Name of the dataset
 35            description: Description of the dataset
 36            max_workers: Maximum number of concurrent download threads (default: 8)
 37        """
 38        self.name = name
 39        self.description = description
 40        self.data = None
 41        self.metadata = {}
 42        self.max_workers = max_workers
 43        self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0}
 44    
 45    @abstractmethod
 46    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 47        """
 48        Load dataset from the specified directory.
 49        
 50        Args:
 51            data_dir: Directory containing the dataset
 52            **kwargs: Additional arguments specific to the dataset
 53            
 54        Returns:
 55            Tuple of (data_list, names_list)
 56        """
 57        pass
 58    
 59    @abstractmethod
 60    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 61                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 62        """
 63        Create sliding windows from the loaded data.
 64        
 65        Args:
 66            data: List of DataFrames
 67            names: List of names corresponding to the data
 68            window_size: Size of each sliding window
 69            step_size: Step size for sliding windows
 70            
 71        Returns:
 72            List of dictionaries containing sliding windows
 73        """
 74        pass
 75    
 76    @abstractmethod
 77    def get_supported_formats(self) -> List[str]:
 78        """
 79        Get list of supported file formats.
 80        
 81        Returns:
 82            List of supported file extensions
 83        """
 84        pass
 85    
 86    def _download_file(self, url: str, dest_path: str, 
 87                      chunk_size: int = 8192, timeout: int = 30) -> Tuple[bool, str]:
 88        """
 89        Download a single file from URL to destination path.
 90        
 91        This method is thread-safe and can be called concurrently.
 92        
 93        Args:
 94            url: URL to download from
 95            dest_path: Destination file path
 96            chunk_size: Size of chunks to download (default: 8192 bytes)
 97            timeout: Request timeout in seconds (default: 30)
 98            
 99        Returns:
100            Tuple of (success: bool, message: str)
101        """
102        try:
103            # Check if file already exists
104            if os.path.exists(dest_path):
105                self._download_stats['skipped'] += 1
106                return True, f"File already exists: {dest_path}"
107            
108            # Make the request
109            response = requests.get(url, stream=True, timeout=timeout)
110            
111            if response.status_code == 200:
112                # Ensure parent directory exists
113                os.makedirs(os.path.dirname(dest_path) if os.path.dirname(dest_path) else '.', exist_ok=True)
114                
115                # Write file in chunks
116                with open(dest_path, 'wb') as f:
117                    for chunk in response.iter_content(chunk_size=chunk_size):
118                        if chunk:
119                            f.write(chunk)
120                
121                self._download_stats['success'] += 1
122                return True, f"Successfully downloaded: {dest_path}"
123            else:
124                self._download_stats['failed'] += 1
125                return False, f"HTTP {response.status_code}: {url}"
126                
127        except requests.exceptions.Timeout:
128            self._download_stats['failed'] += 1
129            return False, f"Timeout downloading: {url}"
130        except requests.exceptions.RequestException as e:
131            self._download_stats['failed'] += 1
132            return False, f"Request error for {url}: {str(e)}"
133        except IOError as e:
134            self._download_stats['failed'] += 1
135            return False, f"IO error for {dest_path}: {str(e)}"
136        except Exception as e:
137            self._download_stats['failed'] += 1
138            return False, f"Unexpected error for {url}: {str(e)}"
139    
140    def download_files_concurrent(self, 
141                                  download_tasks: List[Dict[str, str]], 
142                                  show_progress: bool = True,
143                                  desc: str = "Downloading files") -> Dict[str, Any]:
144        """
145        Download multiple files concurrently using a thread pool.
146        
147        Args:
148            download_tasks: List of dicts with 'url' and 'dest_path' keys
149            show_progress: Whether to show progress bar (default: True)
150            desc: Description for progress bar
151            
152        Returns:
153            Dictionary with download statistics and results
154            
155        Example:
156            tasks = [
157                {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'},
158                {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'}
159            ]
160            results = loader.download_files_concurrent(tasks)
161        """
162        # Reset stats
163        self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0}
164        
165        results = []
166        failed_downloads = []
167        
168        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
169            # Submit all download tasks
170            future_to_task = {
171                executor.submit(self._download_file, task['url'], task['dest_path']): task
172                for task in download_tasks
173            }
174            
175            # Process completed tasks with optional progress bar
176            if show_progress:
177                futures = tqdm(as_completed(future_to_task), 
178                             total=len(download_tasks), 
179                             desc=desc)
180            else:
181                futures = as_completed(future_to_task)
182            
183            for future in futures:
184                task = future_to_task[future]
185                try:
186                    success, message = future.result()
187                    results.append({
188                        'url': task['url'],
189                        'dest_path': task['dest_path'],
190                        'success': success,
191                        'message': message
192                    })
193                    
194                    if not success:
195                        failed_downloads.append({
196                            'url': task['url'],
197                            'dest_path': task['dest_path'],
198                            'error': message
199                        })
200                        
201                except Exception as e:
202                    error_msg = f"Exception during download: {str(e)}"
203                    results.append({
204                        'url': task['url'],
205                        'dest_path': task['dest_path'],
206                        'success': False,
207                        'message': error_msg
208                    })
209                    failed_downloads.append({
210                        'url': task['url'],
211                        'dest_path': task['dest_path'],
212                        'error': error_msg
213                    })
214        
215        # Return comprehensive results
216        return {
217            'total': len(download_tasks),
218            'success': self._download_stats['success'],
219            'failed': self._download_stats['failed'],
220            'skipped': self._download_stats['skipped'],
221            'failed_downloads': failed_downloads,
222            'all_results': results
223        }
224    
225    def set_max_workers(self, max_workers: int):
226        """
227        Set the maximum number of concurrent download threads.
228        
229        Args:
230            max_workers: Maximum number of threads (must be positive)
231        """
232        if max_workers < 1:
233            raise ValueError("max_workers must be at least 1")
234        self.max_workers = max_workers
235    
236    def get_download_stats(self) -> Dict[str, int]:
237        """
238        Get statistics from the last download operation.
239        
240        Returns:
241            Dictionary with success, failed, and skipped counts
242        """
243        return self._download_stats.copy()
244    
245    def get_info(self) -> Dict[str, Any]:
246        """
247        Get information about the dataset.
248        
249        Returns:
250            Dictionary containing dataset information
251        """
252        return {
253            'name': self.name,
254            'description': self.description,
255            'metadata': self.metadata,
256            'supported_formats': self.get_supported_formats(),
257            'max_workers': self.max_workers
258        }

Base class for all dataset loaders.

All dataset loaders should inherit from this class and implement the required methods. This class provides thread-safe concurrent downloading capabilities for efficient data retrieval.

BaseDatasetLoader(name: str, description: str = '', max_workers: int = 8)
29    def __init__(self, name: str, description: str = "", max_workers: int = 8):
30        """
31        Initialize the dataset loader.
32        
33        Args:
34            name: Name of the dataset
35            description: Description of the dataset
36            max_workers: Maximum number of concurrent download threads (default: 8)
37        """
38        self.name = name
39        self.description = description
40        self.data = None
41        self.metadata = {}
42        self.max_workers = max_workers
43        self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0}

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset max_workers: Maximum number of concurrent download threads (default: 8)

name
description
data
metadata
max_workers
@abstractmethod
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
45    @abstractmethod
46    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
47        """
48        Load dataset from the specified directory.
49        
50        Args:
51            data_dir: Directory containing the dataset
52            **kwargs: Additional arguments specific to the dataset
53            
54        Returns:
55            Tuple of (data_list, names_list)
56        """
57        pass

Load dataset from the specified directory.

Args: data_dir: Directory containing the dataset **kwargs: Additional arguments specific to the dataset

Returns: Tuple of (data_list, names_list)

@abstractmethod
def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
59    @abstractmethod
60    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
61                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
62        """
63        Create sliding windows from the loaded data.
64        
65        Args:
66            data: List of DataFrames
67            names: List of names corresponding to the data
68            window_size: Size of each sliding window
69            step_size: Step size for sliding windows
70            
71        Returns:
72            List of dictionaries containing sliding windows
73        """
74        pass

Create sliding windows from the loaded data.

Args: data: List of DataFrames names: List of names corresponding to the data window_size: Size of each sliding window step_size: Step size for sliding windows

Returns: List of dictionaries containing sliding windows

@abstractmethod
def get_supported_formats(self) -> List[str]:
76    @abstractmethod
77    def get_supported_formats(self) -> List[str]:
78        """
79        Get list of supported file formats.
80        
81        Returns:
82            List of supported file extensions
83        """
84        pass

Get list of supported file formats.

Returns: List of supported file extensions

def download_files_concurrent( self, download_tasks: List[Dict[str, str]], show_progress: bool = True, desc: str = 'Downloading files') -> Dict[str, Any]:
140    def download_files_concurrent(self, 
141                                  download_tasks: List[Dict[str, str]], 
142                                  show_progress: bool = True,
143                                  desc: str = "Downloading files") -> Dict[str, Any]:
144        """
145        Download multiple files concurrently using a thread pool.
146        
147        Args:
148            download_tasks: List of dicts with 'url' and 'dest_path' keys
149            show_progress: Whether to show progress bar (default: True)
150            desc: Description for progress bar
151            
152        Returns:
153            Dictionary with download statistics and results
154            
155        Example:
156            tasks = [
157                {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'},
158                {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'}
159            ]
160            results = loader.download_files_concurrent(tasks)
161        """
162        # Reset stats
163        self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0}
164        
165        results = []
166        failed_downloads = []
167        
168        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
169            # Submit all download tasks
170            future_to_task = {
171                executor.submit(self._download_file, task['url'], task['dest_path']): task
172                for task in download_tasks
173            }
174            
175            # Process completed tasks with optional progress bar
176            if show_progress:
177                futures = tqdm(as_completed(future_to_task), 
178                             total=len(download_tasks), 
179                             desc=desc)
180            else:
181                futures = as_completed(future_to_task)
182            
183            for future in futures:
184                task = future_to_task[future]
185                try:
186                    success, message = future.result()
187                    results.append({
188                        'url': task['url'],
189                        'dest_path': task['dest_path'],
190                        'success': success,
191                        'message': message
192                    })
193                    
194                    if not success:
195                        failed_downloads.append({
196                            'url': task['url'],
197                            'dest_path': task['dest_path'],
198                            'error': message
199                        })
200                        
201                except Exception as e:
202                    error_msg = f"Exception during download: {str(e)}"
203                    results.append({
204                        'url': task['url'],
205                        'dest_path': task['dest_path'],
206                        'success': False,
207                        'message': error_msg
208                    })
209                    failed_downloads.append({
210                        'url': task['url'],
211                        'dest_path': task['dest_path'],
212                        'error': error_msg
213                    })
214        
215        # Return comprehensive results
216        return {
217            'total': len(download_tasks),
218            'success': self._download_stats['success'],
219            'failed': self._download_stats['failed'],
220            'skipped': self._download_stats['skipped'],
221            'failed_downloads': failed_downloads,
222            'all_results': results
223        }

Download multiple files concurrently using a thread pool.

Args: download_tasks: List of dicts with 'url' and 'dest_path' keys show_progress: Whether to show progress bar (default: True) desc: Description for progress bar

Returns: Dictionary with download statistics and results

Example: tasks = [ {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'}, {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'} ] results = loader.download_files_concurrent(tasks)

def set_max_workers(self, max_workers: int):
225    def set_max_workers(self, max_workers: int):
226        """
227        Set the maximum number of concurrent download threads.
228        
229        Args:
230            max_workers: Maximum number of threads (must be positive)
231        """
232        if max_workers < 1:
233            raise ValueError("max_workers must be at least 1")
234        self.max_workers = max_workers

Set the maximum number of concurrent download threads.

Args: max_workers: Maximum number of threads (must be positive)

def get_download_stats(self) -> Dict[str, int]:
236    def get_download_stats(self) -> Dict[str, int]:
237        """
238        Get statistics from the last download operation.
239        
240        Returns:
241            Dictionary with success, failed, and skipped counts
242        """
243        return self._download_stats.copy()

Get statistics from the last download operation.

Returns: Dictionary with success, failed, and skipped counts

def get_info(self) -> Dict[str, Any]:
245    def get_info(self) -> Dict[str, Any]:
246        """
247        Get information about the dataset.
248        
249        Returns:
250            Dictionary containing dataset information
251        """
252        return {
253            'name': self.name,
254            'description': self.description,
255            'metadata': self.metadata,
256            'supported_formats': self.get_supported_formats(),
257            'max_workers': self.max_workers
258        }

Get information about the dataset.

Returns: Dictionary containing dataset information

class BaseFeatureExtractor(abc.ABC):
261class BaseFeatureExtractor(ABC):
262    """
263    Base class for all feature extractors.
264    
265    All feature extractors should inherit from this class and implement the required methods.
266    """
267    
268    def __init__(self, name: str, description: str = ""):
269        """
270        Initialize the feature extractor.
271        
272        Args:
273            name: Name of the feature extractor
274            description: Description of the feature extractor
275        """
276        self.name = name
277        self.description = description
278        self.config = {}
279    
280    @abstractmethod
281    def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]:
282        """
283        Extract features from sliding windows.
284        
285        Args:
286            windows: List of sliding window dictionaries
287            fs: Sampling frequency
288            **kwargs: Additional arguments for feature extraction
289            
290        Returns:
291            List of feature dictionaries
292        """
293        pass
294    
295    @abstractmethod
296    def get_feature_names(self) -> List[str]:
297        """
298        Get names of features extracted by this extractor.
299        
300        Returns:
301            List of feature names
302        """
303        pass
304    
305    def configure(self, config: Dict[str, Any]):
306        """
307        Configure the feature extractor.
308        
309        Args:
310            config: Configuration dictionary
311        """
312        self.config.update(config)
313    
314    def get_info(self) -> Dict[str, Any]:
315        """
316        Get information about the feature extractor.
317        
318        Returns:
319            Dictionary containing feature extractor information
320        """
321        return {
322            'name': self.name,
323            'description': self.description,
324            'config': self.config,
325            'feature_names': self.get_feature_names()
326        }

Base class for all feature extractors.

All feature extractors should inherit from this class and implement the required methods.

BaseFeatureExtractor(name: str, description: str = '')
268    def __init__(self, name: str, description: str = ""):
269        """
270        Initialize the feature extractor.
271        
272        Args:
273            name: Name of the feature extractor
274            description: Description of the feature extractor
275        """
276        self.name = name
277        self.description = description
278        self.config = {}

Initialize the feature extractor.

Args: name: Name of the feature extractor description: Description of the feature extractor

name
description
config
@abstractmethod
def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]:
280    @abstractmethod
281    def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]:
282        """
283        Extract features from sliding windows.
284        
285        Args:
286            windows: List of sliding window dictionaries
287            fs: Sampling frequency
288            **kwargs: Additional arguments for feature extraction
289            
290        Returns:
291            List of feature dictionaries
292        """
293        pass

Extract features from sliding windows.

Args: windows: List of sliding window dictionaries fs: Sampling frequency **kwargs: Additional arguments for feature extraction

Returns: List of feature dictionaries

@abstractmethod
def get_feature_names(self) -> List[str]:
295    @abstractmethod
296    def get_feature_names(self) -> List[str]:
297        """
298        Get names of features extracted by this extractor.
299        
300        Returns:
301            List of feature names
302        """
303        pass

Get names of features extracted by this extractor.

Returns: List of feature names

def configure(self, config: Dict[str, Any]):
305    def configure(self, config: Dict[str, Any]):
306        """
307        Configure the feature extractor.
308        
309        Args:
310            config: Configuration dictionary
311        """
312        self.config.update(config)

Configure the feature extractor.

Args: config: Configuration dictionary

def get_info(self) -> Dict[str, Any]:
314    def get_info(self) -> Dict[str, Any]:
315        """
316        Get information about the feature extractor.
317        
318        Returns:
319            Dictionary containing feature extractor information
320        """
321        return {
322            'name': self.name,
323            'description': self.description,
324            'config': self.config,
325            'feature_names': self.get_feature_names()
326        }

Get information about the feature extractor.

Returns: Dictionary containing feature extractor information

class BasePreprocessor(abc.ABC):
329class BasePreprocessor(ABC):
330    """
331    Base class for all preprocessors.
332    
333    All preprocessors should inherit from this class and implement the required methods.
334    """
335    
336    def __init__(self, name: str, description: str = ""):
337        """
338        Initialize the preprocessor.
339        
340        Args:
341            name: Name of the preprocessor
342            description: Description of the preprocessor
343        """
344        self.name = name
345        self.description = description
346        self.config = {}
347        self.fitted = False
348    
349    @abstractmethod
350    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
351        """
352        Fit the preprocessor to the data.
353        
354        Args:
355            data: Input data to fit on
356            **kwargs: Additional arguments for fitting
357        """
358        pass
359    
360    @abstractmethod
361    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
362        """
363        Transform the data using the fitted preprocessor.
364        
365        Args:
366            data: Input data to transform
367            **kwargs: Additional arguments for transformation
368            
369        Returns:
370            Transformed data
371        """
372        pass
373    
374    def fit_transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
375        """
376        Fit the preprocessor and transform the data.
377        
378        Args:
379            data: Input data to fit and transform
380            **kwargs: Additional arguments
381            
382        Returns:
383            Transformed data
384        """
385        self.fit(data, **kwargs)
386        return self.transform(data, **kwargs)
387    
388    def configure(self, config: Dict[str, Any]):
389        """
390        Configure the preprocessor.
391        
392        Args:
393            config: Configuration dictionary
394        """
395        self.config.update(config)
396    
397    def get_info(self) -> Dict[str, Any]:
398        """
399        Get information about the preprocessor.
400        
401        Returns:
402            Dictionary containing preprocessor information
403        """
404        return {
405            'name': self.name,
406            'description': self.description,
407            'config': self.config,
408            'fitted': self.fitted
409        }

Base class for all preprocessors.

All preprocessors should inherit from this class and implement the required methods.

BasePreprocessor(name: str, description: str = '')
336    def __init__(self, name: str, description: str = ""):
337        """
338        Initialize the preprocessor.
339        
340        Args:
341            name: Name of the preprocessor
342            description: Description of the preprocessor
343        """
344        self.name = name
345        self.description = description
346        self.config = {}
347        self.fitted = False

Initialize the preprocessor.

Args: name: Name of the preprocessor description: Description of the preprocessor

name
description
config
fitted
@abstractmethod
def fit( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs):
349    @abstractmethod
350    def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs):
351        """
352        Fit the preprocessor to the data.
353        
354        Args:
355            data: Input data to fit on
356            **kwargs: Additional arguments for fitting
357        """
358        pass

Fit the preprocessor to the data.

Args: data: Input data to fit on **kwargs: Additional arguments for fitting

@abstractmethod
def transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
360    @abstractmethod
361    def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
362        """
363        Transform the data using the fitted preprocessor.
364        
365        Args:
366            data: Input data to transform
367            **kwargs: Additional arguments for transformation
368            
369        Returns:
370            Transformed data
371        """
372        pass

Transform the data using the fitted preprocessor.

Args: data: Input data to transform **kwargs: Additional arguments for transformation

Returns: Transformed data

def fit_transform( self, data: Union[pandas.core.frame.DataFrame, numpy.ndarray], **kwargs) -> Union[pandas.core.frame.DataFrame, numpy.ndarray]:
374    def fit_transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]:
375        """
376        Fit the preprocessor and transform the data.
377        
378        Args:
379            data: Input data to fit and transform
380            **kwargs: Additional arguments
381            
382        Returns:
383            Transformed data
384        """
385        self.fit(data, **kwargs)
386        return self.transform(data, **kwargs)

Fit the preprocessor and transform the data.

Args: data: Input data to fit and transform **kwargs: Additional arguments

Returns: Transformed data

def configure(self, config: Dict[str, Any]):
388    def configure(self, config: Dict[str, Any]):
389        """
390        Configure the preprocessor.
391        
392        Args:
393            config: Configuration dictionary
394        """
395        self.config.update(config)

Configure the preprocessor.

Args: config: Configuration dictionary

def get_info(self) -> Dict[str, Any]:
397    def get_info(self) -> Dict[str, Any]:
398        """
399        Get information about the preprocessor.
400        
401        Returns:
402            Dictionary containing preprocessor information
403        """
404        return {
405            'name': self.name,
406            'description': self.description,
407            'config': self.config,
408            'fitted': self.fitted
409        }

Get information about the preprocessor.

Returns: Dictionary containing preprocessor information

class BaseEDAAnalyzer(abc.ABC):
412class BaseEDAAnalyzer(ABC):
413    """
414    Base class for all EDA analyzers.
415    
416    All EDA analyzers should inherit from this class and implement the required methods.
417    """
418    
419    def __init__(self, name: str, description: str = ""):
420        """
421        Initialize the EDA analyzer.
422        
423        Args:
424            name: Name of the EDA analyzer
425            description: Description of the EDA analyzer
426        """
427        self.name = name
428        self.description = description
429        self.config = {}
430    
431    @abstractmethod
432    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
433        """
434        Perform analysis on the data.
435        
436        Args:
437            data: Input data to analyze
438            **kwargs: Additional arguments for analysis
439            
440        Returns:
441            Dictionary containing analysis results
442        """
443        pass
444    
445    @abstractmethod
446    def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs):
447        """
448        Create visualizations of the data.
449        
450        Args:
451            data: Input data to visualize
452            **kwargs: Additional arguments for visualization
453        """
454        pass
455    
456    def configure(self, config: Dict[str, Any]):
457        """
458        Configure the EDA analyzer.
459        
460        Args:
461            config: Configuration dictionary
462        """
463        self.config.update(config)
464    
465    def get_info(self) -> Dict[str, Any]:
466        """
467        Get information about the EDA analyzer.
468        
469        Returns:
470            Dictionary containing EDA analyzer information
471        """
472        return {
473            'name': self.name,
474            'description': self.description,
475            'config': self.config
476        }

Base class for all EDA analyzers.

All EDA analyzers should inherit from this class and implement the required methods.

BaseEDAAnalyzer(name: str, description: str = '')
419    def __init__(self, name: str, description: str = ""):
420        """
421        Initialize the EDA analyzer.
422        
423        Args:
424            name: Name of the EDA analyzer
425            description: Description of the EDA analyzer
426        """
427        self.name = name
428        self.description = description
429        self.config = {}

Initialize the EDA analyzer.

Args: name: Name of the EDA analyzer description: Description of the EDA analyzer

name
description
config
@abstractmethod
def analyze( self, data: Union[pandas.core.frame.DataFrame, List[pandas.core.frame.DataFrame]], **kwargs) -> Dict[str, Any]:
431    @abstractmethod
432    def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]:
433        """
434        Perform analysis on the data.
435        
436        Args:
437            data: Input data to analyze
438            **kwargs: Additional arguments for analysis
439            
440        Returns:
441            Dictionary containing analysis results
442        """
443        pass

Perform analysis on the data.

Args: data: Input data to analyze **kwargs: Additional arguments for analysis

Returns: Dictionary containing analysis results

@abstractmethod
def visualize( self, data: Union[pandas.core.frame.DataFrame, List[pandas.core.frame.DataFrame]], **kwargs):
445    @abstractmethod
446    def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs):
447        """
448        Create visualizations of the data.
449        
450        Args:
451            data: Input data to visualize
452            **kwargs: Additional arguments for visualization
453        """
454        pass

Create visualizations of the data.

Args: data: Input data to visualize **kwargs: Additional arguments for visualization

def configure(self, config: Dict[str, Any]):
456    def configure(self, config: Dict[str, Any]):
457        """
458        Configure the EDA analyzer.
459        
460        Args:
461            config: Configuration dictionary
462        """
463        self.config.update(config)

Configure the EDA analyzer.

Args: config: Configuration dictionary

def get_info(self) -> Dict[str, Any]:
465    def get_info(self) -> Dict[str, Any]:
466        """
467        Get information about the EDA analyzer.
468        
469        Returns:
470            Dictionary containing EDA analyzer information
471        """
472        return {
473            'name': self.name,
474            'description': self.description,
475            'config': self.config
476        }

Get information about the EDA analyzer.

Returns: Dictionary containing EDA analyzer information

class BaseClassificationModel(abc.ABC):
479class BaseClassificationModel(ABC):
480    """
481    Base class for all classification models.
482    
483    All classification models should inherit from this class and implement the required methods.
484    """
485    
486    def __init__(self, name: str, description: str = ""):
487        """
488        Initialize the classification model.
489        
490        Args:
491            name: Name of the classification model
492            description: Description of the classification model
493        """
494        self.name = name
495        self.description = description
496        self.model = None
497        self.config = {}
498        self.trained = False
499    
500    @abstractmethod
501    def train(self, features: List[Dict], **kwargs):
502        """
503        Train the classification model.
504        
505        Args:
506            features: List of feature dictionaries
507            **kwargs: Additional arguments for training
508        """
509        pass
510    
511    @abstractmethod
512    def predict(self, features: List[Dict], **kwargs) -> np.ndarray:
513        """
514        Make predictions using the trained model.
515        
516        Args:
517            features: List of feature dictionaries
518            **kwargs: Additional arguments for prediction
519            
520        Returns:
521            Array of predictions
522        """
523        pass
524    
525    @abstractmethod
526    def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
527        """
528        Evaluate the model performance.
529        
530        Args:
531            features: List of feature dictionaries
532            **kwargs: Additional arguments for evaluation
533            
534        Returns:
535            Dictionary containing evaluation metrics
536        """
537        pass
538    
539    @abstractmethod
540    def save_model(self, filepath: str):
541        """
542        Save the trained model to a file.
543        
544        Args:
545            filepath: Path to save the model
546        """
547        pass
548    
549    @abstractmethod
550    def load_model(self, filepath: str):
551        """
552        Load a trained model from a file.
553        
554        Args:
555            filepath: Path to the saved model
556        """
557        pass
558    
559    def configure(self, config: Dict[str, Any]):
560        """
561        Configure the classification model.
562        
563        Args:
564            config: Configuration dictionary
565        """
566        self.config.update(config)
567    
568    def get_info(self) -> Dict[str, Any]:
569        """
570        Get information about the classification model.
571        
572        Returns:
573            Dictionary containing model information
574        """
575        return {
576            'name': self.name,
577            'description': self.description,
578            'config': self.config,
579            'trained': self.trained
580        } 

Base class for all classification models.

All classification models should inherit from this class and implement the required methods.

BaseClassificationModel(name: str, description: str = '')
486    def __init__(self, name: str, description: str = ""):
487        """
488        Initialize the classification model.
489        
490        Args:
491            name: Name of the classification model
492            description: Description of the classification model
493        """
494        self.name = name
495        self.description = description
496        self.model = None
497        self.config = {}
498        self.trained = False

Initialize the classification model.

Args: name: Name of the classification model description: Description of the classification model

name
description
model
config
trained
@abstractmethod
def train(self, features: List[Dict], **kwargs):
500    @abstractmethod
501    def train(self, features: List[Dict], **kwargs):
502        """
503        Train the classification model.
504        
505        Args:
506            features: List of feature dictionaries
507            **kwargs: Additional arguments for training
508        """
509        pass

Train the classification model.

Args: features: List of feature dictionaries **kwargs: Additional arguments for training

@abstractmethod
def predict(self, features: List[Dict], **kwargs) -> numpy.ndarray:
511    @abstractmethod
512    def predict(self, features: List[Dict], **kwargs) -> np.ndarray:
513        """
514        Make predictions using the trained model.
515        
516        Args:
517            features: List of feature dictionaries
518            **kwargs: Additional arguments for prediction
519            
520        Returns:
521            Array of predictions
522        """
523        pass

Make predictions using the trained model.

Args: features: List of feature dictionaries **kwargs: Additional arguments for prediction

Returns: Array of predictions

@abstractmethod
def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
525    @abstractmethod
526    def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
527        """
528        Evaluate the model performance.
529        
530        Args:
531            features: List of feature dictionaries
532            **kwargs: Additional arguments for evaluation
533            
534        Returns:
535            Dictionary containing evaluation metrics
536        """
537        pass

Evaluate the model performance.

Args: features: List of feature dictionaries **kwargs: Additional arguments for evaluation

Returns: Dictionary containing evaluation metrics

@abstractmethod
def save_model(self, filepath: str):
539    @abstractmethod
540    def save_model(self, filepath: str):
541        """
542        Save the trained model to a file.
543        
544        Args:
545            filepath: Path to save the model
546        """
547        pass

Save the trained model to a file.

Args: filepath: Path to save the model

@abstractmethod
def load_model(self, filepath: str):
549    @abstractmethod
550    def load_model(self, filepath: str):
551        """
552        Load a trained model from a file.
553        
554        Args:
555            filepath: Path to the saved model
556        """
557        pass

Load a trained model from a file.

Args: filepath: Path to the saved model

def configure(self, config: Dict[str, Any]):
559    def configure(self, config: Dict[str, Any]):
560        """
561        Configure the classification model.
562        
563        Args:
564            config: Configuration dictionary
565        """
566        self.config.update(config)

Configure the classification model.

Args: config: Configuration dictionary

def get_info(self) -> Dict[str, Any]:
568    def get_info(self) -> Dict[str, Any]:
569        """
570        Get information about the classification model.
571        
572        Returns:
573            Dictionary containing model information
574        """
575        return {
576            'name': self.name,
577            'description': self.description,
578            'config': self.config,
579            'trained': self.trained
580        } 

Get information about the classification model.

Returns: Dictionary containing model information