gaitsetpy.core.base_classes
Base classes for GaitSetPy components.
This module defines abstract base classes that all components should inherit from. Each base class defines the interface and common functionality for its respective component type.
Maintainer: @aharshit123456
1""" 2Base classes for GaitSetPy components. 3 4This module defines abstract base classes that all components should inherit from. 5Each base class defines the interface and common functionality for its respective component type. 6 7Maintainer: @aharshit123456 8""" 9 10from abc import ABC, abstractmethod 11from typing import Any, Dict, List, Optional, Tuple, Union, Callable 12import pandas as pd 13import numpy as np 14import os 15import requests 16from concurrent.futures import ThreadPoolExecutor, as_completed 17from tqdm import tqdm 18 19 20class BaseDatasetLoader(ABC): 21 """ 22 Base class for all dataset loaders. 23 24 All dataset loaders should inherit from this class and implement the required methods. 25 This class provides thread-safe concurrent downloading capabilities for efficient data retrieval. 26 """ 27 28 def __init__(self, name: str, description: str = "", max_workers: int = 8): 29 """ 30 Initialize the dataset loader. 31 32 Args: 33 name: Name of the dataset 34 description: Description of the dataset 35 max_workers: Maximum number of concurrent download threads (default: 8) 36 """ 37 self.name = name 38 self.description = description 39 self.data = None 40 self.metadata = {} 41 self.max_workers = max_workers 42 self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0} 43 44 @abstractmethod 45 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 46 """ 47 Load dataset from the specified directory. 48 49 Args: 50 data_dir: Directory containing the dataset 51 **kwargs: Additional arguments specific to the dataset 52 53 Returns: 54 Tuple of (data_list, names_list) 55 """ 56 pass 57 58 @abstractmethod 59 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 60 window_size: int = 192, step_size: int = 32) -> List[Dict]: 61 """ 62 Create sliding windows from the loaded data. 63 64 Args: 65 data: List of DataFrames 66 names: List of names corresponding to the data 67 window_size: Size of each sliding window 68 step_size: Step size for sliding windows 69 70 Returns: 71 List of dictionaries containing sliding windows 72 """ 73 pass 74 75 @abstractmethod 76 def get_supported_formats(self) -> List[str]: 77 """ 78 Get list of supported file formats. 79 80 Returns: 81 List of supported file extensions 82 """ 83 pass 84 85 def _download_file(self, url: str, dest_path: str, 86 chunk_size: int = 8192, timeout: int = 30) -> Tuple[bool, str]: 87 """ 88 Download a single file from URL to destination path. 89 90 This method is thread-safe and can be called concurrently. 91 92 Args: 93 url: URL to download from 94 dest_path: Destination file path 95 chunk_size: Size of chunks to download (default: 8192 bytes) 96 timeout: Request timeout in seconds (default: 30) 97 98 Returns: 99 Tuple of (success: bool, message: str) 100 """ 101 try: 102 # Check if file already exists 103 if os.path.exists(dest_path): 104 self._download_stats['skipped'] += 1 105 return True, f"File already exists: {dest_path}" 106 107 # Make the request 108 response = requests.get(url, stream=True, timeout=timeout) 109 110 if response.status_code == 200: 111 # Ensure parent directory exists 112 os.makedirs(os.path.dirname(dest_path) if os.path.dirname(dest_path) else '.', exist_ok=True) 113 114 # Write file in chunks 115 with open(dest_path, 'wb') as f: 116 for chunk in response.iter_content(chunk_size=chunk_size): 117 if chunk: 118 f.write(chunk) 119 120 self._download_stats['success'] += 1 121 return True, f"Successfully downloaded: {dest_path}" 122 else: 123 self._download_stats['failed'] += 1 124 return False, f"HTTP {response.status_code}: {url}" 125 126 except requests.exceptions.Timeout: 127 self._download_stats['failed'] += 1 128 return False, f"Timeout downloading: {url}" 129 except requests.exceptions.RequestException as e: 130 self._download_stats['failed'] += 1 131 return False, f"Request error for {url}: {str(e)}" 132 except IOError as e: 133 self._download_stats['failed'] += 1 134 return False, f"IO error for {dest_path}: {str(e)}" 135 except Exception as e: 136 self._download_stats['failed'] += 1 137 return False, f"Unexpected error for {url}: {str(e)}" 138 139 def download_files_concurrent(self, 140 download_tasks: List[Dict[str, str]], 141 show_progress: bool = True, 142 desc: str = "Downloading files") -> Dict[str, Any]: 143 """ 144 Download multiple files concurrently using a thread pool. 145 146 Args: 147 download_tasks: List of dicts with 'url' and 'dest_path' keys 148 show_progress: Whether to show progress bar (default: True) 149 desc: Description for progress bar 150 151 Returns: 152 Dictionary with download statistics and results 153 154 Example: 155 tasks = [ 156 {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'}, 157 {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'} 158 ] 159 results = loader.download_files_concurrent(tasks) 160 """ 161 # Reset stats 162 self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0} 163 164 results = [] 165 failed_downloads = [] 166 167 with ThreadPoolExecutor(max_workers=self.max_workers) as executor: 168 # Submit all download tasks 169 future_to_task = { 170 executor.submit(self._download_file, task['url'], task['dest_path']): task 171 for task in download_tasks 172 } 173 174 # Process completed tasks with optional progress bar 175 if show_progress: 176 futures = tqdm(as_completed(future_to_task), 177 total=len(download_tasks), 178 desc=desc) 179 else: 180 futures = as_completed(future_to_task) 181 182 for future in futures: 183 task = future_to_task[future] 184 try: 185 success, message = future.result() 186 results.append({ 187 'url': task['url'], 188 'dest_path': task['dest_path'], 189 'success': success, 190 'message': message 191 }) 192 193 if not success: 194 failed_downloads.append({ 195 'url': task['url'], 196 'dest_path': task['dest_path'], 197 'error': message 198 }) 199 200 except Exception as e: 201 error_msg = f"Exception during download: {str(e)}" 202 results.append({ 203 'url': task['url'], 204 'dest_path': task['dest_path'], 205 'success': False, 206 'message': error_msg 207 }) 208 failed_downloads.append({ 209 'url': task['url'], 210 'dest_path': task['dest_path'], 211 'error': error_msg 212 }) 213 214 # Return comprehensive results 215 return { 216 'total': len(download_tasks), 217 'success': self._download_stats['success'], 218 'failed': self._download_stats['failed'], 219 'skipped': self._download_stats['skipped'], 220 'failed_downloads': failed_downloads, 221 'all_results': results 222 } 223 224 def set_max_workers(self, max_workers: int): 225 """ 226 Set the maximum number of concurrent download threads. 227 228 Args: 229 max_workers: Maximum number of threads (must be positive) 230 """ 231 if max_workers < 1: 232 raise ValueError("max_workers must be at least 1") 233 self.max_workers = max_workers 234 235 def get_download_stats(self) -> Dict[str, int]: 236 """ 237 Get statistics from the last download operation. 238 239 Returns: 240 Dictionary with success, failed, and skipped counts 241 """ 242 return self._download_stats.copy() 243 244 def get_info(self) -> Dict[str, Any]: 245 """ 246 Get information about the dataset. 247 248 Returns: 249 Dictionary containing dataset information 250 """ 251 return { 252 'name': self.name, 253 'description': self.description, 254 'metadata': self.metadata, 255 'supported_formats': self.get_supported_formats(), 256 'max_workers': self.max_workers 257 } 258 259 260class BaseFeatureExtractor(ABC): 261 """ 262 Base class for all feature extractors. 263 264 All feature extractors should inherit from this class and implement the required methods. 265 """ 266 267 def __init__(self, name: str, description: str = ""): 268 """ 269 Initialize the feature extractor. 270 271 Args: 272 name: Name of the feature extractor 273 description: Description of the feature extractor 274 """ 275 self.name = name 276 self.description = description 277 self.config = {} 278 279 @abstractmethod 280 def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]: 281 """ 282 Extract features from sliding windows. 283 284 Args: 285 windows: List of sliding window dictionaries 286 fs: Sampling frequency 287 **kwargs: Additional arguments for feature extraction 288 289 Returns: 290 List of feature dictionaries 291 """ 292 pass 293 294 @abstractmethod 295 def get_feature_names(self) -> List[str]: 296 """ 297 Get names of features extracted by this extractor. 298 299 Returns: 300 List of feature names 301 """ 302 pass 303 304 def configure(self, config: Dict[str, Any]): 305 """ 306 Configure the feature extractor. 307 308 Args: 309 config: Configuration dictionary 310 """ 311 self.config.update(config) 312 313 def get_info(self) -> Dict[str, Any]: 314 """ 315 Get information about the feature extractor. 316 317 Returns: 318 Dictionary containing feature extractor information 319 """ 320 return { 321 'name': self.name, 322 'description': self.description, 323 'config': self.config, 324 'feature_names': self.get_feature_names() 325 } 326 327 328class BasePreprocessor(ABC): 329 """ 330 Base class for all preprocessors. 331 332 All preprocessors should inherit from this class and implement the required methods. 333 """ 334 335 def __init__(self, name: str, description: str = ""): 336 """ 337 Initialize the preprocessor. 338 339 Args: 340 name: Name of the preprocessor 341 description: Description of the preprocessor 342 """ 343 self.name = name 344 self.description = description 345 self.config = {} 346 self.fitted = False 347 348 @abstractmethod 349 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 350 """ 351 Fit the preprocessor to the data. 352 353 Args: 354 data: Input data to fit on 355 **kwargs: Additional arguments for fitting 356 """ 357 pass 358 359 @abstractmethod 360 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 361 """ 362 Transform the data using the fitted preprocessor. 363 364 Args: 365 data: Input data to transform 366 **kwargs: Additional arguments for transformation 367 368 Returns: 369 Transformed data 370 """ 371 pass 372 373 def fit_transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 374 """ 375 Fit the preprocessor and transform the data. 376 377 Args: 378 data: Input data to fit and transform 379 **kwargs: Additional arguments 380 381 Returns: 382 Transformed data 383 """ 384 self.fit(data, **kwargs) 385 return self.transform(data, **kwargs) 386 387 def configure(self, config: Dict[str, Any]): 388 """ 389 Configure the preprocessor. 390 391 Args: 392 config: Configuration dictionary 393 """ 394 self.config.update(config) 395 396 def get_info(self) -> Dict[str, Any]: 397 """ 398 Get information about the preprocessor. 399 400 Returns: 401 Dictionary containing preprocessor information 402 """ 403 return { 404 'name': self.name, 405 'description': self.description, 406 'config': self.config, 407 'fitted': self.fitted 408 } 409 410 411class BaseEDAAnalyzer(ABC): 412 """ 413 Base class for all EDA analyzers. 414 415 All EDA analyzers should inherit from this class and implement the required methods. 416 """ 417 418 def __init__(self, name: str, description: str = ""): 419 """ 420 Initialize the EDA analyzer. 421 422 Args: 423 name: Name of the EDA analyzer 424 description: Description of the EDA analyzer 425 """ 426 self.name = name 427 self.description = description 428 self.config = {} 429 430 @abstractmethod 431 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 432 """ 433 Perform analysis on the data. 434 435 Args: 436 data: Input data to analyze 437 **kwargs: Additional arguments for analysis 438 439 Returns: 440 Dictionary containing analysis results 441 """ 442 pass 443 444 @abstractmethod 445 def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs): 446 """ 447 Create visualizations of the data. 448 449 Args: 450 data: Input data to visualize 451 **kwargs: Additional arguments for visualization 452 """ 453 pass 454 455 def configure(self, config: Dict[str, Any]): 456 """ 457 Configure the EDA analyzer. 458 459 Args: 460 config: Configuration dictionary 461 """ 462 self.config.update(config) 463 464 def get_info(self) -> Dict[str, Any]: 465 """ 466 Get information about the EDA analyzer. 467 468 Returns: 469 Dictionary containing EDA analyzer information 470 """ 471 return { 472 'name': self.name, 473 'description': self.description, 474 'config': self.config 475 } 476 477 478class BaseClassificationModel(ABC): 479 """ 480 Base class for all classification models. 481 482 All classification models should inherit from this class and implement the required methods. 483 """ 484 485 def __init__(self, name: str, description: str = ""): 486 """ 487 Initialize the classification model. 488 489 Args: 490 name: Name of the classification model 491 description: Description of the classification model 492 """ 493 self.name = name 494 self.description = description 495 self.model = None 496 self.config = {} 497 self.trained = False 498 499 @abstractmethod 500 def train(self, features: List[Dict], **kwargs): 501 """ 502 Train the classification model. 503 504 Args: 505 features: List of feature dictionaries 506 **kwargs: Additional arguments for training 507 """ 508 pass 509 510 @abstractmethod 511 def predict(self, features: List[Dict], **kwargs) -> np.ndarray: 512 """ 513 Make predictions using the trained model. 514 515 Args: 516 features: List of feature dictionaries 517 **kwargs: Additional arguments for prediction 518 519 Returns: 520 Array of predictions 521 """ 522 pass 523 524 @abstractmethod 525 def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]: 526 """ 527 Evaluate the model performance. 528 529 Args: 530 features: List of feature dictionaries 531 **kwargs: Additional arguments for evaluation 532 533 Returns: 534 Dictionary containing evaluation metrics 535 """ 536 pass 537 538 @abstractmethod 539 def save_model(self, filepath: str): 540 """ 541 Save the trained model to a file. 542 543 Args: 544 filepath: Path to save the model 545 """ 546 pass 547 548 @abstractmethod 549 def load_model(self, filepath: str): 550 """ 551 Load a trained model from a file. 552 553 Args: 554 filepath: Path to the saved model 555 """ 556 pass 557 558 def configure(self, config: Dict[str, Any]): 559 """ 560 Configure the classification model. 561 562 Args: 563 config: Configuration dictionary 564 """ 565 self.config.update(config) 566 567 def get_info(self) -> Dict[str, Any]: 568 """ 569 Get information about the classification model. 570 571 Returns: 572 Dictionary containing model information 573 """ 574 return { 575 'name': self.name, 576 'description': self.description, 577 'config': self.config, 578 'trained': self.trained 579 }
21class BaseDatasetLoader(ABC): 22 """ 23 Base class for all dataset loaders. 24 25 All dataset loaders should inherit from this class and implement the required methods. 26 This class provides thread-safe concurrent downloading capabilities for efficient data retrieval. 27 """ 28 29 def __init__(self, name: str, description: str = "", max_workers: int = 8): 30 """ 31 Initialize the dataset loader. 32 33 Args: 34 name: Name of the dataset 35 description: Description of the dataset 36 max_workers: Maximum number of concurrent download threads (default: 8) 37 """ 38 self.name = name 39 self.description = description 40 self.data = None 41 self.metadata = {} 42 self.max_workers = max_workers 43 self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0} 44 45 @abstractmethod 46 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 47 """ 48 Load dataset from the specified directory. 49 50 Args: 51 data_dir: Directory containing the dataset 52 **kwargs: Additional arguments specific to the dataset 53 54 Returns: 55 Tuple of (data_list, names_list) 56 """ 57 pass 58 59 @abstractmethod 60 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 61 window_size: int = 192, step_size: int = 32) -> List[Dict]: 62 """ 63 Create sliding windows from the loaded data. 64 65 Args: 66 data: List of DataFrames 67 names: List of names corresponding to the data 68 window_size: Size of each sliding window 69 step_size: Step size for sliding windows 70 71 Returns: 72 List of dictionaries containing sliding windows 73 """ 74 pass 75 76 @abstractmethod 77 def get_supported_formats(self) -> List[str]: 78 """ 79 Get list of supported file formats. 80 81 Returns: 82 List of supported file extensions 83 """ 84 pass 85 86 def _download_file(self, url: str, dest_path: str, 87 chunk_size: int = 8192, timeout: int = 30) -> Tuple[bool, str]: 88 """ 89 Download a single file from URL to destination path. 90 91 This method is thread-safe and can be called concurrently. 92 93 Args: 94 url: URL to download from 95 dest_path: Destination file path 96 chunk_size: Size of chunks to download (default: 8192 bytes) 97 timeout: Request timeout in seconds (default: 30) 98 99 Returns: 100 Tuple of (success: bool, message: str) 101 """ 102 try: 103 # Check if file already exists 104 if os.path.exists(dest_path): 105 self._download_stats['skipped'] += 1 106 return True, f"File already exists: {dest_path}" 107 108 # Make the request 109 response = requests.get(url, stream=True, timeout=timeout) 110 111 if response.status_code == 200: 112 # Ensure parent directory exists 113 os.makedirs(os.path.dirname(dest_path) if os.path.dirname(dest_path) else '.', exist_ok=True) 114 115 # Write file in chunks 116 with open(dest_path, 'wb') as f: 117 for chunk in response.iter_content(chunk_size=chunk_size): 118 if chunk: 119 f.write(chunk) 120 121 self._download_stats['success'] += 1 122 return True, f"Successfully downloaded: {dest_path}" 123 else: 124 self._download_stats['failed'] += 1 125 return False, f"HTTP {response.status_code}: {url}" 126 127 except requests.exceptions.Timeout: 128 self._download_stats['failed'] += 1 129 return False, f"Timeout downloading: {url}" 130 except requests.exceptions.RequestException as e: 131 self._download_stats['failed'] += 1 132 return False, f"Request error for {url}: {str(e)}" 133 except IOError as e: 134 self._download_stats['failed'] += 1 135 return False, f"IO error for {dest_path}: {str(e)}" 136 except Exception as e: 137 self._download_stats['failed'] += 1 138 return False, f"Unexpected error for {url}: {str(e)}" 139 140 def download_files_concurrent(self, 141 download_tasks: List[Dict[str, str]], 142 show_progress: bool = True, 143 desc: str = "Downloading files") -> Dict[str, Any]: 144 """ 145 Download multiple files concurrently using a thread pool. 146 147 Args: 148 download_tasks: List of dicts with 'url' and 'dest_path' keys 149 show_progress: Whether to show progress bar (default: True) 150 desc: Description for progress bar 151 152 Returns: 153 Dictionary with download statistics and results 154 155 Example: 156 tasks = [ 157 {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'}, 158 {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'} 159 ] 160 results = loader.download_files_concurrent(tasks) 161 """ 162 # Reset stats 163 self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0} 164 165 results = [] 166 failed_downloads = [] 167 168 with ThreadPoolExecutor(max_workers=self.max_workers) as executor: 169 # Submit all download tasks 170 future_to_task = { 171 executor.submit(self._download_file, task['url'], task['dest_path']): task 172 for task in download_tasks 173 } 174 175 # Process completed tasks with optional progress bar 176 if show_progress: 177 futures = tqdm(as_completed(future_to_task), 178 total=len(download_tasks), 179 desc=desc) 180 else: 181 futures = as_completed(future_to_task) 182 183 for future in futures: 184 task = future_to_task[future] 185 try: 186 success, message = future.result() 187 results.append({ 188 'url': task['url'], 189 'dest_path': task['dest_path'], 190 'success': success, 191 'message': message 192 }) 193 194 if not success: 195 failed_downloads.append({ 196 'url': task['url'], 197 'dest_path': task['dest_path'], 198 'error': message 199 }) 200 201 except Exception as e: 202 error_msg = f"Exception during download: {str(e)}" 203 results.append({ 204 'url': task['url'], 205 'dest_path': task['dest_path'], 206 'success': False, 207 'message': error_msg 208 }) 209 failed_downloads.append({ 210 'url': task['url'], 211 'dest_path': task['dest_path'], 212 'error': error_msg 213 }) 214 215 # Return comprehensive results 216 return { 217 'total': len(download_tasks), 218 'success': self._download_stats['success'], 219 'failed': self._download_stats['failed'], 220 'skipped': self._download_stats['skipped'], 221 'failed_downloads': failed_downloads, 222 'all_results': results 223 } 224 225 def set_max_workers(self, max_workers: int): 226 """ 227 Set the maximum number of concurrent download threads. 228 229 Args: 230 max_workers: Maximum number of threads (must be positive) 231 """ 232 if max_workers < 1: 233 raise ValueError("max_workers must be at least 1") 234 self.max_workers = max_workers 235 236 def get_download_stats(self) -> Dict[str, int]: 237 """ 238 Get statistics from the last download operation. 239 240 Returns: 241 Dictionary with success, failed, and skipped counts 242 """ 243 return self._download_stats.copy() 244 245 def get_info(self) -> Dict[str, Any]: 246 """ 247 Get information about the dataset. 248 249 Returns: 250 Dictionary containing dataset information 251 """ 252 return { 253 'name': self.name, 254 'description': self.description, 255 'metadata': self.metadata, 256 'supported_formats': self.get_supported_formats(), 257 'max_workers': self.max_workers 258 }
Base class for all dataset loaders.
All dataset loaders should inherit from this class and implement the required methods. This class provides thread-safe concurrent downloading capabilities for efficient data retrieval.
29 def __init__(self, name: str, description: str = "", max_workers: int = 8): 30 """ 31 Initialize the dataset loader. 32 33 Args: 34 name: Name of the dataset 35 description: Description of the dataset 36 max_workers: Maximum number of concurrent download threads (default: 8) 37 """ 38 self.name = name 39 self.description = description 40 self.data = None 41 self.metadata = {} 42 self.max_workers = max_workers 43 self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0}
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset max_workers: Maximum number of concurrent download threads (default: 8)
45 @abstractmethod 46 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 47 """ 48 Load dataset from the specified directory. 49 50 Args: 51 data_dir: Directory containing the dataset 52 **kwargs: Additional arguments specific to the dataset 53 54 Returns: 55 Tuple of (data_list, names_list) 56 """ 57 pass
Load dataset from the specified directory.
Args: data_dir: Directory containing the dataset **kwargs: Additional arguments specific to the dataset
Returns: Tuple of (data_list, names_list)
59 @abstractmethod 60 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 61 window_size: int = 192, step_size: int = 32) -> List[Dict]: 62 """ 63 Create sliding windows from the loaded data. 64 65 Args: 66 data: List of DataFrames 67 names: List of names corresponding to the data 68 window_size: Size of each sliding window 69 step_size: Step size for sliding windows 70 71 Returns: 72 List of dictionaries containing sliding windows 73 """ 74 pass
Create sliding windows from the loaded data.
Args: data: List of DataFrames names: List of names corresponding to the data window_size: Size of each sliding window step_size: Step size for sliding windows
Returns: List of dictionaries containing sliding windows
76 @abstractmethod 77 def get_supported_formats(self) -> List[str]: 78 """ 79 Get list of supported file formats. 80 81 Returns: 82 List of supported file extensions 83 """ 84 pass
Get list of supported file formats.
Returns: List of supported file extensions
140 def download_files_concurrent(self, 141 download_tasks: List[Dict[str, str]], 142 show_progress: bool = True, 143 desc: str = "Downloading files") -> Dict[str, Any]: 144 """ 145 Download multiple files concurrently using a thread pool. 146 147 Args: 148 download_tasks: List of dicts with 'url' and 'dest_path' keys 149 show_progress: Whether to show progress bar (default: True) 150 desc: Description for progress bar 151 152 Returns: 153 Dictionary with download statistics and results 154 155 Example: 156 tasks = [ 157 {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'}, 158 {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'} 159 ] 160 results = loader.download_files_concurrent(tasks) 161 """ 162 # Reset stats 163 self._download_stats = {'success': 0, 'failed': 0, 'skipped': 0} 164 165 results = [] 166 failed_downloads = [] 167 168 with ThreadPoolExecutor(max_workers=self.max_workers) as executor: 169 # Submit all download tasks 170 future_to_task = { 171 executor.submit(self._download_file, task['url'], task['dest_path']): task 172 for task in download_tasks 173 } 174 175 # Process completed tasks with optional progress bar 176 if show_progress: 177 futures = tqdm(as_completed(future_to_task), 178 total=len(download_tasks), 179 desc=desc) 180 else: 181 futures = as_completed(future_to_task) 182 183 for future in futures: 184 task = future_to_task[future] 185 try: 186 success, message = future.result() 187 results.append({ 188 'url': task['url'], 189 'dest_path': task['dest_path'], 190 'success': success, 191 'message': message 192 }) 193 194 if not success: 195 failed_downloads.append({ 196 'url': task['url'], 197 'dest_path': task['dest_path'], 198 'error': message 199 }) 200 201 except Exception as e: 202 error_msg = f"Exception during download: {str(e)}" 203 results.append({ 204 'url': task['url'], 205 'dest_path': task['dest_path'], 206 'success': False, 207 'message': error_msg 208 }) 209 failed_downloads.append({ 210 'url': task['url'], 211 'dest_path': task['dest_path'], 212 'error': error_msg 213 }) 214 215 # Return comprehensive results 216 return { 217 'total': len(download_tasks), 218 'success': self._download_stats['success'], 219 'failed': self._download_stats['failed'], 220 'skipped': self._download_stats['skipped'], 221 'failed_downloads': failed_downloads, 222 'all_results': results 223 }
Download multiple files concurrently using a thread pool.
Args: download_tasks: List of dicts with 'url' and 'dest_path' keys show_progress: Whether to show progress bar (default: True) desc: Description for progress bar
Returns: Dictionary with download statistics and results
Example: tasks = [ {'url': 'http://example.com/file1.txt', 'dest_path': '/path/to/file1.txt'}, {'url': 'http://example.com/file2.txt', 'dest_path': '/path/to/file2.txt'} ] results = loader.download_files_concurrent(tasks)
225 def set_max_workers(self, max_workers: int): 226 """ 227 Set the maximum number of concurrent download threads. 228 229 Args: 230 max_workers: Maximum number of threads (must be positive) 231 """ 232 if max_workers < 1: 233 raise ValueError("max_workers must be at least 1") 234 self.max_workers = max_workers
Set the maximum number of concurrent download threads.
Args: max_workers: Maximum number of threads (must be positive)
236 def get_download_stats(self) -> Dict[str, int]: 237 """ 238 Get statistics from the last download operation. 239 240 Returns: 241 Dictionary with success, failed, and skipped counts 242 """ 243 return self._download_stats.copy()
Get statistics from the last download operation.
Returns: Dictionary with success, failed, and skipped counts
245 def get_info(self) -> Dict[str, Any]: 246 """ 247 Get information about the dataset. 248 249 Returns: 250 Dictionary containing dataset information 251 """ 252 return { 253 'name': self.name, 254 'description': self.description, 255 'metadata': self.metadata, 256 'supported_formats': self.get_supported_formats(), 257 'max_workers': self.max_workers 258 }
Get information about the dataset.
Returns: Dictionary containing dataset information
261class BaseFeatureExtractor(ABC): 262 """ 263 Base class for all feature extractors. 264 265 All feature extractors should inherit from this class and implement the required methods. 266 """ 267 268 def __init__(self, name: str, description: str = ""): 269 """ 270 Initialize the feature extractor. 271 272 Args: 273 name: Name of the feature extractor 274 description: Description of the feature extractor 275 """ 276 self.name = name 277 self.description = description 278 self.config = {} 279 280 @abstractmethod 281 def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]: 282 """ 283 Extract features from sliding windows. 284 285 Args: 286 windows: List of sliding window dictionaries 287 fs: Sampling frequency 288 **kwargs: Additional arguments for feature extraction 289 290 Returns: 291 List of feature dictionaries 292 """ 293 pass 294 295 @abstractmethod 296 def get_feature_names(self) -> List[str]: 297 """ 298 Get names of features extracted by this extractor. 299 300 Returns: 301 List of feature names 302 """ 303 pass 304 305 def configure(self, config: Dict[str, Any]): 306 """ 307 Configure the feature extractor. 308 309 Args: 310 config: Configuration dictionary 311 """ 312 self.config.update(config) 313 314 def get_info(self) -> Dict[str, Any]: 315 """ 316 Get information about the feature extractor. 317 318 Returns: 319 Dictionary containing feature extractor information 320 """ 321 return { 322 'name': self.name, 323 'description': self.description, 324 'config': self.config, 325 'feature_names': self.get_feature_names() 326 }
Base class for all feature extractors.
All feature extractors should inherit from this class and implement the required methods.
268 def __init__(self, name: str, description: str = ""): 269 """ 270 Initialize the feature extractor. 271 272 Args: 273 name: Name of the feature extractor 274 description: Description of the feature extractor 275 """ 276 self.name = name 277 self.description = description 278 self.config = {}
Initialize the feature extractor.
Args: name: Name of the feature extractor description: Description of the feature extractor
280 @abstractmethod 281 def extract_features(self, windows: List[Dict], fs: int, **kwargs) -> List[Dict]: 282 """ 283 Extract features from sliding windows. 284 285 Args: 286 windows: List of sliding window dictionaries 287 fs: Sampling frequency 288 **kwargs: Additional arguments for feature extraction 289 290 Returns: 291 List of feature dictionaries 292 """ 293 pass
Extract features from sliding windows.
Args: windows: List of sliding window dictionaries fs: Sampling frequency **kwargs: Additional arguments for feature extraction
Returns: List of feature dictionaries
295 @abstractmethod 296 def get_feature_names(self) -> List[str]: 297 """ 298 Get names of features extracted by this extractor. 299 300 Returns: 301 List of feature names 302 """ 303 pass
Get names of features extracted by this extractor.
Returns: List of feature names
305 def configure(self, config: Dict[str, Any]): 306 """ 307 Configure the feature extractor. 308 309 Args: 310 config: Configuration dictionary 311 """ 312 self.config.update(config)
Configure the feature extractor.
Args: config: Configuration dictionary
314 def get_info(self) -> Dict[str, Any]: 315 """ 316 Get information about the feature extractor. 317 318 Returns: 319 Dictionary containing feature extractor information 320 """ 321 return { 322 'name': self.name, 323 'description': self.description, 324 'config': self.config, 325 'feature_names': self.get_feature_names() 326 }
Get information about the feature extractor.
Returns: Dictionary containing feature extractor information
329class BasePreprocessor(ABC): 330 """ 331 Base class for all preprocessors. 332 333 All preprocessors should inherit from this class and implement the required methods. 334 """ 335 336 def __init__(self, name: str, description: str = ""): 337 """ 338 Initialize the preprocessor. 339 340 Args: 341 name: Name of the preprocessor 342 description: Description of the preprocessor 343 """ 344 self.name = name 345 self.description = description 346 self.config = {} 347 self.fitted = False 348 349 @abstractmethod 350 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 351 """ 352 Fit the preprocessor to the data. 353 354 Args: 355 data: Input data to fit on 356 **kwargs: Additional arguments for fitting 357 """ 358 pass 359 360 @abstractmethod 361 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 362 """ 363 Transform the data using the fitted preprocessor. 364 365 Args: 366 data: Input data to transform 367 **kwargs: Additional arguments for transformation 368 369 Returns: 370 Transformed data 371 """ 372 pass 373 374 def fit_transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 375 """ 376 Fit the preprocessor and transform the data. 377 378 Args: 379 data: Input data to fit and transform 380 **kwargs: Additional arguments 381 382 Returns: 383 Transformed data 384 """ 385 self.fit(data, **kwargs) 386 return self.transform(data, **kwargs) 387 388 def configure(self, config: Dict[str, Any]): 389 """ 390 Configure the preprocessor. 391 392 Args: 393 config: Configuration dictionary 394 """ 395 self.config.update(config) 396 397 def get_info(self) -> Dict[str, Any]: 398 """ 399 Get information about the preprocessor. 400 401 Returns: 402 Dictionary containing preprocessor information 403 """ 404 return { 405 'name': self.name, 406 'description': self.description, 407 'config': self.config, 408 'fitted': self.fitted 409 }
Base class for all preprocessors.
All preprocessors should inherit from this class and implement the required methods.
336 def __init__(self, name: str, description: str = ""): 337 """ 338 Initialize the preprocessor. 339 340 Args: 341 name: Name of the preprocessor 342 description: Description of the preprocessor 343 """ 344 self.name = name 345 self.description = description 346 self.config = {} 347 self.fitted = False
Initialize the preprocessor.
Args: name: Name of the preprocessor description: Description of the preprocessor
349 @abstractmethod 350 def fit(self, data: Union[pd.DataFrame, np.ndarray], **kwargs): 351 """ 352 Fit the preprocessor to the data. 353 354 Args: 355 data: Input data to fit on 356 **kwargs: Additional arguments for fitting 357 """ 358 pass
Fit the preprocessor to the data.
Args: data: Input data to fit on **kwargs: Additional arguments for fitting
360 @abstractmethod 361 def transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 362 """ 363 Transform the data using the fitted preprocessor. 364 365 Args: 366 data: Input data to transform 367 **kwargs: Additional arguments for transformation 368 369 Returns: 370 Transformed data 371 """ 372 pass
Transform the data using the fitted preprocessor.
Args: data: Input data to transform **kwargs: Additional arguments for transformation
Returns: Transformed data
374 def fit_transform(self, data: Union[pd.DataFrame, np.ndarray], **kwargs) -> Union[pd.DataFrame, np.ndarray]: 375 """ 376 Fit the preprocessor and transform the data. 377 378 Args: 379 data: Input data to fit and transform 380 **kwargs: Additional arguments 381 382 Returns: 383 Transformed data 384 """ 385 self.fit(data, **kwargs) 386 return self.transform(data, **kwargs)
Fit the preprocessor and transform the data.
Args: data: Input data to fit and transform **kwargs: Additional arguments
Returns: Transformed data
388 def configure(self, config: Dict[str, Any]): 389 """ 390 Configure the preprocessor. 391 392 Args: 393 config: Configuration dictionary 394 """ 395 self.config.update(config)
Configure the preprocessor.
Args: config: Configuration dictionary
397 def get_info(self) -> Dict[str, Any]: 398 """ 399 Get information about the preprocessor. 400 401 Returns: 402 Dictionary containing preprocessor information 403 """ 404 return { 405 'name': self.name, 406 'description': self.description, 407 'config': self.config, 408 'fitted': self.fitted 409 }
Get information about the preprocessor.
Returns: Dictionary containing preprocessor information
412class BaseEDAAnalyzer(ABC): 413 """ 414 Base class for all EDA analyzers. 415 416 All EDA analyzers should inherit from this class and implement the required methods. 417 """ 418 419 def __init__(self, name: str, description: str = ""): 420 """ 421 Initialize the EDA analyzer. 422 423 Args: 424 name: Name of the EDA analyzer 425 description: Description of the EDA analyzer 426 """ 427 self.name = name 428 self.description = description 429 self.config = {} 430 431 @abstractmethod 432 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 433 """ 434 Perform analysis on the data. 435 436 Args: 437 data: Input data to analyze 438 **kwargs: Additional arguments for analysis 439 440 Returns: 441 Dictionary containing analysis results 442 """ 443 pass 444 445 @abstractmethod 446 def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs): 447 """ 448 Create visualizations of the data. 449 450 Args: 451 data: Input data to visualize 452 **kwargs: Additional arguments for visualization 453 """ 454 pass 455 456 def configure(self, config: Dict[str, Any]): 457 """ 458 Configure the EDA analyzer. 459 460 Args: 461 config: Configuration dictionary 462 """ 463 self.config.update(config) 464 465 def get_info(self) -> Dict[str, Any]: 466 """ 467 Get information about the EDA analyzer. 468 469 Returns: 470 Dictionary containing EDA analyzer information 471 """ 472 return { 473 'name': self.name, 474 'description': self.description, 475 'config': self.config 476 }
Base class for all EDA analyzers.
All EDA analyzers should inherit from this class and implement the required methods.
419 def __init__(self, name: str, description: str = ""): 420 """ 421 Initialize the EDA analyzer. 422 423 Args: 424 name: Name of the EDA analyzer 425 description: Description of the EDA analyzer 426 """ 427 self.name = name 428 self.description = description 429 self.config = {}
Initialize the EDA analyzer.
Args: name: Name of the EDA analyzer description: Description of the EDA analyzer
431 @abstractmethod 432 def analyze(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs) -> Dict[str, Any]: 433 """ 434 Perform analysis on the data. 435 436 Args: 437 data: Input data to analyze 438 **kwargs: Additional arguments for analysis 439 440 Returns: 441 Dictionary containing analysis results 442 """ 443 pass
Perform analysis on the data.
Args: data: Input data to analyze **kwargs: Additional arguments for analysis
Returns: Dictionary containing analysis results
445 @abstractmethod 446 def visualize(self, data: Union[pd.DataFrame, List[pd.DataFrame]], **kwargs): 447 """ 448 Create visualizations of the data. 449 450 Args: 451 data: Input data to visualize 452 **kwargs: Additional arguments for visualization 453 """ 454 pass
Create visualizations of the data.
Args: data: Input data to visualize **kwargs: Additional arguments for visualization
456 def configure(self, config: Dict[str, Any]): 457 """ 458 Configure the EDA analyzer. 459 460 Args: 461 config: Configuration dictionary 462 """ 463 self.config.update(config)
Configure the EDA analyzer.
Args: config: Configuration dictionary
465 def get_info(self) -> Dict[str, Any]: 466 """ 467 Get information about the EDA analyzer. 468 469 Returns: 470 Dictionary containing EDA analyzer information 471 """ 472 return { 473 'name': self.name, 474 'description': self.description, 475 'config': self.config 476 }
Get information about the EDA analyzer.
Returns: Dictionary containing EDA analyzer information
479class BaseClassificationModel(ABC): 480 """ 481 Base class for all classification models. 482 483 All classification models should inherit from this class and implement the required methods. 484 """ 485 486 def __init__(self, name: str, description: str = ""): 487 """ 488 Initialize the classification model. 489 490 Args: 491 name: Name of the classification model 492 description: Description of the classification model 493 """ 494 self.name = name 495 self.description = description 496 self.model = None 497 self.config = {} 498 self.trained = False 499 500 @abstractmethod 501 def train(self, features: List[Dict], **kwargs): 502 """ 503 Train the classification model. 504 505 Args: 506 features: List of feature dictionaries 507 **kwargs: Additional arguments for training 508 """ 509 pass 510 511 @abstractmethod 512 def predict(self, features: List[Dict], **kwargs) -> np.ndarray: 513 """ 514 Make predictions using the trained model. 515 516 Args: 517 features: List of feature dictionaries 518 **kwargs: Additional arguments for prediction 519 520 Returns: 521 Array of predictions 522 """ 523 pass 524 525 @abstractmethod 526 def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]: 527 """ 528 Evaluate the model performance. 529 530 Args: 531 features: List of feature dictionaries 532 **kwargs: Additional arguments for evaluation 533 534 Returns: 535 Dictionary containing evaluation metrics 536 """ 537 pass 538 539 @abstractmethod 540 def save_model(self, filepath: str): 541 """ 542 Save the trained model to a file. 543 544 Args: 545 filepath: Path to save the model 546 """ 547 pass 548 549 @abstractmethod 550 def load_model(self, filepath: str): 551 """ 552 Load a trained model from a file. 553 554 Args: 555 filepath: Path to the saved model 556 """ 557 pass 558 559 def configure(self, config: Dict[str, Any]): 560 """ 561 Configure the classification model. 562 563 Args: 564 config: Configuration dictionary 565 """ 566 self.config.update(config) 567 568 def get_info(self) -> Dict[str, Any]: 569 """ 570 Get information about the classification model. 571 572 Returns: 573 Dictionary containing model information 574 """ 575 return { 576 'name': self.name, 577 'description': self.description, 578 'config': self.config, 579 'trained': self.trained 580 }
Base class for all classification models.
All classification models should inherit from this class and implement the required methods.
486 def __init__(self, name: str, description: str = ""): 487 """ 488 Initialize the classification model. 489 490 Args: 491 name: Name of the classification model 492 description: Description of the classification model 493 """ 494 self.name = name 495 self.description = description 496 self.model = None 497 self.config = {} 498 self.trained = False
Initialize the classification model.
Args: name: Name of the classification model description: Description of the classification model
500 @abstractmethod 501 def train(self, features: List[Dict], **kwargs): 502 """ 503 Train the classification model. 504 505 Args: 506 features: List of feature dictionaries 507 **kwargs: Additional arguments for training 508 """ 509 pass
Train the classification model.
Args: features: List of feature dictionaries **kwargs: Additional arguments for training
511 @abstractmethod 512 def predict(self, features: List[Dict], **kwargs) -> np.ndarray: 513 """ 514 Make predictions using the trained model. 515 516 Args: 517 features: List of feature dictionaries 518 **kwargs: Additional arguments for prediction 519 520 Returns: 521 Array of predictions 522 """ 523 pass
Make predictions using the trained model.
Args: features: List of feature dictionaries **kwargs: Additional arguments for prediction
Returns: Array of predictions
525 @abstractmethod 526 def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]: 527 """ 528 Evaluate the model performance. 529 530 Args: 531 features: List of feature dictionaries 532 **kwargs: Additional arguments for evaluation 533 534 Returns: 535 Dictionary containing evaluation metrics 536 """ 537 pass
Evaluate the model performance.
Args: features: List of feature dictionaries **kwargs: Additional arguments for evaluation
Returns: Dictionary containing evaluation metrics
539 @abstractmethod 540 def save_model(self, filepath: str): 541 """ 542 Save the trained model to a file. 543 544 Args: 545 filepath: Path to save the model 546 """ 547 pass
Save the trained model to a file.
Args: filepath: Path to save the model
549 @abstractmethod 550 def load_model(self, filepath: str): 551 """ 552 Load a trained model from a file. 553 554 Args: 555 filepath: Path to the saved model 556 """ 557 pass
Load a trained model from a file.
Args: filepath: Path to the saved model
559 def configure(self, config: Dict[str, Any]): 560 """ 561 Configure the classification model. 562 563 Args: 564 config: Configuration dictionary 565 """ 566 self.config.update(config)
Configure the classification model.
Args: config: Configuration dictionary
568 def get_info(self) -> Dict[str, Any]: 569 """ 570 Get information about the classification model. 571 572 Returns: 573 Dictionary containing model information 574 """ 575 return { 576 'name': self.name, 577 'description': self.description, 578 'config': self.config, 579 'trained': self.trained 580 }
Get information about the classification model.
Returns: Dictionary containing model information