gaitsetpy.dataset.utils
This file contains the utility functions to download and extract the datasets. Supported datasets: - Daphnet
Maintainer: @aharshit123456
1''' 2 This file contains the utility functions to download and extract the datasets. 3 Supported datasets: 4 - Daphnet 5 6Maintainer: @aharshit123456 7''' 8 9## imports 10import os 11import requests 12import zipfile 13import tarfile 14import json 15import pandas as pd 16import numpy as np 17from glob import glob 18from concurrent.futures import ThreadPoolExecutor, as_completed 19 20################################################################################# 21############################## DATASET DOWNLOAD ################################# 22################################################################################# 23 24def download_dataset(dataset_name, data_dir): 25 """Download the dataset.""" 26 if dataset_name == "daphnet": 27 download_daphnet_data(data_dir) 28 elif dataset_name == "mobifall": 29 download_mobifall_data(data_dir) 30 elif dataset_name == "arduous": 31 download_arduous_data(data_dir) 32 elif dataset_name == "harup": 33 download_harup_data(data_dir) 34 elif dataset_name == "urfall": 35 download_urfall_data(data_dir) 36 elif dataset_name == "physionet": 37 # PhysioNet dataset is handled by the PhysioNetLoader itself 38 pass 39 else: 40 raise ValueError(f"Dataset {dataset_name} not supported.") 41 42 43def download_daphnet_data(data_dir): 44 """Download the Daphnet dataset. 45 46 This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository. 47 It shows a progress bar during download and handles various potential errors. 48 If the file already exists in the specified directory, it skips the download. 49 50 Args: 51 data_dir (str): Directory where the dataset will be downloaded 52 53 Returns: 54 str: Path to the downloaded file 55 56 Raises: 57 ConnectionError: If unable to connect to the download URL 58 IOError: If unable to create or write to the download directory/file 59 Exception: For other unexpected errors during download 60 """ 61 import os 62 import requests 63 from tqdm import tqdm 64 65 url = "https://archive.ics.uci.edu/static/public/245/daphnet+freezing+of+gait.zip" 66 file_path = os.path.join(data_dir, "daphnet.zip") 67 68 # Check if file already exists 69 if os.path.exists(file_path) and os.path.getsize(file_path) > 0: 70 print(f"Dataset already exists at: {file_path}") 71 return file_path 72 73 try: 74 # Create directory if it doesn't exist 75 os.makedirs(data_dir, exist_ok=True) 76 print(f"Downloading Daphnet dataset to: {file_path}") 77 78 # Send a HEAD request first to get the file size 79 response = requests.head(url) 80 total_size = int(response.headers.get('content-length', 0)) 81 82 # Start the download with progress bar 83 response = requests.get(url, stream=True) 84 response.raise_for_status() # Raise an exception for bad status codes 85 86 # Initialize progress bar 87 progress_bar = tqdm( 88 total=total_size, 89 unit='iB', 90 unit_scale=True, 91 desc='Download Progress' 92 ) 93 94 # Write the file with progress updates 95 with open(file_path, "wb") as file: 96 for chunk in response.iter_content(chunk_size=8192): 97 if chunk: 98 size = file.write(chunk) 99 progress_bar.update(size) 100 101 progress_bar.close() 102 103 # Verify download completed successfully 104 if os.path.getsize(file_path) > 0: 105 print(f"Download completed successfully! File saved to: {file_path}") 106 return file_path 107 else: 108 raise IOError("Downloaded file is empty") 109 110 except requests.exceptions.RequestException as e: 111 print(f"Error connecting to download URL: {e}") 112 if os.path.exists(file_path): 113 os.remove(file_path) # Clean up partial download 114 raise ConnectionError(f"Failed to download dataset: {e}") 115 116 except IOError as e: 117 print(f"Error writing download file: {e}") 118 if os.path.exists(file_path): 119 os.remove(file_path) # Clean up partial download 120 raise IOError(f"Failed to save dataset: {e}") 121 122 except Exception as e: 123 print(f"Unexpected error during download: {e}") 124 if os.path.exists(file_path): 125 os.remove(file_path) # Clean up partial download 126 raise Exception(f"Download failed: {e}") 127 128def download_mobifall_data(data_dir): 129 """Download the MobiFall dataset.""" 130 pass 131 132def download_arduous_data(data_dir): 133 """Download the Arduous dataset.""" 134 pass 135 136def download_urfall_data(data_dir, sequences=None, data_types=None, use_falls=True, use_adls=True, max_workers: int = 8): 137 """ 138 Download the UrFall dataset files. 139 140 Args: 141 data_dir: Directory where the dataset will be downloaded 142 sequences: List of specific sequences to download (e.g., ['fall-01', 'adl-01']) 143 If None, downloads based on use_falls and use_adls 144 data_types: List of data types to download. Options: 'depth', 'rgb', 'accelerometer', 145 'synchronization', 'video', 'features' (default: ['features']) 146 use_falls: Whether to download fall sequences (default: True) 147 use_adls: Whether to download ADL sequences (default: True) 148 max_workers: Max concurrent download workers (default: 8) 149 150 Returns: 151 str: Path to the data directory 152 """ 153 from tqdm import tqdm 154 155 base_url = "http://fenix.univ.rzeszow.pl/~mkepski/ds/data/" 156 157 # Default to downloading pre-extracted features 158 if data_types is None: 159 data_types = ['features'] 160 161 # Create directory if it doesn't exist 162 os.makedirs(data_dir, exist_ok=True) 163 164 # Determine which sequences to download 165 seq_list = [] 166 if sequences is not None: 167 seq_list = sequences 168 else: 169 if use_falls: 170 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 171 if use_adls: 172 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 173 174 # Prepare feature files 175 feature_tasks = [] 176 if 'features' in data_types: 177 if use_falls: 178 feature_tasks.append("urfall-cam0-falls.csv") 179 if use_adls: 180 feature_tasks.append("urfall-cam0-adls.csv") 181 182 # Prepare raw file tasks 183 file_extension_map = { 184 'depth': '-cam0-d.zip', 185 'rgb': '-cam0-rgb.zip', 186 'accelerometer': '-acc.csv', 187 'synchronization': '-data.csv', 188 'video': '-cam0.mp4' 189 } 190 raw_tasks = [] 191 for seq in seq_list: 192 for dtype in data_types: 193 if dtype == 'features': 194 continue 195 if dtype not in file_extension_map: 196 continue 197 raw_tasks.append(seq + file_extension_map[dtype]) 198 199 # Build list of (url, dest_path, desc) 200 download_jobs = [] 201 for filename in feature_tasks: 202 dest = os.path.join(data_dir, filename) 203 if not os.path.exists(dest): 204 download_jobs.append((base_url + filename, dest, filename)) 205 for filename in raw_tasks: 206 dest = os.path.join(data_dir, filename) 207 if not os.path.exists(dest): 208 download_jobs.append((base_url + filename, dest, filename)) 209 210 if not download_jobs: 211 print("All requested UrFall files already present.") 212 return data_dir 213 214 print(f"Starting concurrent downloads: {len(download_jobs)} file(s) with up to {max_workers} workers...") 215 successes = 0 216 failures = [] 217 218 with ThreadPoolExecutor(max_workers=max_workers) as executor: 219 future_to_job = {executor.submit(_download_file, url, dest, desc): (url, dest) for url, dest, desc in download_jobs} 220 for future in as_completed(future_to_job): 221 (url, dest) = future_to_job[future] 222 ok, info = future.result() 223 if ok: 224 successes += 1 225 else: 226 failures.append((url, info)) 227 228 print(f"Completed downloads: {successes} succeeded, {len(failures)} failed.") 229 if failures: 230 for url, err in failures[:10]: 231 print(f" - Failed: {url} -> {err}") 232 if len(failures) > 10: 233 print(f" ... and {len(failures) - 10} more failures") 234 235 return data_dir 236 237 238################################################################################# 239############################## EXTRACT DOWNLOAD ################################# 240################################################################################# 241 242def extract_dataset(dataset_name, data_dir): 243 """Extract the dataset.""" 244 if dataset_name == "daphnet": 245 extract_daphnet_data(data_dir) 246 elif dataset_name == "mobifall": 247 extract_mobifall_data(data_dir) 248 elif dataset_name == "arduous": 249 extract_arduous_data(data_dir) 250 elif dataset_name == "harup": 251 extract_harup_data(data_dir) 252 elif dataset_name == "urfall": 253 extract_urfall_data(data_dir) 254 elif dataset_name == "physionet": 255 # PhysioNet dataset is handled by the PhysioNetLoader itself 256 pass 257 else: 258 raise ValueError(f"Dataset {dataset_name} not supported.") 259 260 261def extract_daphnet_data(data_dir): 262 """Extract the Daphnet dataset.""" 263 file_path = os.path.join(data_dir, "daphnet.zip") 264 with zipfile.ZipFile(file_path, "r") as zip_ref: 265 zip_ref.extractall(data_dir) 266 267def extract_mobifall_data(data_dir): 268 """Extract the MobiFall dataset.""" 269 pass 270 271def extract_arduous_data(data_dir): 272 """Extract the Arduous dataset.""" 273 pass 274 275def extract_urfall_data(data_dir, sequences=None, use_falls=True, use_adls=True): 276 """ 277 Extract the UrFall dataset zip files (depth and RGB data). 278 279 Args: 280 data_dir: Directory containing the dataset 281 sequences: List of specific sequences to extract 282 use_falls: Whether to extract fall sequences 283 use_adls: Whether to extract ADL sequences 284 """ 285 # Determine which sequences to extract 286 seq_list = [] 287 if sequences is not None: 288 seq_list = sequences 289 else: 290 if use_falls: 291 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 292 if use_adls: 293 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 294 295 # Extract depth and RGB zip files 296 for seq in seq_list: 297 for data_type, ext in [('depth', '-cam0-d.zip'), ('rgb', '-cam0-rgb.zip')]: 298 zip_file = os.path.join(data_dir, seq + ext) 299 if os.path.exists(zip_file): 300 extract_dir = os.path.join(data_dir, seq + f"-cam0-{data_type[0]}") 301 if os.path.exists(extract_dir): 302 print(f"Already extracted: {extract_dir}") 303 continue 304 305 try: 306 print(f"Extracting {zip_file}...") 307 with zipfile.ZipFile(zip_file, 'r') as zip_ref: 308 zip_ref.extractall(extract_dir) 309 print(f"Extracted to: {extract_dir}") 310 except Exception as e: 311 print(f"Failed to extract {zip_file}: {e}") 312 313 314################################################################################# 315############################ OTHER UTILS DOWNLOAD ############################### 316################################################################################# 317 318 319def sliding_window(data, window_size, step_size): 320 num_windows = (len(data) - window_size) // step_size + 1 321 windows = [] 322 for i in range(num_windows): 323 start = i * step_size 324 end = start + window_size 325 windows.append(data[start:end]) 326 return windows 327 328def _download_file(url: str, dest_path: str, desc: str = None): 329 """Download a single file to dest_path with a simple progress indicator.""" 330 from tqdm import tqdm 331 try: 332 response = requests.get(url, stream=True, timeout=60) 333 response.raise_for_status() 334 total_size = int(response.headers.get('content-length', 0)) 335 progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc=desc or os.path.basename(dest_path)) 336 with open(dest_path, 'wb') as f: 337 for chunk in response.iter_content(chunk_size=8192): 338 if chunk: 339 written = f.write(chunk) 340 progress_bar.update(written) 341 progress_bar.close() 342 if total_size != 0 and os.path.getsize(dest_path) < total_size: 343 raise IOError(f"Incomplete download for {dest_path}") 344 return True, dest_path 345 except Exception as e: 346 try: 347 if os.path.exists(dest_path): 348 os.remove(dest_path) 349 except Exception: 350 pass 351 return False, f"{dest_path}: {e}" 352 353def download_harup_data(data_dir): 354 """ 355 Download the HAR-UP dataset. 356 357 This function provides instructions for downloading the HAR-UP dataset and offers 358 an option to download it directly from Google Drive as a ZIP file. 359 360 Args: 361 data_dir (str): Directory where the dataset will be downloaded 362 363 Returns: 364 str: Path to the extracted dataset directory or None if not performed 365 """ 366 import os 367 import requests 368 from tqdm import tqdm 369 import webbrowser 370 import zipfile 371 372 # Create directory if it doesn't exist 373 os.makedirs(data_dir, exist_ok=True) 374 375 # Define file paths 376 zip_filename = "HAR-UP_Dataset.zip" 377 zip_path = os.path.join(data_dir, zip_filename) 378 dataset_dir = os.path.join(data_dir, "DataSet") 379 380 # Check if dataset directory already exists 381 if os.path.exists(dataset_dir): 382 print(f"HAR-UP dataset directory already exists at: {dataset_dir}") 383 return dataset_dir 384 385 # Direct download URL from Google Drive (update if needed) 386 url = "https://drive.usercontent.google.com/download?id=1Y2MSUijPcB7--PcGoAKhGeqI8GxKK0Pm&export=download&authuser=0" 387 print("\n" + "="*80) 388 print("HAR-UP DATASET DOWNLOAD") 389 print("="*80) 390 print("The HAR-UP dataset can be downloaded automatically or manually.") 391 print("\nOptions:") 392 print("1. Automatic download (recommended)") 393 print("2. Manual download") 394 print("3. Skip download (if you already have the dataset elsewhere)") 395 396 choice = input("\nEnter your choice (1-3): ") 397 398 if choice == "1": 399 try: 400 print(f"\nDownloading HAR-UP dataset ZIP to: {zip_path}") 401 print("This may take some time depending on your internet connection...") 402 response = requests.get(url, stream=True) 403 response.raise_for_status() # Raise an exception for bad status codes 404 total_size = int(response.headers.get('content-length', 0)) 405 progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc='Download Progress') 406 with open(zip_path, "wb") as file: 407 for chunk in response.iter_content(chunk_size=8192): 408 if chunk: 409 size = file.write(chunk) 410 progress_bar.update(size) 411 progress_bar.close() 412 if os.path.getsize(zip_path) > 0: 413 print(f"Download completed successfully! File saved to: {zip_path}") 414 print("\nExtracting the downloaded ZIP file...") 415 with zipfile.ZipFile(zip_path, "r") as zip_ref: 416 zip_ref.extractall(data_dir) 417 # Check for DataSet folder 418 if not os.path.exists(dataset_dir): 419 # Sometimes the zip may contain a top-level folder, e.g., HAR-UP_Dataset/DataSet/... 420 for entry in os.listdir(data_dir): 421 entry_path = os.path.join(data_dir, entry) 422 if os.path.isdir(entry_path) and os.path.exists(os.path.join(entry_path, "DataSet")): 423 import shutil 424 shutil.move(os.path.join(entry_path, "DataSet"), dataset_dir) 425 break 426 if os.path.exists(dataset_dir): 427 print(f"Extraction complete. DataSet directory at: {dataset_dir}") 428 return dataset_dir 429 else: 430 print("Extraction failed: DataSet directory not found after extraction.") 431 return None 432 else: 433 raise IOError("Downloaded file is empty") 434 except Exception as e: 435 print(f"\nError during download: {e}") 436 print("\nPlease try the manual download option instead.") 437 if os.path.exists(zip_path): 438 os.remove(zip_path) # Clean up partial download 439 return None 440 441 elif choice == "2": 442 print("\nOpening the HAR-UP dataset download page in your browser...") 443 print("Please download the ZIP file and save it to the following location:") 444 print(f" {zip_path}") 445 webbrowser.open("https://sites.google.com/up.edu.mx/har-up/download") 446 print("\nAfter downloading, please ensure the ZIP file is named 'HAR-UP_Dataset.zip' and placed in your data directory.") 447 print("Then, rerun this function or choose option 1 to extract.") 448 return None 449 450 elif choice == "3": 451 print("\nSkipping download. Please ensure the dataset is available at:") 452 print(f" {os.path.join(data_dir, 'DataSet')}") 453 return None 454 455 else: 456 print("\nInvalid choice. Please run again and select a valid option.") 457 return None 458 459 460def extract_harup_data(data_dir): 461 """ 462 Extract the HAR-UP dataset zip file if not already extracted. 463 """ 464 dataset_dir = os.path.join(data_dir, "DataSet") 465 if os.path.exists(dataset_dir): 466 print(f"HAR-UP dataset already extracted at: {dataset_dir}") 467 return 468 zip_path = os.path.join(data_dir, "HAR-UP_Dataset.zip") 469 if not os.path.exists(zip_path): 470 print(f"HAR-UP zip file not found at: {zip_path}") 471 print("Please run download_harup_data first.") 472 return 473 import zipfile 474 print(f"Extracting HAR-UP dataset zip to: {data_dir}") 475 with zipfile.ZipFile(zip_path, "r") as zip_ref: 476 zip_ref.extractall(data_dir) 477 print(f"Extraction complete.")
25def download_dataset(dataset_name, data_dir): 26 """Download the dataset.""" 27 if dataset_name == "daphnet": 28 download_daphnet_data(data_dir) 29 elif dataset_name == "mobifall": 30 download_mobifall_data(data_dir) 31 elif dataset_name == "arduous": 32 download_arduous_data(data_dir) 33 elif dataset_name == "harup": 34 download_harup_data(data_dir) 35 elif dataset_name == "urfall": 36 download_urfall_data(data_dir) 37 elif dataset_name == "physionet": 38 # PhysioNet dataset is handled by the PhysioNetLoader itself 39 pass 40 else: 41 raise ValueError(f"Dataset {dataset_name} not supported.")
Download the dataset.
44def download_daphnet_data(data_dir): 45 """Download the Daphnet dataset. 46 47 This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository. 48 It shows a progress bar during download and handles various potential errors. 49 If the file already exists in the specified directory, it skips the download. 50 51 Args: 52 data_dir (str): Directory where the dataset will be downloaded 53 54 Returns: 55 str: Path to the downloaded file 56 57 Raises: 58 ConnectionError: If unable to connect to the download URL 59 IOError: If unable to create or write to the download directory/file 60 Exception: For other unexpected errors during download 61 """ 62 import os 63 import requests 64 from tqdm import tqdm 65 66 url = "https://archive.ics.uci.edu/static/public/245/daphnet+freezing+of+gait.zip" 67 file_path = os.path.join(data_dir, "daphnet.zip") 68 69 # Check if file already exists 70 if os.path.exists(file_path) and os.path.getsize(file_path) > 0: 71 print(f"Dataset already exists at: {file_path}") 72 return file_path 73 74 try: 75 # Create directory if it doesn't exist 76 os.makedirs(data_dir, exist_ok=True) 77 print(f"Downloading Daphnet dataset to: {file_path}") 78 79 # Send a HEAD request first to get the file size 80 response = requests.head(url) 81 total_size = int(response.headers.get('content-length', 0)) 82 83 # Start the download with progress bar 84 response = requests.get(url, stream=True) 85 response.raise_for_status() # Raise an exception for bad status codes 86 87 # Initialize progress bar 88 progress_bar = tqdm( 89 total=total_size, 90 unit='iB', 91 unit_scale=True, 92 desc='Download Progress' 93 ) 94 95 # Write the file with progress updates 96 with open(file_path, "wb") as file: 97 for chunk in response.iter_content(chunk_size=8192): 98 if chunk: 99 size = file.write(chunk) 100 progress_bar.update(size) 101 102 progress_bar.close() 103 104 # Verify download completed successfully 105 if os.path.getsize(file_path) > 0: 106 print(f"Download completed successfully! File saved to: {file_path}") 107 return file_path 108 else: 109 raise IOError("Downloaded file is empty") 110 111 except requests.exceptions.RequestException as e: 112 print(f"Error connecting to download URL: {e}") 113 if os.path.exists(file_path): 114 os.remove(file_path) # Clean up partial download 115 raise ConnectionError(f"Failed to download dataset: {e}") 116 117 except IOError as e: 118 print(f"Error writing download file: {e}") 119 if os.path.exists(file_path): 120 os.remove(file_path) # Clean up partial download 121 raise IOError(f"Failed to save dataset: {e}") 122 123 except Exception as e: 124 print(f"Unexpected error during download: {e}") 125 if os.path.exists(file_path): 126 os.remove(file_path) # Clean up partial download 127 raise Exception(f"Download failed: {e}")
Download the Daphnet dataset.
This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository. It shows a progress bar during download and handles various potential errors. If the file already exists in the specified directory, it skips the download.
Args: data_dir (str): Directory where the dataset will be downloaded
Returns: str: Path to the downloaded file
Raises: ConnectionError: If unable to connect to the download URL IOError: If unable to create or write to the download directory/file Exception: For other unexpected errors during download
Download the MobiFall dataset.
Download the Arduous dataset.
137def download_urfall_data(data_dir, sequences=None, data_types=None, use_falls=True, use_adls=True, max_workers: int = 8): 138 """ 139 Download the UrFall dataset files. 140 141 Args: 142 data_dir: Directory where the dataset will be downloaded 143 sequences: List of specific sequences to download (e.g., ['fall-01', 'adl-01']) 144 If None, downloads based on use_falls and use_adls 145 data_types: List of data types to download. Options: 'depth', 'rgb', 'accelerometer', 146 'synchronization', 'video', 'features' (default: ['features']) 147 use_falls: Whether to download fall sequences (default: True) 148 use_adls: Whether to download ADL sequences (default: True) 149 max_workers: Max concurrent download workers (default: 8) 150 151 Returns: 152 str: Path to the data directory 153 """ 154 from tqdm import tqdm 155 156 base_url = "http://fenix.univ.rzeszow.pl/~mkepski/ds/data/" 157 158 # Default to downloading pre-extracted features 159 if data_types is None: 160 data_types = ['features'] 161 162 # Create directory if it doesn't exist 163 os.makedirs(data_dir, exist_ok=True) 164 165 # Determine which sequences to download 166 seq_list = [] 167 if sequences is not None: 168 seq_list = sequences 169 else: 170 if use_falls: 171 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 172 if use_adls: 173 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 174 175 # Prepare feature files 176 feature_tasks = [] 177 if 'features' in data_types: 178 if use_falls: 179 feature_tasks.append("urfall-cam0-falls.csv") 180 if use_adls: 181 feature_tasks.append("urfall-cam0-adls.csv") 182 183 # Prepare raw file tasks 184 file_extension_map = { 185 'depth': '-cam0-d.zip', 186 'rgb': '-cam0-rgb.zip', 187 'accelerometer': '-acc.csv', 188 'synchronization': '-data.csv', 189 'video': '-cam0.mp4' 190 } 191 raw_tasks = [] 192 for seq in seq_list: 193 for dtype in data_types: 194 if dtype == 'features': 195 continue 196 if dtype not in file_extension_map: 197 continue 198 raw_tasks.append(seq + file_extension_map[dtype]) 199 200 # Build list of (url, dest_path, desc) 201 download_jobs = [] 202 for filename in feature_tasks: 203 dest = os.path.join(data_dir, filename) 204 if not os.path.exists(dest): 205 download_jobs.append((base_url + filename, dest, filename)) 206 for filename in raw_tasks: 207 dest = os.path.join(data_dir, filename) 208 if not os.path.exists(dest): 209 download_jobs.append((base_url + filename, dest, filename)) 210 211 if not download_jobs: 212 print("All requested UrFall files already present.") 213 return data_dir 214 215 print(f"Starting concurrent downloads: {len(download_jobs)} file(s) with up to {max_workers} workers...") 216 successes = 0 217 failures = [] 218 219 with ThreadPoolExecutor(max_workers=max_workers) as executor: 220 future_to_job = {executor.submit(_download_file, url, dest, desc): (url, dest) for url, dest, desc in download_jobs} 221 for future in as_completed(future_to_job): 222 (url, dest) = future_to_job[future] 223 ok, info = future.result() 224 if ok: 225 successes += 1 226 else: 227 failures.append((url, info)) 228 229 print(f"Completed downloads: {successes} succeeded, {len(failures)} failed.") 230 if failures: 231 for url, err in failures[:10]: 232 print(f" - Failed: {url} -> {err}") 233 if len(failures) > 10: 234 print(f" ... and {len(failures) - 10} more failures") 235 236 return data_dir
Download the UrFall dataset files.
Args: data_dir: Directory where the dataset will be downloaded sequences: List of specific sequences to download (e.g., ['fall-01', 'adl-01']) If None, downloads based on use_falls and use_adls data_types: List of data types to download. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) use_falls: Whether to download fall sequences (default: True) use_adls: Whether to download ADL sequences (default: True) max_workers: Max concurrent download workers (default: 8)
Returns: str: Path to the data directory
243def extract_dataset(dataset_name, data_dir): 244 """Extract the dataset.""" 245 if dataset_name == "daphnet": 246 extract_daphnet_data(data_dir) 247 elif dataset_name == "mobifall": 248 extract_mobifall_data(data_dir) 249 elif dataset_name == "arduous": 250 extract_arduous_data(data_dir) 251 elif dataset_name == "harup": 252 extract_harup_data(data_dir) 253 elif dataset_name == "urfall": 254 extract_urfall_data(data_dir) 255 elif dataset_name == "physionet": 256 # PhysioNet dataset is handled by the PhysioNetLoader itself 257 pass 258 else: 259 raise ValueError(f"Dataset {dataset_name} not supported.")
Extract the dataset.
262def extract_daphnet_data(data_dir): 263 """Extract the Daphnet dataset.""" 264 file_path = os.path.join(data_dir, "daphnet.zip") 265 with zipfile.ZipFile(file_path, "r") as zip_ref: 266 zip_ref.extractall(data_dir)
Extract the Daphnet dataset.
Extract the MobiFall dataset.
Extract the Arduous dataset.
276def extract_urfall_data(data_dir, sequences=None, use_falls=True, use_adls=True): 277 """ 278 Extract the UrFall dataset zip files (depth and RGB data). 279 280 Args: 281 data_dir: Directory containing the dataset 282 sequences: List of specific sequences to extract 283 use_falls: Whether to extract fall sequences 284 use_adls: Whether to extract ADL sequences 285 """ 286 # Determine which sequences to extract 287 seq_list = [] 288 if sequences is not None: 289 seq_list = sequences 290 else: 291 if use_falls: 292 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 293 if use_adls: 294 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 295 296 # Extract depth and RGB zip files 297 for seq in seq_list: 298 for data_type, ext in [('depth', '-cam0-d.zip'), ('rgb', '-cam0-rgb.zip')]: 299 zip_file = os.path.join(data_dir, seq + ext) 300 if os.path.exists(zip_file): 301 extract_dir = os.path.join(data_dir, seq + f"-cam0-{data_type[0]}") 302 if os.path.exists(extract_dir): 303 print(f"Already extracted: {extract_dir}") 304 continue 305 306 try: 307 print(f"Extracting {zip_file}...") 308 with zipfile.ZipFile(zip_file, 'r') as zip_ref: 309 zip_ref.extractall(extract_dir) 310 print(f"Extracted to: {extract_dir}") 311 except Exception as e: 312 print(f"Failed to extract {zip_file}: {e}")
Extract the UrFall dataset zip files (depth and RGB data).
Args: data_dir: Directory containing the dataset sequences: List of specific sequences to extract use_falls: Whether to extract fall sequences use_adls: Whether to extract ADL sequences
354def download_harup_data(data_dir): 355 """ 356 Download the HAR-UP dataset. 357 358 This function provides instructions for downloading the HAR-UP dataset and offers 359 an option to download it directly from Google Drive as a ZIP file. 360 361 Args: 362 data_dir (str): Directory where the dataset will be downloaded 363 364 Returns: 365 str: Path to the extracted dataset directory or None if not performed 366 """ 367 import os 368 import requests 369 from tqdm import tqdm 370 import webbrowser 371 import zipfile 372 373 # Create directory if it doesn't exist 374 os.makedirs(data_dir, exist_ok=True) 375 376 # Define file paths 377 zip_filename = "HAR-UP_Dataset.zip" 378 zip_path = os.path.join(data_dir, zip_filename) 379 dataset_dir = os.path.join(data_dir, "DataSet") 380 381 # Check if dataset directory already exists 382 if os.path.exists(dataset_dir): 383 print(f"HAR-UP dataset directory already exists at: {dataset_dir}") 384 return dataset_dir 385 386 # Direct download URL from Google Drive (update if needed) 387 url = "https://drive.usercontent.google.com/download?id=1Y2MSUijPcB7--PcGoAKhGeqI8GxKK0Pm&export=download&authuser=0" 388 print("\n" + "="*80) 389 print("HAR-UP DATASET DOWNLOAD") 390 print("="*80) 391 print("The HAR-UP dataset can be downloaded automatically or manually.") 392 print("\nOptions:") 393 print("1. Automatic download (recommended)") 394 print("2. Manual download") 395 print("3. Skip download (if you already have the dataset elsewhere)") 396 397 choice = input("\nEnter your choice (1-3): ") 398 399 if choice == "1": 400 try: 401 print(f"\nDownloading HAR-UP dataset ZIP to: {zip_path}") 402 print("This may take some time depending on your internet connection...") 403 response = requests.get(url, stream=True) 404 response.raise_for_status() # Raise an exception for bad status codes 405 total_size = int(response.headers.get('content-length', 0)) 406 progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc='Download Progress') 407 with open(zip_path, "wb") as file: 408 for chunk in response.iter_content(chunk_size=8192): 409 if chunk: 410 size = file.write(chunk) 411 progress_bar.update(size) 412 progress_bar.close() 413 if os.path.getsize(zip_path) > 0: 414 print(f"Download completed successfully! File saved to: {zip_path}") 415 print("\nExtracting the downloaded ZIP file...") 416 with zipfile.ZipFile(zip_path, "r") as zip_ref: 417 zip_ref.extractall(data_dir) 418 # Check for DataSet folder 419 if not os.path.exists(dataset_dir): 420 # Sometimes the zip may contain a top-level folder, e.g., HAR-UP_Dataset/DataSet/... 421 for entry in os.listdir(data_dir): 422 entry_path = os.path.join(data_dir, entry) 423 if os.path.isdir(entry_path) and os.path.exists(os.path.join(entry_path, "DataSet")): 424 import shutil 425 shutil.move(os.path.join(entry_path, "DataSet"), dataset_dir) 426 break 427 if os.path.exists(dataset_dir): 428 print(f"Extraction complete. DataSet directory at: {dataset_dir}") 429 return dataset_dir 430 else: 431 print("Extraction failed: DataSet directory not found after extraction.") 432 return None 433 else: 434 raise IOError("Downloaded file is empty") 435 except Exception as e: 436 print(f"\nError during download: {e}") 437 print("\nPlease try the manual download option instead.") 438 if os.path.exists(zip_path): 439 os.remove(zip_path) # Clean up partial download 440 return None 441 442 elif choice == "2": 443 print("\nOpening the HAR-UP dataset download page in your browser...") 444 print("Please download the ZIP file and save it to the following location:") 445 print(f" {zip_path}") 446 webbrowser.open("https://sites.google.com/up.edu.mx/har-up/download") 447 print("\nAfter downloading, please ensure the ZIP file is named 'HAR-UP_Dataset.zip' and placed in your data directory.") 448 print("Then, rerun this function or choose option 1 to extract.") 449 return None 450 451 elif choice == "3": 452 print("\nSkipping download. Please ensure the dataset is available at:") 453 print(f" {os.path.join(data_dir, 'DataSet')}") 454 return None 455 456 else: 457 print("\nInvalid choice. Please run again and select a valid option.") 458 return None
Download the HAR-UP dataset.
This function provides instructions for downloading the HAR-UP dataset and offers an option to download it directly from Google Drive as a ZIP file.
Args: data_dir (str): Directory where the dataset will be downloaded
Returns: str: Path to the extracted dataset directory or None if not performed
461def extract_harup_data(data_dir): 462 """ 463 Extract the HAR-UP dataset zip file if not already extracted. 464 """ 465 dataset_dir = os.path.join(data_dir, "DataSet") 466 if os.path.exists(dataset_dir): 467 print(f"HAR-UP dataset already extracted at: {dataset_dir}") 468 return 469 zip_path = os.path.join(data_dir, "HAR-UP_Dataset.zip") 470 if not os.path.exists(zip_path): 471 print(f"HAR-UP zip file not found at: {zip_path}") 472 print("Please run download_harup_data first.") 473 return 474 import zipfile 475 print(f"Extracting HAR-UP dataset zip to: {data_dir}") 476 with zipfile.ZipFile(zip_path, "r") as zip_ref: 477 zip_ref.extractall(data_dir) 478 print(f"Extraction complete.")
Extract the HAR-UP dataset zip file if not already extracted.