gaitsetpy.dataset.utils

This file contains the utility functions to download and extract the datasets. Supported datasets: - Daphnet

Maintainer: @aharshit123456

  1'''
  2    This file contains the utility functions to download and extract the datasets.
  3    Supported datasets:
  4    - Daphnet
  5    
  6Maintainer: @aharshit123456
  7'''
  8
  9## imports
 10import os
 11import requests
 12import zipfile
 13import tarfile
 14import json
 15import pandas as pd
 16import numpy as np
 17from glob import glob
 18from concurrent.futures import ThreadPoolExecutor, as_completed
 19
 20#################################################################################
 21############################## DATASET DOWNLOAD #################################
 22#################################################################################
 23
 24def download_dataset(dataset_name, data_dir):
 25    """Download the dataset."""
 26    if dataset_name == "daphnet":
 27        download_daphnet_data(data_dir)
 28    elif dataset_name == "mobifall":
 29        download_mobifall_data(data_dir)
 30    elif dataset_name == "arduous":
 31        download_arduous_data(data_dir)
 32    elif dataset_name == "harup":
 33        download_harup_data(data_dir)
 34    elif dataset_name == "urfall":
 35        download_urfall_data(data_dir)
 36    elif dataset_name == "physionet":
 37        # PhysioNet dataset is handled by the PhysioNetLoader itself
 38        pass
 39    else:
 40        raise ValueError(f"Dataset {dataset_name} not supported.")
 41    
 42
 43def download_daphnet_data(data_dir):
 44    """Download the Daphnet dataset.
 45    
 46    This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository.
 47    It shows a progress bar during download and handles various potential errors.
 48    If the file already exists in the specified directory, it skips the download.
 49    
 50    Args:
 51        data_dir (str): Directory where the dataset will be downloaded
 52        
 53    Returns:
 54        str: Path to the downloaded file
 55        
 56    Raises:
 57        ConnectionError: If unable to connect to the download URL
 58        IOError: If unable to create or write to the download directory/file
 59        Exception: For other unexpected errors during download
 60    """
 61    import os
 62    import requests
 63    from tqdm import tqdm
 64    
 65    url = "https://archive.ics.uci.edu/static/public/245/daphnet+freezing+of+gait.zip"
 66    file_path = os.path.join(data_dir, "daphnet.zip")
 67    
 68    # Check if file already exists
 69    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
 70        print(f"Dataset already exists at: {file_path}")
 71        return file_path
 72    
 73    try:
 74        # Create directory if it doesn't exist
 75        os.makedirs(data_dir, exist_ok=True)
 76        print(f"Downloading Daphnet dataset to: {file_path}")
 77        
 78        # Send a HEAD request first to get the file size
 79        response = requests.head(url)
 80        total_size = int(response.headers.get('content-length', 0))
 81        
 82        # Start the download with progress bar
 83        response = requests.get(url, stream=True)
 84        response.raise_for_status()  # Raise an exception for bad status codes
 85        
 86        # Initialize progress bar
 87        progress_bar = tqdm(
 88            total=total_size,
 89            unit='iB',
 90            unit_scale=True,
 91            desc='Download Progress'
 92        )
 93        
 94        # Write the file with progress updates
 95        with open(file_path, "wb") as file:
 96            for chunk in response.iter_content(chunk_size=8192):
 97                if chunk:
 98                    size = file.write(chunk)
 99                    progress_bar.update(size)
100        
101        progress_bar.close()
102        
103        # Verify download completed successfully
104        if os.path.getsize(file_path) > 0:
105            print(f"Download completed successfully! File saved to: {file_path}")
106            return file_path
107        else:
108            raise IOError("Downloaded file is empty")
109            
110    except requests.exceptions.RequestException as e:
111        print(f"Error connecting to download URL: {e}")
112        if os.path.exists(file_path):
113            os.remove(file_path)  # Clean up partial download
114        raise ConnectionError(f"Failed to download dataset: {e}")
115        
116    except IOError as e:
117        print(f"Error writing download file: {e}")
118        if os.path.exists(file_path):
119            os.remove(file_path)  # Clean up partial download
120        raise IOError(f"Failed to save dataset: {e}")
121        
122    except Exception as e:
123        print(f"Unexpected error during download: {e}")
124        if os.path.exists(file_path):
125            os.remove(file_path)  # Clean up partial download
126        raise Exception(f"Download failed: {e}")
127
128def download_mobifall_data(data_dir):
129    """Download the MobiFall dataset."""
130    pass
131
132def download_arduous_data(data_dir):
133    """Download the Arduous dataset."""
134    pass
135
136def download_urfall_data(data_dir, sequences=None, data_types=None, use_falls=True, use_adls=True, max_workers: int = 8):
137    """
138    Download the UrFall dataset files.
139    
140    Args:
141        data_dir: Directory where the dataset will be downloaded
142        sequences: List of specific sequences to download (e.g., ['fall-01', 'adl-01'])
143                  If None, downloads based on use_falls and use_adls
144        data_types: List of data types to download. Options: 'depth', 'rgb', 'accelerometer',
145                   'synchronization', 'video', 'features' (default: ['features'])
146        use_falls: Whether to download fall sequences (default: True)
147        use_adls: Whether to download ADL sequences (default: True)
148        max_workers: Max concurrent download workers (default: 8)
149        
150    Returns:
151        str: Path to the data directory
152    """
153    from tqdm import tqdm
154    
155    base_url = "http://fenix.univ.rzeszow.pl/~mkepski/ds/data/"
156    
157    # Default to downloading pre-extracted features
158    if data_types is None:
159        data_types = ['features']
160    
161    # Create directory if it doesn't exist
162    os.makedirs(data_dir, exist_ok=True)
163    
164    # Determine which sequences to download
165    seq_list = []
166    if sequences is not None:
167        seq_list = sequences
168    else:
169        if use_falls:
170            seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
171        if use_adls:
172            seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
173    
174    # Prepare feature files
175    feature_tasks = []
176    if 'features' in data_types:
177        if use_falls:
178            feature_tasks.append("urfall-cam0-falls.csv")
179        if use_adls:
180            feature_tasks.append("urfall-cam0-adls.csv")
181    
182    # Prepare raw file tasks
183    file_extension_map = {
184        'depth': '-cam0-d.zip',
185        'rgb': '-cam0-rgb.zip',
186        'accelerometer': '-acc.csv',
187        'synchronization': '-data.csv',
188        'video': '-cam0.mp4'
189    }
190    raw_tasks = []
191    for seq in seq_list:
192        for dtype in data_types:
193            if dtype == 'features':
194                continue
195            if dtype not in file_extension_map:
196                continue
197            raw_tasks.append(seq + file_extension_map[dtype])
198    
199    # Build list of (url, dest_path, desc)
200    download_jobs = []
201    for filename in feature_tasks:
202        dest = os.path.join(data_dir, filename)
203        if not os.path.exists(dest):
204            download_jobs.append((base_url + filename, dest, filename))
205    for filename in raw_tasks:
206        dest = os.path.join(data_dir, filename)
207        if not os.path.exists(dest):
208            download_jobs.append((base_url + filename, dest, filename))
209    
210    if not download_jobs:
211        print("All requested UrFall files already present.")
212        return data_dir
213    
214    print(f"Starting concurrent downloads: {len(download_jobs)} file(s) with up to {max_workers} workers...")
215    successes = 0
216    failures = []
217    
218    with ThreadPoolExecutor(max_workers=max_workers) as executor:
219        future_to_job = {executor.submit(_download_file, url, dest, desc): (url, dest) for url, dest, desc in download_jobs}
220        for future in as_completed(future_to_job):
221            (url, dest) = future_to_job[future]
222            ok, info = future.result()
223            if ok:
224                successes += 1
225            else:
226                failures.append((url, info))
227    
228    print(f"Completed downloads: {successes} succeeded, {len(failures)} failed.")
229    if failures:
230        for url, err in failures[:10]:
231            print(f" - Failed: {url} -> {err}")
232        if len(failures) > 10:
233            print(f" ... and {len(failures) - 10} more failures")
234    
235    return data_dir
236
237
238#################################################################################
239############################## EXTRACT DOWNLOAD #################################
240#################################################################################
241
242def extract_dataset(dataset_name, data_dir):
243    """Extract the dataset."""
244    if dataset_name == "daphnet":
245        extract_daphnet_data(data_dir)
246    elif dataset_name == "mobifall":
247        extract_mobifall_data(data_dir)
248    elif dataset_name == "arduous":
249        extract_arduous_data(data_dir)
250    elif dataset_name == "harup":
251        extract_harup_data(data_dir)
252    elif dataset_name == "urfall":
253        extract_urfall_data(data_dir)
254    elif dataset_name == "physionet":
255        # PhysioNet dataset is handled by the PhysioNetLoader itself
256        pass
257    else:
258        raise ValueError(f"Dataset {dataset_name} not supported.")
259    
260
261def extract_daphnet_data(data_dir):
262    """Extract the Daphnet dataset."""
263    file_path = os.path.join(data_dir, "daphnet.zip")
264    with zipfile.ZipFile(file_path, "r") as zip_ref:
265        zip_ref.extractall(data_dir)
266
267def extract_mobifall_data(data_dir):
268    """Extract the MobiFall dataset."""
269    pass
270
271def extract_arduous_data(data_dir):
272    """Extract the Arduous dataset."""
273    pass
274
275def extract_urfall_data(data_dir, sequences=None, use_falls=True, use_adls=True):
276    """
277    Extract the UrFall dataset zip files (depth and RGB data).
278    
279    Args:
280        data_dir: Directory containing the dataset
281        sequences: List of specific sequences to extract
282        use_falls: Whether to extract fall sequences
283        use_adls: Whether to extract ADL sequences
284    """
285    # Determine which sequences to extract
286    seq_list = []
287    if sequences is not None:
288        seq_list = sequences
289    else:
290        if use_falls:
291            seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
292        if use_adls:
293            seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
294    
295    # Extract depth and RGB zip files
296    for seq in seq_list:
297        for data_type, ext in [('depth', '-cam0-d.zip'), ('rgb', '-cam0-rgb.zip')]:
298            zip_file = os.path.join(data_dir, seq + ext)
299            if os.path.exists(zip_file):
300                extract_dir = os.path.join(data_dir, seq + f"-cam0-{data_type[0]}")
301                if os.path.exists(extract_dir):
302                    print(f"Already extracted: {extract_dir}")
303                    continue
304                
305                try:
306                    print(f"Extracting {zip_file}...")
307                    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
308                        zip_ref.extractall(extract_dir)
309                    print(f"Extracted to: {extract_dir}")
310                except Exception as e:
311                    print(f"Failed to extract {zip_file}: {e}")
312
313
314#################################################################################
315############################ OTHER UTILS DOWNLOAD ###############################
316#################################################################################
317
318
319def sliding_window(data, window_size, step_size):
320    num_windows = (len(data) - window_size) // step_size + 1
321    windows = []
322    for i in range(num_windows):
323        start = i * step_size
324        end = start + window_size
325        windows.append(data[start:end])
326    return windows
327
328def _download_file(url: str, dest_path: str, desc: str = None):
329    """Download a single file to dest_path with a simple progress indicator."""
330    from tqdm import tqdm
331    try:
332        response = requests.get(url, stream=True, timeout=60)
333        response.raise_for_status()
334        total_size = int(response.headers.get('content-length', 0))
335        progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc=desc or os.path.basename(dest_path))
336        with open(dest_path, 'wb') as f:
337            for chunk in response.iter_content(chunk_size=8192):
338                if chunk:
339                    written = f.write(chunk)
340                    progress_bar.update(written)
341        progress_bar.close()
342        if total_size != 0 and os.path.getsize(dest_path) < total_size:
343            raise IOError(f"Incomplete download for {dest_path}")
344        return True, dest_path
345    except Exception as e:
346        try:
347            if os.path.exists(dest_path):
348                os.remove(dest_path)
349        except Exception:
350            pass
351        return False, f"{dest_path}: {e}"
352
353def download_harup_data(data_dir):
354    """
355    Download the HAR-UP dataset.
356    
357    This function provides instructions for downloading the HAR-UP dataset and offers
358    an option to download it directly from Google Drive as a ZIP file.
359    
360    Args:
361        data_dir (str): Directory where the dataset will be downloaded
362        
363    Returns:
364        str: Path to the extracted dataset directory or None if not performed
365    """
366    import os
367    import requests
368    from tqdm import tqdm
369    import webbrowser
370    import zipfile
371
372    # Create directory if it doesn't exist
373    os.makedirs(data_dir, exist_ok=True)
374
375    # Define file paths
376    zip_filename = "HAR-UP_Dataset.zip"
377    zip_path = os.path.join(data_dir, zip_filename)
378    dataset_dir = os.path.join(data_dir, "DataSet")
379
380    # Check if dataset directory already exists
381    if os.path.exists(dataset_dir):
382        print(f"HAR-UP dataset directory already exists at: {dataset_dir}")
383        return dataset_dir
384
385    # Direct download URL from Google Drive (update if needed)
386    url = "https://drive.usercontent.google.com/download?id=1Y2MSUijPcB7--PcGoAKhGeqI8GxKK0Pm&export=download&authuser=0"
387    print("\n" + "="*80)
388    print("HAR-UP DATASET DOWNLOAD")
389    print("="*80)
390    print("The HAR-UP dataset can be downloaded automatically or manually.")
391    print("\nOptions:")
392    print("1. Automatic download (recommended)")
393    print("2. Manual download")
394    print("3. Skip download (if you already have the dataset elsewhere)")
395
396    choice = input("\nEnter your choice (1-3): ")
397
398    if choice == "1":
399        try:
400            print(f"\nDownloading HAR-UP dataset ZIP to: {zip_path}")
401            print("This may take some time depending on your internet connection...")
402            response = requests.get(url, stream=True)
403            response.raise_for_status()  # Raise an exception for bad status codes
404            total_size = int(response.headers.get('content-length', 0))
405            progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc='Download Progress')
406            with open(zip_path, "wb") as file:
407                for chunk in response.iter_content(chunk_size=8192):
408                    if chunk:
409                        size = file.write(chunk)
410                        progress_bar.update(size)
411            progress_bar.close()
412            if os.path.getsize(zip_path) > 0:
413                print(f"Download completed successfully! File saved to: {zip_path}")
414                print("\nExtracting the downloaded ZIP file...")
415                with zipfile.ZipFile(zip_path, "r") as zip_ref:
416                    zip_ref.extractall(data_dir)
417                # Check for DataSet folder
418                if not os.path.exists(dataset_dir):
419                    # Sometimes the zip may contain a top-level folder, e.g., HAR-UP_Dataset/DataSet/...
420                    for entry in os.listdir(data_dir):
421                        entry_path = os.path.join(data_dir, entry)
422                        if os.path.isdir(entry_path) and os.path.exists(os.path.join(entry_path, "DataSet")):
423                            import shutil
424                            shutil.move(os.path.join(entry_path, "DataSet"), dataset_dir)
425                            break
426                if os.path.exists(dataset_dir):
427                    print(f"Extraction complete. DataSet directory at: {dataset_dir}")
428                    return dataset_dir
429                else:
430                    print("Extraction failed: DataSet directory not found after extraction.")
431                    return None
432            else:
433                raise IOError("Downloaded file is empty")
434        except Exception as e:
435            print(f"\nError during download: {e}")
436            print("\nPlease try the manual download option instead.")
437            if os.path.exists(zip_path):
438                os.remove(zip_path)  # Clean up partial download
439            return None
440
441    elif choice == "2":
442        print("\nOpening the HAR-UP dataset download page in your browser...")
443        print("Please download the ZIP file and save it to the following location:")
444        print(f"  {zip_path}")
445        webbrowser.open("https://sites.google.com/up.edu.mx/har-up/download")
446        print("\nAfter downloading, please ensure the ZIP file is named 'HAR-UP_Dataset.zip' and placed in your data directory.")
447        print("Then, rerun this function or choose option 1 to extract.")
448        return None
449
450    elif choice == "3":
451        print("\nSkipping download. Please ensure the dataset is available at:")
452        print(f"  {os.path.join(data_dir, 'DataSet')}")
453        return None
454
455    else:
456        print("\nInvalid choice. Please run again and select a valid option.")
457        return None
458
459
460def extract_harup_data(data_dir):
461    """
462    Extract the HAR-UP dataset zip file if not already extracted.
463    """
464    dataset_dir = os.path.join(data_dir, "DataSet")
465    if os.path.exists(dataset_dir):
466        print(f"HAR-UP dataset already extracted at: {dataset_dir}")
467        return
468    zip_path = os.path.join(data_dir, "HAR-UP_Dataset.zip")
469    if not os.path.exists(zip_path):
470        print(f"HAR-UP zip file not found at: {zip_path}")
471        print("Please run download_harup_data first.")
472        return
473    import zipfile
474    print(f"Extracting HAR-UP dataset zip to: {data_dir}")
475    with zipfile.ZipFile(zip_path, "r") as zip_ref:
476        zip_ref.extractall(data_dir)
477    print(f"Extraction complete.")
def download_dataset(dataset_name, data_dir):
25def download_dataset(dataset_name, data_dir):
26    """Download the dataset."""
27    if dataset_name == "daphnet":
28        download_daphnet_data(data_dir)
29    elif dataset_name == "mobifall":
30        download_mobifall_data(data_dir)
31    elif dataset_name == "arduous":
32        download_arduous_data(data_dir)
33    elif dataset_name == "harup":
34        download_harup_data(data_dir)
35    elif dataset_name == "urfall":
36        download_urfall_data(data_dir)
37    elif dataset_name == "physionet":
38        # PhysioNet dataset is handled by the PhysioNetLoader itself
39        pass
40    else:
41        raise ValueError(f"Dataset {dataset_name} not supported.")

Download the dataset.

def download_daphnet_data(data_dir):
 44def download_daphnet_data(data_dir):
 45    """Download the Daphnet dataset.
 46    
 47    This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository.
 48    It shows a progress bar during download and handles various potential errors.
 49    If the file already exists in the specified directory, it skips the download.
 50    
 51    Args:
 52        data_dir (str): Directory where the dataset will be downloaded
 53        
 54    Returns:
 55        str: Path to the downloaded file
 56        
 57    Raises:
 58        ConnectionError: If unable to connect to the download URL
 59        IOError: If unable to create or write to the download directory/file
 60        Exception: For other unexpected errors during download
 61    """
 62    import os
 63    import requests
 64    from tqdm import tqdm
 65    
 66    url = "https://archive.ics.uci.edu/static/public/245/daphnet+freezing+of+gait.zip"
 67    file_path = os.path.join(data_dir, "daphnet.zip")
 68    
 69    # Check if file already exists
 70    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
 71        print(f"Dataset already exists at: {file_path}")
 72        return file_path
 73    
 74    try:
 75        # Create directory if it doesn't exist
 76        os.makedirs(data_dir, exist_ok=True)
 77        print(f"Downloading Daphnet dataset to: {file_path}")
 78        
 79        # Send a HEAD request first to get the file size
 80        response = requests.head(url)
 81        total_size = int(response.headers.get('content-length', 0))
 82        
 83        # Start the download with progress bar
 84        response = requests.get(url, stream=True)
 85        response.raise_for_status()  # Raise an exception for bad status codes
 86        
 87        # Initialize progress bar
 88        progress_bar = tqdm(
 89            total=total_size,
 90            unit='iB',
 91            unit_scale=True,
 92            desc='Download Progress'
 93        )
 94        
 95        # Write the file with progress updates
 96        with open(file_path, "wb") as file:
 97            for chunk in response.iter_content(chunk_size=8192):
 98                if chunk:
 99                    size = file.write(chunk)
100                    progress_bar.update(size)
101        
102        progress_bar.close()
103        
104        # Verify download completed successfully
105        if os.path.getsize(file_path) > 0:
106            print(f"Download completed successfully! File saved to: {file_path}")
107            return file_path
108        else:
109            raise IOError("Downloaded file is empty")
110            
111    except requests.exceptions.RequestException as e:
112        print(f"Error connecting to download URL: {e}")
113        if os.path.exists(file_path):
114            os.remove(file_path)  # Clean up partial download
115        raise ConnectionError(f"Failed to download dataset: {e}")
116        
117    except IOError as e:
118        print(f"Error writing download file: {e}")
119        if os.path.exists(file_path):
120            os.remove(file_path)  # Clean up partial download
121        raise IOError(f"Failed to save dataset: {e}")
122        
123    except Exception as e:
124        print(f"Unexpected error during download: {e}")
125        if os.path.exists(file_path):
126            os.remove(file_path)  # Clean up partial download
127        raise Exception(f"Download failed: {e}")

Download the Daphnet dataset.

This function downloads the Daphnet Freezing of Gait dataset from the UCI Machine Learning Repository. It shows a progress bar during download and handles various potential errors. If the file already exists in the specified directory, it skips the download.

Args: data_dir (str): Directory where the dataset will be downloaded

Returns: str: Path to the downloaded file

Raises: ConnectionError: If unable to connect to the download URL IOError: If unable to create or write to the download directory/file Exception: For other unexpected errors during download

def download_mobifall_data(data_dir):
129def download_mobifall_data(data_dir):
130    """Download the MobiFall dataset."""
131    pass

Download the MobiFall dataset.

def download_arduous_data(data_dir):
133def download_arduous_data(data_dir):
134    """Download the Arduous dataset."""
135    pass

Download the Arduous dataset.

def download_urfall_data( data_dir, sequences=None, data_types=None, use_falls=True, use_adls=True, max_workers: int = 8):
137def download_urfall_data(data_dir, sequences=None, data_types=None, use_falls=True, use_adls=True, max_workers: int = 8):
138    """
139    Download the UrFall dataset files.
140    
141    Args:
142        data_dir: Directory where the dataset will be downloaded
143        sequences: List of specific sequences to download (e.g., ['fall-01', 'adl-01'])
144                  If None, downloads based on use_falls and use_adls
145        data_types: List of data types to download. Options: 'depth', 'rgb', 'accelerometer',
146                   'synchronization', 'video', 'features' (default: ['features'])
147        use_falls: Whether to download fall sequences (default: True)
148        use_adls: Whether to download ADL sequences (default: True)
149        max_workers: Max concurrent download workers (default: 8)
150        
151    Returns:
152        str: Path to the data directory
153    """
154    from tqdm import tqdm
155    
156    base_url = "http://fenix.univ.rzeszow.pl/~mkepski/ds/data/"
157    
158    # Default to downloading pre-extracted features
159    if data_types is None:
160        data_types = ['features']
161    
162    # Create directory if it doesn't exist
163    os.makedirs(data_dir, exist_ok=True)
164    
165    # Determine which sequences to download
166    seq_list = []
167    if sequences is not None:
168        seq_list = sequences
169    else:
170        if use_falls:
171            seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
172        if use_adls:
173            seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
174    
175    # Prepare feature files
176    feature_tasks = []
177    if 'features' in data_types:
178        if use_falls:
179            feature_tasks.append("urfall-cam0-falls.csv")
180        if use_adls:
181            feature_tasks.append("urfall-cam0-adls.csv")
182    
183    # Prepare raw file tasks
184    file_extension_map = {
185        'depth': '-cam0-d.zip',
186        'rgb': '-cam0-rgb.zip',
187        'accelerometer': '-acc.csv',
188        'synchronization': '-data.csv',
189        'video': '-cam0.mp4'
190    }
191    raw_tasks = []
192    for seq in seq_list:
193        for dtype in data_types:
194            if dtype == 'features':
195                continue
196            if dtype not in file_extension_map:
197                continue
198            raw_tasks.append(seq + file_extension_map[dtype])
199    
200    # Build list of (url, dest_path, desc)
201    download_jobs = []
202    for filename in feature_tasks:
203        dest = os.path.join(data_dir, filename)
204        if not os.path.exists(dest):
205            download_jobs.append((base_url + filename, dest, filename))
206    for filename in raw_tasks:
207        dest = os.path.join(data_dir, filename)
208        if not os.path.exists(dest):
209            download_jobs.append((base_url + filename, dest, filename))
210    
211    if not download_jobs:
212        print("All requested UrFall files already present.")
213        return data_dir
214    
215    print(f"Starting concurrent downloads: {len(download_jobs)} file(s) with up to {max_workers} workers...")
216    successes = 0
217    failures = []
218    
219    with ThreadPoolExecutor(max_workers=max_workers) as executor:
220        future_to_job = {executor.submit(_download_file, url, dest, desc): (url, dest) for url, dest, desc in download_jobs}
221        for future in as_completed(future_to_job):
222            (url, dest) = future_to_job[future]
223            ok, info = future.result()
224            if ok:
225                successes += 1
226            else:
227                failures.append((url, info))
228    
229    print(f"Completed downloads: {successes} succeeded, {len(failures)} failed.")
230    if failures:
231        for url, err in failures[:10]:
232            print(f" - Failed: {url} -> {err}")
233        if len(failures) > 10:
234            print(f" ... and {len(failures) - 10} more failures")
235    
236    return data_dir

Download the UrFall dataset files.

Args: data_dir: Directory where the dataset will be downloaded sequences: List of specific sequences to download (e.g., ['fall-01', 'adl-01']) If None, downloads based on use_falls and use_adls data_types: List of data types to download. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) use_falls: Whether to download fall sequences (default: True) use_adls: Whether to download ADL sequences (default: True) max_workers: Max concurrent download workers (default: 8)

Returns: str: Path to the data directory

def extract_dataset(dataset_name, data_dir):
243def extract_dataset(dataset_name, data_dir):
244    """Extract the dataset."""
245    if dataset_name == "daphnet":
246        extract_daphnet_data(data_dir)
247    elif dataset_name == "mobifall":
248        extract_mobifall_data(data_dir)
249    elif dataset_name == "arduous":
250        extract_arduous_data(data_dir)
251    elif dataset_name == "harup":
252        extract_harup_data(data_dir)
253    elif dataset_name == "urfall":
254        extract_urfall_data(data_dir)
255    elif dataset_name == "physionet":
256        # PhysioNet dataset is handled by the PhysioNetLoader itself
257        pass
258    else:
259        raise ValueError(f"Dataset {dataset_name} not supported.")

Extract the dataset.

def extract_daphnet_data(data_dir):
262def extract_daphnet_data(data_dir):
263    """Extract the Daphnet dataset."""
264    file_path = os.path.join(data_dir, "daphnet.zip")
265    with zipfile.ZipFile(file_path, "r") as zip_ref:
266        zip_ref.extractall(data_dir)

Extract the Daphnet dataset.

def extract_mobifall_data(data_dir):
268def extract_mobifall_data(data_dir):
269    """Extract the MobiFall dataset."""
270    pass

Extract the MobiFall dataset.

def extract_arduous_data(data_dir):
272def extract_arduous_data(data_dir):
273    """Extract the Arduous dataset."""
274    pass

Extract the Arduous dataset.

def extract_urfall_data(data_dir, sequences=None, use_falls=True, use_adls=True):
276def extract_urfall_data(data_dir, sequences=None, use_falls=True, use_adls=True):
277    """
278    Extract the UrFall dataset zip files (depth and RGB data).
279    
280    Args:
281        data_dir: Directory containing the dataset
282        sequences: List of specific sequences to extract
283        use_falls: Whether to extract fall sequences
284        use_adls: Whether to extract ADL sequences
285    """
286    # Determine which sequences to extract
287    seq_list = []
288    if sequences is not None:
289        seq_list = sequences
290    else:
291        if use_falls:
292            seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
293        if use_adls:
294            seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
295    
296    # Extract depth and RGB zip files
297    for seq in seq_list:
298        for data_type, ext in [('depth', '-cam0-d.zip'), ('rgb', '-cam0-rgb.zip')]:
299            zip_file = os.path.join(data_dir, seq + ext)
300            if os.path.exists(zip_file):
301                extract_dir = os.path.join(data_dir, seq + f"-cam0-{data_type[0]}")
302                if os.path.exists(extract_dir):
303                    print(f"Already extracted: {extract_dir}")
304                    continue
305                
306                try:
307                    print(f"Extracting {zip_file}...")
308                    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
309                        zip_ref.extractall(extract_dir)
310                    print(f"Extracted to: {extract_dir}")
311                except Exception as e:
312                    print(f"Failed to extract {zip_file}: {e}")

Extract the UrFall dataset zip files (depth and RGB data).

Args: data_dir: Directory containing the dataset sequences: List of specific sequences to extract use_falls: Whether to extract fall sequences use_adls: Whether to extract ADL sequences

def sliding_window(data, window_size, step_size):
320def sliding_window(data, window_size, step_size):
321    num_windows = (len(data) - window_size) // step_size + 1
322    windows = []
323    for i in range(num_windows):
324        start = i * step_size
325        end = start + window_size
326        windows.append(data[start:end])
327    return windows
def download_harup_data(data_dir):
354def download_harup_data(data_dir):
355    """
356    Download the HAR-UP dataset.
357    
358    This function provides instructions for downloading the HAR-UP dataset and offers
359    an option to download it directly from Google Drive as a ZIP file.
360    
361    Args:
362        data_dir (str): Directory where the dataset will be downloaded
363        
364    Returns:
365        str: Path to the extracted dataset directory or None if not performed
366    """
367    import os
368    import requests
369    from tqdm import tqdm
370    import webbrowser
371    import zipfile
372
373    # Create directory if it doesn't exist
374    os.makedirs(data_dir, exist_ok=True)
375
376    # Define file paths
377    zip_filename = "HAR-UP_Dataset.zip"
378    zip_path = os.path.join(data_dir, zip_filename)
379    dataset_dir = os.path.join(data_dir, "DataSet")
380
381    # Check if dataset directory already exists
382    if os.path.exists(dataset_dir):
383        print(f"HAR-UP dataset directory already exists at: {dataset_dir}")
384        return dataset_dir
385
386    # Direct download URL from Google Drive (update if needed)
387    url = "https://drive.usercontent.google.com/download?id=1Y2MSUijPcB7--PcGoAKhGeqI8GxKK0Pm&export=download&authuser=0"
388    print("\n" + "="*80)
389    print("HAR-UP DATASET DOWNLOAD")
390    print("="*80)
391    print("The HAR-UP dataset can be downloaded automatically or manually.")
392    print("\nOptions:")
393    print("1. Automatic download (recommended)")
394    print("2. Manual download")
395    print("3. Skip download (if you already have the dataset elsewhere)")
396
397    choice = input("\nEnter your choice (1-3): ")
398
399    if choice == "1":
400        try:
401            print(f"\nDownloading HAR-UP dataset ZIP to: {zip_path}")
402            print("This may take some time depending on your internet connection...")
403            response = requests.get(url, stream=True)
404            response.raise_for_status()  # Raise an exception for bad status codes
405            total_size = int(response.headers.get('content-length', 0))
406            progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc='Download Progress')
407            with open(zip_path, "wb") as file:
408                for chunk in response.iter_content(chunk_size=8192):
409                    if chunk:
410                        size = file.write(chunk)
411                        progress_bar.update(size)
412            progress_bar.close()
413            if os.path.getsize(zip_path) > 0:
414                print(f"Download completed successfully! File saved to: {zip_path}")
415                print("\nExtracting the downloaded ZIP file...")
416                with zipfile.ZipFile(zip_path, "r") as zip_ref:
417                    zip_ref.extractall(data_dir)
418                # Check for DataSet folder
419                if not os.path.exists(dataset_dir):
420                    # Sometimes the zip may contain a top-level folder, e.g., HAR-UP_Dataset/DataSet/...
421                    for entry in os.listdir(data_dir):
422                        entry_path = os.path.join(data_dir, entry)
423                        if os.path.isdir(entry_path) and os.path.exists(os.path.join(entry_path, "DataSet")):
424                            import shutil
425                            shutil.move(os.path.join(entry_path, "DataSet"), dataset_dir)
426                            break
427                if os.path.exists(dataset_dir):
428                    print(f"Extraction complete. DataSet directory at: {dataset_dir}")
429                    return dataset_dir
430                else:
431                    print("Extraction failed: DataSet directory not found after extraction.")
432                    return None
433            else:
434                raise IOError("Downloaded file is empty")
435        except Exception as e:
436            print(f"\nError during download: {e}")
437            print("\nPlease try the manual download option instead.")
438            if os.path.exists(zip_path):
439                os.remove(zip_path)  # Clean up partial download
440            return None
441
442    elif choice == "2":
443        print("\nOpening the HAR-UP dataset download page in your browser...")
444        print("Please download the ZIP file and save it to the following location:")
445        print(f"  {zip_path}")
446        webbrowser.open("https://sites.google.com/up.edu.mx/har-up/download")
447        print("\nAfter downloading, please ensure the ZIP file is named 'HAR-UP_Dataset.zip' and placed in your data directory.")
448        print("Then, rerun this function or choose option 1 to extract.")
449        return None
450
451    elif choice == "3":
452        print("\nSkipping download. Please ensure the dataset is available at:")
453        print(f"  {os.path.join(data_dir, 'DataSet')}")
454        return None
455
456    else:
457        print("\nInvalid choice. Please run again and select a valid option.")
458        return None

Download the HAR-UP dataset.

This function provides instructions for downloading the HAR-UP dataset and offers an option to download it directly from Google Drive as a ZIP file.

Args: data_dir (str): Directory where the dataset will be downloaded

Returns: str: Path to the extracted dataset directory or None if not performed

def extract_harup_data(data_dir):
461def extract_harup_data(data_dir):
462    """
463    Extract the HAR-UP dataset zip file if not already extracted.
464    """
465    dataset_dir = os.path.join(data_dir, "DataSet")
466    if os.path.exists(dataset_dir):
467        print(f"HAR-UP dataset already extracted at: {dataset_dir}")
468        return
469    zip_path = os.path.join(data_dir, "HAR-UP_Dataset.zip")
470    if not os.path.exists(zip_path):
471        print(f"HAR-UP zip file not found at: {zip_path}")
472        print("Please run download_harup_data first.")
473        return
474    import zipfile
475    print(f"Extracting HAR-UP dataset zip to: {data_dir}")
476    with zipfile.ZipFile(zip_path, "r") as zip_ref:
477        zip_ref.extractall(data_dir)
478    print(f"Extraction complete.")

Extract the HAR-UP dataset zip file if not already extracted.