Source code for iceprod.core.gridftp

"""
gridftp interface
"""

from __future__ import absolute_import, division, print_function

import os
import logging
from threading import Thread, Event
from functools import partial
from collections import namedtuple
from datetime import datetime
import time
import tempfile
import shutil
import subprocess

logger = logging.getLogger('gridftp')

def _cmd(cmd, timeout=1200):
    p = subprocess.Popen(cmd)
    i = 0
    while True:
        time.sleep(0.01)
        i += 0.01
        ret = p.poll()
        if ret is not None:
            if ret:
                raise Exception('Request failed')
            return
        if i >= timeout:
            p.kill()
            raise Exception('Request timed out')

def _cmd_output(cmd, timeout=1200):
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    i = 0
    while True:
        time.sleep(0.01)
        i += 0.01
        ret = p.poll()
        if ret is not None:
            return (p.returncode, p.communicate()[0].decode('utf-8'))
        if i >= timeout:
            p.kill()
            raise Exception('Request timed out')


[docs]def listify(lines,details=False,dotfiles=False): """Turn ls output into a list of NamedTuples""" out = [] if details: File = namedtuple('File', ['directory','perms','subfiles', 'owner','group','size','date', 'name']) months = {'jan':1,'feb':2,'mar':3,'apr':4,'may':5,'jun':6, 'jul':7,'aug':8,'sep':9,'oct':10,'nov':11,'dec':12} for x in lines.split('\n'): if not x.strip(): continue pieces = x.split() name = pieces[-1] if name.startswith('.') and not dotfiles: continue d = x[0] == 'd' perms = pieces[0][1:] year = datetime.now().year month = months[pieces[5].lower()] day = int(pieces[6]) if ':' in pieces[7]: hour,minute = pieces[7].split(':') dt = datetime(year,month,day,int(hour),int(minute)) else: year = int(pieces[7]) dt = datetime(year,month,day) out.append(File(d,perms,int(pieces[1]),pieces[2],pieces[3], int(pieces[4]),dt,name)) else: for x in lines.split('\n'): if not x.strip(): continue f = x.split()[-1] if not f.startswith('.') or dotfiles: out.append(f) return out
[docs]class GridFTP(object): """GridFTP interface to command line client. Example: GridFTP.get('gsiftp://data.icecube.wisc.edu/file', filename='/path/to/file') """ _timeout = 1200 # 20 min default timeout @classmethod
[docs] def supported_address(cls,address): """Return False for address types that are not supported""" if '://' not in address: return False addr_type = address.split(':')[0] if addr_type not in ('gsiftp','ftp'): return False return True
@classmethod
[docs] def address_split(cls,address): """Split an address into server/path parts""" pieces = address.split('://',1) if '/' in pieces[1]: pieces2 = pieces[1].split('/',1) return (pieces[0]+'://'+pieces2[0],'/'+pieces2[1]) else: return (address,'/')
@classmethod
[docs] def get(cls, address, filename=None, request_timeout=None): """ Do a GridFTP get request. Either data is returned directly or filename must be defined. Args: address (str): url to get from filename (str): filename to write data to request_timeout (float): timeout in secodns Returns: str: data, if filename is not defined Raises: Exception for failure """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) tmpdir = None if filename is None: tmpdir = tempfile.mkdtemp(dir=os.getcwd()) dest = 'file:'+os.path.join(tmpdir,'get_tmp_file') else: dest = 'file:'+filename cmd = ['globus-url-copy',address,dest] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout try: _cmd(cmd, timeout=timeout) if filename is None: with open(dest[5:]) as f: return f.read() finally: if tmpdir: shutil.rmtree(tmpdir,ignore_errors=True)
@classmethod
[docs] def put(cls, address, data=None, filename=None, request_timeout=None): """ Do a GridFTP put request. Either data or filename must be defined. Args: address (str): url to put to data (str): the data to put filename (str): filename for data to put request_timeout (float): timeout in seconds Raises: Exception for failure """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) tmpdir = None if data is not None: tmpdir = tempfile.mkdtemp(dir=os.getcwd()) src = 'file:'+os.path.join(tmpdir,'put_tmp_file') with open(src[5:],'w' if isinstance(data,str) else 'wb') as f: f.write(data) elif filename is not None: src = 'file:'+filename else: raise Exception('Neither data or filename is defined') cmd = ['globus-url-copy','-cd',src,address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout try: _cmd(cmd, timeout=timeout) finally: if tmpdir: shutil.rmtree(tmpdir,ignore_errors=True)
@classmethod
[docs] def list(cls, address, request_timeout=None, details=False, dotfiles=False): """ Do a GridFTP list request. Args: address (str): url to list request_timeout (float): timeout in seconds details (bool): result is a list of NamedTuples dotfiles (bool): result includes '.', '..', and other '.' files Returns: list: a list of files Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) cmd = ['uberftp','-retry','5','-ls',address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout ret = _cmd_output(cmd, timeout=timeout) if ret[0]: raise Exception('Error getting listing') return listify(ret[1], details=details, dotfiles=dotfiles)
@classmethod
[docs] def mkdir(cls, address, request_timeout=None, parents=False): """ Make a directory on the ftp server. Args: address (str): url to directory request_timeout (float): timeout in seconds parents (bool): make parent directories as needed Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) if parents: # recursively make directory try: cls.mkdir(os.path.basename(address), request_timeout=request_timeout,parents=True) except Exception: pass cmd = ['uberftp','-retry','5','-mkdir',address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout _cmd(cmd, timeout=timeout)
@classmethod
[docs] def rmdir(cls, address, request_timeout=None): """ Remove a directory on the ftp server. This fails if the directory is not empty. Use :py:func:`rmtree` for recursive removal. Args: address (str): url to directory request_timeout (float): timeout in seconds Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) cmd = ['uberftp','-retry','5','-rmdir',address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout ret = _cmd_output(cmd, timeout=timeout) if ret[0] and 'No match for' not in ret[1]: raise Exception('Error removing dir')
@classmethod
[docs] def delete(cls, address, request_timeout=None): """ Delete a file on the ftp server. Args: address (str): url to file request_timeout (float): timeout in seconds Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) cmd = ['uberftp','-retry','5','-rm',address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout ret = _cmd_output(cmd, timeout=timeout) if ret[0] and 'No match for' not in ret[1]: raise Exception('Error removing dir')
@classmethod
[docs] def rmtree(cls, address, request_timeout=None): """ Delete a file or directory on the ftp server. This is recursive, like `rm -rf`. Args: address (str): url to file or directory request_timeout (float): timeout in seconds Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) cmd = ['uberftp','-retry','5','-rm','-r',address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout ret = _cmd_output(cmd, timeout=timeout) if ret[0] and 'No match for' not in ret[1]: raise Exception('Error removing dir')
@classmethod
[docs] def move(cls, src, dest, request_timeout=None): """ Move a file on the ftp server. Args: src (str): url to source file dest (str): url to destination file request_timeout (float): timeout in seconds Raises: Exception on error """ if not cls.supported_address(src): raise Exception('address type not supported for src %s'%str(src)) if not cls.supported_address(dest): raise Exception('address type not supported for dest %s'%str(dest)) cmd = ['uberftp','-retry','5','-rename',src,cls.address_split(dest)[-1]] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout _cmd(cmd, timeout=timeout)
@classmethod
[docs] def exists(cls, address, request_timeout=None): """ Check if a file exists on the ftp server. Args: address (str): url to file request_timeout (float): timeout in seconds Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) cmd = ['uberftp','-retry','5','-size',address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout ret = _cmd_output(cmd, timeout=timeout) return (not ret[0])
@classmethod
[docs] def chmod(cls, address, mode, request_timeout=None): """ Chmod a file on the ftp server. Args: address (str): url to file mode (str): mode of file request_timeout (float): timeout in seconds Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) cmd = ['uberftp','-retry','5','-chmod',mode,address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout _cmd(cmd, timeout=timeout)
@classmethod
[docs] def size(cls, address, request_timeout=None): """ Get the size of a file on the ftp server. Args: address (str): url to file request_timeout (float): timeout in seconds Returns: int: size of file in bytes Raises: Exception on error """ if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) cmd = ['uberftp','-retry','5','-size',address] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout ret = _cmd_output(cmd, timeout=timeout) if ret[0]: raise Exception('failed to get size') return int(ret[1])
@classmethod def _chksum(cls, type, address, request_timeout=None): """Chksum is faked by redownloading the file and checksumming that""" from iceprod.core.functions import cksm if not cls.supported_address(address): raise Exception('address type not supported for address %s'%str(address)) if type.endswith('sum'): type = type[:-3] tmpdir = tempfile.mkdtemp(dir=os.getcwd()) dest = 'file:'+os.path.join(tmpdir,'dest') cmd = ['globus-url-copy',address,dest] if request_timeout is None: timeout = cls._timeout else: timeout = request_timeout try: _cmd(cmd, timeout=timeout) if not os.path.exists(dest[5:]): raise Exception('failed to redownload') return cksm(dest[5:],type) finally: shutil.rmtree(tmpdir,ignore_errors=True) ### Some helper functions for different checksum types ### @classmethod
[docs] def md5sum(cls,address,request_timeout=None): """ Get the md5sum of a file on an ftp server. Args: address (str): url to file request_timeout (float): timeout in seconds Returns: str: the md5sum Raises: Exception on error """ return cls._chksum('md5sum',address,request_timeout=request_timeout)
@classmethod
[docs] def sha1sum(cls,address,request_timeout=None): """ Get the sha1sum of a file on an ftp server. Args: address (str): url to file request_timeout (float): timeout in seconds Returns: str: the sha1sum Raises: Exception on error """ return cls._chksum('sha1sum',address,request_timeout=request_timeout)
@classmethod
[docs] def sha256sum(cls, address, request_timeout=None): """ Get the sha256sum of a file on an ftp server. Args: address (str): url to file request_timeout (float): timeout in seconds Returns: str: the sha256sum Raises: Exception on error """ return cls._chksum('sha256sum',address,request_timeout=request_timeout)
@classmethod
[docs] def sha512sum(cls, address, request_timeout=None): """ Get the sha512sum of a file on an ftp server. Args: address (str): url to file request_timeout (float): timeout in seconds Returns: str: the sha512sum Raises: Exception on error """ return cls._chksum('sha512sum',address,request_timeout=request_timeout)