Source code for iceprod.core.functions

"""
Common functions
"""

from __future__ import absolute_import, division, print_function

import sys
import os
import re
import shutil
import time
import logging
import socket
import subprocess
import tarfile
import urllib
import tempfile
import hashlib
from functools import partial
from contextlib import contextmanager

try:
    import cPickle as pickle
except ImportError:
    import pickle

try:
    import psutil
except ImportError:
    psutil = None

import requests
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from requests_toolbelt.multipart.encoder import MultipartEncoder

from iceprod.core import util
from iceprod.core.gridftp import GridFTP
from iceprod.core.jsonUtil import json_encode,json_decode

logger = logging.getLogger('functions')


### Compression Functions ###
_compress_suffixes = ('.tgz','.gz','.tbz2','.tbz','.bz2','.bz',
                     '.lzma2','.lzma','.lz','.xz')
_tar_suffixes = ('.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tbz2', '.tbz',
                '.tar.lzma', '.tar.xz', '.tlz', '.txz')

[docs]def uncompress(infile, out_dir=None): """Uncompress a file, if possible""" files = [] cur_dir = os.getcwd() try: if out_dir: os.chdir(out_dir) logger.info('uncompressing %s',infile) if istarred(infile): # handle tarfile output = subprocess.check_output(['tar','-atf',infile]).decode('utf-8') files = [x for x in output.split('\n') if x.strip() and x[-1] != '/'] if not files: raise Exception('no files inside tarfile') for f in files: if os.path.exists(f): break else: subprocess.call(['tar','-axf',infile]) else: if infile.endswith('.gz'): cmd = 'gzip' elif any(infile.endswith(s) for s in ('.bz','.bz2')): cmd = 'bzip2' elif any(infile.endswith(s) for s in ('.xz','.lzma')): cmd = 'xz' else: logger.info('unknown format: %s',infile) raise Exception('unknown format') subprocess.call([cmd,'-kdf',infile]) files.append(infile.rsplit('.',1)[0]) finally: os.chdir(cur_dir) logger.info('files: %r', files) if len(files) == 1: return files[0] else: return files
[docs]def compress(infile,compression='lzma'): """Compress a file or directory. The compression argument is used as the new file extension""" if not istarred('.'+compression) and os.path.isdir(infile): outfile = infile+'.tar.'+compression else: outfile = infile+'.'+compression if istarred(outfile): dirname, filename = os.path.split(infile) subprocess.call(['tar','-acf',outfile,'-C',dirname,filename]) else: if outfile.endswith('.gz'): cmd = ['gzip'] elif any(outfile.endswith(s) for s in ('.bz','.bz2')): cmd = ['bzip2'] elif outfile.endswith('.xz'): cmd = ['xz'] elif outfile.endswith('.lzma'): cmd = ['xz','-F','lzma'] else: logger.info('unknown format: %s',infile) raise Exception('unknown format') subprocess.call(cmd+['-kf',infile]) return outfile
[docs]def iscompressed(infile): """Check if a file is a compressed file, based on file name""" return any(infile.endswith(s) for s in _compress_suffixes)
[docs]def istarred(infile): """Check if a file is a tarred file, based on file name""" return any(infile.endswith(s) for s in _tar_suffixes)
[docs]def cksm(filename,type,buffersize=16384,file=True): """Return checksum of file using algorithm specified""" if type not in ('md5','sha1','sha256','sha512'): raise Exception('cannot get checksum for type %r',type) try: digest = getattr(hashlib,type)() except Exception: raise Exception('cannot get checksum for type %r',type) if file and os.path.exists(filename): # checksum file contents with open(filename,'rb') as filed: buffer = filed.read(buffersize) while buffer: digest.update(buffer) buffer = filed.read(buffersize) else: # just checksum the contents of the first argument digest.update(filename) return digest.hexdigest()
[docs]def md5sum(filename,buffersize=16384): """Return md5 digest of file""" return cksm(filename,'md5',buffersize)
[docs]def sha1sum(filename,buffersize=16384): """Return sha1 digest of file""" return cksm(filename,'sha1',buffersize)
[docs]def sha256sum(filename,buffersize=16384): """Return sha256 digest of file""" return cksm(filename,'sha256',buffersize)
[docs]def sha512sum(filename,buffersize=16384): """Return sha512 digest of file""" return cksm(filename,'sha512',buffersize)
[docs]def load_cksm(sumfile, base_filename): """Load the checksum from a file""" for l in open(sumfile,'r'): if os.path.basename(base_filename) in l: sum_cksm, name = l.strip('\n').split() return sum_cksm raise Exception('could not find checksum in file')
[docs]def check_cksm(file,type,sum): """Check a checksum of a file""" if not os.path.exists(file): return False # get checksum from file file_cksm = cksm(file,type) # load sum if os.path.isfile(sum): sum_cksm = load_cksm(sum, file) else: sum_cksm = sum # check sum logger.debug('file_cksm: %r', file_cksm) logger.debug('sum_cksm: %r', sum_cksm) return (file_cksm == sum_cksm)
[docs]def check_md5sum(file,sum): """Check an md5sum of a file""" return check_cksm(file,'md5',sum)
[docs]def check_sha1sum(file,sum): """Check an sha1sum of a file""" return check_cksm(file,'sha1',sum)
[docs]def check_sha256sum(file,sum): """Check an sha256sum of a file""" return check_cksm(file,'sha256',sum)
[docs]def check_sha512sum(file,sum): """Check an sha512sum of a file""" return check_cksm(file,'sha512',sum)
### File and Directory Manipulation Functions ###
[docs]def removedirs(path): try: if os.path.isdir(path): shutil.rmtree(path,True) else: os.remove(path) except Exception: pass
[docs]def copy(src,dest): parent_dir = os.path.dirname(dest) if not os.path.exists(parent_dir): logger.info('attempting to make parent dest dir %s',parent_dir) try: os.makedirs(parent_dir) except Exception: logger.error('failed to make dest directory for copy',exc_info=True) raise if os.path.isdir(src): logger.info('dircopy: %s to %s',src,dest) shutil.copytree(src,dest,symlinks=True) else: logger.info('filecopy: %s to %s',src,dest) shutil.copy2(src,dest)
### Network Functions ###
[docs]def getInterfaces(): """ Get a list of available interfaces. Requires `psutil`. Returns: dict of {nic_name: {type: address}} """ interfaces = {} ret = psutil.net_if_addrs() for nic_name in ret: n = {} for snic in ret[nic_name]: if not snic.address: continue if snic.family == socket.AF_INET: n['ipv4'] = snic.address elif snic.family == socket.AF_INET6: n['ipv6'] = snic.address elif snic.family == psutil.AF_LINK: n['mac'] = snic.address interfaces[nic_name] = n return interfaces
[docs]def get_local_ip_address(): """Get the local (loopback) ip address""" try: return socket.gethostbyname('localhost') except Exception: return socket.gethostbyname( socket.getfqdn() )
[docs]def gethostname(): """Get hostname of this computer.""" ret = socket.getfqdn() try: resp = requests.get('http://simprod.icecube.wisc.edu/downloads/getip.php') resp.raise_for_status() logger.info('getip: %r', resp.text) ret2 = resp.text.split(' ')[-1] if len(ret2.split('.')) > 1: ret = '.'.join(ret.split('.')[:1]+ret2.split('.')[1:]) except Exception: logger.info('error getting global ip', exc_info=True) return ret
@contextmanager def _http_helper(options={}): """Set up an http session using requests""" with requests.Session() as s: if 'username' in options and 'password' in options: s.auth = (options['username'], options['password']) if 'sslcert' in options: if 'sslkey' in options: s.cert = (options['sslcert'], options['sslkey']) else: s.cert = options['sslcert'] if 'cacert' in options: s.verify = options['cacert'] retries = Retry(total=5, backoff_factor=0.5, status_forcelist=[ 408, 500, 502, 503, 504 ]) s.mount('http://', HTTPAdapter(max_retries=retries)) s.mount('https://', HTTPAdapter(max_retries=retries)) yield s
[docs]def download(url, local, options={}): """Download a file, checksumming if possible""" local = os.path.expanduser(os.path.expandvars(local)) url = os.path.expanduser(os.path.expandvars(url)) if not isurl(url): if os.path.exists(url): url = 'file:'+url else: raise Exception("unsupported protocol %s" % url) # strip off query params if '?' in url: clean_url = url[:url.find('?')] elif '#' in url: clean_url = url[:url.find('#')] else: clean_url = url # fix local to be a filename to write to if local.startswith('file:'): local = local[5:] if os.path.isdir(local): local = os.path.join(local, os.path.basename(clean_url)) logger.warning('wget(): src: %s, local: %s', url, local) # actually download the file try: if url.startswith('http'): logger.info('http from %s to %s', url, local) with _http_helper(options) as s: r = s.get(url, stream=True, timeout=300) with open(local, 'wb') as f: for chunk in r.iter_content(65536): f.write(chunk) r.raise_for_status() elif url.startswith('file:'): url = url[5:] logger.info('copy from %s to %s', url, local) if os.path.exists(url): copy(url, local) elif url.startswith('gsiftp:') or url.startswith('ftp:'): logger.info('gsiftp from %s to %s', url, local) GridFTP.get(url, filename=local) else: raise Exception("unsupported protocol %s" % url) if not os.path.exists(local): raise Exception('download failed - file does not exist') except Exception: removedirs(local) raise return local
[docs]def upload(local, url, options={}): """Upload a file, checksumming if possible""" local = os.path.expandvars(local) url = os.path.expandvars(url) if not isurl(url): if url.startswith('/'): url = 'file:'+url else: raise Exception("unsupported protocol %s" % url) if local.startswith('file:'): local = local[5:] if os.path.isdir(local): compress(local, 'tar') local += '.tar' logger.warning('wput(): local: %s, url: %s', local, url) if not os.path.exists(local): logger.warning('upload: local path, %s, does not exist', local) raise Exception('local file does not exist') chksum = sha512sum(local) chksum_type = 'sha512' # actually upload the file if url.startswith('http'): logger.info('http from %s to %s', local, url) with _http_helper(options) as s: with open(local, 'rb') as f: m = MultipartEncoder( fields={'field0': ('filename', f, 'text/plain')} ) r = s.post(url, timeout=300, data=m, headers={'Content-Type': m.content_type}) r.raise_for_status() # get checksum r = s.get(url, stream=True, timeout=300) try: with open(local+'.tmp', 'wb') as f: for chunk in r.iter_content(65536): f.write(chunk) r.raise_for_status() if sha512sum(local+'.tmp') != chksum: raise Exception('http checksum error') finally: removedirs(local+'.tmp') elif url.startswith('file:'): # use copy command url = url[5:] if os.path.exists(url): logger.warning('put: file already exists. overwriting!') removedirs(url) copy(local, url) if sha512sum(url) != chksum: raise Exception('file checksum error') elif url.startswith('gsiftp:') or url.startswith('ftp:'): try: GridFTP.put(url, filename=local) except Exception: # because d-cache doesn't allow overwriting, try deletion GridFTP.delete(url) GridFTP.put(url, filename=local) ret = GridFTP.sha512sum(url) if ret != chksum: raise Exception('gridftp checksum error') else: raise Exception("unsupported protocol %s" % url)
[docs]def delete(url, options={}): """Delete a url or file""" url = os.path.expandvars(url) if (not isurl(url)) and os.path.exists(url): url = 'file:'+url if url.startswith('http'): logger.info('delete http: %s', url) with _http_helper(options) as s: r = s.delete(url, timeout=300) r.raise_for_status() elif url.startswith('file:'): url = url[5:] logger.info('delete file: %r', url) if os.path.exists(url): removedirs(url) elif url.startswith('gsiftp:') or url.startswith('ftp:'): logger.info('delete gsiftp: %r', url) GridFTP.rmtree(url) else: raise Exception("unsupported protocol %s" % url)
[docs]def isurl(url): """Determine if this is a supported protocol""" prefixes = ('file:','http:','https:','ftp:','ftps:','gsiftp:') try: return url.startswith(prefixes) except Exception: try: return reduce(lambda a,b: a or url.startswith(b), prefixes, False) except Exception: return False