Source code for iceprod.core.i3exec

"""
The task runner.

Run it with `python -m iceprod.core.i3exec`.

optional arguments:
  -h, --help            show this help message and exit
  -f CFGFILE, --cfgfile CFGFILE
                        Specify config file
  -u URL, --url URL     URL of the iceprod server
  -p PASSKEY, --passkey PASSKEY
                        passkey for communcation with iceprod server
  --pilot_id PILOTID    ID of the pilot (if this is a pilot)
  -d, --debug           Enable debug actions and logging
  --offline             Enable offline mode (don't talk with server)
  --logfile LOGFILE     Specify the logfile to use
  --job JOB             Index of the job to run
  --task TASK           Name of the task to run
"""

from __future__ import absolute_import, division, print_function

import os
import sys
import logging
import logging.config
import time
import signal
from functools import partial
import tempfile
import shutil
import threading

from iceprod.core import to_file, constants
import iceprod.core.dataclasses
import iceprod.core.serialization
import iceprod.core.exe
from iceprod.core.exe_json import ServerComms
import iceprod.core.pilot
import iceprod.core.resources

import iceprod.core.logger

[docs]def handler(signum, frame): """Signal handler. Exit on SIGQUIT or SIGINT.""" logging.warning('Signal handler called with signal %s' % signum) logging.warning('Exiting...') os._exit(0)
[docs]def load_config(cfgfile): """Load a config from file, serialized string, dictionary, etc""" logger = logging.getLogger('i3exec') config = None if isinstance(cfgfile,str): try: if os.path.exists(cfgfile): config = iceprod.core.serialization.serialize_json.load(cfgfile) else: config = iceprod.core.serialization.serialize_json.loads(cfgfile) if not config: raise Exception('Config not found') except Exception as e: logger.critical('Error loading configuration: %s' % str(e)) raise elif isinstance(cfgfile,iceprod.core.dataclasses.Job): config = cfgfile elif isinstance(cfgfile,dict): config = iceprod.core.serialization.dict_to_dataclasses(cfgfile) else: logger.warning('cfgfile: %r',cfgfile) raise Exception('cfgfile is not a str or a Job') return config
[docs]def main(cfgfile=None, logfile=None, url=None, debug=False, passkey='', pilot_id=None, offline=False, offline_transfer=False): """Main task runner for iceprod""" # set up logger if debug: logl = 'INFO' else: logl = 'WARNING' if logfile: logf = os.path.abspath(os.path.expandvars(logfile)) else: logf = os.path.abspath(os.path.expandvars(constants['stdlog'])) iceprod.core.logger.set_logger(loglevel=logl, logfile=logf, logsize=67108864, lognum=1) logger = logging.getLogger('i3exec') logger.warning('starting...%s ' % logger.name) signal.signal(signal.SIGQUIT, handler) signal.signal(signal.SIGINT, handler) if cfgfile is None: logger.critical('There is no cfgfile') raise Exception('missing cfgfile') elif isinstance(cfgfile, str): config = load_config(cfgfile) else: config = cfgfile logger.info('config: %r',config) if offline: # run in offline mode runner(config, url, debug=debug, offline=offline, offline_transfer=offline_transfer) return # if we are not in offline mode, we need a url if not url: logger.critical('url missing') raise Exception('url missing') # setup jsonRPC kwargs = {} if 'username' in config['options']: kwargs['username'] = config['options']['username'] if 'password' in config['options']: kwargs['password'] = config['options']['password'] if 'ssl' in config['options'] and config['options']['ssl']: kwargs.update(config['options']['ssl']) rpc = ServerComms(url+'/jsonrpc', passkey, None, **kwargs) if 'tasks' in config and config['tasks']: logger.info('default configuration - a single task') if not offline: # tell the server that we are processing this task try: if 'task_id' not in config['options']: raise Exception('config["options"]["task_id"] not specified, ' 'so cannot update status') rpc.processing(config['options']['task_id']) except Exception: logger.error('json error', exc_info=True) # set up stdout and stderr stdout = partial(to_file,sys.stdout,constants['stdout']) stderr = partial(to_file,sys.stderr,constants['stderr']) with stdout(), stderr(): runner(config, url, rpc=rpc, debug=debug) else: logger.info('pilot mode - get many tasks from server') if 'gridspec' not in config['options']: logger.critical('gridspec missing') raise Exception('gridspec missing') if not pilot_id: logger.critical('pilot_id missing') raise Exception('pilot_id missing') pilot_kwargs = {} if 'run_timeout' in config['options']: pilot_kwargs['run_timeout'] = config['options']['run_timeout'] iceprod.core.pilot.Pilot(config, rpc=rpc, debug=debug, runner=partial(runner, rpc=rpc, url=url, debug=debug), pilot_id=pilot_id, **pilot_kwargs) logger.warning('finished running normally; exiting...')
[docs]def runner(config, url, rpc=None, debug=False, offline=False, offline_transfer=False): """Run a config. #. Set some default options if not set in configuration. #. Set up global env based on the configuration. #. Run tasks * If a task is specified in the configuration options: If the task is specified by name or number, run only that task. If there is a problem finding the task specified, raise a critical error. * Otherwise, run all tasks in the configuration in the order they were written. #. Destroy the global env, uploading and deleting files as needed. #. Upload the log, error, and output files if specified in options. Args: config (`iceprod.core.dataclasses.Job`): Dataset configuration url (str): URL to server rpc (:py:class:`iceprod.core.exe_json.ServerComms`): RPC object debug (bool): (optional) turn on debug logging offline (bool): (optional) enable offline mode offline_transfer (bool): (optional) enable/disable offline data transfers """ logger = logging.getLogger('i3exec_runner') # set logging verbosity if 'debug' not in config['options']: config['options']['debug'] = debug if ('debug' in config['options'] and config['options']['debug'] and 'loglevel' not in config['options']): config['options']['loglevel'] = 'INFO' if ('loglevel' in config['options'] and config['options']['loglevel'].upper() in iceprod.core.logger.setlevel): try: iceprod.core.logger.set_log_level(config['options']['loglevel']) except Exception: logger.warning('failed to set a new log level', exc_info=True) # make sure some basic options are set if 'job' not in config['options']: config['options']['job'] = 0 if 'jobs_submitted' not in config['options']: config['options']['jobs_submitted'] = 1 if 'resource_url' not in config['options']: config['options']['resource_url'] = str(url)+'/download' if 'offline' not in config['options']: config['options']['offline'] = offline if 'offline_transfer' not in config['options']: config['options']['offline_transfer'] = offline_transfer if 'data_url' not in config['options']: config['options']['data_url'] = 'gsiftp://gridftp.icecube.wisc.edu/' if 'svn_repository' not in config['options']: config['options']['svn_repository'] = 'http://code.icecube.wisc.edu/svn/' if 'site_temp' not in config['options']: config['options']['site_temp'] = 'gsiftp://gridftp-scratch.icecube.wisc.edu/local/simprod/' if 'dataset_temp' not in config['options']: config['options']['dataset_temp'] = os.path.join(config['options']['site_temp'],'$(dataset)') if 'job_temp' not in config['options']: config['options']['job_temp'] = os.path.join(config['options']['dataset_temp'],'$(job)') if 'task_temp' not in config['options']: config['options']['task_temp'] = 'file:'+os.path.join(os.getcwd(),'task_temp') if 'tray_temp' not in config['options']: config['options']['tray_temp'] = 'file:'+os.path.join(os.getcwd(),'tray_temp') if 'local_temp' not in config['options']: config['options']['local_temp'] = os.path.join(os.getcwd(),'local_temp') if 'stillrunninginterval' not in config['options']: config['options']['stillrunninginterval'] = 60 if 'upload' not in config['options']: config['options']['upload'] = 'logging' if not config['steering']: # make sure steering exists in the config config['steering'] = iceprod.core.dataclasses.Steering() if offline: try: import psutil except ImportError: resources = None else: # track resource usage in separate thread resource_stop = False resources = iceprod.core.resources.Resources(debug=debug) resources.claim('a') resources.register_process('a',psutil.Process(), os.getcwd()) def track(): while not resource_stop: resources.check_claims() time.sleep(1) resource_thread = threading.Thread(target=track) resource_thread.start() # make exe Config cfg = iceprod.core.exe.Config(config=config,rpc=rpc) if rpc: rpc.cfg = cfg # set up global env, based on config['options'] and config.steering env_opts = cfg.parseObject(config['options'], {}) stats = {} try: try: # keep track of the start time start_time = time.time() with iceprod.core.exe.setupenv(cfg, config['steering'], {'options':env_opts}) as env: logger.warning("config options: %r",config['options']) # find tasks to run if 'task' in config['options']: logger.warning('task specified: %r',config['options']['task']) # run only this task name or number name = config['options']['task'] if isinstance(name, iceprod.core.dataclasses.String) and name.isdigit(): name = int(name) if isinstance(name, iceprod.core.dataclasses.String): # find task by name for task in config['tasks']: if task['name'] == name: iceprod.core.exe.runtask(cfg, env, task) break else: logger.critical('cannot find task named %r', name) raise Exception('cannot find specified task') elif isinstance(name, int): # find task by index if (name >= 0 and name < len(config['tasks'])): iceprod.core.exe.runtask(cfg, env, config['tasks'][name]) else: logger.critical('cannot find task index %d', name) raise Exception('cannot find specified task') else: logger.critical('task specified in options is %r, but no task found', name) raise Exception('cannot find specified task') # finish task if not offline: rpc.finish_task(env['stats'], start_time=start_time) elif offline: # run all tasks in order for task in config['tasks']: iceprod.core.exe.runtask(cfg, env, task) else: raise Exception('task to run not specified') except Exception as e: logger.error('task failed, exiting without running completion steps.', exc_info=True) # set task status on server if not offline: try: rpc.task_error(stats=env['stats'], start_time=start_time, reason=str(e)) except Exception as e: logger.error(e) # forcibly turn on logging, so we can see the error config['options']['upload'] = 'logging' raise finally: # check resources if offline and resources: resource_stop = True resource_thread.join() print('Resources:') r = resources.get_final('a') if not r: print(' None') else: for k in r: print(' {}: {:.2f}'.format(k,r[k])) # upload log files to server try: if (not offline) and 'upload' in config['options']: if isinstance(config['options']['upload'], iceprod.core.dataclasses.String): upload = config['options']['upload'].lower().split('|') elif isinstance(config['options']['upload'],(tuple,list)): upload = [x.lower() for x in config['options']['upload']] else: raise Exception('upload config is not a valid type') for up in upload: if up.startswith('logging'): # upload err,log,out files rpc.uploadLog() rpc.uploadErr() rpc.uploadOut() break elif up.startswith('log'): # upload log files rpc.uploadLog() elif up.startswith('err'): # upload err files rpc.uploadErr() elif up.startswith('out'): # upload out files rpc.uploadOut() except Exception as e: logger.error('failed when uploading logging info',exc_info=True) logger.warning('finished without error')
if __name__ == '__main__': # get arguments import argparse parser = argparse.ArgumentParser(description='IceProd Core') parser.add_argument('-f','--cfgfile', type=str, help='Specify config file') parser.add_argument('-u','--url', type=str, help='URL of the iceprod server') parser.add_argument('-p','--passkey', type=str, help='passkey for communcation with iceprod server') parser.add_argument('--pilot_id', type=str, default=None, help='ID of the pilot (if this is a pilot)') parser.add_argument('-d','--debug', action='store_true', default=False, help='Enable debug actions and logging') parser.add_argument('--offline', action='store_true', default=False, help='Enable offline mode (don\'t talk with server)') parser.add_argument('--offline_transfer', type=bool, default=False, help='Enable/disable file transfer during offline mode') parser.add_argument('--logfile', type=str, default=None, help='Specify the logfile to use') parser.add_argument('--job', type=int, default=None, help='Index of the job to run') parser.add_argument('--jobs_submitted', type=int, default=None, help='Total number of jobs in this dataset') parser.add_argument('--task', type=str, default=None, help='Name of the task to run') args = vars(parser.parse_args()) print(args) # check cfgfile if args['cfgfile'] is not None and not os.path.isfile(args['cfgfile']): if os.path.isfile(os.path.join(os.getcwd(),args['cfgfile'])): args['cfgfile'] = os.path.join(os.getcwd(),args['cfgfile']) else: args['cfgfile'] = None options = {k: args.pop(k) for k in ('job','jobs_submitted','task')} if not options['jobs_submitted'] and options['job']: options['jobs_submitted'] = options['job']+1 options['debug'] = args['debug'] if args['cfgfile']: cfgfile = load_config(args['cfgfile']) for k in options: if options[k] is not None and k not in cfgfile['options']: cfgfile['options'][k] = options[k] args['cfgfile'] = cfgfile # start iceprod main(**args)