Source code for iceprod.server.dbmethods.web

"""
Website database methods
"""

import os
import logging
from datetime import datetime,timedelta
from functools import partial,reduce
import operator
from collections import OrderedDict, Iterable, defaultdict
import math
import uuid
import shutil
from io import BytesIO

import tornado.gen

import iceprod.core.functions
from iceprod.core.jsonUtil import json_encode,json_decode,json_compressor
from iceprod.server import GlobalID
from iceprod.server.dbmethods import _Methods_Base,datetime2str,str2datetime, filtered_input

logger = logging.getLogger('dbmethods.web')

[docs]class web(_Methods_Base): """ The website DB methods. """ @tornado.gen.coroutine
[docs] def web_get_tasks_by_status(self, gridspec=None, dataset_id=None): """ Get the number of tasks in each state on this site and plugin. Args: gridspec (str): grid and plugin id dataset_id (str): dataset id Returns: dict: {status:num} """ sql = 'select search.task_status, count(*) as num from search ' bindings = tuple() if dataset_id: sql += ' where search.dataset_id = ? ' bindings += (dataset_id,) if gridspec: if 'where' not in sql: sql += ' where ' else: sql += ' and ' sql += ' search.gridspec like ? ' bindings += ('%'+gridspec+'%',) sql += ' group by search.task_status ' ret = yield self.parent.db.query(sql, bindings) task_groups = {status:num for status,num in ret} raise tornado.gen.Return(task_groups)
@tornado.gen.coroutine
[docs] def web_get_datasets(self, gridspec=None, groups=None, **filters): """ Get the number of datasets in each state on this site and plugin. Filters are specified as key=['match1','match2'] Args: gridspec (str): grid and plugin id groups (iterable): Fields to group by **filters (dict): (optional) filters for the query Returns: list: [{dataset}] """ sql = 'select ' if groups: groups = list(map(filtered_input,groups)) sql += ','.join(groups) + ', count(*) as num ' else: sql += ' * ' sql += ' from dataset ' bindings = [] if gridspec or any(filters.values()): sql += ' where ' if gridspec: sql += ' gridspec like ? ' bindings.append('%'+gridspec+'%') for f in filters: if filters[f]: sql += ' '+filtered_input(f)+' in (' sql += ','.join('?' for _ in range(len(filters[f]))) sql += ') ' bindings.extend(filters[f]) if groups: sql += ' group by ' + ','.join(groups) ret = yield self.parent.db.query(sql, bindings) def grouper(data, groups, val): if len(groups) == 1: data[groups[0]] = val else: if groups[0] not in data: data[groups[0]] = {} grouper(data[groups[0]], groups[1:], val) if groups: dataset_groups = {} for row in ret: grouper(dataset_groups,row[:-1],row[-1]) ret = dataset_groups else: ret = [self._list_to_dict('dataset',x) for x in ret] ret.sort(key=lambda x:x['start_date'], reverse=True) raise tornado.gen.Return(ret)
@tornado.gen.coroutine
[docs] def web_get_datasets_details(self, dataset_id=None, status=None, gridspec=None): """ Get the number of datasets in each state on this site and plugin. Args: dataset_id (str): dataset id status (str): dataset status gridspec (str): grid and plugin id Returns: dict: {status:num} """ sql = 'select * from dataset ' bindings = tuple() if dataset_id: sql += ' where dataset.dataset_id = ? ' bindings += (dataset_id,) if status: if 'where' not in sql: sql += ' where ' else: sql += ' and ' sql += ' dataset.status = ? ' bindings += (status,) if gridspec: if 'where' not in sql: sql += ' where ' else: sql += ' and ' sql += ' dataset.gridspec like ? ' bindings += ('%'+gridspec+'%',) ret = yield self.parent.db.query(sql, bindings) datasets = {} for row in ret: tmp = self._list_to_dict('dataset',row) datasets[tmp['dataset_id']] = tmp raise tornado.gen.Return(datasets)
@tornado.gen.coroutine
[docs] def web_get_tasks_details(self, task_id=None, status=None, gridspec=None, dataset_id=None): """ Get the number of tasks in each state on this site and plugin. Args: task_id (str): task id status (str): task status gridspec (str): grid and plugin id dataset_id (str): dataset id Returns: dict: {status:num} """ sql = 'select * from search ' bindings = tuple() if task_id: sql += ' where task_id = ? ' bindings += (task_id,) if status: if 'where' not in sql: sql += ' where ' sql += ' task_status = ? ' bindings += (status,) if dataset_id: if 'where' not in sql: sql += ' where ' else: sql += ' and ' sql += ' dataset_id = ? ' bindings += (dataset_id,) if gridspec: if 'where' not in sql: sql += ' where ' else: sql += ' and ' sql += ' gridspec like ? ' bindings += ('%'+gridspec+'%',) ret = yield self.parent.db.query(sql, bindings) tasks = {} job_ids = defaultdict(list) for row in ret: tmp = self._list_to_dict('search',row) tasks[tmp['task_id']] = tmp job_ids[tmp['job_id']].append(tmp['task_id']) sql = 'select * from task where task_id in (%s)' for f in self._bulk_select(sql, tasks): for row in (yield f): tmp = self._list_to_dict('task',row) tasks[tmp['task_id']].update(tmp) sql = 'select job_id, job_index from job where job_id in (%s)' for f in self._bulk_select(sql, job_ids): for job_id,job_index in (yield f): for task_id in job_ids[job_id]: tasks[task_id]['job_index'] = job_index raise tornado.gen.Return(tasks)
@tornado.gen.coroutine
[docs] def web_get_logs(self, task_id, lines=None): """ Get the logs for a task. Args: task_id (str): task id lines (int): tail this number of lines (default: all lines) Returns: dict: {log_name:text} """ sql = 'select * from task_log where task_id = ?' bindings = (task_id,) ret = yield self.parent.db.query(sql, bindings) logs = {} for row in ret: tmp = self._list_to_dict('task_log',row) if tmp['name'] and tmp['data']: data = json_compressor.uncompress(tmp['data']) if lines and isinstance(lines,int): data = '\n'.join(data.rsplit('\n',lines+1)[-1*lines:]) logs[tmp['name']] = data def sort_key(k): log_order = ['stdout','stderr','stdlog'] if k in log_order: return '_'+str(log_order.index(k)) else: return k ret = OrderedDict((k,logs[k]) for k in sorted(logs,key=sort_key)) raise tornado.gen.Return(ret)
@tornado.gen.coroutine
[docs] def web_get_gridspec(self): """ Get the possible gridspecs that we know about. Returns: dict: {gridspecs} """ sql = 'select site_id,queues from site' bindings = tuple() ret = yield self.parent.db.query(sql, bindings) gridspecs = {} for site_id,queues in ret: try: gridspecs.update(json_decode(queues)) except Exception: pass raise tornado.gen.Return(gridspecs)
[docs] def web_get_sites(self, **kwargs): """Get sites matching kwargs""" # TODO: finish this raise NotImplementedException()
@tornado.gen.coroutine
[docs] def web_get_dataset_by_name(self, name, callback=None): """ Get a dataset by its name. Args: name (str): dataset name Returns: str: dataset id """ sql = 'select dataset_id from dataset where name = ?' bindings = (name,) ret = yield self.parent.db.query(sql, bindings) if len(ret) == 1: raise tornado.gen.Return(ret[0][0]) else: raise Exception('name not found')
@tornado.gen.coroutine
[docs] def web_get_task_completion_stats(self, dataset_id): """ Get the task completion stats for a dataset. Columns: task_name task_type num_queued num_running num_completions avg_runtime max_runtime min_runtime error_count efficiency Args: dataset_id (str): dataset id Returns: dict: {task_name: {column: num} } """ # get task name/type logger.info('get task name/type') task_rel = {} task_rel_index = {} sql = 'select task_rel_id, task_index, name, requirements ' sql += ' from task_rel where dataset_id = ?' bindings = (dataset_id,) ret = yield self.parent.db.query(sql, bindings) for trid, index, name, req in ret: task_rel[trid] = ('GPU' if 'gpu' in req.lower() else 'CPU', name) task_rel_index[index] = trid # get sorted order for task_rel_ids task_rel_ids = [task_rel_index[x] for x in sorted(task_rel_index)] # get status numbers logger.info('get status numbers') sql = 'select count(*), status, sum(walltime), sum(walltime_err), ' sql += 'sum(walltime_err_n), max(walltime), min(walltime), task_rel_id ' sql += 'from task where task_rel_id in (%s) group by task_rel_id,status' task_groups = {trid:[0,0,0,0,0,0.0,0.0,0,0,0] for trid in task_rel} for f in self._bulk_select(sql,task_rel_ids): ret = yield f for n,status,wall,wall_e,wall_en,wall_max,wall_min,trid in ret: if status == 'waiting': task_groups[trid][0] += n elif status == 'queued': task_groups[trid][1] += n elif status == 'processing': task_groups[trid][2] += n elif status in ('resume','reset','failed'): task_groups[trid][3] += n elif status == 'complete': task_groups[trid][4] += n task_groups[trid][5] += wall task_groups[trid][6] += wall+wall_e task_groups[trid][7] += wall_en if ((not task_groups[trid][8]) or task_groups[trid][8] < wall_max): task_groups[trid][8] = wall_max if ((not task_groups[trid][9]) or task_groups[trid][9] > wall_min): task_groups[trid][9] = wall_min logger.info('make stats') stats = OrderedDict() for trid in task_rel_ids: if task_groups[trid][6]: avg = task_groups[trid][5]/task_groups[trid][4] eff = task_groups[trid][5]/task_groups[trid][6] else: avg = 0 eff = 0 stats[trid] = { 'task_name': task_rel[trid][1], 'task_type': task_rel[trid][0], 'num_waiting': task_groups[trid][0], 'num_queued': task_groups[trid][1], 'num_running': task_groups[trid][2], 'num_error': task_groups[trid][3], 'num_completions': task_groups[trid][4], 'avg_runtime': avg, 'max_runtime': task_groups[trid][8], 'min_runtime': task_groups[trid][9], 'error_count': task_groups[trid][7], 'efficiency': eff, } raise tornado.gen.Return(stats)
@tornado.gen.coroutine
[docs] def web_get_job_counts_by_status(self, status=None, dataset_id=None): """ Get count of jobs by status. Args: status (str): status to restrict by dataset_id (str): dataset id Returns: dict: {status: count} """ where_query = {} if dataset_id: where_query['dataset_id'] = dataset_id if status: where_query['status'] = status sql = 'select status,count(*) from job' if where_query: sql += ' where ' sql += ' and '.join(w+' = ?' for w in where_query) sql += ' group by status' bindings = tuple(where_query.values()) ret = yield self.parent.db.query(sql, bindings) jobs = {status:num for status,num in ret} raise tornado.gen.Return(jobs)
@tornado.gen.coroutine
[docs] def web_get_jobs_by_status(self, status=None, dataset_id=None): """ Get basic job info. Args: status (str): status to restrict by dataset_id (str): dataset id Returns: dict: [job_info] """ where_query = {} if dataset_id: where_query['dataset_id'] = dataset_id if status: where_query['status'] = status sql = 'select * from job' if where_query: sql += ' where ' sql += ' and '.join(w+' = ?' for w in where_query) bindings = tuple(where_query.values()) ret = yield self.parent.db.query(sql, bindings) jobs = [self._list_to_dict(['job'],row) for row in ret] jobs.sort(key=lambda j:j['job_index']) raise tornado.gen.Return(jobs)
@tornado.gen.coroutine
[docs] def web_get_jobs_details(self, job_id): """ Get job details for a job_id. Args: job_id (str): job_id Returns: dict: {job_id:details} """ sql = 'select * from job where job_id = ?' bindings = (job_id,) ret = yield self.parent.db.query(sql, bindings) job = {} for row in ret: job.update(self._list_to_dict(['job'],row)) sql = 'select search.*,task.* from search ' sql += ' join task on search.task_id = task.task_id ' sql += ' where job_id = ?' bindings = (job_id,) ret = yield self.parent.db.query(sql, bindings) tasks = [] for row in ret: tmp = self._list_to_dict(['search','task'],row) job['dataset_id'] = tmp.pop('dataset_id') if tmp['requirements']: tmp['requirements'] = json_decode(tmp['requirements']) tasks.append(tmp) if 'dataset_id' in job: sql = 'select task_rel_id,task_index,requirements ' sql += ' from task_rel where dataset_id = ?' bindings = (job['dataset_id'],) ret = yield self.parent.db.query(sql, bindings) task_rels = {} for task_rel_id,task_index,requirements in ret: reqs = {} if requirements: try: reqs = json_decode(requirements) except Exception: pass task_rels[task_rel_id] = { 'index': task_index, 'reqs': reqs, } for t in tasks: tmp = task_rels[t['task_rel_id']]['reqs'].copy() if t['requirements']: tmp.update(t['requirements']) t['requirements'] = tmp t['index'] = task_rels[t['task_rel_id']]['index'] tasks.sort(key=lambda t:t['index']) job['tasks'] = tasks raise tornado.gen.Return(job)