Source code for dammit.handler

# Copyright (C) 2015-2018 Camille Scott
# All rights reserved.
#
# This software may be modified and distributed under the terms
# of the BSD license.  See the LICENSE file for details.

from collections import OrderedDict
from os import mkdir, path

from doit.cmd_base import TaskLoader
from doit.doit_cmd import DoitMain
from doit.dependency import Dependency, SqliteDB

from dammit.profile import StartProfiler
from dammit.utils import Move
from dammit import ui


[docs]class TaskHandler(TaskLoader):

    def __init__(self, directory, logger, files=None, 
                 profile=False, db=None, n_threads=1, **doit_config_kwds):
        '''Stores tasks and the files they operate on, along with
        doit config and other metadata. This is the core of the pipelines:
        it passes its tasks along to doit for execution, and can check task
        and pipeline completion status.

        Args:
            directory (str): The directory in which to run the tasks. Will be
                created it it doesn't exist.
            logger (logging.Logger): Logger to record to.
            files (dict): Files used by the handler. Starts empty if omitted.
            profile (bool): If True, profile task execution.
            db (str): Name of the doit database.
            **doit_config_kwds: Keyword arguments passed to doit.

        Attributes:
            files (dict): Files used by the tasks.
            directory (str): Working directory for execution.
            tasks (OrderedDict): The tasks to execute.
            dep_file (str): Path of the doit database.
            doit_config (dict): The doit configuration given to the task runner.
            doit_dep_mgr (doit.dependency.Dependency): Doit object to track task
                status.
            profile (bool): Whether to run the profiler on tasks.
            logger (logging.Logger): Logger to use.
        '''

        super(TaskHandler, self).__init__()

        if files is None:
            self.files = {}
        elif type(files) is not dict:
            raise TypeError('files must be of type dict')
        else:
            self.files = files

        self.tasks = OrderedDict()
        
        self.directory = directory
        try:
            mkdir(directory)
        except OSError:
            pass

        if db is None:
            dep_file = path.join(self.directory, 'doit.db')
        else:
            dep_file = path.join(self.directory, '{0}.doit.db'.format(db))
        self.dep_file = dep_file
        logger.debug('Dependency Database File: {0}'.format(dep_file))
        self.doit_config = dict(dep_file=self.dep_file,
                                reporter=ui.GithubMarkdownReporter,
                                **doit_config_kwds)
        self.doit_dep_mgr = Dependency(SqliteDB, dep_file)
        self.n_threads = n_threads
        self.profile = profile
        self.logger = logger
        

[docs]    def register_task(self, name, task, files=None):
        '''Register a new task and its files with the handler.

        It may seem redundant or confusing to give the tasks a name different
        than their internal doit name. I do this because doit tasks need to have 
        names as unique as possible, so that they can be reused in different
        projects. A particular TaskHandler instance is only used for one
        pipeline run, and allowing different names makes it easier to reference
        tasks from elsewhere.

        Args:
            name (str): Name of the task. Does not have to correspond to doit's
                internal task name.
            task (:obj:): Either a dictionary or Task object.
            files (dict): Dictionary of files used.
        '''

        if files is None:
            files = {}
        if type(files) is not dict:
            raise TypeError('files must be of type dict')
        
        self.tasks[name] = task
        self.files.update(files)
        self.logger.debug('registered task {0}: {1}\n'
                          '  with files {2}'.format(name, task, files))

[docs]    def clear_tasks(self):
        '''Empty the task dictionary.'''

        self.logger.debug('Clearing {0} tasks'.format(len(self.tasks)))
        self.tasks = {}

[docs]    def get_status(self, task, move=False):
        '''Get the up-to-date status of a single task.

        Args:
            task (str): The task name to look up.
            move (bool): If True, move to the handler's directory before
                checking. Whether this is necessary depends mostly on whether
                the task uses relative or absolute paths.
        Returns:
            str: The string represenation of the status. Either "run" or
            "uptodate".
        '''

        if type(task) is str:
            try:
                task = self.tasks[task]
            except KeyError:
                self.logger.error('Task not found:{0}'.format(task))
                raise
        self.logger.debug('Getting status for task {0}'.format(task.name))
        if move:
            with Move(self.directory):
                status = self.doit_dep_mgr.get_status(task, self.tasks.values(),
                                                      get_log=True)
        else:
            status = self.doit_dep_mgr.get_status(task, self.tasks.values(),
                                                      get_log=True)
        self.logger.debug('Task {0} had status {1}'.format(task, status.status))
        try:
            self.logger.debug('Task {0} had reasons {1}'.format(task, status.reasons))
        except AttributeError:
            pass

        return status.status

[docs]    def print_statuses(self, uptodate_msg='All tasks up-to-date!',
                             outofdate_msg='Some tasks out of date!'):
        '''Print the up-to-date status of all tasks.

        Args:
            uptodate_msg (str): The message to print if all tasks are up to
            date.
        Returns:
            tuple: A bool (True if all up to date) and a dictionary of statuses.
        '''

        uptodate, statuses = self.check_uptodate()
        if uptodate:
            print(ui.paragraph(uptodate_msg))
        else:
            print(ui.paragraph(outofdate_msg))
            uptodate_list = [t for t,s in statuses.items() if s is True]
            outofdate_list = [t for t,s in statuses.items() if s is False]
            if uptodate_list:
                print('\nUp-to-date tasks:')
                print(ui.listing(uptodate_list))
            if outofdate_list:
                print('\nOut-of-date tasks:')
                print(ui.listing(outofdate_list))
        return uptodate, statuses

[docs]    def check_uptodate(self):
        '''Check if all tasks are up-to-date, ie if the pipeline is complete.
        Note that this moves to the handler's directory to lessen issues with
        relative versus absolute paths.

        Returns:
            bool: True if all are up to date.
        '''

        with Move(self.directory):
            statuses = {}
            outofdate = False
            for task_name, task in self.tasks.items():
                status = self.get_status(task)
                statuses[task_name] = status == 'up-to-date'
            return all(statuses.values()), statuses
        
[docs]    def load_tasks(self, cmd, opt_values, pos_args):
        '''Internal to doit -- triggered by the TaskLoader.'''

        self.logger.debug('loading {0} tasks'.format(len(self.tasks)))
        return self.tasks.values(), self.doit_config

[docs]    def run(self, doit_args=None, verbose=True):
        '''Run the pipeline. Movees to the directory, loads the tasks into doit,
        and executes that tasks that are not up-to-date.

        Args:
            doit_args (list): Args that would be passed to the doit shell
                command. By default, just run.
            verbose (bool): If True, print UI stuff.
        Returns:
            int: Exit status of the doit command.
        '''
        if verbose:
            print(ui.header('Run Tasks', level=4))
        if doit_args is None:
            doit_args = ['run']
            if self.n_threads > 1:
                doit_args.extend(['-n', str(self.n_threads)])

        runner = DoitMain(self)

        with Move(self.directory):
            if self.profile is True:
                profile_fn = path.join(self.directory, 'profile.csv')
                with StartProfiler(filename=profile_fn):
                    return runner.run(doit_args)
            else:
                return runner.run(doit_args)