import logging
import argparse
import os
from surround import Surround
from .stages import ValidateData, TutorialData

# Data Science projects should use logging over print statements:
#
#    import logging
#    logging.info("print to output")
#
# This is so that in a production environment the output is written to
# log files for debugging rather than to standard out where the output
# is lost.
# The command below configures the default logger.
logging.basicConfig(level=logging.INFO)

def main():

    # All Surround projects have a Surround class which is responsible
    # for configurating and running a list of stages. In this case
    # Surround has a single stage to run, the ValidateData stage.
    # The __name__ enables Surround to automatically load the config.yaml
    # file and to configure the following paths: `output_path`,
    # `data_path` and `model_path`.
    surround = Surround([ValidateData()], __name__)

    # Config is a dictionary with some utility functions for reading
    # values in from yaml files. Any value read in from a yaml file
    # can be overridden with an environment variable with a SURROUND_
    # prefix and '_' to mark nesting. A yaml file with the following
    # content:
    #
    # output:
    #   mode: True
    #
    # Can be overridden with an environment variable:
    # SURROUND_OUTPUT_MODE
    config = surround.config

    # Surround operates on an instance of SurroundData. In this case
    # the input data is a string 'data'. See stages.py for more
    # details. In most projects the input for Surround will be read
    # from a file stored in the directory input_dir.
    data = TutorialData("data")

    # Start running each stage of Surround by passing the data to
    # each stage in order. The data variable will be updated with the
    # output of each stage so there is no return value.
    surround.process(data)

    # Write the output to file
    with open(os.path.abspath(os.path.join(config["output_path"], "output.txt")), 'w') as f:
        f.write(data.output_data)

    # Check and handle any errors
    if data.errors:
        logging.error("Processing error...")

    # Log all warnings from running the pipeline
    for warn in data.warnings:
        logging.warn(warn)

    # Log the result to screen
    logging.info(data.output_data)

if __name__ == "__main__":
    main()
