"""Utils"""

import os
import re
from shlex import split as shlex_split
from subprocess import PIPE, Popen
from itertools import chain
from collections import defaultdict
from typing import Dict, Union
from collections.abc import Iterable, Callable
from warnings import warn
from functools import partial

Identifier = str  # but satisfying str.isidentifier
IdentifierCommandDict = dict[Identifier, str]
IdentifiedCommands = Union[
    Iterable[Identifier], IdentifierCommandDict, Callable[..., IdentifierCommandDict]
]


class ProcessError(OSError):
    """To be raised when running a command yields an error"""


def raise_process_error(stderr):
    raise ProcessError(stderr.decode())


def simple_run_command(cmd, *, strip_output=True):
    with os.popen(cmd) as stream:
        output = stream.read()
    if strip_output:
        output = output.strip()
    return output


def strip(x):
    return x.strip()


def run(
    *args,
    on_error=raise_process_error,
    egress=strip,
    stdout=PIPE,
    stderr=PIPE,
    **kwargs,
):
    """
    A parametrizable way to run shell commands.

    :param args: The "command" or "instruction" to run.
    It can be the same string you'd type in the console, or a tokenized sequence of
    its command and arguments.
    :param on_error: A function to be called on the stderr generated by running the
    command, if the stderr is not empty.
    :param egress: A function to call on the stdout. The output of this function is
    what will be returned to the user.
    :param stdout, stderr, kwargs: Extra ``subprocess.Popen`` arguments.
    :return: The output of running the command.

    Works somewhat like the `subprocess.run
    <https://docs.python.org/3/library/subprocess.html#subprocess.run>`_ function,
    but with different defaults, as well as the additional arguments `on_error` and
    `egress`.

    >>> output = run('pwd')
    >>> os.path.isdir(output)  # verify that output is indeed a valid directory path
    True

    Also very important difference with ``subprocess.run``:
    You don't specify a LIST of tokenized arguments here:
    You can specify the full (string) command or parts of it as a sequence of strings:

    >>> assert run('echo hello world') == run('echo', 'hello', 'world') == b'hello world'

    Note that ``run`` will return ``bytes`` of the output, stripped of extremal
    newlines. The argument that does the stripping is ``egress``.
    You can use this argument to do something else with the output.
    For example, if you want to to cast the output to a ``str``, strip it, then
    print it, you could specify this in the ``egress``:

    >>> run('echo hello world', egress=lambda x: print(x.decode().strip()))
    hello world

    ``run``'s purpose in life is designed to be curried.
    That is, you can use ``functools.partial`` to make your own specialized
    functions that use shell scripts as their backend.

    >>> from functools import partial
    >>> stripped_str = lambda x: x.decode().strip()
    >>> pwd = partial(run, 'pwd', egress=stripped_str)
    >>> ls_la = partial(run, 'ls', '-la', egress=lambda x: print(stripped_str(x)))
    >>> current_dir = pwd()
    >>> os.path.isdir(current_dir)
    True
    >>> ls_la(current_dir)  # doctest: +SKIP
    total 56
    drwxr-xr-x@  7 Thor.Whalen  staff   224 Sep 23 12:12 .
    drwxr-xr-x@ 11 Thor.Whalen  staff   352 Sep 23 11:33 ..
    -rw-r--r--@  1 Thor.Whalen  staff    48 Sep 22 12:47 __init__.py
    -rw-r--r--@  1 Thor.Whalen  staff  4649 Sep 23 11:33 base.py
    -rw-r--r--@  1 Thor.Whalen  staff   348 Sep 22 12:38 raw.py
    -rw-r--r--@  1 Thor.Whalen  staff  8980 Sep 23 12:12 util.py

    """
    args = list(chain.from_iterable(map(shlex_split, args)))
    process = Popen(args, stdout=stdout, stderr=stderr, **kwargs)
    output, error = process.communicate()
    if error:
        return on_error(error)
    else:
        return egress(output)


def str_if_bytes(x, encoding="utf-8", errors="strict"):
    if isinstance(x, bytes):
        x = x.decode(encoding, errors)
    return x


def print_text_egress(
    output, *, encoding="utf-8", errors="strict", end="\n", file=None
):
    """
    Decodes output and prints it (with control on decoder and printing).
    A useful ``egress`` argument for the ``run`` function.
    """
    return print(str_if_bytes(output, encoding, errors), end=end, file=file)


is_executable_path = partial(os.access, mode=os.X_OK)
# directories are also executable, so could need:
is_executable_file = lambda path: os.path.isfile(path) and is_executable_path(path)


def is_executable_according_to_which(string: str):
    """
    Says if a string is an executable command according to the (linux) which command.

    That is, it will try resolving finding the executable file with a ``which COMMAND``
    command, deciding the ``COMMAND`` is indeed an executable if, and only if, ``which``
    comes back with something.


    See: https://linuxize.com/post/linux-which-command

    """
    return bool(simple_run_command(f"which {string}", strip_output=True))


# TODO: Generalize to DOS
# See options for getting available commands here:
# https://stackoverflow.com/questions/948008/linux-command-to-list-all-available-commands-and-aliases
def local_commands(verbose=False):
    """
    Get a list of available commands (strings).

    The function will look at all folders listed in the PATH environment variables,
    and gather all filenames of files therein (in first level of folder only) that
    are executable.

    Essentially do what the command:
    ``ls $(echo $PATH | tr ':' ' ') | grep -v '/' | grep . | sort``
    would, with deduplication.


    """

    def _keep_only_existing_paths(dirpaths, verbose=False):
        dirpaths = set(filter(None, dirpaths))
        existing_dirpaths = set(filter(os.path.isdir, dirpaths))
        if non_existing_dirs := (set(dirpaths) - existing_dirpaths):
            _non_existing_dirs = "\n\t" + "\n\t".join(non_existing_dirs)
            if verbose:
                warn(
                    "These paths were in your PATH environment variable, but were not "
                    f"found as directories:{_non_existing_dirs}"
                )
        return sorted(existing_dirpaths)

    def _executables_of_dir(dirpath):
        for filename in os.listdir(dirpath):
            filepath = os.path.join(dirpath, filename)
            if is_executable_file(filepath):
                yield filename

    dirpaths = os.environ.get("PATH", "").split(":")
    dirpaths = _keep_only_existing_paths(dirpaths, verbose)

    def _commands():
        for dirpath in dirpaths:
            yield from _executables_of_dir(dirpath)

    return sorted(set(_commands()))


def str_to_identifier(string: str) -> Identifier:
    """
    Transforms a string into an identifier

    >>> str_to_identifier("a-string$with@non*identifier(characters)")
    'a_string_with_non_identifier_characters_'
    >>> str_to_identifier("123go")
    '_123go'
    """

    def _replace_all_non_alphnumerics_with_underscore(string: str):
        return re.sub(r"\W", "_", string)

    def _first_character_is_a_digit(string: str):
        if len(string) == 0:
            raise ValueError("string was empty")
        first_character, *_ = string
        return bool(re.match(r"\d", first_character))

    def _prefix_with_underscore_if_starts_with_digit(string: str):
        if _first_character_is_a_digit(string):
            return "_" + string
        else:
            return string

    identifier = _replace_all_non_alphnumerics_with_underscore(string)
    identifier = _prefix_with_underscore_if_starts_with_digit(identifier)
    return identifier


def _gather_duplicates(values, value_to_group_key):
    """
    >>> _gather_duplicates(['this', 'or', 'that'], len)
    {4: ['this', 'that']}
    """
    d = defaultdict(list)
    for value in values:
        d[value_to_group_key(value)].append(value)
    return {k: group for k, group in d.items() if len(group) > 1}


# TODO: Could resolve collisions (e.g. suffixing with _1, _2, etc.) instead of warning
def identifier_mapping(
    strings: Iterable[str], str_to_id=str_to_identifier
) -> dict[str, Identifier]:
    """
    Maps strings to identifiers, returning a map from identifiers to the strings,
    warning about any collisions (when two distinct strings map to the same
    identifier
    """
    strings = list(strings)
    str_of_id = {str_to_id(string): string for string in strings}
    if len(str_of_id) != len(strings):
        duplicates = _gather_duplicates(strings, str_to_id)
        raise ValueError(f"Some commands mapped to the same identifier: {duplicates}")
    return str_of_id


def local_identifier_command_dict(
    str_to_id=str_to_identifier, verbose=False
) -> dict[Identifier, str]:
    """
    A dict of ``{identifier: command, ...`` for all commands found in the local system.

    ``identifier`` is a python-valid name that uniquely identifies ``command``.
    When ``command`` is a valid identifier itself (as defined by ``str.isidentifier``),
    ``identifier`` is equal to ``command``. But when ``command`` is not
    (when it contains anything that is not alphanumeric or an underscore, for example;
    dots or dashes), the ``identifier`` saves the day to make a valid python function
    name
    """
    return identifier_mapping(local_commands(verbose), str_to_id)
