#!/usr/bin/env python3

"""Merge translations from one or many po files into one or many others."""

import argparse
import json
import os
import sys
import textwrap
from collections.abc import Iterable
from contextlib import suppress
from difflib import SequenceMatcher as SM
from typing import cast

import polib
from tqdm import tqdm


def find_best_match(possibilities: Iterable[str], to_find: str) -> str | None:
    """Return the msgid from possibilities which is close to `to_find`."""
    best_match: tuple[float, str | None] = (0, None)
    for possibility in [
        possib for possib in possibilities if possib[:5] == to_find[:5]
    ]:
        match = SM(None, possibility, to_find).ratio()
        if match > best_match[0]:
            best_match = (match, possibility)
    if best_match[0] > 0.9:
        return best_match[1]
    return None


def find_translations(files: list[str]) -> dict[str, str]:
    """Read all translations from the given files."""
    known_translations = {}
    # Aggregate all known translations
    for po_file_name in tqdm(files, desc="Searching translations"):
        try:
            po_file = polib.pofile(po_file_name)
        except OSError as err:
            print(f"Skipping {po_file_name}: {err}", sys.stderr)
            continue
        for entry in po_file:
            if "fuzzy" not in entry.flags and entry.msgstr != "":
                known_translations[entry.msgid] = entry.msgstr
    return known_translations


def read_memory(memory_file: str) -> dict[str, str]:
    """Load the translation memory file.

    This is to keep a memory of translation form one run to another, typically:

        pomerge --from *.po
        pomerge --to *.po
    """
    try:
        with open(memory_file, encoding="UTF-8") as memory:
            return cast(dict[str, str], json.load(memory))
    except FileNotFoundError:
        return {}


def write_memory(translations: dict[str, str], memory_file: str) -> None:
    """Write to the translation memory file.

    This is to keep a memory of translation form one run to another, typically:

        pomerge --from *.po
        pomerge --to *.po
    """
    with open(memory_file, "w", encoding="UTF-8") as memory:
        json.dump(translations, memory)


def write_translations(
    translations: dict[str, str],
    files: list[str],
    fuzzy_matching: bool = False,
    mark_as_fuzzy: bool = False,
    overwrite: bool = True,
) -> None:
    """Write `translations` to msgstrs of file in `files`."""
    for po_file_name in tqdm(files, desc="Updating translations"):
        po_file = polib.pofile(po_file_name)
        for entry in po_file:
            if not overwrite and entry.msgstr and "fuzzy" not in entry.flags:
                continue
            if (
                entry.msgid in translations
                and entry.msgstr != translations[entry.msgid]
            ):
                entry.msgstr = translations[entry.msgid]
                if "fuzzy" in entry.flags:
                    entry.flags.remove("fuzzy")
                if mark_as_fuzzy:
                    entry.flags.append("fuzzy")
            elif fuzzy_matching:
                candidate = find_best_match(list(translations.keys()), entry.msgid)
                if candidate:
                    entry.msgstr = translations[candidate]
                    entry.flags.append("fuzzy")
        po_file.save()


def merge_po_files(
    from_files: list[str],
    to_files: list[str],
    fuzzy_matching: bool = False,
    mark_as_fuzzy: bool = False,
    overwrite: bool = True,
) -> None:
    """Replicate translations from `from_files` to `to_files`.

    - `to_files` may be empty: in this case this function only "learns*.
    - `from_files` may be empty: in this case this function
      on writes what it has previously "learned".
    """
    memory_file = os.path.expanduser("~/.pomerge.json")
    if from_files:
        translations = find_translations(from_files)
    else:
        translations = read_memory(memory_file)
    if to_files:
        write_translations(
            translations, to_files, fuzzy_matching, mark_as_fuzzy, overwrite
        )
    else:
        write_memory(translations, memory_file)


def parse_args() -> argparse.Namespace:
    """Parse command line arguments as found in sys.argv."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent(
            """\
Replicate known translations between sets of po files.
To propagate known translation in a single set of po files,
give it as a source and a destination, like:

    pomerge --from *.po --to *.po

Translations already existing in the destinations po files will be
updated by translations from the source po files.

To find po files recursively, use the globstar option of bash, or your
shell equivalent, like:

    shopt -s globstar
    pomerge --from **/*.po --to **/*.po

Giving only --from or only --to works by using a temporary file, so:

    pomerge --from a/*.po --to b/*.po

is equivalent to:

    pomerge --from a/*.po
    pomerge --to b/*.po
"""
        ),
    )
    parser.add_argument(
        "--fuzzy-matching",
        action="store_true",
        help="Also replicate nearly identical strings, "
        "but when doing so, add a fuzzy flag.",
    )
    parser.add_argument(
        "--mark-as-fuzzy",
        action="store_true",
        help="Mark all new translations as fuzzy.",
    )
    parser.add_argument(
        "-n",
        "--no-overwrite",
        action="store_true",
        help="When applying translation, "
        "do not overwrite existing translations (apply only to untranslated or fuzzy ones).",
    )

    parser.add_argument(
        "-f",
        "--from",
        "--from-files",
        dest="from_files",
        nargs="+",
        help="File in which known translations are searched",
    )
    parser.add_argument(
        "-t",
        "--to",
        "--to-files",
        dest="to_files",
        nargs="+",
        help="File in which translations will be added or updated",
    )
    parser.add_argument(
        "--clear",
        action="store_true",
        help="Empty the translation memory (can be used in conjunction with "
        "--from-files and --to-files, cleaning is happening at the end).",
    )
    args = parser.parse_args()
    if not args.from_files and not args.to_files and not args.clear:
        parser.print_help()
        sys.exit(1)
    return args


def main() -> None:
    """Module entry point."""
    args = parse_args()
    merge_po_files(
        args.from_files,
        args.to_files,
        args.fuzzy_matching,
        args.mark_as_fuzzy,
        overwrite=not args.no_overwrite,
    )
    if args.to_files:
        print("Successfully merged, you probably want to run `powrap -m` now.")
    if args.clear:
        with suppress(FileNotFoundError):
            os.unlink(os.path.expanduser("~/.pomerge.json"))


if __name__ == "__main__":
    main()
