# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/google/GDoc.ipynb.

# %% auto 0
__all__ = ['get_document_by_id', 'GDoc_StyleEnum', 'GDoc_Text_Style', 'GDoc_Element_TextRun', 'GDoc_Element',
           'GDoc_Paragraph_Style', 'GDoc_Paragraph', 'GDoc_SectionBreak_Style', 'GDoc_SectionBreak', 'GDoc_Content',
           'GDoc', 'DocState', 'generate_delete_json', 'generate_insert_json', 'generate_update_style',
           'generate_insert_hyperlink', 'GDoc_Tab', 'GDocSplitter']

# %% ../../nbs/google/GDoc.ipynb 2
from bs4 import BeautifulSoup
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Any, Optional, Callable

from googleapiclient.discovery import Resource

import domolibrary_extensions.google.GDrive as gd


import domolibrary_extensions.google.GAuth as ga

from markdownify import markdownify as md
import domolibrary_extensions.utils.files as defi
from domolibrary_extensions.utils.web import (
    clean_bs4,
    extract_canonical_url_to_html,
    processing_fn,
)

# %% ../../nbs/google/GDoc.ipynb 6
async def get_document_by_id(
    service: Resource, document_id, include_tabs_content: bool = True
):
    document = (
        service.documents()
        .get(
            documentId=document_id,
            includeTabsContent=include_tabs_content,
        )
        .execute()
    )
    return document

# %% ../../nbs/google/GDoc.ipynb 11
class GDoc_StyleEnum(Enum):
    HEADING_1 = "HEADING_1"
    HEADING_2 = "HEADING_2"

# %% ../../nbs/google/GDoc.ipynb 12
@dataclass
class GDoc_Text_Style:
    pass


@dataclass
class GDoc_Element_TextRun:
    content: str

    textStyle: GDoc_Text_Style
    text: str

    @classmethod
    def _from_json(cls, obj):

        return cls(
            content=obj.get("content"),
            textStyle=obj.get("textStyle"),
            text=obj.get("content").strip(),
        )


@dataclass
class GDoc_Element:
    startIndex: int
    endIndex: int
    textRun: GDoc_Element_TextRun

    @classmethod
    def _from_json(cls, obj):
        return GDoc_Element(
            startIndex=obj.get("startIndex"),
            endIndex=obj.get("endIndex"),
            textRun=GDoc_Element_TextRun._from_json(obj.get("textRun")),
        )


@dataclass
class GDoc_Paragraph_Style:

    headingId: Optional[str]

    namedStyleType: str
    direction: str

    @classmethod
    def _from_json(cls, obj):

        return cls(
            headingId=obj.get("headingId"),
            namedStyleType=obj["namedStyleType"],
            direction=obj["direction"],
        )


@dataclass
class GDoc_Paragraph:

    elements: List[GDoc_Element]

    paragraphStyle: Optional[GDoc_Paragraph_Style]

    @classmethod
    def _from_json(cls, obj):

        return cls(
            elements=[
                GDoc_Element._from_json(element) for element in obj.get("elements")
            ],
            paragraphStyle=GDoc_Paragraph_Style._from_json(obj.get("paragraphStyle")),
        )


@dataclass
class GDoc_SectionBreak_Style:

    columnSeparatorStyle: str
    contentDirection: str

    sectionType: str


@dataclass
class GDoc_SectionBreak:

    sectionStyle: GDoc_SectionBreak_Style


@dataclass
class GDoc_Content:

    startIndex: Optional[int]

    endIndex: Optional[int]

    paragraph: Optional[GDoc_Paragraph]

    sectionBreak: Optional[GDoc_SectionBreak]

    @classmethod
    def _from_json(cls, obj):

        gdoc_paragraph = (
            GDoc_Paragraph._from_json(obj.get("paragraph"))
            if obj.get("paragraph")
            else None
        )

        return cls(
            startIndex=obj.get("startIndex"),
            endIndex=obj.get("endIndex"),
            sectionBreak=obj.get("sectionBreak"),
            paragraph=gdoc_paragraph,
        )

# %% ../../nbs/google/GDoc.ipynb 13
@dataclass
class GDoc(gd.GDrive_File):

    raw: Any = field(repr=False, default=None)

    gdoc_body: List[GDoc_Content] = field(default=None)

    def __post_init__(self):
        if not self.service:
            self.service = auth.generate_service("docs", "v1")

        self._parent_init()

    @classmethod
    async def get_by_id(
        cls,
        document_id,
        auth: ga.GoogleAuth = None,
        service=None,
        include_tabs_content: bool = False,
        return_raw: bool = False,
    ):
        # require_base_url = "https://docs.googleapis.com/"

        service = service or auth.generate_gdocs_service()

        res_doc = await get_document_by_id(
            service=service,
            document_id=document_id,
            include_tabs_content=include_tabs_content,
        )

        if return_raw:
            return res_doc

        gdrive_service = auth.generate_gdrive_service()

        gdrive_obj = await cls._super_get_by_id(
            id=document_id, auth=auth, service=gdrive_service, return_raw=True
        )

        gdoc_body = [
            GDoc_Content._from_json(content)
            for content in res_doc.get("body", {}).get("content")
        ]

        return cls._from_json(
            gdrive_obj=gdrive_obj,
            auth=auth,
            service=service,
            raw=res_doc,
            gdoc_body=gdoc_body,
        )

# %% ../../nbs/google/GDoc.ipynb 15
@dataclass
class DocState:
    cmds: List[dict] = field(default_factory=lambda: [])
    start_index: int = None
    end_index: int = None
    text: str = None

    function_ls: List[Callable] = field(default_factory=lambda: [])

    def run(self):
        [fn(self) for fn in self.function_ls]

        return self.cmds


def generate_delete_json(
    state: DocState, start_index: int = None, end_index: int = None
):
    start_index = start_index or state.start_index
    end_index = end_index or state.end_index

    state.cmds.append(
        {
            "deleteContentRange": {
                "range": {"startIndex": start_index, "endIndex": end_index}
            }
        }
    )
    state.start_index = start_index
    state.end_index = end_index


def generate_insert_json(
    state: DocState,
    start_index=None,
    text: str = None,
    is_suppress_add_new_line: bool = False,
):
    if not is_suppress_add_new_line and not text.endswith("\n"):
        text += "\n"

    start_index = start_index or state.start_index
    text = text or state.text

    state.cmds.append(
        {
            "insertText": {
                "location": {"index": start_index},
                "text": text,
            }
        }
    )

    state.start_index = start_index
    state.text = text
    state.end_index = len(text)


def generate_update_style(
    state: DocState,
    style_type: GDoc_StyleEnum = None,
    start_index=None,
    text=None,
):
    start_index = start_index or state.start_index
    text = text or state.text
    end_index = len(text) + start_index

    state.cmds.append(
        {
            "updateParagraphStyle": {
                "range": {"startIndex": start_index, "endIndex": end_index},
                "paragraphStyle": {"namedStyleType": style_type.value},
                "fields": "namedStyleType",
            }
        }
    )

    state.start_index = start_index
    state.text = text
    state.end_index = len(text)


def generate_insert_hyperlink(
    state: DocState,
    start_index: int = None,
    text: str = None,
    url: str = None,
):
    start_index = start_index or state.start_index
    text = text or state.text
    end_index = len(text) + start_index
    url = url or state.url

    state.cmds.append(
        {
            "updateTextStyle": {
                "textStyle": {
                    "link": {"url": url}  # Please set the modified URL here.
                },
                "range": {"startIndex": start_index, "endIndex": end_index},
                "fields": "link",
            }
        }
    )

    state.start_index = start_index
    state.text = text
    state.end_index = len(text)
    state.url = url

# %% ../../nbs/google/GDoc.ipynb 19
from ..client import ExecutionError
from pathlib import Path
import domolibrary_extensions.utils.files as defi
from domolibrary_extensions.utils.web import (
    clean_bs4,
    extract_canonical_url_to_html,
    generate_file_path_from_doc_title,
)
from markdownify import markdownify as md
from typing import Callable, List
from dataclasses import dataclass, field
import os


@dataclass
class GDoc_Tab:
    """Represents one split section (the index or a tab)."""

    title: str
    soup: BeautifulSoup

    export_path: str = None
    is_index: bool = False
    md_str: str = None

    def __post_init__(self):
        try:
            self.soup.find(class_="title").decompose()
        except:
            pass

    @classmethod
    def from_soup(
        cls,
        soup,
        title="index",
        is_index: bool = False,
    ):

        if not is_index:
            title = str(next((soup.children)).text)

        return cls(
            title=title,
            soup=soup,
            is_index=is_index,
        )

    def generate_markdown(self, export_path=None):
        export_path = export_path or self.export_path

        if not self.soup:
            raise ExecutionError("generate_markdonwn - must have soup")

        if not export_path:
            raise ExecutionError("generate_markdown - missing export path")

        self.md_str = md(
            html=str(self.soup),
            # parent_tags=["td", "span"],
            keep_inline_images_in=["td", "span"],
            file_path=export_path,
        )

        return self.md_str

    def export(self, export_folder="./EXPORT/"):

        export_path = f"{generate_file_path_from_doc_title(self.title)}.md"

        if export_folder:
            export_path = os.path.join(export_folder, export_path)

        md = self.generate_markdown(export_path)

        defi.upsert_file(export_path)

        with open(export_path, "w+", encoding="utf-8") as f:
            f.write(md)

        self.export_path = export_path
        return f"exported {self.title} to {self.export_path}"


@dataclass
class GDocSplitter:
    """
    Reads a single GDoc-exported HTML file,
    splits it on <p class="title">, processes each chunk,
    and writes each out as Markdown.
    """

    source_path: Path

    raw_html: str = None
    source_url: str = None

    split_selector: str = (
        '<p class="title"'  # html string that identifies tabs in a doc downloaded as html
    )

    processing_fn: Callable[[BeautifulSoup], BeautifulSoup] = processing_fn

    tabs: List[GDoc_Tab] = field(default_factory=list)

    @classmethod
    def from_path(cls, source_path: str):

        assert os.path.exists(source_path)

        return cls(source_path=Path(source_path))

    def __post_init__(self):
        self.load_raw_html()
        self.parse_tabs(processing_fn=self.processing_fn)

    def load_raw_html(self) -> str:
        self.raw_html = self.source_path.read_text(encoding="utf-8")

        self.source_url = extract_canonical_url_to_html(self.source_path)

        return self.raw_html

    def parse_tabs(self, processing_fn: Callable = None) -> None:
        """Splits the HTML into tabs based on the split_selector."""
        processing_fn = processing_fn or self.processing_fn

        raw_html = self.load_raw_html()

        full_soup = clean_bs4(html=raw_html, is_body=False)

        tabs_delimiter = self.split_selector
        parts = str(full_soup).split(tabs_delimiter)

        # print(f"{len(parts)} - num parts")

        if len(parts) == 1:
            index_html = parts.pop(0)
            self.tabs.append(
                GDoc_Tab.from_soup(
                    title="index",
                    soup=processing_fn(full_soup, url=self.source_url),
                    is_index=True,
                )
            )
            return self.tabs

        parts = [
            tabs_delimiter + part
            for idx, part in enumerate(parts)
            if idx != 0 and not part.startswith(tabs_delimiter)
        ]

        # Each remaining part is a new tab
        for i, chunk in enumerate(parts, start=1):

            tab_soup = clean_bs4(html=chunk, is_body=False)

            title_ele = tab_soup.find("p", class_="title")

            tab_url = f"{self.source_url}#heading={ title_ele['id']}"

            self.tabs.append(
                GDoc_Tab.from_soup(
                    title=title_ele.text,
                    soup=processing_fn(tab_soup, url=tab_url),
                )
            )

        return self.tabs

    def export_tabs(self) -> None:
        if len(self.tabs) <= 1:
            print("No tabs found to split; nothing to export.")
            return

        for tab in self.tabs:
            tab.export()

    def run(self) -> None:
        self.parse_tabs()
        self.export_tabs()
