import csv
import json
import logging
import os
import typing
from base64 import b64decode, b64encode
from pathlib import Path
from time import sleep

import requests
from openpyxl import load_workbook
from retry import retry

logger = logging.getLogger(__name__)


# The default home directory, if nothing is provided by the user
WARN_USER_DIR = Path(os.path.expanduser("~"))
WARN_DEFAULT_OUTPUT_DIR = WARN_USER_DIR / ".warn-scraper"

# Set the home directory
WARN_OUTPUT_DIR = Path(os.environ.get("WARN_OUTPUT_DIR", WARN_DEFAULT_OUTPUT_DIR))

# Set the subdirectories for other bits
WARN_CACHE_DIR = WARN_OUTPUT_DIR / "cache"
WARN_DATA_DIR = WARN_OUTPUT_DIR / "exports"
WARN_LOG_DIR = WARN_OUTPUT_DIR / "logs"


def create_directory(path: Path, is_file: bool = False):
    """Create the filesystem directories for the provided Path objects.

    Args:
        path (Path): The file path to create directories for.
        is_file (bool): Whether or not the path leads to a file (default: False)
    """
    # Get the directory path
    if is_file:
        # If it's a file, take the parent
        directory = path.parent
    else:
        # Other, assume it's a directory and we're good
        directory = path

    # If the path already exists, we're good
    if directory.exists():
        return

    # If not, lets make it
    logger.debug(f"Creating directory at {directory}")
    directory.mkdir(parents=True)


def fetch_if_not_cached(filename, url, **kwargs):
    """Download files if they're not already saved.

    Args:
        filename: The full filename for the file
        url: The URL from which the file may be downloaded.
    Notes: Should this even be in utils vs. cache? Should it exist?
    """
    create_directory(Path(filename), is_file=True)
    if not os.path.exists(filename):
        logger.debug(f"Fetching {filename} from {url}")
        response = requests.get(url, **kwargs)
        if not response.ok:
            logger.error(f"Failed to fetch {url} to {filename}")
        else:
            with open(filename, "wb") as outfile:
                outfile.write(response.content)
        sleep(2)  # Pause between requests
    return


def save_if_good_url(filename, url, **kwargs):
    """Save a file if given a responsive URL.

    Args:
        filename: The full filename for the file
        url: The URL from which the file may be downloaded.
    Notes: Should this even be in utils vs. cache? Should it exist?
    """
    create_directory(Path(filename), is_file=True)
    response = requests.get(url, **kwargs)
    if not response.ok:
        logger.error(f"URL {url} fetch failed with {response.status_code}")
        logger.error(f"Not saving to {filename}. Is a new year's URL not started?")
        success_flag = False
        content = False
    else:
        with open(filename, "wb") as outfile:
            outfile.write(response.content)
            success_flag = True
            content = response.content
    sleep(2)  # Pause between requests
    return success_flag, content


def get_with_zyte(url):
    """Use Zyte as a proxy server to retrieve data not available without it.

    Args:
        url (str): URL to retrieve
    Returns:
        returnbin (bin): raw binary representation of returned data object
        returntext (str): utf-8 conversion of returned data object, e.g., HTML
    Failures:
        Returns (None, None) if it encounters a problem and logs an error.
    Requires:
        ZYTE_API_KEY to be set in environment
    """
    logger.debug(f"Seeking to fetch {url} with Zyte")
    try:
        zyte_api_key = os.environ["ZYTE_API_KEY"]
    except KeyError:
        logger.error(
            "No ZYTE_API_KEY variable found in environment. Please get an API key from Zyte and export it."
        )
        return (None, None)

    api_response = requests.post(
        "https://api.zyte.com/v1/extract",
        auth=(zyte_api_key, ""),
        json={
            "url": url,
            "httpResponseBody": True,
            "followRedirect": True,
        },
    )

    if not api_response.ok:
        logger.error(
            f"Error downloading {url} with get_with_zyte. Repsonse code: {api_response.status_code}"
        )
        return (None, None)
    returnbin: bytes = b64decode(api_response.json()["httpResponseBody"])
    returntext: str = returnbin.decode("utf-8", errors="backslashreplace")
    logger.debug(f"Fetched {url}")
    return (returnbin, returntext)


def post_with_zyte(url, payload):
    """Use Zyte as a proxy server to retrieve data not available without it.

    Args:
        url (str): URL to retrieve
        payload: (dict, str or binary): POST body.
            If type dict: Convert to utf-8 text then:
            If type str: Convert to b64encoded
    Returns:
        returnbin (bin): raw binary representation of returned data object
        returntext (str): utf-8 conversion of returned data object, e.g., HTML
    Failures:
        Returns (None, None) if it encounters a problem and logs an error.
    Requires:
        ZYTE_API_KEY to be set in environment
    """
    logger.debug(f"Seeking to fetch {url} with Zyte")
    try:
        zyte_api_key = os.environ["ZYTE_API_KEY"]
    except KeyError:
        logger.error(
            "No ZYTE_API_KEY variable found in environment. Please get an API key from Zyte and export it."
        )
        return (None, None)

    if isinstance(payload, dict):
        payload = json.dumps(payload)

    if isinstance(payload, str):
        payload = b64encode(payload.encode("utf-8"))

    api_response = requests.post(
        "https://api.zyte.com/v1/extract",
        auth=(zyte_api_key, ""),
        json={
            "url": url,
            "httpRequestMethod": "POST",
            "httpRequestBody": payload,
            "httpResponseBody": True,
            "followRedirect": True,
        },
    )

    if not api_response.ok:
        logger.error(
            f"Error downloading {url} with post_with_zyte. Repsonse code: {api_response.status_code}. Reponse: {api_response.json()}"
        )
        return (None, None)
    returnbin: bytes = b64decode(api_response.json()["httpResponseBody"])
    returntext: str = returnbin.decode("utf-8", errors="backslashreplace")
    logger.debug(f"Fetched {url}")
    return (returnbin, returntext)


def write_rows_to_csv(output_path: Path, rows: list, mode="w"):
    """Write the provided list to the provided path as comma-separated values.

    Args:
        rows (list): the list to be saved
        output_path (Path): the Path were the result will be saved
        mode (str): the mode to be used when opening the file (default 'w')
    """
    create_directory(output_path, is_file=True)
    logger.debug(f"Writing {len(rows)} rows to {output_path}")
    with open(output_path, mode, newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerows(rows)


def write_dict_rows_to_csv(output_path, headers, rows, mode="w", extrasaction="raise"):
    """Write the provided list of dictionaries to the provided path as comma-separated values.

    Args:
        output_path (Path): the Path were the result will be saved
        headers (list): a list of the headers for the output file
        rows (list): the list of dictionaries to be saved
        mode (str): the mode to be used when opening the file (default 'w')
        extrasaction (str): what to do if the if a field isn't in the headers (default 'raise')
    """
    create_directory(output_path, is_file=True)
    logger.debug(f"Writing {len(rows)} rows to {output_path}")
    with open(output_path, mode, newline="") as f:
        # Create the writer object
        writer = csv.DictWriter(f, fieldnames=headers, extrasaction=extrasaction)
        # If we are writing a new row ...
        if mode == "w":
            # ... drop in the headers
            writer.writeheader()
        # Loop through the dicts and write them in one by one.
        for row in rows:
            writer.writerow(row)


def write_disparate_dict_rows_to_csv(output_path, rows, mode="w"):
    """Write the provided list of dictionaries to the provided path as comma-separated values, while determining a header.

    Args:
        output_path (Path): the Path were the result will be saved
        rows (list): the list of dictionaries to be saved; can have disparate dict keys
        mode (str): the mode to be used when opening the file (default 'w')
    """
    create_directory(output_path, is_file=True)
    headers: set = set()  # Get all the potential header names
    for row in rows:
        for item in row:
            headers.add(item)
    headers = list(sorted(headers))
    logger.debug(f"Found {len(headers):,} header entries in list of dicts.")
    logger.debug(f"Writing {len(rows)} rows to {output_path}")
    with open(output_path, mode, newline="") as outfile:
        # Create the writer object
        writer = csv.writer(outfile)
        # If we are writing a new row ...
        if mode == "w":
            # ... drop in the headers
            writer.writerow(headers)
        # Loop through the dicts and write them in one by one.
        for row in rows:
            line = {}
            for item in headers:
                if item in row:
                    line[item] = row[item]
                else:
                    line[item] = None
            writer.writerow(list(line.values()))
    return


def get_all_scrapers():
    """Get all the states and territories that have scrapers.

    Returns: List of lower-case post abbreviations.
    """
    this_dir = Path(__file__).parent
    scrapers_dir = this_dir / "scrapers"
    return sorted(
        p.stem for p in scrapers_dir.glob("*.py") if "__init__.py" not in str(p)
    )


@retry(tries=3, delay=15, backoff=2)
def get_url(
    url, user_agent="Big Local News (biglocalnews.org)", session=None, **kwargs
):
    """Request the provided URL and return a response object.

    Args:
        url (str): the url to be requested
        user_agent (str): the user-agent header passed with the request (default: biglocalnews.org)
        session: a session object to use when making the request. optional
    """
    logger.debug(f"Requesting {url}")

    # Set the headers
    if "headers" not in kwargs:
        kwargs["headers"] = {}
    kwargs["headers"]["User-Agent"] = user_agent

    # Go get it
    if session is not None:
        logger.debug(f"Requesting with session {session}")
        response = session.get(url, **kwargs)
    else:
        response = requests.get(url, **kwargs)
    logger.debug(f"Response code: {response.status_code}")

    # Verify that the response is 200
    assert response.ok

    # Return the response
    return response


def parse_excel(excel_path: Path, keep_header: bool = True) -> typing.List[typing.List]:
    """Parse the Excel file at the provided path.

    Args:
        excel_path (Path): The path to an XLSX file
        keep_header (bool): Whether or not to return the header row. Default  True.

    Returns: List of values ready to write.
    """
    # Open it up
    workbook = load_workbook(filename=excel_path)

    # Get the first sheet
    worksheet = workbook.worksheets[0]

    # Convert the sheet to a list of lists
    row_list = []
    for i, r in enumerate(worksheet.rows):
        # Skip the header row, if that's what the user wants
        if i == 0 and not keep_header:
            continue

        # Parse cells
        cell_list = [cell.value for cell in r]

        # Skip empty rows
        try:
            # A list with only empty cells will throw an error
            next(c for c in cell_list if c)
        except StopIteration:
            continue

        # Add to the master list
        row_list.append(cell_list)

    # Pass it back
    return row_list
