# quantvn/crypto/data/utils.py
from __future__ import annotations
from typing import Optional, Literal
import io
import pandas as pd
import boto3, requests
from botocore.exceptions import ClientError, NoCredentialsError
from botocore.config import Config
from botocore import UNSIGNED

__all__ = ["get_crypto"]

def get_crypto(
    symbol: str,
    days: Optional[int] = None,
    *,
    bucket: str = "qco-market",
    prefix: str = "data-csv",
    session: Optional[boto3.Session] = None,
    auth: Literal["auto", "signed", "unsigned"] = "auto",
) -> pd.DataFrame:
    """
    Đọc s3://{bucket}/{prefix}/{symbol}.csv và (tuỳ chọn) lọc N ngày.
    - auth="auto": thử signed; nếu không có credential → fallback unsigned → fallback HTTP.
    - auth="signed": chỉ signed (bắt buộc user có credential).
    - auth="unsigned": luôn truy cập ẩn danh (chỉ dùng khi object public).
    """
    key = f"{prefix.rstrip('/')}/{symbol}.csv" if prefix else f"{symbol}.csv"
    session = session or boto3.Session()

    def _post(df: pd.DataFrame) -> pd.DataFrame:
        if "datetime" not in df.columns:
            raise KeyError("CSV phải có cột 'datetime'.")
        df["datetime"] = pd.to_datetime(df["datetime"], errors="coerce", utc=True)
        if days is not None:
            cutoff = pd.Timestamp.utcnow() - pd.Timedelta(days=days)
            df = df[df["datetime"] >= cutoff].copy()
        df.attrs["s3_key"] = key
        return df

    def _read_via_boto(client) -> pd.DataFrame:
        obj = client.get_object(Bucket=bucket, Key=key)
        return _post(pd.read_csv(obj["Body"]))

    def _http_fallback(possible_region: Optional[str] = None) -> pd.DataFrame:
        # S3 virtual-hosted–style URL; requests sẽ tự theo redirect nếu region khác. :contentReference[oaicite:1]{index=1}
        urls = [
            f"https://{bucket}.s3.amazonaws.com/{key}",
        ]
        if possible_region:
            urls.insert(0, f"https://{bucket}.s3.{possible_region}.amazonaws.com/{key}")
        for url in urls:
            r = requests.get(url, timeout=30)
            if r.ok:
                return _post(pd.read_csv(io.BytesIO(r.content)))
        raise PermissionError(
            "AccessDenied khi dùng unsigned/HTTP. Hãy bật public-read cho prefix hoặc cung cấp credential/presigned URL."
        )

    # 1) Chế độ ép unsigned ngay từ đầu
    if auth == "unsigned":
        try:
            s3u = session.client("s3", config=Config(signature_version=UNSIGNED))  # :contentReference[oaicite:2]{index=2}
            return _read_via_boto(s3u)
        except ClientError as e:
            region = e.response.get("ResponseMetadata", {}).get("HTTPHeaders", {}).get("x-amz-bucket-region")
            return _http_fallback(region)

    # 2) signed hoặc auto (ưu tiên signed)
    try:
        s3 = session.client("s3")
        return _read_via_boto(s3)
    except NoCredentialsError:
        if auth == "signed":
            raise
        # auto → fallback unsigned
        try:
            s3u = session.client("s3", config=Config(signature_version=UNSIGNED))  # :contentReference[oaicite:3]{index=3}
            return _read_via_boto(s3u)
        except ClientError as e:
            region = e.response.get("ResponseMetadata", {}).get("HTTPHeaders", {}).get("x-amz-bucket-region")
            return _http_fallback(region)
    except ClientError as e:
        # Trường hợp sai region → thử lại signed với region thật; nếu vẫn fail → HTTP
        region = e.response.get("ResponseMetadata", {}).get("HTTPHeaders", {}).get("x-amz-bucket-region")
        if region:
            try:
                s3r = session.client("s3", region_name=region)
                return _read_via_boto(s3r)
            except Exception:
                pass
        # Cuối cùng: HTTP
        return _http_fallback(region)
