"""Video I/O utilities for extracting frames and metadata."""

import subprocess
from pathlib import Path
from typing import List, Tuple

import cv2
import ffmpeg
import numpy as np


def video_fps(video_path: str) -> float:
    """
    Get the frame rate of a video file.

    Args:
        video_path: Path to video file

    Returns:
        Frame rate in fps

    Raises:
        RuntimeError: If video cannot be read or has invalid metadata
    """
    try:
        probe = ffmpeg.probe(str(video_path))
    except ffmpeg.Error as e:
        raise RuntimeError(f"Cannot read video file: {video_path}") from e

    # Find video stream
    video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
    if not video_streams:
        raise RuntimeError(f"No video stream found in: {video_path}")

    video_stream = video_streams[0]

    # Parse fps from r_frame_rate (e.g., "30000/1001" for 29.97)
    fps_str = video_stream.get("r_frame_rate", "0/1")
    num, den = map(int, fps_str.split("/"))
    if den == 0:
        raise RuntimeError(f"Invalid frame rate in video: {video_path}")

    return num / den


def get_video_duration(video_path: str) -> float:
    """
    Get the duration of a video file in seconds.

    Args:
        video_path: Path to video file

    Returns:
        Duration in seconds

    Raises:
        RuntimeError: If video cannot be read or has invalid metadata
    """
    try:
        probe = ffmpeg.probe(str(video_path))
    except ffmpeg.Error as e:
        raise RuntimeError(f"Cannot read video file: {video_path}") from e

    # Try to get duration from format or video stream
    if "format" in probe and "duration" in probe["format"]:
        return float(probe["format"]["duration"])

    video_streams = [s for s in probe["streams"] if s["codec_type"] == "video"]
    if video_streams and "duration" in video_streams[0]:
        return float(video_streams[0]["duration"])

    raise RuntimeError(f"Cannot determine duration for: {video_path}")


def extract_keyframes(
    video_path: str, fps: float
) -> List[Tuple[float, np.ndarray]]:
    """
    Extract keyframes from video at specified frame rate.

    Uses timestamp-based extraction to handle variable frame rate (VFR) videos.
    Sampling is deterministic and based on exact timestamps.

    Args:
        video_path: Path to video file
        fps: Desired keyframe extraction rate (e.g., 3.0 for 3 fps)

    Returns:
        List of (timestamp_seconds, frame_bgr) tuples, where frame_bgr is a numpy
        array in BGR format (OpenCV convention)

    Raises:
        RuntimeError: If video cannot be read or is empty
        ValueError: If fps is invalid
    """
    if fps <= 0:
        raise ValueError(f"FPS must be positive, got: {fps}")

    video_path = str(video_path)
    path = Path(video_path)
    if not path.exists():
        raise RuntimeError(f"Video file not found: {video_path}")

    if path.stat().st_size == 0:
        raise RuntimeError(f"Video file is empty: {video_path}")

    # Get video duration
    try:
        duration = get_video_duration(video_path)
    except Exception as e:
        raise RuntimeError(f"Failed to read video metadata: {video_path}") from e

    if duration <= 0:
        raise RuntimeError(f"Video has invalid duration: {duration}")

    # Calculate timestamps for extraction
    interval = 1.0 / fps
    timestamps = []
    t = 0.0
    while t < duration:
        timestamps.append(t)
        t += interval

    if not timestamps:
        raise RuntimeError(f"No frames to extract from video: {video_path}")

    # Extract frames at specific timestamps
    keyframes = []
    for timestamp in timestamps:
        try:
            # Extract one frame at an accurate timestamp.
            # IMPORTANT: place -ss AFTER the input for accurate seeking (decodes to the exact ts),
            # rather than fast-seek (-ss before -i) which jumps to the previous keyframe.
            out, _ = (
                ffmpeg
                .input(video_path)
                .output("pipe:", ss=timestamp, vframes=1, format="rawvideo", pix_fmt="bgr24")
                .run(capture_stdout=True, capture_stderr=True, quiet=True)
            )

            # Get video dimensions from probe (only once)
            if not keyframes:
                probe = ffmpeg.probe(video_path)
                video_stream = [
                    s for s in probe["streams"] if s["codec_type"] == "video"
                ][0]
                width = int(video_stream["width"])
                height = int(video_stream["height"])

            # Convert bytes to numpy array
            frame = np.frombuffer(out, np.uint8).reshape([height, width, 3])
            keyframes.append((timestamp, frame))

        except ffmpeg.Error as e:
            # If we can't extract a frame at this timestamp, skip it
            # This can happen at the very end of the video
            stderr = e.stderr.decode() if e.stderr else ""
            if timestamp > duration - interval:
                # Expected at end of video
                break
            # Otherwise log and continue
            continue
        except Exception as e:
            # Unexpected error, but don't fail entire extraction
            if timestamp > duration - interval:
                break
            continue

    if not keyframes:
        raise RuntimeError(f"Failed to extract any frames from: {video_path}")

    return keyframes
