# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from typing import List, Literal, Optional

from pydantic import BaseModel

from .completions import CompletionUsage
from .function_call import ChatCompletionMessageToolCall, ChoiceDeltaToolCall

FINISH_REASONS = Literal[
    "stop", "length", "tool_calls", "content_filter", "function_call"
]


# ==================== Shared Models ====================
class ChatCompletionMessageCore(BaseModel):
    content: Optional[str] = None
    """The contents of the message."""

    refusal: Optional[str] = None
    """The refusal message generated by the model."""

    role: Literal["assistant"] = "assistant"
    """The role of the author of this message."""


class ChoiceCore(BaseModel):
    finish_reason: Optional[FINISH_REASONS] = None
    """The reason the model stopped generating tokens.

    This will be `stop` if the model hit a natural stop point or a provided stop
    sequence, `length` if the maximum number of tokens specified in the request was
    reached, `content_filter` if content was omitted due to a flag from our content
    filters, `tool_calls` if the model called a tool, or `function_call`
    (deprecated) if the model called a function.
    """

    index: int
    """The index of the choice in the list of choices."""


class ChatCompletionCore(BaseModel):
    id: str
    """A unique identifier for the chat completion."""

    created: int
    """The Unix timestamp (in seconds) of when the chat completion was created."""

    model: str
    """The model used for the chat completion."""

    service_tier: Optional[Literal["auto", "default", "flex"]] = None
    """Specifies the latency tier to use for processing the request.

    This parameter is relevant for customers subscribed to the scale tier service:

    - If set to 'auto', and the Project is Scale tier enabled, the system will
      utilize scale tier credits until they are exhausted.
    - If set to 'auto', and the Project is not Scale tier enabled, the request will
      be processed using the default service tier with a lower uptime SLA and no
      latency guarantee.
    - If set to 'default', the request will be processed using the default service
      tier with a lower uptime SLA and no latency guarantee.
    - If set to 'flex', the request will be processed with the Flex Processing
      service tier.
      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
    - When not set, the default behavior is 'auto'.

    When this parameter is set, the response body will include the `service_tier`
    utilized.
    """

    system_fingerprint: Optional[str] = None
    """This fingerprint represents the backend configuration that the model runs with.

    Can be used in conjunction with the `seed` request parameter to understand when
    backend changes have been made that might impact determinism.
    """

    usage: Optional[CompletionUsage] = None
    """Usage statistics for the completion request."""


# ==================== Non Streaming Response ====================


class ChatCompletionMessage(ChatCompletionMessageCore):
    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
    """The tool calls generated by the model, such as function calls."""


class NonStreamChoice(ChoiceCore):
    finish_reason: Optional[FINISH_REASONS] = "stop"
    """The reason the model stopped generating tokens.

    This will be `stop` if the model hit a natural stop point or a provided stop
    sequence, `length` if the maximum number of tokens specified in the request was
    reached, `content_filter` if content was omitted due to a flag from our content
    filters, `tool_calls` if the model called a tool, or `function_call`
    (deprecated) if the model called a function.
    """

    message: ChatCompletionMessage
    """A chat completion message generated by the model."""


class ChatCompletion(ChatCompletionCore):
    choices: List[NonStreamChoice]
    """A list of chat completion choices.

    Can be more than one if `n` is greater than 1.
    """

    object: Literal["chat.completion"] = "chat.completion"
    """The object type, which is always `chat.completion`."""


# ==================== Streaming Response ====================


class ChoiceDelta(ChatCompletionMessageCore):
    tool_calls: Optional[List[ChoiceDeltaToolCall]] = None


class StreamChoice(ChoiceCore):
    delta: ChoiceDelta
    """A chat completion delta generated by streamed model responses."""


class ChatCompletionChunk(ChatCompletionCore):
    choices: List[StreamChoice]
    """A list of chat completion choices.

    Can contain more than one elements if `n` is greater than 1. Can also be empty
    for the last chunk if you set `stream_options: {"include_usage": true}`.
    """

    object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
    """The object type, which is always `chat.completion.chunk`."""
