# coding=utf-8
# *** WARNING: this file was generated by pulumi-language-python. ***
# *** Do not edit by hand unless you're certain you know what you are doing! ***

import builtins as _builtins
import warnings
import sys
import pulumi
import pulumi.runtime
from typing import Any, Mapping, Optional, Sequence, Union, overload
if sys.version_info >= (3, 11):
    from typing import NotRequired, TypedDict, TypeAlias
else:
    from typing_extensions import NotRequired, TypedDict, TypeAlias
from . import _utilities
from . import outputs
from ._inputs import *

__all__ = ['ModelServingArgs', 'ModelServing']

@pulumi.input_type
class ModelServingArgs:
    def __init__(__self__, *,
                 ai_gateway: Optional[pulumi.Input['ModelServingAiGatewayArgs']] = None,
                 budget_policy_id: Optional[pulumi.Input[_builtins.str]] = None,
                 config: Optional[pulumi.Input['ModelServingConfigArgs']] = None,
                 description: Optional[pulumi.Input[_builtins.str]] = None,
                 email_notifications: Optional[pulumi.Input['ModelServingEmailNotificationsArgs']] = None,
                 name: Optional[pulumi.Input[_builtins.str]] = None,
                 rate_limits: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]]] = None,
                 route_optimized: Optional[pulumi.Input[_builtins.bool]] = None,
                 tags: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]]] = None):
        """
        The set of arguments for constructing a ModelServing resource.
        :param pulumi.Input['ModelServingAiGatewayArgs'] ai_gateway: A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.str] budget_policy_id: The Budget Policy ID set for this serving endpoint.
        :param pulumi.Input['ModelServingConfigArgs'] config: The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated.
        :param pulumi.Input[_builtins.str] description: The description of the model serving endpoint.
        :param pulumi.Input['ModelServingEmailNotificationsArgs'] email_notifications: A block with Email notification setting.
        :param pulumi.Input[_builtins.str] name: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
        :param pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]] rate_limits: A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.bool] route_optimized: A boolean enabling route optimization for the endpoint. *Note: only available for custom models.*
        :param pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]] tags: Tags to be attached to the serving endpoint and automatically propagated to billing logs.
        """
        if ai_gateway is not None:
            pulumi.set(__self__, "ai_gateway", ai_gateway)
        if budget_policy_id is not None:
            pulumi.set(__self__, "budget_policy_id", budget_policy_id)
        if config is not None:
            pulumi.set(__self__, "config", config)
        if description is not None:
            pulumi.set(__self__, "description", description)
        if email_notifications is not None:
            pulumi.set(__self__, "email_notifications", email_notifications)
        if name is not None:
            pulumi.set(__self__, "name", name)
        if rate_limits is not None:
            warnings.warn("""Please use AI Gateway to manage rate limits.""", DeprecationWarning)
            pulumi.log.warn("""rate_limits is deprecated: Please use AI Gateway to manage rate limits.""")
        if rate_limits is not None:
            pulumi.set(__self__, "rate_limits", rate_limits)
        if route_optimized is not None:
            pulumi.set(__self__, "route_optimized", route_optimized)
        if tags is not None:
            pulumi.set(__self__, "tags", tags)

    @_builtins.property
    @pulumi.getter(name="aiGateway")
    def ai_gateway(self) -> Optional[pulumi.Input['ModelServingAiGatewayArgs']]:
        """
        A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.*
        """
        return pulumi.get(self, "ai_gateway")

    @ai_gateway.setter
    def ai_gateway(self, value: Optional[pulumi.Input['ModelServingAiGatewayArgs']]):
        pulumi.set(self, "ai_gateway", value)

    @_builtins.property
    @pulumi.getter(name="budgetPolicyId")
    def budget_policy_id(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        The Budget Policy ID set for this serving endpoint.
        """
        return pulumi.get(self, "budget_policy_id")

    @budget_policy_id.setter
    def budget_policy_id(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "budget_policy_id", value)

    @_builtins.property
    @pulumi.getter
    def config(self) -> Optional[pulumi.Input['ModelServingConfigArgs']]:
        """
        The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated.
        """
        return pulumi.get(self, "config")

    @config.setter
    def config(self, value: Optional[pulumi.Input['ModelServingConfigArgs']]):
        pulumi.set(self, "config", value)

    @_builtins.property
    @pulumi.getter
    def description(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        The description of the model serving endpoint.
        """
        return pulumi.get(self, "description")

    @description.setter
    def description(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "description", value)

    @_builtins.property
    @pulumi.getter(name="emailNotifications")
    def email_notifications(self) -> Optional[pulumi.Input['ModelServingEmailNotificationsArgs']]:
        """
        A block with Email notification setting.
        """
        return pulumi.get(self, "email_notifications")

    @email_notifications.setter
    def email_notifications(self, value: Optional[pulumi.Input['ModelServingEmailNotificationsArgs']]):
        pulumi.set(self, "email_notifications", value)

    @_builtins.property
    @pulumi.getter
    def name(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
        """
        return pulumi.get(self, "name")

    @name.setter
    def name(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "name", value)

    @_builtins.property
    @pulumi.getter(name="rateLimits")
    @_utilities.deprecated("""Please use AI Gateway to manage rate limits.""")
    def rate_limits(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]]]:
        """
        A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.*
        """
        return pulumi.get(self, "rate_limits")

    @rate_limits.setter
    def rate_limits(self, value: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]]]):
        pulumi.set(self, "rate_limits", value)

    @_builtins.property
    @pulumi.getter(name="routeOptimized")
    def route_optimized(self) -> Optional[pulumi.Input[_builtins.bool]]:
        """
        A boolean enabling route optimization for the endpoint. *Note: only available for custom models.*
        """
        return pulumi.get(self, "route_optimized")

    @route_optimized.setter
    def route_optimized(self, value: Optional[pulumi.Input[_builtins.bool]]):
        pulumi.set(self, "route_optimized", value)

    @_builtins.property
    @pulumi.getter
    def tags(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]]]:
        """
        Tags to be attached to the serving endpoint and automatically propagated to billing logs.
        """
        return pulumi.get(self, "tags")

    @tags.setter
    def tags(self, value: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]]]):
        pulumi.set(self, "tags", value)


@pulumi.input_type
class _ModelServingState:
    def __init__(__self__, *,
                 ai_gateway: Optional[pulumi.Input['ModelServingAiGatewayArgs']] = None,
                 budget_policy_id: Optional[pulumi.Input[_builtins.str]] = None,
                 config: Optional[pulumi.Input['ModelServingConfigArgs']] = None,
                 description: Optional[pulumi.Input[_builtins.str]] = None,
                 email_notifications: Optional[pulumi.Input['ModelServingEmailNotificationsArgs']] = None,
                 endpoint_url: Optional[pulumi.Input[_builtins.str]] = None,
                 name: Optional[pulumi.Input[_builtins.str]] = None,
                 rate_limits: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]]] = None,
                 route_optimized: Optional[pulumi.Input[_builtins.bool]] = None,
                 serving_endpoint_id: Optional[pulumi.Input[_builtins.str]] = None,
                 tags: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]]] = None):
        """
        Input properties used for looking up and filtering ModelServing resources.
        :param pulumi.Input['ModelServingAiGatewayArgs'] ai_gateway: A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.str] budget_policy_id: The Budget Policy ID set for this serving endpoint.
        :param pulumi.Input['ModelServingConfigArgs'] config: The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated.
        :param pulumi.Input[_builtins.str] description: The description of the model serving endpoint.
        :param pulumi.Input['ModelServingEmailNotificationsArgs'] email_notifications: A block with Email notification setting.
        :param pulumi.Input[_builtins.str] endpoint_url: Invocation url of the endpoint.
        :param pulumi.Input[_builtins.str] name: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
        :param pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]] rate_limits: A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.bool] route_optimized: A boolean enabling route optimization for the endpoint. *Note: only available for custom models.*
        :param pulumi.Input[_builtins.str] serving_endpoint_id: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
        :param pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]] tags: Tags to be attached to the serving endpoint and automatically propagated to billing logs.
        """
        if ai_gateway is not None:
            pulumi.set(__self__, "ai_gateway", ai_gateway)
        if budget_policy_id is not None:
            pulumi.set(__self__, "budget_policy_id", budget_policy_id)
        if config is not None:
            pulumi.set(__self__, "config", config)
        if description is not None:
            pulumi.set(__self__, "description", description)
        if email_notifications is not None:
            pulumi.set(__self__, "email_notifications", email_notifications)
        if endpoint_url is not None:
            pulumi.set(__self__, "endpoint_url", endpoint_url)
        if name is not None:
            pulumi.set(__self__, "name", name)
        if rate_limits is not None:
            warnings.warn("""Please use AI Gateway to manage rate limits.""", DeprecationWarning)
            pulumi.log.warn("""rate_limits is deprecated: Please use AI Gateway to manage rate limits.""")
        if rate_limits is not None:
            pulumi.set(__self__, "rate_limits", rate_limits)
        if route_optimized is not None:
            pulumi.set(__self__, "route_optimized", route_optimized)
        if serving_endpoint_id is not None:
            pulumi.set(__self__, "serving_endpoint_id", serving_endpoint_id)
        if tags is not None:
            pulumi.set(__self__, "tags", tags)

    @_builtins.property
    @pulumi.getter(name="aiGateway")
    def ai_gateway(self) -> Optional[pulumi.Input['ModelServingAiGatewayArgs']]:
        """
        A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.*
        """
        return pulumi.get(self, "ai_gateway")

    @ai_gateway.setter
    def ai_gateway(self, value: Optional[pulumi.Input['ModelServingAiGatewayArgs']]):
        pulumi.set(self, "ai_gateway", value)

    @_builtins.property
    @pulumi.getter(name="budgetPolicyId")
    def budget_policy_id(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        The Budget Policy ID set for this serving endpoint.
        """
        return pulumi.get(self, "budget_policy_id")

    @budget_policy_id.setter
    def budget_policy_id(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "budget_policy_id", value)

    @_builtins.property
    @pulumi.getter
    def config(self) -> Optional[pulumi.Input['ModelServingConfigArgs']]:
        """
        The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated.
        """
        return pulumi.get(self, "config")

    @config.setter
    def config(self, value: Optional[pulumi.Input['ModelServingConfigArgs']]):
        pulumi.set(self, "config", value)

    @_builtins.property
    @pulumi.getter
    def description(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        The description of the model serving endpoint.
        """
        return pulumi.get(self, "description")

    @description.setter
    def description(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "description", value)

    @_builtins.property
    @pulumi.getter(name="emailNotifications")
    def email_notifications(self) -> Optional[pulumi.Input['ModelServingEmailNotificationsArgs']]:
        """
        A block with Email notification setting.
        """
        return pulumi.get(self, "email_notifications")

    @email_notifications.setter
    def email_notifications(self, value: Optional[pulumi.Input['ModelServingEmailNotificationsArgs']]):
        pulumi.set(self, "email_notifications", value)

    @_builtins.property
    @pulumi.getter(name="endpointUrl")
    def endpoint_url(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        Invocation url of the endpoint.
        """
        return pulumi.get(self, "endpoint_url")

    @endpoint_url.setter
    def endpoint_url(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "endpoint_url", value)

    @_builtins.property
    @pulumi.getter
    def name(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
        """
        return pulumi.get(self, "name")

    @name.setter
    def name(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "name", value)

    @_builtins.property
    @pulumi.getter(name="rateLimits")
    @_utilities.deprecated("""Please use AI Gateway to manage rate limits.""")
    def rate_limits(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]]]:
        """
        A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.*
        """
        return pulumi.get(self, "rate_limits")

    @rate_limits.setter
    def rate_limits(self, value: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingRateLimitArgs']]]]):
        pulumi.set(self, "rate_limits", value)

    @_builtins.property
    @pulumi.getter(name="routeOptimized")
    def route_optimized(self) -> Optional[pulumi.Input[_builtins.bool]]:
        """
        A boolean enabling route optimization for the endpoint. *Note: only available for custom models.*
        """
        return pulumi.get(self, "route_optimized")

    @route_optimized.setter
    def route_optimized(self, value: Optional[pulumi.Input[_builtins.bool]]):
        pulumi.set(self, "route_optimized", value)

    @_builtins.property
    @pulumi.getter(name="servingEndpointId")
    def serving_endpoint_id(self) -> Optional[pulumi.Input[_builtins.str]]:
        """
        Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
        """
        return pulumi.get(self, "serving_endpoint_id")

    @serving_endpoint_id.setter
    def serving_endpoint_id(self, value: Optional[pulumi.Input[_builtins.str]]):
        pulumi.set(self, "serving_endpoint_id", value)

    @_builtins.property
    @pulumi.getter
    def tags(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]]]:
        """
        Tags to be attached to the serving endpoint and automatically propagated to billing logs.
        """
        return pulumi.get(self, "tags")

    @tags.setter
    def tags(self, value: Optional[pulumi.Input[Sequence[pulumi.Input['ModelServingTagArgs']]]]):
        pulumi.set(self, "tags", value)


@pulumi.type_token("databricks:index/modelServing:ModelServing")
class ModelServing(pulumi.CustomResource):
    @overload
    def __init__(__self__,
                 resource_name: str,
                 opts: Optional[pulumi.ResourceOptions] = None,
                 ai_gateway: Optional[pulumi.Input[Union['ModelServingAiGatewayArgs', 'ModelServingAiGatewayArgsDict']]] = None,
                 budget_policy_id: Optional[pulumi.Input[_builtins.str]] = None,
                 config: Optional[pulumi.Input[Union['ModelServingConfigArgs', 'ModelServingConfigArgsDict']]] = None,
                 description: Optional[pulumi.Input[_builtins.str]] = None,
                 email_notifications: Optional[pulumi.Input[Union['ModelServingEmailNotificationsArgs', 'ModelServingEmailNotificationsArgsDict']]] = None,
                 name: Optional[pulumi.Input[_builtins.str]] = None,
                 rate_limits: Optional[pulumi.Input[Sequence[pulumi.Input[Union['ModelServingRateLimitArgs', 'ModelServingRateLimitArgsDict']]]]] = None,
                 route_optimized: Optional[pulumi.Input[_builtins.bool]] = None,
                 tags: Optional[pulumi.Input[Sequence[pulumi.Input[Union['ModelServingTagArgs', 'ModelServingTagArgsDict']]]]] = None,
                 __props__=None):
        """
        This resource allows you to manage [Model Serving](https://docs.databricks.com/machine-learning/model-serving/index.html) endpoints in Databricks, including custom models, external models, and foundation models. For newer foundation models, including Llama 4, please use the ModelServingProvisionedThroughput resource.

        > This resource can only be used with a workspace-level provider!

        > If you replace `served_models` with `served_entities` in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version.

        ## Example Usage

        Creating a CPU serving endpoint

        ```python
        import pulumi
        import pulumi_databricks as databricks

        this = databricks.ModelServing("this",
            name="ads-serving-endpoint",
            config={
                "served_entities": [
                    {
                        "name": "prod_model",
                        "entity_name": "ads-model",
                        "entity_version": "2",
                        "workload_size": "Small",
                        "scale_to_zero_enabled": True,
                    },
                    {
                        "name": "candidate_model",
                        "entity_name": "ads-model",
                        "entity_version": "4",
                        "workload_size": "Small",
                        "scale_to_zero_enabled": False,
                    },
                ],
                "traffic_config": {
                    "routes": [
                        {
                            "served_model_name": "prod_model",
                            "traffic_percentage": 90,
                        },
                        {
                            "served_model_name": "candidate_model",
                            "traffic_percentage": 10,
                        },
                    ],
                },
            })
        ```

        Creating a Foundation Model endpoint

        ```python
        import pulumi
        import pulumi_databricks as databricks

        llama = databricks.ModelServing("llama",
            name="llama_3_2_3b_instruct",
            ai_gateway={
                "usage_tracking_config": {
                    "enabled": True,
                },
            },
            config={
                "served_entities": [{
                    "name": "meta_llama_v3_2_3b_instruct-3",
                    "entity_name": "system.ai.llama_v3_2_3b_instruct",
                    "entity_version": "2",
                    "scale_to_zero_enabled": True,
                    "max_provisioned_throughput": 44000,
                }],
            })
        ```

        Creating an External Model endpoint

        ```python
        import pulumi
        import pulumi_databricks as databricks

        gpt4o = databricks.ModelServing("gpt_4o",
            name="gpt-4o-mini",
            ai_gateway={
                "usage_tracking_config": {
                    "enabled": True,
                },
                "rate_limits": [{
                    "calls": 10,
                    "key": "endpoint",
                    "renewal_period": "minute",
                }],
                "inference_table_config": {
                    "enabled": True,
                    "table_name_prefix": "gpt-4o-mini",
                    "catalog_name": "ml",
                    "schema_name": "ai_gateway",
                },
                "guardrails": {
                    "input": {
                        "invalid_keywords": ["SuperSecretProject"],
                        "pii": {
                            "behavior": "BLOCK",
                        },
                    },
                    "output": {
                        "pii": {
                            "behavior": "BLOCK",
                        },
                    },
                },
            },
            config={
                "served_entities": [{
                    "name": "gpt-4o-mini",
                    "external_model": {
                        "name": "gpt-4o-mini",
                        "provider": "openai",
                        "task": "llm/v1/chat",
                        "openai_config": {
                            "openai_api_key": "{{secrets/llm_scope/openai_api_key}}",
                        },
                    },
                }],
            })
        ```

        ## Access Control

        * Permissions can control which groups or individual users can *Manage*, *Query* or *View* individual serving endpoints.

        ## Related Resources

        The following resources are often used in the same context:

        * ModelServingProvisionedThroughput to create [Foundation Model provisioned throughput](https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/deploy-prov-throughput-foundation-model-apis) endpoints in Databricks.
        * RegisteredModel to create [Models in Unity Catalog](https://docs.databricks.com/en/mlflow/models-in-uc.html) in Databricks.
        * End to end workspace management guide.
        * Directory to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html).
        * MlflowModel to create models in the [workspace model registry](https://docs.databricks.com/en/mlflow/model-registry.html) in Databricks.
        * Notebook to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html).
        * Notebook data to export a notebook from Databricks Workspace.
        * Repo to manage [Databricks Repos](https://docs.databricks.com/repos.html).

        ## Import

        The model serving resource can be imported using the name of the endpoint.

        hcl

        import {

          to = databricks_model_serving.this

          id = "<model-serving-endpoint-name>"

        }

        Alternatively, when using `terraform` version 1.4 or earlier, import using the `pulumi import` command:

        bash

        ```sh
        $ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
        ```

        :param str resource_name: The name of the resource.
        :param pulumi.ResourceOptions opts: Options for the resource.
        :param pulumi.Input[Union['ModelServingAiGatewayArgs', 'ModelServingAiGatewayArgsDict']] ai_gateway: A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.str] budget_policy_id: The Budget Policy ID set for this serving endpoint.
        :param pulumi.Input[Union['ModelServingConfigArgs', 'ModelServingConfigArgsDict']] config: The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated.
        :param pulumi.Input[_builtins.str] description: The description of the model serving endpoint.
        :param pulumi.Input[Union['ModelServingEmailNotificationsArgs', 'ModelServingEmailNotificationsArgsDict']] email_notifications: A block with Email notification setting.
        :param pulumi.Input[_builtins.str] name: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
        :param pulumi.Input[Sequence[pulumi.Input[Union['ModelServingRateLimitArgs', 'ModelServingRateLimitArgsDict']]]] rate_limits: A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.bool] route_optimized: A boolean enabling route optimization for the endpoint. *Note: only available for custom models.*
        :param pulumi.Input[Sequence[pulumi.Input[Union['ModelServingTagArgs', 'ModelServingTagArgsDict']]]] tags: Tags to be attached to the serving endpoint and automatically propagated to billing logs.
        """
        ...
    @overload
    def __init__(__self__,
                 resource_name: str,
                 args: Optional[ModelServingArgs] = None,
                 opts: Optional[pulumi.ResourceOptions] = None):
        """
        This resource allows you to manage [Model Serving](https://docs.databricks.com/machine-learning/model-serving/index.html) endpoints in Databricks, including custom models, external models, and foundation models. For newer foundation models, including Llama 4, please use the ModelServingProvisionedThroughput resource.

        > This resource can only be used with a workspace-level provider!

        > If you replace `served_models` with `served_entities` in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version.

        ## Example Usage

        Creating a CPU serving endpoint

        ```python
        import pulumi
        import pulumi_databricks as databricks

        this = databricks.ModelServing("this",
            name="ads-serving-endpoint",
            config={
                "served_entities": [
                    {
                        "name": "prod_model",
                        "entity_name": "ads-model",
                        "entity_version": "2",
                        "workload_size": "Small",
                        "scale_to_zero_enabled": True,
                    },
                    {
                        "name": "candidate_model",
                        "entity_name": "ads-model",
                        "entity_version": "4",
                        "workload_size": "Small",
                        "scale_to_zero_enabled": False,
                    },
                ],
                "traffic_config": {
                    "routes": [
                        {
                            "served_model_name": "prod_model",
                            "traffic_percentage": 90,
                        },
                        {
                            "served_model_name": "candidate_model",
                            "traffic_percentage": 10,
                        },
                    ],
                },
            })
        ```

        Creating a Foundation Model endpoint

        ```python
        import pulumi
        import pulumi_databricks as databricks

        llama = databricks.ModelServing("llama",
            name="llama_3_2_3b_instruct",
            ai_gateway={
                "usage_tracking_config": {
                    "enabled": True,
                },
            },
            config={
                "served_entities": [{
                    "name": "meta_llama_v3_2_3b_instruct-3",
                    "entity_name": "system.ai.llama_v3_2_3b_instruct",
                    "entity_version": "2",
                    "scale_to_zero_enabled": True,
                    "max_provisioned_throughput": 44000,
                }],
            })
        ```

        Creating an External Model endpoint

        ```python
        import pulumi
        import pulumi_databricks as databricks

        gpt4o = databricks.ModelServing("gpt_4o",
            name="gpt-4o-mini",
            ai_gateway={
                "usage_tracking_config": {
                    "enabled": True,
                },
                "rate_limits": [{
                    "calls": 10,
                    "key": "endpoint",
                    "renewal_period": "minute",
                }],
                "inference_table_config": {
                    "enabled": True,
                    "table_name_prefix": "gpt-4o-mini",
                    "catalog_name": "ml",
                    "schema_name": "ai_gateway",
                },
                "guardrails": {
                    "input": {
                        "invalid_keywords": ["SuperSecretProject"],
                        "pii": {
                            "behavior": "BLOCK",
                        },
                    },
                    "output": {
                        "pii": {
                            "behavior": "BLOCK",
                        },
                    },
                },
            },
            config={
                "served_entities": [{
                    "name": "gpt-4o-mini",
                    "external_model": {
                        "name": "gpt-4o-mini",
                        "provider": "openai",
                        "task": "llm/v1/chat",
                        "openai_config": {
                            "openai_api_key": "{{secrets/llm_scope/openai_api_key}}",
                        },
                    },
                }],
            })
        ```

        ## Access Control

        * Permissions can control which groups or individual users can *Manage*, *Query* or *View* individual serving endpoints.

        ## Related Resources

        The following resources are often used in the same context:

        * ModelServingProvisionedThroughput to create [Foundation Model provisioned throughput](https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/deploy-prov-throughput-foundation-model-apis) endpoints in Databricks.
        * RegisteredModel to create [Models in Unity Catalog](https://docs.databricks.com/en/mlflow/models-in-uc.html) in Databricks.
        * End to end workspace management guide.
        * Directory to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html).
        * MlflowModel to create models in the [workspace model registry](https://docs.databricks.com/en/mlflow/model-registry.html) in Databricks.
        * Notebook to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html).
        * Notebook data to export a notebook from Databricks Workspace.
        * Repo to manage [Databricks Repos](https://docs.databricks.com/repos.html).

        ## Import

        The model serving resource can be imported using the name of the endpoint.

        hcl

        import {

          to = databricks_model_serving.this

          id = "<model-serving-endpoint-name>"

        }

        Alternatively, when using `terraform` version 1.4 or earlier, import using the `pulumi import` command:

        bash

        ```sh
        $ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
        ```

        :param str resource_name: The name of the resource.
        :param ModelServingArgs args: The arguments to use to populate this resource's properties.
        :param pulumi.ResourceOptions opts: Options for the resource.
        """
        ...
    def __init__(__self__, resource_name: str, *args, **kwargs):
        resource_args, opts = _utilities.get_resource_args_opts(ModelServingArgs, pulumi.ResourceOptions, *args, **kwargs)
        if resource_args is not None:
            __self__._internal_init(resource_name, opts, **resource_args.__dict__)
        else:
            __self__._internal_init(resource_name, *args, **kwargs)

    def _internal_init(__self__,
                 resource_name: str,
                 opts: Optional[pulumi.ResourceOptions] = None,
                 ai_gateway: Optional[pulumi.Input[Union['ModelServingAiGatewayArgs', 'ModelServingAiGatewayArgsDict']]] = None,
                 budget_policy_id: Optional[pulumi.Input[_builtins.str]] = None,
                 config: Optional[pulumi.Input[Union['ModelServingConfigArgs', 'ModelServingConfigArgsDict']]] = None,
                 description: Optional[pulumi.Input[_builtins.str]] = None,
                 email_notifications: Optional[pulumi.Input[Union['ModelServingEmailNotificationsArgs', 'ModelServingEmailNotificationsArgsDict']]] = None,
                 name: Optional[pulumi.Input[_builtins.str]] = None,
                 rate_limits: Optional[pulumi.Input[Sequence[pulumi.Input[Union['ModelServingRateLimitArgs', 'ModelServingRateLimitArgsDict']]]]] = None,
                 route_optimized: Optional[pulumi.Input[_builtins.bool]] = None,
                 tags: Optional[pulumi.Input[Sequence[pulumi.Input[Union['ModelServingTagArgs', 'ModelServingTagArgsDict']]]]] = None,
                 __props__=None):
        opts = pulumi.ResourceOptions.merge(_utilities.get_resource_opts_defaults(), opts)
        if not isinstance(opts, pulumi.ResourceOptions):
            raise TypeError('Expected resource options to be a ResourceOptions instance')
        if opts.id is None:
            if __props__ is not None:
                raise TypeError('__props__ is only valid when passed in combination with a valid opts.id to get an existing resource')
            __props__ = ModelServingArgs.__new__(ModelServingArgs)

            __props__.__dict__["ai_gateway"] = ai_gateway
            __props__.__dict__["budget_policy_id"] = budget_policy_id
            __props__.__dict__["config"] = config
            __props__.__dict__["description"] = description
            __props__.__dict__["email_notifications"] = email_notifications
            __props__.__dict__["name"] = name
            __props__.__dict__["rate_limits"] = rate_limits
            __props__.__dict__["route_optimized"] = route_optimized
            __props__.__dict__["tags"] = tags
            __props__.__dict__["endpoint_url"] = None
            __props__.__dict__["serving_endpoint_id"] = None
        super(ModelServing, __self__).__init__(
            'databricks:index/modelServing:ModelServing',
            resource_name,
            __props__,
            opts)

    @staticmethod
    def get(resource_name: str,
            id: pulumi.Input[str],
            opts: Optional[pulumi.ResourceOptions] = None,
            ai_gateway: Optional[pulumi.Input[Union['ModelServingAiGatewayArgs', 'ModelServingAiGatewayArgsDict']]] = None,
            budget_policy_id: Optional[pulumi.Input[_builtins.str]] = None,
            config: Optional[pulumi.Input[Union['ModelServingConfigArgs', 'ModelServingConfigArgsDict']]] = None,
            description: Optional[pulumi.Input[_builtins.str]] = None,
            email_notifications: Optional[pulumi.Input[Union['ModelServingEmailNotificationsArgs', 'ModelServingEmailNotificationsArgsDict']]] = None,
            endpoint_url: Optional[pulumi.Input[_builtins.str]] = None,
            name: Optional[pulumi.Input[_builtins.str]] = None,
            rate_limits: Optional[pulumi.Input[Sequence[pulumi.Input[Union['ModelServingRateLimitArgs', 'ModelServingRateLimitArgsDict']]]]] = None,
            route_optimized: Optional[pulumi.Input[_builtins.bool]] = None,
            serving_endpoint_id: Optional[pulumi.Input[_builtins.str]] = None,
            tags: Optional[pulumi.Input[Sequence[pulumi.Input[Union['ModelServingTagArgs', 'ModelServingTagArgsDict']]]]] = None) -> 'ModelServing':
        """
        Get an existing ModelServing resource's state with the given name, id, and optional extra
        properties used to qualify the lookup.

        :param str resource_name: The unique name of the resulting resource.
        :param pulumi.Input[str] id: The unique provider ID of the resource to lookup.
        :param pulumi.ResourceOptions opts: Options for the resource.
        :param pulumi.Input[Union['ModelServingAiGatewayArgs', 'ModelServingAiGatewayArgsDict']] ai_gateway: A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.str] budget_policy_id: The Budget Policy ID set for this serving endpoint.
        :param pulumi.Input[Union['ModelServingConfigArgs', 'ModelServingConfigArgsDict']] config: The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated.
        :param pulumi.Input[_builtins.str] description: The description of the model serving endpoint.
        :param pulumi.Input[Union['ModelServingEmailNotificationsArgs', 'ModelServingEmailNotificationsArgsDict']] email_notifications: A block with Email notification setting.
        :param pulumi.Input[_builtins.str] endpoint_url: Invocation url of the endpoint.
        :param pulumi.Input[_builtins.str] name: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
        :param pulumi.Input[Sequence[pulumi.Input[Union['ModelServingRateLimitArgs', 'ModelServingRateLimitArgsDict']]]] rate_limits: A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.*
        :param pulumi.Input[_builtins.bool] route_optimized: A boolean enabling route optimization for the endpoint. *Note: only available for custom models.*
        :param pulumi.Input[_builtins.str] serving_endpoint_id: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
        :param pulumi.Input[Sequence[pulumi.Input[Union['ModelServingTagArgs', 'ModelServingTagArgsDict']]]] tags: Tags to be attached to the serving endpoint and automatically propagated to billing logs.
        """
        opts = pulumi.ResourceOptions.merge(opts, pulumi.ResourceOptions(id=id))

        __props__ = _ModelServingState.__new__(_ModelServingState)

        __props__.__dict__["ai_gateway"] = ai_gateway
        __props__.__dict__["budget_policy_id"] = budget_policy_id
        __props__.__dict__["config"] = config
        __props__.__dict__["description"] = description
        __props__.__dict__["email_notifications"] = email_notifications
        __props__.__dict__["endpoint_url"] = endpoint_url
        __props__.__dict__["name"] = name
        __props__.__dict__["rate_limits"] = rate_limits
        __props__.__dict__["route_optimized"] = route_optimized
        __props__.__dict__["serving_endpoint_id"] = serving_endpoint_id
        __props__.__dict__["tags"] = tags
        return ModelServing(resource_name, opts=opts, __props__=__props__)

    @_builtins.property
    @pulumi.getter(name="aiGateway")
    def ai_gateway(self) -> pulumi.Output[Optional['outputs.ModelServingAiGateway']]:
        """
        A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.*
        """
        return pulumi.get(self, "ai_gateway")

    @_builtins.property
    @pulumi.getter(name="budgetPolicyId")
    def budget_policy_id(self) -> pulumi.Output[Optional[_builtins.str]]:
        """
        The Budget Policy ID set for this serving endpoint.
        """
        return pulumi.get(self, "budget_policy_id")

    @_builtins.property
    @pulumi.getter
    def config(self) -> pulumi.Output['outputs.ModelServingConfig']:
        """
        The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated.
        """
        return pulumi.get(self, "config")

    @_builtins.property
    @pulumi.getter
    def description(self) -> pulumi.Output[Optional[_builtins.str]]:
        """
        The description of the model serving endpoint.
        """
        return pulumi.get(self, "description")

    @_builtins.property
    @pulumi.getter(name="emailNotifications")
    def email_notifications(self) -> pulumi.Output[Optional['outputs.ModelServingEmailNotifications']]:
        """
        A block with Email notification setting.
        """
        return pulumi.get(self, "email_notifications")

    @_builtins.property
    @pulumi.getter(name="endpointUrl")
    def endpoint_url(self) -> pulumi.Output[_builtins.str]:
        """
        Invocation url of the endpoint.
        """
        return pulumi.get(self, "endpoint_url")

    @_builtins.property
    @pulumi.getter
    def name(self) -> pulumi.Output[_builtins.str]:
        """
        The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
        """
        return pulumi.get(self, "name")

    @_builtins.property
    @pulumi.getter(name="rateLimits")
    @_utilities.deprecated("""Please use AI Gateway to manage rate limits.""")
    def rate_limits(self) -> pulumi.Output[Optional[Sequence['outputs.ModelServingRateLimit']]]:
        """
        A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.*
        """
        return pulumi.get(self, "rate_limits")

    @_builtins.property
    @pulumi.getter(name="routeOptimized")
    def route_optimized(self) -> pulumi.Output[Optional[_builtins.bool]]:
        """
        A boolean enabling route optimization for the endpoint. *Note: only available for custom models.*
        """
        return pulumi.get(self, "route_optimized")

    @_builtins.property
    @pulumi.getter(name="servingEndpointId")
    def serving_endpoint_id(self) -> pulumi.Output[_builtins.str]:
        """
        Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
        """
        return pulumi.get(self, "serving_endpoint_id")

    @_builtins.property
    @pulumi.getter
    def tags(self) -> pulumi.Output[Optional[Sequence['outputs.ModelServingTag']]]:
        """
        Tags to be attached to the serving endpoint and automatically propagated to billing logs.
        """
        return pulumi.get(self, "tags")

