# coding: utf-8
# Copyright (c) 2016, 2025, Oracle and/or its affiliates.  All rights reserved.
# This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
# NOTE: This class is auto generated by OracleSDKGenerator. DO NOT EDIT. API Version: 20231130

from __future__ import print_function
import click
import oci  # noqa: F401
import six  # noqa: F401
import sys  # noqa: F401
from oci_cli.cli_root import cli
from oci_cli import cli_constants  # noqa: F401
from oci_cli import cli_util
from oci_cli import json_skeleton_utils
from oci_cli import custom_types  # noqa: F401
from oci_cli.aliasing import CommandGroupWithAlias


@cli.command(cli_util.override('generative_ai_inference.generative_ai_inference_root_group.command_name', 'generative-ai-inference'), cls=CommandGroupWithAlias, help=cli_util.override('generative_ai_inference.generative_ai_inference_root_group.help', """OCI Generative AI is a fully managed service that provides a set of state-of-the-art, customizable large language models (LLMs) that cover a wide range of use cases for text generation, summarization, and text embeddings.

Use the Generative AI service inference API to access your custom model endpoints, or to try the out-of-the-box models to [chat], [generate text], [summarize], and [create text embeddings].

To use a Generative AI custom model for inference, you must first create an endpoint for that model. Use the [Generative AI service management API] to [create a custom model] by fine-tuning an out-of-the-box model, or a previous version of a custom model, using your own data. Fine-tune the custom model on a [fine-tuning dedicated AI cluster]. Then, create a [hosting dedicated AI cluster] with an [endpoint] to host your custom model. For resource management in the Generative AI service, use the [Generative AI service management API].

To learn more about the service, see the [Generative AI documentation]."""), short_help=cli_util.override('generative_ai_inference.generative_ai_inference_root_group.short_help', """Generative AI Service Inference API"""))
@cli_util.help_option_group
def generative_ai_inference_root_group():
    pass


@click.command(cli_util.override('generative_ai_inference.generate_text_result_group.command_name', 'generate-text-result'), cls=CommandGroupWithAlias, help="""The generated text result to return.""")
@cli_util.help_option_group
def generate_text_result_group():
    pass


@click.command(cli_util.override('generative_ai_inference.apply_guardrails_result_group.command_name', 'apply-guardrails-result'), cls=CommandGroupWithAlias, help="""The result of applying guardrails to the input text.""")
@cli_util.help_option_group
def apply_guardrails_result_group():
    pass


@click.command(cli_util.override('generative_ai_inference.embed_text_result_group.command_name', 'embed-text-result'), cls=CommandGroupWithAlias, help="""The generated embedded result to return.""")
@cli_util.help_option_group
def embed_text_result_group():
    pass


@click.command(cli_util.override('generative_ai_inference.summarize_text_result_group.command_name', 'summarize-text-result'), cls=CommandGroupWithAlias, help="""Summarize text result to return to caller.""")
@cli_util.help_option_group
def summarize_text_result_group():
    pass


@click.command(cli_util.override('generative_ai_inference.rerank_text_result_group.command_name', 'rerank-text-result'), cls=CommandGroupWithAlias, help="""The rerank response to return to the caller.""")
@cli_util.help_option_group
def rerank_text_result_group():
    pass


@click.command(cli_util.override('generative_ai_inference.chat_result_group.command_name', 'chat-result'), cls=CommandGroupWithAlias, help="""The response to the chat conversation.""")
@cli_util.help_option_group
def chat_result_group():
    pass


generative_ai_inference_root_group.add_command(generate_text_result_group)
generative_ai_inference_root_group.add_command(apply_guardrails_result_group)
generative_ai_inference_root_group.add_command(embed_text_result_group)
generative_ai_inference_root_group.add_command(summarize_text_result_group)
generative_ai_inference_root_group.add_command(rerank_text_result_group)
generative_ai_inference_root_group.add_command(chat_result_group)


@apply_guardrails_result_group.command(name=cli_util.override('generative_ai_inference.apply_guardrails.command_name', 'apply-guardrails'), help=u"""Applies guardrails to the input text, including content moderation, PII detection, and prompt injection protection. \n[Command Reference](applyGuardrails)""")
@cli_util.option('--input', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--guardrail-configs', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of the compartment to apply guardrails.""")
@json_skeleton_utils.get_cli_json_input_option({'input': {'module': 'generative_ai_inference', 'class': 'GuardrailsInput'}, 'guardrail-configs': {'module': 'generative_ai_inference', 'class': 'GuardrailConfigs'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'input': {'module': 'generative_ai_inference', 'class': 'GuardrailsInput'}, 'guardrail-configs': {'module': 'generative_ai_inference', 'class': 'GuardrailConfigs'}}, output_type={'module': 'generative_ai_inference', 'class': 'ApplyGuardrailsResult'})
@cli_util.wrap_exceptions
def apply_guardrails(ctx, from_json, input, guardrail_configs, compartment_id):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['input'] = cli_util.parse_json_parameter("input", input)
    _details['guardrailConfigs'] = cli_util.parse_json_parameter("guardrail_configs", guardrail_configs)
    _details['compartmentId'] = compartment_id

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.apply_guardrails(
        apply_guardrails_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@apply_guardrails_result_group.command(name=cli_util.override('generative_ai_inference.apply_guardrails_guardrails_text_input.command_name', 'apply-guardrails-guardrails-text-input'), help=u"""Applies guardrails to the input text, including content moderation, PII detection, and prompt injection protection. \n[Command Reference](applyGuardrails)""")
@cli_util.option('--guardrail-configs', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of the compartment to apply guardrails.""")
@cli_util.option('--input-content', help=u"""The actual input data.""")
@cli_util.option('--input-language-code', help=u"""The language code of the input text. example - en | es | en-US | zh-CN""")
@json_skeleton_utils.get_cli_json_input_option({'guardrail-configs': {'module': 'generative_ai_inference', 'class': 'GuardrailConfigs'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'guardrail-configs': {'module': 'generative_ai_inference', 'class': 'GuardrailConfigs'}}, output_type={'module': 'generative_ai_inference', 'class': 'ApplyGuardrailsResult'})
@cli_util.wrap_exceptions
def apply_guardrails_guardrails_text_input(ctx, from_json, guardrail_configs, compartment_id, input_content, input_language_code):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['input'] = {}
    _details['guardrailConfigs'] = cli_util.parse_json_parameter("guardrail_configs", guardrail_configs)
    _details['compartmentId'] = compartment_id

    if input_content is not None:
        _details['input']['content'] = input_content

    if input_language_code is not None:
        _details['input']['languageCode'] = input_language_code

    _details['input']['type'] = 'TEXT'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.apply_guardrails(
        apply_guardrails_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@chat_result_group.command(name=cli_util.override('generative_ai_inference.chat.command_name', 'chat'), help=u"""Creates a response for the given conversation. \n[Command Reference](chat)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to chat.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'chat-request': {'module': 'generative_ai_inference', 'class': 'BaseChatRequest'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'chat-request': {'module': 'generative_ai_inference', 'class': 'BaseChatRequest'}}, output_type={'module': 'generative_ai_inference', 'class': 'ChatResult'})
@cli_util.wrap_exceptions
def chat(ctx, from_json, compartment_id, serving_mode, chat_request):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['compartmentId'] = compartment_id
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)
    _details['chatRequest'] = cli_util.parse_json_parameter("chat_request", chat_request)

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.chat(
        chat_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@chat_result_group.command(name=cli_util.override('generative_ai_inference.chat_dedicated_serving_mode.command_name', 'chat-dedicated-serving-mode'), help=u"""Creates a response for the given conversation. \n[Command Reference](chat)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to chat.""")
@cli_util.option('--chat-request', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--serving-mode-endpoint-id', required=True, help=u"""The OCID of the endpoint to use.""")
@json_skeleton_utils.get_cli_json_input_option({'chat-request': {'module': 'generative_ai_inference', 'class': 'BaseChatRequest'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'chat-request': {'module': 'generative_ai_inference', 'class': 'BaseChatRequest'}}, output_type={'module': 'generative_ai_inference', 'class': 'ChatResult'})
@cli_util.wrap_exceptions
def chat_dedicated_serving_mode(ctx, from_json, compartment_id, chat_request, serving_mode_endpoint_id):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['compartmentId'] = compartment_id
    _details['chatRequest'] = cli_util.parse_json_parameter("chat_request", chat_request)
    _details['servingMode']['endpointId'] = serving_mode_endpoint_id

    _details['servingMode']['servingType'] = 'DEDICATED'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.chat(
        chat_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@chat_result_group.command(name=cli_util.override('generative_ai_inference.chat_on_demand_serving_mode.command_name', 'chat-on-demand-serving-mode'), help=u"""Creates a response for the given conversation. \n[Command Reference](chat)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to chat.""")
@cli_util.option('--chat-request', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--serving-mode-model-id', required=True, help=u"""The unique ID of a model to use. You can use the [ListModels] API to list the available models.""")
@json_skeleton_utils.get_cli_json_input_option({'chat-request': {'module': 'generative_ai_inference', 'class': 'BaseChatRequest'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'chat-request': {'module': 'generative_ai_inference', 'class': 'BaseChatRequest'}}, output_type={'module': 'generative_ai_inference', 'class': 'ChatResult'})
@cli_util.wrap_exceptions
def chat_on_demand_serving_mode(ctx, from_json, compartment_id, chat_request, serving_mode_model_id):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['compartmentId'] = compartment_id
    _details['chatRequest'] = cli_util.parse_json_parameter("chat_request", chat_request)
    _details['servingMode']['modelId'] = serving_mode_model_id

    _details['servingMode']['servingType'] = 'ON_DEMAND'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.chat(
        chat_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@chat_result_group.command(name=cli_util.override('generative_ai_inference.chat_generic_chat_request.command_name', 'chat-generic-chat-request'), help=u"""Creates a response for the given conversation. \n[Command Reference](chat)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to chat.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-messages', type=custom_types.CLI_COMPLEX_TYPE, help=u"""The series of messages in a chat request. Includes the previous messages in a conversation. Each message includes a role (`USER` or the `CHATBOT`) and content.

This option is a JSON list with items of type Message.  For documentation on Message please see our API reference: https://docs.cloud.oracle.com/api/#/en/generativeaiinference/20231130/datatypes/Message.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-reasoning-effort', type=custom_types.CliCaseInsensitiveChoice(["MINIMAL", "LOW", "MEDIUM", "HIGH"]), help=u"""Constrains effort on reasoning for reasoning models. Currently supported values are minimal, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.""")
@cli_util.option('--chat-request-verbosity', type=custom_types.CliCaseInsensitiveChoice(["LOW", "MEDIUM", "HIGH"]), help=u"""Constrains the verbosity of the model's response. Lower values will result in more concise responses, while higher values will result in more verbose responses.""")
@cli_util.option('--chat-request-metadata', type=custom_types.CLI_COMPLEX_TYPE, help=u"""Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.

Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-is-stream', type=click.BOOL, help=u"""Whether to stream back partial progress. If set to true, as tokens become available, they are sent as data-only server-sent events.""")
@cli_util.option('--chat-request-stream-options', type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-num-generations', type=click.INT, help=u"""The number of of generated texts that will be returned.""")
@cli_util.option('--chat-request-seed', type=click.INT, help=u"""If specified, the backend will make a best effort to sample tokens deterministically, so that repeated requests with the same seed and parameters yield the same result. However, determinism cannot be fully guaranteed.""")
@cli_util.option('--chat-request-is-echo', type=click.BOOL, help=u"""Whether to include the user prompt in the response. Applies only to non-stream results.""")
@cli_util.option('--chat-request-top-k', type=click.INT, help=u"""An integer that sets up the model to use only the top k most likely tokens in the generated output. A higher k introduces more randomness into the output making the output text sound more natural. Default value is -1 which means to consider all tokens. Setting to 0 disables this method and considers all tokens.

If also using top p, then the model considers only the top tokens whose probabilities add up to p percent and ignores the rest of the k tokens. For example, if k is 20, but the probabilities of the top 10 add up to .75, then only the top 10 tokens are chosen.""")
@cli_util.option('--chat-request-top-p', help=u"""If set to a probability 0.0 < p < 1.0, it ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.

To eliminate tokens with low likelihood, assign p a minimum percentage for the next token's likelihood. For example, when p is set to 0.75, the model eliminates the bottom 25 percent for the next token. Set to 1 to consider all tokens and set to 0 to disable. If both k and p are enabled, p acts after k.""")
@cli_util.option('--chat-request-temperature', help=u"""A number that sets the randomness of the generated output. A lower temperature means a less random generations.

Use lower numbers for tasks with a correct answer such as question answering or summarizing. High temperatures can generate hallucinations or factually incorrect information. Start with temperatures lower than 1.0 and increase the temperature for more creative outputs, as you regenerate the prompts to refine the outputs.""")
@cli_util.option('--chat-request-frequency-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on their frequency in the generated text so far. Values > 0 encourage the model to use new tokens and values < 0 encourage the model to repeat tokens. Set to 0 to disable.""")
@cli_util.option('--chat-request-presence-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on whether they've appeared in the generated text so far. Values > 0 encourage the model to use new tokens and values < 0 encourage the model to repeat tokens.

Similar to frequency penalty, a penalty is applied to previously present tokens, except that this penalty is applied equally to all tokens that have already appeared, regardless of how many times they've appeared. Set to 0 to disable.""")
@cli_util.option('--chat-request-stop', type=custom_types.CLI_COMPLEX_TYPE, help=u"""List of strings that stop the generation if they are generated for the response text. The returned output will not contain the stop strings.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-log-probs', type=click.INT, help=u"""Includes the logarithmic probabilities for the most likely output tokens and the chosen tokens.

For example, if the log probability is 5, the API returns a list of the 5 most likely tokens. The API returns the log probability of the sampled token, so there might be up to logprobs+1 elements in the response.""")
@cli_util.option('--chat-request-max-tokens', type=click.INT, help=u"""The maximum number of tokens that can be generated per output sequence. The token count of your prompt plus maxTokens must not exceed the model's context length. For on-demand inferencing, the response length is capped at 4,000 tokens for each run.""")
@cli_util.option('--chat-request-max-completion-tokens', type=click.INT, help=u"""An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.""")
@cli_util.option('--chat-request-logit-bias', type=custom_types.CLI_COMPLEX_TYPE, help=u"""Modifies the likelihood of specified tokens that appear in the completion.

Example: '{\"6395\": 2, \"8134\": 1, \"21943\": 0.5, \"5923\": -100}'""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-prediction', type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-response-format', type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-tool-choice', type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-is-parallel-tool-calls', type=click.BOOL, help=u"""Whether to enable parallel function calling during tool use.""")
@cli_util.option('--chat-request-tools', type=custom_types.CLI_COMPLEX_TYPE, help=u"""A list of tools the model may call. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.

This option is a JSON list with items of type ToolDefinition.  For documentation on ToolDefinition please see our API reference: https://docs.cloud.oracle.com/api/#/en/generativeaiinference/20231130/datatypes/ToolDefinition.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-web-search-options', type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'chat-request-messages': {'module': 'generative_ai_inference', 'class': 'list[Message]'}, 'chat-request-metadata': {'module': 'generative_ai_inference', 'class': 'object'}, 'chat-request-stream-options': {'module': 'generative_ai_inference', 'class': 'StreamOptions'}, 'chat-request-stop': {'module': 'generative_ai_inference', 'class': 'list[string]'}, 'chat-request-logit-bias': {'module': 'generative_ai_inference', 'class': 'object'}, 'chat-request-prediction': {'module': 'generative_ai_inference', 'class': 'Prediction'}, 'chat-request-response-format': {'module': 'generative_ai_inference', 'class': 'ResponseFormat'}, 'chat-request-tool-choice': {'module': 'generative_ai_inference', 'class': 'ToolChoice'}, 'chat-request-tools': {'module': 'generative_ai_inference', 'class': 'list[ToolDefinition]'}, 'chat-request-web-search-options': {'module': 'generative_ai_inference', 'class': 'WebSearchOptions'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'chat-request-messages': {'module': 'generative_ai_inference', 'class': 'list[Message]'}, 'chat-request-metadata': {'module': 'generative_ai_inference', 'class': 'object'}, 'chat-request-stream-options': {'module': 'generative_ai_inference', 'class': 'StreamOptions'}, 'chat-request-stop': {'module': 'generative_ai_inference', 'class': 'list[string]'}, 'chat-request-logit-bias': {'module': 'generative_ai_inference', 'class': 'object'}, 'chat-request-prediction': {'module': 'generative_ai_inference', 'class': 'Prediction'}, 'chat-request-response-format': {'module': 'generative_ai_inference', 'class': 'ResponseFormat'}, 'chat-request-tool-choice': {'module': 'generative_ai_inference', 'class': 'ToolChoice'}, 'chat-request-tools': {'module': 'generative_ai_inference', 'class': 'list[ToolDefinition]'}, 'chat-request-web-search-options': {'module': 'generative_ai_inference', 'class': 'WebSearchOptions'}}, output_type={'module': 'generative_ai_inference', 'class': 'ChatResult'})
@cli_util.wrap_exceptions
def chat_generic_chat_request(ctx, from_json, compartment_id, serving_mode, chat_request_messages, chat_request_reasoning_effort, chat_request_verbosity, chat_request_metadata, chat_request_is_stream, chat_request_stream_options, chat_request_num_generations, chat_request_seed, chat_request_is_echo, chat_request_top_k, chat_request_top_p, chat_request_temperature, chat_request_frequency_penalty, chat_request_presence_penalty, chat_request_stop, chat_request_log_probs, chat_request_max_tokens, chat_request_max_completion_tokens, chat_request_logit_bias, chat_request_prediction, chat_request_response_format, chat_request_tool_choice, chat_request_is_parallel_tool_calls, chat_request_tools, chat_request_web_search_options):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['chatRequest'] = {}
    _details['compartmentId'] = compartment_id
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)

    if chat_request_messages is not None:
        _details['chatRequest']['messages'] = cli_util.parse_json_parameter("chat_request_messages", chat_request_messages)

    if chat_request_reasoning_effort is not None:
        _details['chatRequest']['reasoningEffort'] = chat_request_reasoning_effort

    if chat_request_verbosity is not None:
        _details['chatRequest']['verbosity'] = chat_request_verbosity

    if chat_request_metadata is not None:
        _details['chatRequest']['metadata'] = cli_util.parse_json_parameter("chat_request_metadata", chat_request_metadata)

    if chat_request_is_stream is not None:
        _details['chatRequest']['isStream'] = chat_request_is_stream

    if chat_request_stream_options is not None:
        _details['chatRequest']['streamOptions'] = cli_util.parse_json_parameter("chat_request_stream_options", chat_request_stream_options)

    if chat_request_num_generations is not None:
        _details['chatRequest']['numGenerations'] = chat_request_num_generations

    if chat_request_seed is not None:
        _details['chatRequest']['seed'] = chat_request_seed

    if chat_request_is_echo is not None:
        _details['chatRequest']['isEcho'] = chat_request_is_echo

    if chat_request_top_k is not None:
        _details['chatRequest']['topK'] = chat_request_top_k

    if chat_request_top_p is not None:
        _details['chatRequest']['topP'] = chat_request_top_p

    if chat_request_temperature is not None:
        _details['chatRequest']['temperature'] = chat_request_temperature

    if chat_request_frequency_penalty is not None:
        _details['chatRequest']['frequencyPenalty'] = chat_request_frequency_penalty

    if chat_request_presence_penalty is not None:
        _details['chatRequest']['presencePenalty'] = chat_request_presence_penalty

    if chat_request_stop is not None:
        _details['chatRequest']['stop'] = cli_util.parse_json_parameter("chat_request_stop", chat_request_stop)

    if chat_request_log_probs is not None:
        _details['chatRequest']['logProbs'] = chat_request_log_probs

    if chat_request_max_tokens is not None:
        _details['chatRequest']['maxTokens'] = chat_request_max_tokens

    if chat_request_max_completion_tokens is not None:
        _details['chatRequest']['maxCompletionTokens'] = chat_request_max_completion_tokens

    if chat_request_logit_bias is not None:
        _details['chatRequest']['logitBias'] = cli_util.parse_json_parameter("chat_request_logit_bias", chat_request_logit_bias)

    if chat_request_prediction is not None:
        _details['chatRequest']['prediction'] = cli_util.parse_json_parameter("chat_request_prediction", chat_request_prediction)

    if chat_request_response_format is not None:
        _details['chatRequest']['responseFormat'] = cli_util.parse_json_parameter("chat_request_response_format", chat_request_response_format)

    if chat_request_tool_choice is not None:
        _details['chatRequest']['toolChoice'] = cli_util.parse_json_parameter("chat_request_tool_choice", chat_request_tool_choice)

    if chat_request_is_parallel_tool_calls is not None:
        _details['chatRequest']['isParallelToolCalls'] = chat_request_is_parallel_tool_calls

    if chat_request_tools is not None:
        _details['chatRequest']['tools'] = cli_util.parse_json_parameter("chat_request_tools", chat_request_tools)

    if chat_request_web_search_options is not None:
        _details['chatRequest']['webSearchOptions'] = cli_util.parse_json_parameter("chat_request_web_search_options", chat_request_web_search_options)

    _details['chatRequest']['apiFormat'] = 'GENERIC'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.chat(
        chat_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@chat_result_group.command(name=cli_util.override('generative_ai_inference.chat_cohere_chat_request.command_name', 'chat-cohere-chat-request'), help=u"""Creates a response for the given conversation. \n[Command Reference](chat)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to chat.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-message', required=True, help=u"""The text that the user inputs for the model to respond to.""")
@cli_util.option('--chat-request-chat-history', type=custom_types.CLI_COMPLEX_TYPE, help=u"""The list of previous messages between the user and the model. The chat history gives the model context for responding to the user's inputs.

This option is a JSON list with items of type CohereMessage.  For documentation on CohereMessage please see our API reference: https://docs.cloud.oracle.com/api/#/en/generativeaiinference/20231130/datatypes/CohereMessage.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-documents', type=custom_types.CLI_COMPLEX_TYPE, help=u"""A list of relevant documents that the model can refer to for generating grounded responses to the user's requests. Some example keys that you can add to the dictionary are \"text\", \"author\", and \"date\". Keep the total word count of the strings in the dictionary to 300 words or less.

Example: `[   { \"title\": \"Tall penguins\", \"snippet\": \"Emperor penguins are the tallest.\" },   { \"title\": \"Penguin habitats\", \"snippet\": \"Emperor penguins only live in Antarctica.\" } ]`""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-response-format', type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-is-search-queries-only', type=click.BOOL, help=u"""When set to true, the response contains only a list of generated search queries without the search results and the model will not respond to the user's message.""")
@cli_util.option('--chat-request-preamble-override', help=u"""If specified, the default Cohere preamble is replaced with the provided preamble. A preamble is an initial guideline message that can change the model's overall chat behavior and conversation style. Default preambles vary for different models.

Example: `You are a travel advisor. Answer with a pirate tone.`""")
@cli_util.option('--chat-request-is-stream', type=click.BOOL, help=u"""Whether to stream the partial progress of the model's response. When set to true, as tokens become available, they are sent as data-only server-sent events.""")
@cli_util.option('--chat-request-stream-options', type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-max-tokens', type=click.INT, help=u"""The maximum number of output tokens that the model will generate for the response. The token count of your prompt plus maxTokens must not exceed the model's context length. For on-demand inferencing, the response length is capped at 4,000 tokens for each run.""")
@cli_util.option('--chat-request-max-input-tokens', type=click.INT, help=u"""The maximum number of input tokens to send to the model. If not specified, max_input_tokens is the model's context length limit minus a small buffer.""")
@cli_util.option('--chat-request-temperature', help=u"""A number that sets the randomness of the generated output. A lower temperature means less random generations. Use lower numbers for tasks such as question answering or summarizing. High temperatures can generate hallucinations or factually incorrect information. Start with temperatures lower than 1.0 and increase the temperature for more creative outputs, as you regenerate the prompts to refine the outputs.""")
@cli_util.option('--chat-request-top-k', type=click.INT, help=u"""A sampling method in which the model chooses the next token randomly from the top k most likely tokens. A higher value for k generates more random output, which makes the output text sound more natural. The default value for k is 0 which disables this method and considers all tokens. To set a number for the likely tokens, choose an integer between 1 and 500.

If also using top p, then the model considers only the top tokens whose probabilities add up to p percent and ignores the rest of the k tokens. For example, if k is 20 but only the probabilities of the top 10 add up to the value of p, then only the top 10 tokens are chosen.""")
@cli_util.option('--chat-request-top-p', help=u"""If set to a probability 0.0 < p < 1.0, it ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.

To eliminate tokens with low likelihood, assign p a minimum percentage for the next token's likelihood. For example, when p is set to 0.75, the model eliminates the bottom 25 percent for the next token. Set to 1.0 to consider all tokens and set to 0 to disable. If both k and p are enabled, p acts after k.""")
@cli_util.option('--chat-request-prompt-truncation', type=custom_types.CliCaseInsensitiveChoice(["OFF", "AUTO_PRESERVE_ORDER"]), help=u"""Defaults to OFF. Dictates how the prompt will be constructed. With `promptTruncation` set to AUTO_PRESERVE_ORDER, some elements from `chatHistory` and `documents` will be dropped to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved. With `prompt_truncation` set to OFF, no elements will be dropped.""")
@cli_util.option('--chat-request-frequency-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on their frequency in the generated text so far. Greater numbers encourage the model to use new tokens, while lower numbers encourage the model to repeat the tokens. Set to 0 to disable.""")
@cli_util.option('--chat-request-presence-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on whether they've appeared in the generated text so far. Greater numbers encourage the model to use new tokens, while lower numbers encourage the model to repeat the tokens.

Similar to frequency penalty, a penalty is applied to previously present tokens, except that this penalty is applied equally to all tokens that have already appeared, regardless of how many times they've appeared. Set to 0 to disable.""")
@cli_util.option('--chat-request-seed', type=click.INT, help=u"""If specified, the backend will make a best effort to sample tokens deterministically, so that repeated requests with the same seed and parameters yield the same result. However, determinism cannot be fully guaranteed.""")
@cli_util.option('--chat-request-is-echo', type=click.BOOL, help=u"""Returns the full prompt that was sent to the model when True.""")
@cli_util.option('--chat-request-tools', type=custom_types.CLI_COMPLEX_TYPE, help=u"""A list of available tools (functions) that the model may suggest invoking before producing a text response.

This option is a JSON list with items of type CohereTool.  For documentation on CohereTool please see our API reference: https://docs.cloud.oracle.com/api/#/en/generativeaiinference/20231130/datatypes/CohereTool.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-tool-results', type=custom_types.CLI_COMPLEX_TYPE, help=u"""A list of results from invoking tools recommended by the model in the previous chat turn.

This option is a JSON list with items of type CohereToolResult.  For documentation on CohereToolResult please see our API reference: https://docs.cloud.oracle.com/api/#/en/generativeaiinference/20231130/datatypes/CohereToolResult.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-is-force-single-step', type=click.BOOL, help=u"""When enabled, the model will issue (potentially multiple) tool calls in a single step, before it receives the tool responses and directly answers the user's original message.""")
@cli_util.option('--chat-request-stop-sequences', type=custom_types.CLI_COMPLEX_TYPE, help=u"""Stop the model generation when it reaches a stop sequence defined in this parameter.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--chat-request-is-raw-prompting', type=click.BOOL, help=u"""When enabled, the user\u2019s `message` will be sent to the model without any preprocessing.""")
@cli_util.option('--chat-request-citation-quality', type=custom_types.CliCaseInsensitiveChoice(["ACCURATE", "FAST"]), help=u"""When FAST is selected, citations are generated at the same time as the text output and the request will be completed sooner. May result in less accurate citations.""")
@cli_util.option('--chat-request-safety-mode', type=custom_types.CliCaseInsensitiveChoice(["CONTEXTUAL", "STRICT", "OFF"]), help=u"""Safety mode: Adds a safety instruction for the model to use when generating responses. Contextual: (Default) Puts fewer constraints on the output. It maintains core protections by aiming to reject harmful or illegal suggestions, but it allows profanity and some toxic content, sexually explicit and violent content, and content that contains medical, financial, or legal information. Contextual mode is suited for entertainment, creative, or academic use. Strict: Aims to avoid sensitive topics, such as violent or sexual acts and profanity. This mode aims to provide a safer experience by prohibiting responses or recommendations that it finds inappropriate. Strict mode is suited for corporate use, such as for corporate communications and customer service. Off: No safety mode is applied. Note: This parameter is only compatible with models cohere.command-r-08-2024, cohere.command-r-plus-08-2024 and Cohere models released after these models. See [release dates].""")
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'chat-request-chat-history': {'module': 'generative_ai_inference', 'class': 'list[CohereMessage]'}, 'chat-request-documents': {'module': 'generative_ai_inference', 'class': 'list[object]'}, 'chat-request-response-format': {'module': 'generative_ai_inference', 'class': 'CohereResponseFormat'}, 'chat-request-stream-options': {'module': 'generative_ai_inference', 'class': 'StreamOptions'}, 'chat-request-tools': {'module': 'generative_ai_inference', 'class': 'list[CohereTool]'}, 'chat-request-tool-results': {'module': 'generative_ai_inference', 'class': 'list[CohereToolResult]'}, 'chat-request-stop-sequences': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'chat-request-chat-history': {'module': 'generative_ai_inference', 'class': 'list[CohereMessage]'}, 'chat-request-documents': {'module': 'generative_ai_inference', 'class': 'list[object]'}, 'chat-request-response-format': {'module': 'generative_ai_inference', 'class': 'CohereResponseFormat'}, 'chat-request-stream-options': {'module': 'generative_ai_inference', 'class': 'StreamOptions'}, 'chat-request-tools': {'module': 'generative_ai_inference', 'class': 'list[CohereTool]'}, 'chat-request-tool-results': {'module': 'generative_ai_inference', 'class': 'list[CohereToolResult]'}, 'chat-request-stop-sequences': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'ChatResult'})
@cli_util.wrap_exceptions
def chat_cohere_chat_request(ctx, from_json, compartment_id, serving_mode, chat_request_message, chat_request_chat_history, chat_request_documents, chat_request_response_format, chat_request_is_search_queries_only, chat_request_preamble_override, chat_request_is_stream, chat_request_stream_options, chat_request_max_tokens, chat_request_max_input_tokens, chat_request_temperature, chat_request_top_k, chat_request_top_p, chat_request_prompt_truncation, chat_request_frequency_penalty, chat_request_presence_penalty, chat_request_seed, chat_request_is_echo, chat_request_tools, chat_request_tool_results, chat_request_is_force_single_step, chat_request_stop_sequences, chat_request_is_raw_prompting, chat_request_citation_quality, chat_request_safety_mode):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['chatRequest'] = {}
    _details['compartmentId'] = compartment_id
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)
    _details['chatRequest']['message'] = chat_request_message

    if chat_request_chat_history is not None:
        _details['chatRequest']['chatHistory'] = cli_util.parse_json_parameter("chat_request_chat_history", chat_request_chat_history)

    if chat_request_documents is not None:
        _details['chatRequest']['documents'] = cli_util.parse_json_parameter("chat_request_documents", chat_request_documents)

    if chat_request_response_format is not None:
        _details['chatRequest']['responseFormat'] = cli_util.parse_json_parameter("chat_request_response_format", chat_request_response_format)

    if chat_request_is_search_queries_only is not None:
        _details['chatRequest']['isSearchQueriesOnly'] = chat_request_is_search_queries_only

    if chat_request_preamble_override is not None:
        _details['chatRequest']['preambleOverride'] = chat_request_preamble_override

    if chat_request_is_stream is not None:
        _details['chatRequest']['isStream'] = chat_request_is_stream

    if chat_request_stream_options is not None:
        _details['chatRequest']['streamOptions'] = cli_util.parse_json_parameter("chat_request_stream_options", chat_request_stream_options)

    if chat_request_max_tokens is not None:
        _details['chatRequest']['maxTokens'] = chat_request_max_tokens

    if chat_request_max_input_tokens is not None:
        _details['chatRequest']['maxInputTokens'] = chat_request_max_input_tokens

    if chat_request_temperature is not None:
        _details['chatRequest']['temperature'] = chat_request_temperature

    if chat_request_top_k is not None:
        _details['chatRequest']['topK'] = chat_request_top_k

    if chat_request_top_p is not None:
        _details['chatRequest']['topP'] = chat_request_top_p

    if chat_request_prompt_truncation is not None:
        _details['chatRequest']['promptTruncation'] = chat_request_prompt_truncation

    if chat_request_frequency_penalty is not None:
        _details['chatRequest']['frequencyPenalty'] = chat_request_frequency_penalty

    if chat_request_presence_penalty is not None:
        _details['chatRequest']['presencePenalty'] = chat_request_presence_penalty

    if chat_request_seed is not None:
        _details['chatRequest']['seed'] = chat_request_seed

    if chat_request_is_echo is not None:
        _details['chatRequest']['isEcho'] = chat_request_is_echo

    if chat_request_tools is not None:
        _details['chatRequest']['tools'] = cli_util.parse_json_parameter("chat_request_tools", chat_request_tools)

    if chat_request_tool_results is not None:
        _details['chatRequest']['toolResults'] = cli_util.parse_json_parameter("chat_request_tool_results", chat_request_tool_results)

    if chat_request_is_force_single_step is not None:
        _details['chatRequest']['isForceSingleStep'] = chat_request_is_force_single_step

    if chat_request_stop_sequences is not None:
        _details['chatRequest']['stopSequences'] = cli_util.parse_json_parameter("chat_request_stop_sequences", chat_request_stop_sequences)

    if chat_request_is_raw_prompting is not None:
        _details['chatRequest']['isRawPrompting'] = chat_request_is_raw_prompting

    if chat_request_citation_quality is not None:
        _details['chatRequest']['citationQuality'] = chat_request_citation_quality

    if chat_request_safety_mode is not None:
        _details['chatRequest']['safetyMode'] = chat_request_safety_mode

    _details['chatRequest']['apiFormat'] = 'COHERE'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.chat(
        chat_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@embed_text_result_group.command(name=cli_util.override('generative_ai_inference.embed_text.command_name', 'embed-text'), help=u"""Produces embeddings for the inputs.

An embedding is numeric representation of a piece of text. This text can be a phrase, a sentence, or one or more paragraphs. The Generative AI embedding model transforms each phrase, sentence, or paragraph that you input, into an array with 1024 numbers. You can use these embeddings for finding similarity in your input text such as finding phrases that are similar in context or category. Embeddings are mostly used for semantic searches where the search function focuses on the meaning of the text that it's searching through rather than finding results based on keywords. \n[Command Reference](embedText)""")
@cli_util.option('--inputs', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""Provide a list of strings or one base64 encoded image with `input_type` setting to `IMAGE`. If text embedding, each string can be words, a phrase, or a paragraph. The maximum length of each string entry in the list is 512 tokens.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to create text embeddings.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to include the original inputs in the response. Results are index-based.""")
@cli_util.option('--truncate', type=custom_types.CliCaseInsensitiveChoice(["NONE", "START", "END"]), help=u"""For an input that's longer than the maximum token length, specifies which part of the input text will be truncated.""")
@cli_util.option('--input-type', type=custom_types.CliCaseInsensitiveChoice(["SEARCH_DOCUMENT", "SEARCH_QUERY", "CLASSIFICATION", "CLUSTERING", "IMAGE"]), help=u"""Specifies the input type.""")
@json_skeleton_utils.get_cli_json_input_option({'inputs': {'module': 'generative_ai_inference', 'class': 'list[string]'}, 'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'inputs': {'module': 'generative_ai_inference', 'class': 'list[string]'}, 'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}}, output_type={'module': 'generative_ai_inference', 'class': 'EmbedTextResult'})
@cli_util.wrap_exceptions
def embed_text(ctx, from_json, inputs, serving_mode, compartment_id, is_echo, truncate, input_type):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['inputs'] = cli_util.parse_json_parameter("inputs", inputs)
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)
    _details['compartmentId'] = compartment_id

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if truncate is not None:
        _details['truncate'] = truncate

    if input_type is not None:
        _details['inputType'] = input_type

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.embed_text(
        embed_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@embed_text_result_group.command(name=cli_util.override('generative_ai_inference.embed_text_dedicated_serving_mode.command_name', 'embed-text-dedicated-serving-mode'), help=u"""Produces embeddings for the inputs.

An embedding is numeric representation of a piece of text. This text can be a phrase, a sentence, or one or more paragraphs. The Generative AI embedding model transforms each phrase, sentence, or paragraph that you input, into an array with 1024 numbers. You can use these embeddings for finding similarity in your input text such as finding phrases that are similar in context or category. Embeddings are mostly used for semantic searches where the search function focuses on the meaning of the text that it's searching through rather than finding results based on keywords. \n[Command Reference](embedText)""")
@cli_util.option('--inputs', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""Provide a list of strings or one base64 encoded image with `input_type` setting to `IMAGE`. If text embedding, each string can be words, a phrase, or a paragraph. The maximum length of each string entry in the list is 512 tokens.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to create text embeddings.""")
@cli_util.option('--serving-mode-endpoint-id', required=True, help=u"""The OCID of the endpoint to use.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to include the original inputs in the response. Results are index-based.""")
@cli_util.option('--truncate', type=custom_types.CliCaseInsensitiveChoice(["NONE", "START", "END"]), help=u"""For an input that's longer than the maximum token length, specifies which part of the input text will be truncated.""")
@cli_util.option('--input-type', type=custom_types.CliCaseInsensitiveChoice(["SEARCH_DOCUMENT", "SEARCH_QUERY", "CLASSIFICATION", "CLUSTERING", "IMAGE"]), help=u"""Specifies the input type.""")
@json_skeleton_utils.get_cli_json_input_option({'inputs': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'inputs': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'EmbedTextResult'})
@cli_util.wrap_exceptions
def embed_text_dedicated_serving_mode(ctx, from_json, inputs, compartment_id, serving_mode_endpoint_id, is_echo, truncate, input_type):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['inputs'] = cli_util.parse_json_parameter("inputs", inputs)
    _details['compartmentId'] = compartment_id
    _details['servingMode']['endpointId'] = serving_mode_endpoint_id

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if truncate is not None:
        _details['truncate'] = truncate

    if input_type is not None:
        _details['inputType'] = input_type

    _details['servingMode']['servingType'] = 'DEDICATED'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.embed_text(
        embed_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@embed_text_result_group.command(name=cli_util.override('generative_ai_inference.embed_text_on_demand_serving_mode.command_name', 'embed-text-on-demand-serving-mode'), help=u"""Produces embeddings for the inputs.

An embedding is numeric representation of a piece of text. This text can be a phrase, a sentence, or one or more paragraphs. The Generative AI embedding model transforms each phrase, sentence, or paragraph that you input, into an array with 1024 numbers. You can use these embeddings for finding similarity in your input text such as finding phrases that are similar in context or category. Embeddings are mostly used for semantic searches where the search function focuses on the meaning of the text that it's searching through rather than finding results based on keywords. \n[Command Reference](embedText)""")
@cli_util.option('--inputs', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""Provide a list of strings or one base64 encoded image with `input_type` setting to `IMAGE`. If text embedding, each string can be words, a phrase, or a paragraph. The maximum length of each string entry in the list is 512 tokens.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to create text embeddings.""")
@cli_util.option('--serving-mode-model-id', required=True, help=u"""The unique ID of a model to use. You can use the [ListModels] API to list the available models.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to include the original inputs in the response. Results are index-based.""")
@cli_util.option('--truncate', type=custom_types.CliCaseInsensitiveChoice(["NONE", "START", "END"]), help=u"""For an input that's longer than the maximum token length, specifies which part of the input text will be truncated.""")
@cli_util.option('--input-type', type=custom_types.CliCaseInsensitiveChoice(["SEARCH_DOCUMENT", "SEARCH_QUERY", "CLASSIFICATION", "CLUSTERING", "IMAGE"]), help=u"""Specifies the input type.""")
@json_skeleton_utils.get_cli_json_input_option({'inputs': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'inputs': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'EmbedTextResult'})
@cli_util.wrap_exceptions
def embed_text_on_demand_serving_mode(ctx, from_json, inputs, compartment_id, serving_mode_model_id, is_echo, truncate, input_type):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['inputs'] = cli_util.parse_json_parameter("inputs", inputs)
    _details['compartmentId'] = compartment_id
    _details['servingMode']['modelId'] = serving_mode_model_id

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if truncate is not None:
        _details['truncate'] = truncate

    if input_type is not None:
        _details['inputType'] = input_type

    _details['servingMode']['servingType'] = 'ON_DEMAND'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.embed_text(
        embed_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@generate_text_result_group.command(name=cli_util.override('generative_ai_inference.generate_text.command_name', 'generate-text'), help=u"""Generates a text response based on the user prompt. \n[Command Reference](generateText)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to generate text.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--inference-request', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'inference-request': {'module': 'generative_ai_inference', 'class': 'LlmInferenceRequest'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'inference-request': {'module': 'generative_ai_inference', 'class': 'LlmInferenceRequest'}}, output_type={'module': 'generative_ai_inference', 'class': 'GenerateTextResult'})
@cli_util.wrap_exceptions
def generate_text(ctx, from_json, compartment_id, serving_mode, inference_request):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['compartmentId'] = compartment_id
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)
    _details['inferenceRequest'] = cli_util.parse_json_parameter("inference_request", inference_request)

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.generate_text(
        generate_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@generate_text_result_group.command(name=cli_util.override('generative_ai_inference.generate_text_dedicated_serving_mode.command_name', 'generate-text-dedicated-serving-mode'), help=u"""Generates a text response based on the user prompt. \n[Command Reference](generateText)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to generate text.""")
@cli_util.option('--inference-request', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--serving-mode-endpoint-id', required=True, help=u"""The OCID of the endpoint to use.""")
@json_skeleton_utils.get_cli_json_input_option({'inference-request': {'module': 'generative_ai_inference', 'class': 'LlmInferenceRequest'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'inference-request': {'module': 'generative_ai_inference', 'class': 'LlmInferenceRequest'}}, output_type={'module': 'generative_ai_inference', 'class': 'GenerateTextResult'})
@cli_util.wrap_exceptions
def generate_text_dedicated_serving_mode(ctx, from_json, compartment_id, inference_request, serving_mode_endpoint_id):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['compartmentId'] = compartment_id
    _details['inferenceRequest'] = cli_util.parse_json_parameter("inference_request", inference_request)
    _details['servingMode']['endpointId'] = serving_mode_endpoint_id

    _details['servingMode']['servingType'] = 'DEDICATED'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.generate_text(
        generate_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@generate_text_result_group.command(name=cli_util.override('generative_ai_inference.generate_text_on_demand_serving_mode.command_name', 'generate-text-on-demand-serving-mode'), help=u"""Generates a text response based on the user prompt. \n[Command Reference](generateText)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to generate text.""")
@cli_util.option('--inference-request', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--serving-mode-model-id', required=True, help=u"""The unique ID of a model to use. You can use the [ListModels] API to list the available models.""")
@json_skeleton_utils.get_cli_json_input_option({'inference-request': {'module': 'generative_ai_inference', 'class': 'LlmInferenceRequest'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'inference-request': {'module': 'generative_ai_inference', 'class': 'LlmInferenceRequest'}}, output_type={'module': 'generative_ai_inference', 'class': 'GenerateTextResult'})
@cli_util.wrap_exceptions
def generate_text_on_demand_serving_mode(ctx, from_json, compartment_id, inference_request, serving_mode_model_id):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['compartmentId'] = compartment_id
    _details['inferenceRequest'] = cli_util.parse_json_parameter("inference_request", inference_request)
    _details['servingMode']['modelId'] = serving_mode_model_id

    _details['servingMode']['servingType'] = 'ON_DEMAND'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.generate_text(
        generate_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@generate_text_result_group.command(name=cli_util.override('generative_ai_inference.generate_text_llama_llm_inference_request.command_name', 'generate-text-llama-llm-inference-request'), help=u"""Generates a text response based on the user prompt. \n[Command Reference](generateText)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to generate text.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--inference-request-prompt', help=u"""Represents the prompt to be completed. The trailing white spaces are trimmed before completion.""")
@cli_util.option('--inference-request-is-stream', type=click.BOOL, help=u"""Whether to stream back partial progress. If set, tokens are sent as data-only server-sent events as they become available.""")
@cli_util.option('--inference-request-num-generations', type=click.INT, help=u"""The number of of generated texts that will be returned.""")
@cli_util.option('--inference-request-is-echo', type=click.BOOL, help=u"""Whether or not to return the user prompt in the response. Applies only to non-stream results.""")
@cli_util.option('--inference-request-top-k', type=click.INT, help=u"""An integer that sets up the model to use only the top k most likely tokens in the generated output. A higher k introduces more randomness into the output making the output text sound more natural. Default value is -1 which means to consider all tokens. Setting to 0 disables this method and considers all tokens.

If also using top p, then the model considers only the top tokens whose probabilities add up to p percent and ignores the rest of the k tokens. For example, if k is 20, but the probabilities of the top 10 add up to .75, then only the top 10 tokens are chosen.""")
@cli_util.option('--inference-request-top-p', help=u"""If set to a probability 0.0 < p < 1.0, it ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.

To eliminate tokens with low likelihood, assign p a minimum percentage for the next token's likelihood. For example, when p is set to 0.75, the model eliminates the bottom 25 percent for the next token. Set to 1 to consider all tokens and set to 0 to disable. If both k and p are enabled, p acts after k.""")
@cli_util.option('--inference-request-temperature', help=u"""A number that sets the randomness of the generated output. A lower temperature means a less random generations.

Use lower numbers for tasks with a correct answer such as question answering or summarizing. High temperatures can generate hallucinations or factually incorrect information. Start with temperatures lower than 1.0 and increase the temperature for more creative outputs, as you regenerate the prompts to refine the outputs.""")
@cli_util.option('--inference-request-frequency-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on their frequency in the generated text so far. Values > 0 encourage the model to use new tokens and values < 0 encourage the model to repeat tokens. Set to 0 to disable.""")
@cli_util.option('--inference-request-presence-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on whether they've appeared in the generated text so far. Values > 0 encourage the model to use new tokens and values < 0 encourage the model to repeat tokens.

Similar to frequency penalty, a penalty is applied to previously present tokens, except that this penalty is applied equally to all tokens that have already appeared, regardless of how many times they've appeared. Set to 0 to disable.""")
@cli_util.option('--inference-request-stop', type=custom_types.CLI_COMPLEX_TYPE, help=u"""List of strings that stop the generation if they are generated for the response text. The returned output will not contain the stop strings.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--inference-request-log-probs', type=click.INT, help=u"""Includes the logarithmic probabilities for the most likely output tokens and the chosen tokens.

For example, if the log probability is 5, the API returns a list of the 5 most likely tokens. The API returns the log probability of the sampled token, so there might be up to logprobs+1 elements in the response.""")
@cli_util.option('--inference-request-max-tokens', type=click.INT, help=u"""The maximum number of tokens that can be generated per output sequence. The token count of the prompt plus `maxTokens` cannot exceed the model's context length.""")
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'inference-request-stop': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'inference-request-stop': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'GenerateTextResult'})
@cli_util.wrap_exceptions
def generate_text_llama_llm_inference_request(ctx, from_json, compartment_id, serving_mode, inference_request_prompt, inference_request_is_stream, inference_request_num_generations, inference_request_is_echo, inference_request_top_k, inference_request_top_p, inference_request_temperature, inference_request_frequency_penalty, inference_request_presence_penalty, inference_request_stop, inference_request_log_probs, inference_request_max_tokens):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['inferenceRequest'] = {}
    _details['compartmentId'] = compartment_id
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)

    if inference_request_prompt is not None:
        _details['inferenceRequest']['prompt'] = inference_request_prompt

    if inference_request_is_stream is not None:
        _details['inferenceRequest']['isStream'] = inference_request_is_stream

    if inference_request_num_generations is not None:
        _details['inferenceRequest']['numGenerations'] = inference_request_num_generations

    if inference_request_is_echo is not None:
        _details['inferenceRequest']['isEcho'] = inference_request_is_echo

    if inference_request_top_k is not None:
        _details['inferenceRequest']['topK'] = inference_request_top_k

    if inference_request_top_p is not None:
        _details['inferenceRequest']['topP'] = inference_request_top_p

    if inference_request_temperature is not None:
        _details['inferenceRequest']['temperature'] = inference_request_temperature

    if inference_request_frequency_penalty is not None:
        _details['inferenceRequest']['frequencyPenalty'] = inference_request_frequency_penalty

    if inference_request_presence_penalty is not None:
        _details['inferenceRequest']['presencePenalty'] = inference_request_presence_penalty

    if inference_request_stop is not None:
        _details['inferenceRequest']['stop'] = cli_util.parse_json_parameter("inference_request_stop", inference_request_stop)

    if inference_request_log_probs is not None:
        _details['inferenceRequest']['logProbs'] = inference_request_log_probs

    if inference_request_max_tokens is not None:
        _details['inferenceRequest']['maxTokens'] = inference_request_max_tokens

    _details['inferenceRequest']['runtimeType'] = 'LLAMA'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.generate_text(
        generate_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@generate_text_result_group.command(name=cli_util.override('generative_ai_inference.generate_text_cohere_llm_inference_request.command_name', 'generate-text-cohere-llm-inference-request'), help=u"""Generates a text response based on the user prompt. \n[Command Reference](generateText)""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to generate text.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--inference-request-prompt', required=True, help=u"""Represents the prompt to be completed. The trailing white spaces are trimmed before completion.""")
@cli_util.option('--inference-request-is-stream', type=click.BOOL, help=u"""Whether to stream back partial progress. If set, tokens are sent as data-only server-sent events as they become available.""")
@cli_util.option('--inference-request-num-generations', type=click.INT, help=u"""The number of generated texts that will be returned.""")
@cli_util.option('--inference-request-is-echo', type=click.BOOL, help=u"""Whether or not to return the user prompt in the response. This option only applies to non-stream results.""")
@cli_util.option('--inference-request-max-tokens', type=click.INT, help=u"""The maximum number of tokens to predict for each response. Includes input plus output tokens.""")
@cli_util.option('--inference-request-temperature', help=u"""A number that sets the randomness of the generated output. A lower temperature means a less random generations.

Use lower numbers for tasks with a correct answer such as question answering or summarizing. High temperatures can generate hallucinations or factually incorrect information. Start with temperatures lower than 1.0 and increase the temperature for more creative outputs, as you regenerate the prompts to refine the outputs.""")
@cli_util.option('--inference-request-top-k', type=click.INT, help=u"""An integer that sets up the model to use only the top k most likely tokens in the generated output. A higher k introduces more randomness into the output making the output text sound more natural. Default value is 0 which disables this method and considers all tokens. To set a number for the likely tokens, choose an integer between 1 and 500.

If also using top p, then the model considers only the top tokens whose probabilities add up to p percent and ignores the rest of the k tokens. For example, if k is 20, but the probabilities of the top 10 add up to .75, then only the top 10 tokens are chosen.""")
@cli_util.option('--inference-request-top-p', help=u"""If set to a probability 0.0 < p < 1.0, it ensures that only the most likely tokens, with total probability mass of p, are considered for generation at each step.

To eliminate tokens with low likelihood, assign p a minimum percentage for the next token's likelihood. For example, when p is set to 0.75, the model eliminates the bottom 25 percent for the next token. Set to 1.0 to consider all tokens and set to 0 to disable. If both k and p are enabled, p acts after k.""")
@cli_util.option('--inference-request-frequency-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on their frequency in the generated text so far. Greater numbers encourage the model to use new tokens, while lower numbers encourage the model to repeat the tokens. Set to 0 to disable.""")
@cli_util.option('--inference-request-presence-penalty', help=u"""To reduce repetitiveness of generated tokens, this number penalizes new tokens based on whether they've appeared in the generated text so far. Greater numbers encourage the model to use new tokens, while lower numbers encourage the model to repeat the tokens.

Similar to frequency penalty, a penalty is applied to previously present tokens, except that this penalty is applied equally to all tokens that have already appeared, regardless of how many times they've appeared. Set to 0 to disable.""")
@cli_util.option('--inference-request-stop-sequences', type=custom_types.CLI_COMPLEX_TYPE, help=u"""The generated text is cut at the end of the earliest occurrence of this stop sequence. The generated text will include this stop sequence.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--inference-request-return-likelihoods', type=custom_types.CliCaseInsensitiveChoice(["NONE", "ALL", "GENERATION"]), help=u"""Specifies how and if the token likelihoods are returned with the response.""")
@cli_util.option('--inference-request-truncate', type=custom_types.CliCaseInsensitiveChoice(["NONE", "START", "END"]), help=u"""For an input that's longer than the maximum token length, specifies which part of the input text will be truncated.""")
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'inference-request-stop-sequences': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'inference-request-stop-sequences': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'GenerateTextResult'})
@cli_util.wrap_exceptions
def generate_text_cohere_llm_inference_request(ctx, from_json, compartment_id, serving_mode, inference_request_prompt, inference_request_is_stream, inference_request_num_generations, inference_request_is_echo, inference_request_max_tokens, inference_request_temperature, inference_request_top_k, inference_request_top_p, inference_request_frequency_penalty, inference_request_presence_penalty, inference_request_stop_sequences, inference_request_return_likelihoods, inference_request_truncate):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['inferenceRequest'] = {}
    _details['compartmentId'] = compartment_id
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)
    _details['inferenceRequest']['prompt'] = inference_request_prompt

    if inference_request_is_stream is not None:
        _details['inferenceRequest']['isStream'] = inference_request_is_stream

    if inference_request_num_generations is not None:
        _details['inferenceRequest']['numGenerations'] = inference_request_num_generations

    if inference_request_is_echo is not None:
        _details['inferenceRequest']['isEcho'] = inference_request_is_echo

    if inference_request_max_tokens is not None:
        _details['inferenceRequest']['maxTokens'] = inference_request_max_tokens

    if inference_request_temperature is not None:
        _details['inferenceRequest']['temperature'] = inference_request_temperature

    if inference_request_top_k is not None:
        _details['inferenceRequest']['topK'] = inference_request_top_k

    if inference_request_top_p is not None:
        _details['inferenceRequest']['topP'] = inference_request_top_p

    if inference_request_frequency_penalty is not None:
        _details['inferenceRequest']['frequencyPenalty'] = inference_request_frequency_penalty

    if inference_request_presence_penalty is not None:
        _details['inferenceRequest']['presencePenalty'] = inference_request_presence_penalty

    if inference_request_stop_sequences is not None:
        _details['inferenceRequest']['stopSequences'] = cli_util.parse_json_parameter("inference_request_stop_sequences", inference_request_stop_sequences)

    if inference_request_return_likelihoods is not None:
        _details['inferenceRequest']['returnLikelihoods'] = inference_request_return_likelihoods

    if inference_request_truncate is not None:
        _details['inferenceRequest']['truncate'] = inference_request_truncate

    _details['inferenceRequest']['runtimeType'] = 'COHERE'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.generate_text(
        generate_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@rerank_text_result_group.command(name=cli_util.override('generative_ai_inference.rerank_text.command_name', 'rerank-text'), help=u"""Reranks the text responses based on the input documents and a prompt.

Rerank assigns an index and a relevance score to each document, indicating which document is most related to the prompt. \n[Command Reference](rerankText)""")
@cli_util.option('--input', required=True, help=u"""Input query for search in the documents.""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of the compartment to call into the Generative AI service LLMs.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--documents', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""A list of document strings to rerank based on the query asked.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--top-n', type=click.INT, help=u"""The number of most relevant documents or indices to return. Defaults to the length of the documents.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to return the documents in the response.""")
@cli_util.option('--max-chunks-per-document', type=click.INT, help=u"""The maximum number of chunks to produce internally from a document.""")
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'documents': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}, 'documents': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'RerankTextResult'})
@cli_util.wrap_exceptions
def rerank_text(ctx, from_json, input, compartment_id, serving_mode, documents, top_n, is_echo, max_chunks_per_document):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['input'] = input
    _details['compartmentId'] = compartment_id
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)
    _details['documents'] = cli_util.parse_json_parameter("documents", documents)

    if top_n is not None:
        _details['topN'] = top_n

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if max_chunks_per_document is not None:
        _details['maxChunksPerDocument'] = max_chunks_per_document

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.rerank_text(
        rerank_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@rerank_text_result_group.command(name=cli_util.override('generative_ai_inference.rerank_text_dedicated_serving_mode.command_name', 'rerank-text-dedicated-serving-mode'), help=u"""Reranks the text responses based on the input documents and a prompt.

Rerank assigns an index and a relevance score to each document, indicating which document is most related to the prompt. \n[Command Reference](rerankText)""")
@cli_util.option('--input', required=True, help=u"""Input query for search in the documents.""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of the compartment to call into the Generative AI service LLMs.""")
@cli_util.option('--documents', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""A list of document strings to rerank based on the query asked.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--serving-mode-endpoint-id', required=True, help=u"""The OCID of the endpoint to use.""")
@cli_util.option('--top-n', type=click.INT, help=u"""The number of most relevant documents or indices to return. Defaults to the length of the documents.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to return the documents in the response.""")
@cli_util.option('--max-chunks-per-document', type=click.INT, help=u"""The maximum number of chunks to produce internally from a document.""")
@json_skeleton_utils.get_cli_json_input_option({'documents': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'documents': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'RerankTextResult'})
@cli_util.wrap_exceptions
def rerank_text_dedicated_serving_mode(ctx, from_json, input, compartment_id, documents, serving_mode_endpoint_id, top_n, is_echo, max_chunks_per_document):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['input'] = input
    _details['compartmentId'] = compartment_id
    _details['documents'] = cli_util.parse_json_parameter("documents", documents)
    _details['servingMode']['endpointId'] = serving_mode_endpoint_id

    if top_n is not None:
        _details['topN'] = top_n

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if max_chunks_per_document is not None:
        _details['maxChunksPerDocument'] = max_chunks_per_document

    _details['servingMode']['servingType'] = 'DEDICATED'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.rerank_text(
        rerank_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@rerank_text_result_group.command(name=cli_util.override('generative_ai_inference.rerank_text_on_demand_serving_mode.command_name', 'rerank-text-on-demand-serving-mode'), help=u"""Reranks the text responses based on the input documents and a prompt.

Rerank assigns an index and a relevance score to each document, indicating which document is most related to the prompt. \n[Command Reference](rerankText)""")
@cli_util.option('--input', required=True, help=u"""Input query for search in the documents.""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of the compartment to call into the Generative AI service LLMs.""")
@cli_util.option('--documents', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""A list of document strings to rerank based on the query asked.""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--serving-mode-model-id', required=True, help=u"""The unique ID of a model to use. You can use the [ListModels] API to list the available models.""")
@cli_util.option('--top-n', type=click.INT, help=u"""The number of most relevant documents or indices to return. Defaults to the length of the documents.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to return the documents in the response.""")
@cli_util.option('--max-chunks-per-document', type=click.INT, help=u"""The maximum number of chunks to produce internally from a document.""")
@json_skeleton_utils.get_cli_json_input_option({'documents': {'module': 'generative_ai_inference', 'class': 'list[string]'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'documents': {'module': 'generative_ai_inference', 'class': 'list[string]'}}, output_type={'module': 'generative_ai_inference', 'class': 'RerankTextResult'})
@cli_util.wrap_exceptions
def rerank_text_on_demand_serving_mode(ctx, from_json, input, compartment_id, documents, serving_mode_model_id, top_n, is_echo, max_chunks_per_document):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['input'] = input
    _details['compartmentId'] = compartment_id
    _details['documents'] = cli_util.parse_json_parameter("documents", documents)
    _details['servingMode']['modelId'] = serving_mode_model_id

    if top_n is not None:
        _details['topN'] = top_n

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if max_chunks_per_document is not None:
        _details['maxChunksPerDocument'] = max_chunks_per_document

    _details['servingMode']['servingType'] = 'ON_DEMAND'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.rerank_text(
        rerank_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@summarize_text_result_group.command(name=cli_util.override('generative_ai_inference.summarize_text.command_name', 'summarize-text'), help=u"""Summarizes the input text. \n[Command Reference](summarizeText)""")
@cli_util.option('--input', required=True, help=u"""The input string to be summarized.""")
@cli_util.option('--serving-mode', required=True, type=custom_types.CLI_COMPLEX_TYPE, help=u"""""" + custom_types.cli_complex_type.COMPLEX_TYPE_HELP)
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to summarize text.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to include the original inputs in the response.""")
@cli_util.option('--temperature', help=u"""A number that sets the randomness of the generated output. Lower temperatures mean less random generations.

Use lower numbers for tasks with a correct answer such as question answering or summarizing. High temperatures can generate hallucinations or factually incorrect information. Start with temperatures lower than 1.0, and increase the temperature for more creative outputs, as you regenerate the prompts to refine the outputs.""")
@cli_util.option('--additional-command', help=u"""A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". For example, \"focusing on the next steps\" or \"written by Yoda\".""")
@cli_util.option('--length', type=custom_types.CliCaseInsensitiveChoice(["SHORT", "MEDIUM", "LONG", "AUTO"]), help=u"""Indicates the approximate length of the summary. If \"AUTO\" is selected, the best option will be picked based on the input text.""")
@cli_util.option('--format', type=custom_types.CliCaseInsensitiveChoice(["PARAGRAPH", "BULLETS", "AUTO"]), help=u"""Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If \"AUTO\" is selected, the best option will be picked based on the input text.""")
@cli_util.option('--extractiveness', type=custom_types.CliCaseInsensitiveChoice(["LOW", "MEDIUM", "HIGH", "AUTO"]), help=u"""Controls how close to the original text the summary is. High extractiveness summaries will lean towards reusing sentences verbatim, while low extractiveness summaries will tend to paraphrase more.""")
@json_skeleton_utils.get_cli_json_input_option({'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={'serving-mode': {'module': 'generative_ai_inference', 'class': 'ServingMode'}}, output_type={'module': 'generative_ai_inference', 'class': 'SummarizeTextResult'})
@cli_util.wrap_exceptions
def summarize_text(ctx, from_json, input, serving_mode, compartment_id, is_echo, temperature, additional_command, length, format, extractiveness):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['input'] = input
    _details['servingMode'] = cli_util.parse_json_parameter("serving_mode", serving_mode)
    _details['compartmentId'] = compartment_id

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if temperature is not None:
        _details['temperature'] = temperature

    if additional_command is not None:
        _details['additionalCommand'] = additional_command

    if length is not None:
        _details['length'] = length

    if format is not None:
        _details['format'] = format

    if extractiveness is not None:
        _details['extractiveness'] = extractiveness

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.summarize_text(
        summarize_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@summarize_text_result_group.command(name=cli_util.override('generative_ai_inference.summarize_text_dedicated_serving_mode.command_name', 'summarize-text-dedicated-serving-mode'), help=u"""Summarizes the input text. \n[Command Reference](summarizeText)""")
@cli_util.option('--input', required=True, help=u"""The input string to be summarized.""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to summarize text.""")
@cli_util.option('--serving-mode-endpoint-id', required=True, help=u"""The OCID of the endpoint to use.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to include the original inputs in the response.""")
@cli_util.option('--temperature', help=u"""A number that sets the randomness of the generated output. Lower temperatures mean less random generations.

Use lower numbers for tasks with a correct answer such as question answering or summarizing. High temperatures can generate hallucinations or factually incorrect information. Start with temperatures lower than 1.0, and increase the temperature for more creative outputs, as you regenerate the prompts to refine the outputs.""")
@cli_util.option('--additional-command', help=u"""A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". For example, \"focusing on the next steps\" or \"written by Yoda\".""")
@cli_util.option('--length', type=custom_types.CliCaseInsensitiveChoice(["SHORT", "MEDIUM", "LONG", "AUTO"]), help=u"""Indicates the approximate length of the summary. If \"AUTO\" is selected, the best option will be picked based on the input text.""")
@cli_util.option('--format', type=custom_types.CliCaseInsensitiveChoice(["PARAGRAPH", "BULLETS", "AUTO"]), help=u"""Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If \"AUTO\" is selected, the best option will be picked based on the input text.""")
@cli_util.option('--extractiveness', type=custom_types.CliCaseInsensitiveChoice(["LOW", "MEDIUM", "HIGH", "AUTO"]), help=u"""Controls how close to the original text the summary is. High extractiveness summaries will lean towards reusing sentences verbatim, while low extractiveness summaries will tend to paraphrase more.""")
@json_skeleton_utils.get_cli_json_input_option({})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={}, output_type={'module': 'generative_ai_inference', 'class': 'SummarizeTextResult'})
@cli_util.wrap_exceptions
def summarize_text_dedicated_serving_mode(ctx, from_json, input, compartment_id, serving_mode_endpoint_id, is_echo, temperature, additional_command, length, format, extractiveness):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['input'] = input
    _details['compartmentId'] = compartment_id
    _details['servingMode']['endpointId'] = serving_mode_endpoint_id

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if temperature is not None:
        _details['temperature'] = temperature

    if additional_command is not None:
        _details['additionalCommand'] = additional_command

    if length is not None:
        _details['length'] = length

    if format is not None:
        _details['format'] = format

    if extractiveness is not None:
        _details['extractiveness'] = extractiveness

    _details['servingMode']['servingType'] = 'DEDICATED'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.summarize_text(
        summarize_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)


@summarize_text_result_group.command(name=cli_util.override('generative_ai_inference.summarize_text_on_demand_serving_mode.command_name', 'summarize-text-on-demand-serving-mode'), help=u"""Summarizes the input text. \n[Command Reference](summarizeText)""")
@cli_util.option('--input', required=True, help=u"""The input string to be summarized.""")
@cli_util.option('--compartment-id', required=True, help=u"""The OCID of compartment in which to call the Generative AI service to summarize text.""")
@cli_util.option('--serving-mode-model-id', required=True, help=u"""The unique ID of a model to use. You can use the [ListModels] API to list the available models.""")
@cli_util.option('--is-echo', type=click.BOOL, help=u"""Whether or not to include the original inputs in the response.""")
@cli_util.option('--temperature', help=u"""A number that sets the randomness of the generated output. Lower temperatures mean less random generations.

Use lower numbers for tasks with a correct answer such as question answering or summarizing. High temperatures can generate hallucinations or factually incorrect information. Start with temperatures lower than 1.0, and increase the temperature for more creative outputs, as you regenerate the prompts to refine the outputs.""")
@cli_util.option('--additional-command', help=u"""A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". For example, \"focusing on the next steps\" or \"written by Yoda\".""")
@cli_util.option('--length', type=custom_types.CliCaseInsensitiveChoice(["SHORT", "MEDIUM", "LONG", "AUTO"]), help=u"""Indicates the approximate length of the summary. If \"AUTO\" is selected, the best option will be picked based on the input text.""")
@cli_util.option('--format', type=custom_types.CliCaseInsensitiveChoice(["PARAGRAPH", "BULLETS", "AUTO"]), help=u"""Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If \"AUTO\" is selected, the best option will be picked based on the input text.""")
@cli_util.option('--extractiveness', type=custom_types.CliCaseInsensitiveChoice(["LOW", "MEDIUM", "HIGH", "AUTO"]), help=u"""Controls how close to the original text the summary is. High extractiveness summaries will lean towards reusing sentences verbatim, while low extractiveness summaries will tend to paraphrase more.""")
@json_skeleton_utils.get_cli_json_input_option({})
@cli_util.help_option
@click.pass_context
@json_skeleton_utils.json_skeleton_generation_handler(input_params_to_complex_types={}, output_type={'module': 'generative_ai_inference', 'class': 'SummarizeTextResult'})
@cli_util.wrap_exceptions
def summarize_text_on_demand_serving_mode(ctx, from_json, input, compartment_id, serving_mode_model_id, is_echo, temperature, additional_command, length, format, extractiveness):

    kwargs = {}
    kwargs['opc_request_id'] = cli_util.use_or_generate_request_id(ctx.obj['request_id'])

    _details = {}
    _details['servingMode'] = {}
    _details['input'] = input
    _details['compartmentId'] = compartment_id
    _details['servingMode']['modelId'] = serving_mode_model_id

    if is_echo is not None:
        _details['isEcho'] = is_echo

    if temperature is not None:
        _details['temperature'] = temperature

    if additional_command is not None:
        _details['additionalCommand'] = additional_command

    if length is not None:
        _details['length'] = length

    if format is not None:
        _details['format'] = format

    if extractiveness is not None:
        _details['extractiveness'] = extractiveness

    _details['servingMode']['servingType'] = 'ON_DEMAND'

    client = cli_util.build_client('generative_ai_inference', 'generative_ai_inference', ctx)
    result = client.summarize_text(
        summarize_text_details=_details,
        **kwargs
    )
    cli_util.render_response(result, ctx)
