# --------------------------------------------------------------------------
# ⚠️ WARNING - AUTO-GENERATED CODE - DO NOT EDIT ⚠️
# ⚙️ Generated by 'python -m opgen'
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
# pylint: disable=W0221,W0222,R0901,W0237
# mypy: disable-error-code=override
# ruff: noqa: N801,E741
# ruff: noqa: D214,D402,D405,D411,D412,D416,D417
# --------------------------------------------------------------------------

from __future__ import annotations

from typing import Optional, Sequence, TypeVar, Union

from onnx import GraphProto
from onnx.defs import get_schema
from typing_extensions import TypeAlias

from onnxscript.onnx_opset._impl.opset15 import Opset15
from onnxscript.onnx_types import (
    BFLOAT16,
    BOOL,
    COMPLEX64,
    COMPLEX128,
    DOUBLE,
    FLOAT,
    FLOAT16,
    INT8,
    INT16,
    INT32,
    INT64,
    STRING,
    UINT8,
    UINT16,
    UINT32,
    UINT64,
)
from onnxscript.values import Op, Opset


class Opset16(Opset15):
    def __new__(cls):
        return Opset.__new__(cls, "", 16)

    T_GreaterOrEqual = TypeVar(
        "T_GreaterOrEqual",
        BFLOAT16,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T1_GreaterOrEqual: TypeAlias = BOOL

    def GreaterOrEqual(self, A: T_GreaterOrEqual, B: T_GreaterOrEqual) -> T1_GreaterOrEqual:
        r"""[🌐 GreaterOrEqual(16)](https://onnx.ai/onnx/operators/onnx__GreaterOrEqual.html#greaterorequal-16 "Online Documentation")


        Returns the tensor resulted from performing the `greater_equal` logical operation
        elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support).

        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.


        Args:
            A: (non-differentiable) First input operand for the logical operator.

            B: (non-differentiable) Second input operand for the logical operator.
        """

        schema = get_schema("GreaterOrEqual", 16, "")
        op = Op(self, "GreaterOrEqual", schema)
        return op(*self._prepare_inputs(schema, A, B))

    T1_GridSample = TypeVar(
        "T1_GridSample",
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T2_GridSample = TypeVar("T2_GridSample", DOUBLE, FLOAT, FLOAT16)

    def GridSample(
        self,
        X: T1_GridSample,
        grid: T2_GridSample,
        *,
        align_corners: int = 0,
        mode: str = "bilinear",
        padding_mode: str = "zeros",
    ) -> T1_GridSample:
        r"""[🌐 GridSample(16)](https://onnx.ai/onnx/operators/onnx__GridSample.html#gridsample-16 "Online Documentation")


        Given an input `X` and a flow-field `grid`, computes the output `Y` using `X` values and pixel locations from `grid`.
        Currently, only spatial (4-D) inputs are supported. For input `X` with shape (N, C, H, W) and `grid` with shape (N, H_out, W_out, 2),
        the output `Y` will have shape (N, C, H_out, W_out).

        The tensor `X` contains values at centers of square pixels in a H by W 2-dimensional image.
        The tensor `grid` describes normalized positions where the output `Y` is to be computed
        using a specified interpolation method (the mode) and a padding mode (for grid positions falling outside the 2-dimensional image).

        Elements in `grid[N, H_out, W_out]` are size-2 vectors specifying positions in the 2-dimensional space of `X`.
        They are used to interpolate output values of `Y[N, C, H_out, W_out]`.

        The GridSample operator is often used in doing grid generator and sampler in the [Spatial Transformer Networks](https://arxiv.org/abs/1506.02025).
        See also in [torch.nn.functional.grid_sample](https://pytorch.org/docs/master/generated/torch.nn.functional.grid_sample.html#torch-nn-functional-grid-sample).


        Args:
            X: (differentiable) 4-D tensor of shape (N, C, H, W), where N is the batch
                size, C is the numbers of channels, H and W are the height and width of
                the input data.

            grid: (non-differentiable) Input offset, 4-D tensor of shape (N, H_out,
                W_out, 2), where H_out and W_out are the height and width of grid and
                output, Grid specifies the sampling pixel locations normalized by the
                input spatial dimensions. Therefore, it should have most values in the
                range of [-1, 1]. If grid has values outside the range of [-1, 1], the
                corresponding outputs will be handled as defined by padding_mode.

            align_corners: If align_corners=1, the extrema (-1 and 1) are considered as
                referring to the center points of the input's corner pixels. If
                align_corners=0, they are instead considered as referring to the corner
                points of the input's corner pixels, making the sampling more resolution
                agnostic.

            mode: Three interpolation modes: bilinear (default), nearest and bicubic.

            padding_mode: Support padding modes for outside grid values:
                `zeros`(default), `border`, `reflection`. zeros: use 0 for out-of-bound
                grid locations, border: use border values for out-of-bound grid
                locations, reflection: use values at locations reflected by the border
                for out-of-bound grid locations. If index 0 represents the margin pixel,
                the reflected value at index -1 will be the same as the value at index
                1. For location far away from the border, it will keep being reflected
                until becoming in bound. If pixel location x = -3.5 reflects by border
                -1 and becomes x' = 1.5, then reflects by border 1 and becomes x'' =
                0.5.
        """

        schema = get_schema("GridSample", 16, "")
        op = Op(self, "GridSample", schema)
        return op(
            *self._prepare_inputs(schema, X, grid),
            align_corners=align_corners,
            mode=mode,
            padding_mode=padding_mode,
        )

    V_Identity = TypeVar(
        "V_Identity",
        Optional[Sequence[BOOL]],
        Optional[Sequence[COMPLEX128]],
        Optional[Sequence[COMPLEX64]],
        Optional[Sequence[DOUBLE]],
        Optional[Sequence[FLOAT]],
        Optional[Sequence[FLOAT16]],
        Optional[Sequence[INT16]],
        Optional[Sequence[INT32]],
        Optional[Sequence[INT64]],
        Optional[Sequence[INT8]],
        Optional[Sequence[STRING]],
        Optional[Sequence[UINT16]],
        Optional[Sequence[UINT32]],
        Optional[Sequence[UINT64]],
        Optional[Sequence[UINT8]],
        Optional[BOOL],
        Optional[COMPLEX128],
        Optional[COMPLEX64],
        Optional[DOUBLE],
        Optional[FLOAT],
        Optional[FLOAT16],
        Optional[INT16],
        Optional[INT32],
        Optional[INT64],
        Optional[INT8],
        Optional[STRING],
        Optional[UINT16],
        Optional[UINT32],
        Optional[UINT64],
        Optional[UINT8],
        Sequence[BOOL],
        Sequence[COMPLEX128],
        Sequence[COMPLEX64],
        Sequence[DOUBLE],
        Sequence[FLOAT],
        Sequence[FLOAT16],
        Sequence[INT16],
        Sequence[INT32],
        Sequence[INT64],
        Sequence[INT8],
        Sequence[STRING],
        Sequence[UINT16],
        Sequence[UINT32],
        Sequence[UINT64],
        Sequence[UINT8],
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Identity(self, input: V_Identity) -> V_Identity:
        r"""[🌐 Identity(16)](https://onnx.ai/onnx/operators/onnx__Identity.html#identity-16 "Online Documentation")

        Identity operator

        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Identity", 16, "")
        op = Op(self, "Identity", schema)
        return op(*self._prepare_inputs(schema, input))

    B_If: TypeAlias = BOOL

    V_If: TypeAlias = Union[
        Optional[Sequence[BFLOAT16]],
        Optional[Sequence[BOOL]],
        Optional[Sequence[COMPLEX128]],
        Optional[Sequence[COMPLEX64]],
        Optional[Sequence[DOUBLE]],
        Optional[Sequence[FLOAT]],
        Optional[Sequence[FLOAT16]],
        Optional[Sequence[INT16]],
        Optional[Sequence[INT32]],
        Optional[Sequence[INT64]],
        Optional[Sequence[INT8]],
        Optional[Sequence[STRING]],
        Optional[Sequence[UINT16]],
        Optional[Sequence[UINT32]],
        Optional[Sequence[UINT64]],
        Optional[Sequence[UINT8]],
        Optional[BFLOAT16],
        Optional[BOOL],
        Optional[COMPLEX128],
        Optional[COMPLEX64],
        Optional[DOUBLE],
        Optional[FLOAT],
        Optional[FLOAT16],
        Optional[INT16],
        Optional[INT32],
        Optional[INT64],
        Optional[INT8],
        Optional[STRING],
        Optional[UINT16],
        Optional[UINT32],
        Optional[UINT64],
        Optional[UINT8],
        Sequence[BFLOAT16],
        Sequence[BOOL],
        Sequence[COMPLEX128],
        Sequence[COMPLEX64],
        Sequence[DOUBLE],
        Sequence[FLOAT],
        Sequence[FLOAT16],
        Sequence[INT16],
        Sequence[INT32],
        Sequence[INT64],
        Sequence[INT8],
        Sequence[STRING],
        Sequence[UINT16],
        Sequence[UINT32],
        Sequence[UINT64],
        Sequence[UINT8],
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    ]

    def If(self, cond: B_If, *, else_branch: GraphProto, then_branch: GraphProto) -> V_If:
        r"""[🌐 If(16)](https://onnx.ai/onnx/operators/onnx__If.html#if-16 "Online Documentation")

        If conditional

        Args:
            cond: Condition for the if

            else_branch: Graph to run if condition is false. Has N outputs: values you
                wish to be live-out to the enclosing scope. The number of outputs must
                match the number of outputs in the then_branch.

            then_branch: Graph to run if condition is true. Has N outputs: values you
                wish to be live-out to the enclosing scope. The number of outputs must
                match the number of outputs in the else_branch.
        """

        schema = get_schema("If", 16, "")
        op = Op(self, "If", schema)
        return op(
            *self._prepare_inputs(schema, cond),
            else_branch=else_branch,
            then_branch=then_branch,
        )

    T_LeakyRelu = TypeVar("T_LeakyRelu", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def LeakyRelu(self, X: T_LeakyRelu, *, alpha: float = 0.009999999776482582) -> T_LeakyRelu:
        r"""[🌐 LeakyRelu(16)](https://onnx.ai/onnx/operators/onnx__LeakyRelu.html#leakyrelu-16 "Online Documentation")


        LeakyRelu takes input data (Tensor<T>) and an argument alpha, and produces one
        output data (Tensor<T>) where the function `f(x) = alpha * x for x < 0`,
        `f(x) = x for x >= 0`, is applied to the data tensor elementwise.


        Args:
            X: (differentiable) Input tensor

            alpha: Coefficient of leakage.
        """

        schema = get_schema("LeakyRelu", 16, "")
        op = Op(self, "LeakyRelu", schema)
        return op(*self._prepare_inputs(schema, X), alpha=alpha)

    T_LessOrEqual = TypeVar(
        "T_LessOrEqual",
        BFLOAT16,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T1_LessOrEqual: TypeAlias = BOOL

    def LessOrEqual(self, A: T_LessOrEqual, B: T_LessOrEqual) -> T1_LessOrEqual:
        r"""[🌐 LessOrEqual(16)](https://onnx.ai/onnx/operators/onnx__LessOrEqual.html#lessorequal-16 "Online Documentation")


        Returns the tensor resulted from performing the `less_equal` logical operation
        elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support).

        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.


        Args:
            A: (non-differentiable) First input operand for the logical operator.

            B: (non-differentiable) Second input operand for the logical operator.
        """

        schema = get_schema("LessOrEqual", 16, "")
        op = Op(self, "LessOrEqual", schema)
        return op(*self._prepare_inputs(schema, A, B))

    I_Loop: TypeAlias = INT64

    B_Loop: TypeAlias = BOOL

    V_Loop = TypeVar(
        "V_Loop",
        Optional[Sequence[BFLOAT16]],
        Optional[Sequence[BOOL]],
        Optional[Sequence[COMPLEX128]],
        Optional[Sequence[COMPLEX64]],
        Optional[Sequence[DOUBLE]],
        Optional[Sequence[FLOAT]],
        Optional[Sequence[FLOAT16]],
        Optional[Sequence[INT16]],
        Optional[Sequence[INT32]],
        Optional[Sequence[INT64]],
        Optional[Sequence[INT8]],
        Optional[Sequence[STRING]],
        Optional[Sequence[UINT16]],
        Optional[Sequence[UINT32]],
        Optional[Sequence[UINT64]],
        Optional[Sequence[UINT8]],
        Optional[BFLOAT16],
        Optional[BOOL],
        Optional[COMPLEX128],
        Optional[COMPLEX64],
        Optional[DOUBLE],
        Optional[FLOAT],
        Optional[FLOAT16],
        Optional[INT16],
        Optional[INT32],
        Optional[INT64],
        Optional[INT8],
        Optional[STRING],
        Optional[UINT16],
        Optional[UINT32],
        Optional[UINT64],
        Optional[UINT8],
        Sequence[BFLOAT16],
        Sequence[BOOL],
        Sequence[COMPLEX128],
        Sequence[COMPLEX64],
        Sequence[DOUBLE],
        Sequence[FLOAT],
        Sequence[FLOAT16],
        Sequence[INT16],
        Sequence[INT32],
        Sequence[INT64],
        Sequence[INT8],
        Sequence[STRING],
        Sequence[UINT16],
        Sequence[UINT32],
        Sequence[UINT64],
        Sequence[UINT8],
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Loop(
        self, M: Optional[I_Loop], cond: Optional[B_Loop], *v_initial: V_Loop, body: GraphProto
    ) -> V_Loop:
        r"""[🌐 Loop(16)](https://onnx.ai/onnx/operators/onnx__Loop.html#loop-16 "Online Documentation")


        Generic Looping construct. This loop has multiple termination conditions:

        1) Trip count. Iteration count specified at runtime. Set by
           specifying the input M. Optional. Set to empty string to omit.
           Note that a static trip count (specified at graph construction time) can be
           specified by passing in a constant node for input M.
        2) Loop termination condition. This is an input to the op that determines
           whether to run the first iteration and also a loop-carried dependency for
           the body graph. The body graph must yield a value for the condition variable,
           whether this input is provided or not.

        This table summarizes the operating modes of this operator with equivalent
        C-style code:

        Operator inputs defined as (max_trip_count, condition_var).

        * input ("", ""):
                for (int i=0; ; ++i) {
                  cond = ... // Note this value is ignored, but is required in the body
                }

        * input ("", cond) // Note this is analogous to a while loop
                bool cond = ...;
                for (int i=0; cond; ++i) {
                  cond = ...;
                }

        * input ("", 1) // Note this is analogous to a do-while loop
                bool cond = true
                for (int i=0; cond; ++i) {
                  cond = ...;
                }

        * input (trip_count, "") // Note this is analogous to a for loop
                int trip_count = ...
                for (int i=0; i < trip_count; ++i) {
                  cond = ...; // ignored
                }

        * input (trip_count, cond)
                int trip_count = ...;
                bool cond = ...;
                for (int i=0; i < trip_count && cond; ++i) {
                  cond = ...;
                }


        *Sample usage - cond as well as trip count*

            graph predict-net {
              %a = Constant[value = <Scalar Tensor [3]>]()
              %b = Constant[value = <Scalar Tensor [6]>]()
              %keepgoing = Constant[value = <Scalar Tensor [1]>]()
              %max_trip_count = Constant[value = <Scalar Tensor [10]>]()
              %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
              return
            }

            graph body-net (
              %i[INT32, scalar]           // iteration number
              %keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used
              %b_in[INT32, scalar]        // incoming value of loop-carried-dependency b
            ) {
              %my_local = Add(%a, %b_in)
              %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b
              %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition
              %user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated
              return %keepgoing_out, %b_out, %user_defined_val
            }

        *Sample equivalent C code*

            {
              /* User-defined code (enclosing scope) */
              int a = 3, b = 6;
              bool keepgoing = true; // Analogous to input cond
              /* End user-defined code */

              /* Implicitly-defined code */
              const int max_trip_count = 10; // Analogous to input M
              int user_defined_vals[]; // Imagine this is resizable
              /* End implicitly-defined code */
              /* initialize loop-carried variables and scan-output variables */
              bool keepgoing_out = keepgoing
              int b_out = b

              for (int i=0; i < max_trip_count && keepgoing_out; ++i) {
                /* Implicitly-defined code: bind actual parameter values
                   to formal parameter variables of loop-body */
                bool keepgoing_in = keepgoing_out;
                bool b_in = b_out;

                /* User-defined code (loop body) */
                int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
                b_out = a - b_in;
                keepgoing_out = my_local > b_out;
                user_defined_val = b_in + b_in; // b_in and b_out are different variables
                /* End user-defined code */

                /* Implicitly defined-code */
                user_defined_vals[i] = user_defined_val // accumulate scan-output values
              }
              // int t = my_local; // Can't do this. my_local is not accessible here.

              // The values below are bound to the output variables of the loop and therefore accessible
              // b_out; user_defined_vals; keepgoing_out;
            }

        There are several things of note in this code snippet:

        1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can
           be referenced in the inputs of the loop.
        2) Any values computed in the loop body that needs to be used in a subsequent
           iteration or after the loop are modelled using a pair of variables in the loop-body,
           consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
           These are referred to as loop-carried dependences. The loop operation node
           supplies the input value of the input variable for the first iteration, and
           returns the output value of the output variable produced by the final
           iteration.
        3) Scan_output variables are used to implicitly concatenate values computed across
           all the iterations. In the above example, the value of user_defined_val computed
           over all iterations are concatenated and returned as the value of user_defined_vals
           after the loop.
        4) Values created in the body cannot be accessed in the enclosing scope,
           except using the mechanism described above.

        Note that the semantics of this op support "diagonal" or "wavefront" execution.
        (See Step 3 here for an example:
        https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
        Frontends should emit multi-layer RNNs as a series of While operators (with
        time being the inner looping dimension), with each successive layer consuming
        the scan_outputs from the previous layer, possibly going through several
        point-wise operators (e.g. dropout, residual connections, linear layer).

        The input/output of subgraph (produced by loop node) matching is based on order instead of name. The implementation will figure out the names based on this order.


        Args:
            M: (optional) A maximum trip-count for the loop specified at runtime.
                Optional. Pass empty string to skip.

            cond: (optional) A boolean termination condition. Optional. Pass empty
                string to skip.

            v_initial: (variadic, heterogeneous) The initial values of any loop-carried
                dependencies (values that change across loop iterations)

            body: The graph run each iteration. It has 2+N inputs: (iteration_num,
                condition, loop carried dependencies...). It has 1+N+K outputs:
                (condition, loop carried dependencies..., scan_outputs...). Each
                scan_output is created by concatenating the value of the specified
                output value at the end of each iteration of the loop. It is an error if
                the dimensions or data type of these scan_outputs change across loop
                iterations.
        """

        schema = get_schema("Loop", 16, "")
        op = Op(self, "Loop", schema)
        return op(*self._prepare_inputs(schema, M, cond, *v_initial), body=body)

    T_PRelu = TypeVar(
        "T_PRelu", BFLOAT16, DOUBLE, FLOAT, FLOAT16, INT32, INT64, UINT32, UINT64
    )

    def PRelu(self, X: T_PRelu, slope: T_PRelu) -> T_PRelu:
        r"""[🌐 PRelu(16)](https://onnx.ai/onnx/operators/onnx__PRelu.html#prelu-16 "Online Documentation")


        PRelu takes input data (Tensor<T>) and slope tensor as input, and produces one
        output data (Tensor<T>) where the function `f(x) = slope * x for x < 0`,
        `f(x) = x for x >= 0`., is applied to the data tensor elementwise.
        This operator supports **unidirectional broadcasting** (tensor slope should be unidirectional broadcastable to input tensor X); for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.

        Args:
            X: (differentiable) Input tensor

            slope: (differentiable) Slope tensor. The shape of slope can be smaller then
                first input X; if so, its shape must be unidirectional broadcastable to
                X
        """

        schema = get_schema("PRelu", 16, "")
        op = Op(self, "PRelu", schema)
        return op(*self._prepare_inputs(schema, X, slope))

    T1_RoiAlign = TypeVar("T1_RoiAlign", DOUBLE, FLOAT, FLOAT16)

    T2_RoiAlign: TypeAlias = INT64

    def RoiAlign(
        self,
        X: T1_RoiAlign,
        rois: T1_RoiAlign,
        batch_indices: T2_RoiAlign,
        *,
        coordinate_transformation_mode: str = "half_pixel",
        mode: str = "avg",
        output_height: int = 1,
        output_width: int = 1,
        sampling_ratio: int = 0,
        spatial_scale: float = 1.0,
    ) -> T1_RoiAlign:
        r"""[🌐 RoiAlign(16)](https://onnx.ai/onnx/operators/onnx__RoiAlign.html#roialign-16 "Online Documentation")


        Region of Interest (RoI) align operation described in the
        [Mask R-CNN paper](https://arxiv.org/abs/1703.06870).
        RoiAlign consumes an input tensor X and region of interests (rois)
        to apply pooling across each RoI; it produces a 4-D tensor of shape
        (num_rois, C, output_height, output_width).

        RoiAlign is proposed to avoid the misalignment by removing
        quantizations while converting from original image into feature
        map and from feature map into RoI feature; in each ROI bin,
        the value of the sampled locations are computed directly
        through bilinear interpolation.


        Args:
            X: Input data tensor from the previous operator; 4-D feature map of shape
                (N, C, H, W), where N is the batch size, C is the number of channels,
                and H and W are the height and the width of the data.

            rois: RoIs (Regions of Interest) to pool over; rois is 2-D input of shape
                (num_rois, 4) given as [[x1, y1, x2, y2], ...]. The RoIs' coordinates
                are in the coordinate system of the input image. Each coordinate set has
                a 1:1 correspondence with the 'batch_indices' input.

            batch_indices: 1-D tensor of shape (num_rois,) with each element denoting
                the index of the corresponding image in the batch.

            coordinate_transformation_mode: Allowed values are 'half_pixel' and
                'output_half_pixel'. Use the value 'half_pixel' to pixel shift the input
                coordinates by -0.5 (the recommended behavior). Use the value
                'output_half_pixel' to omit the pixel shift for the input (use this for
                a backward-compatible behavior).

            mode: The pooling method. Two modes are supported: 'avg' and 'max'. Default
                is 'avg'.

            output_height: default 1; Pooled output Y's height.

            output_width: default 1; Pooled output Y's width.

            sampling_ratio: Number of sampling points in the interpolation grid used to
                compute the output value of each pooled output bin. If > 0, then exactly
                sampling_ratio x sampling_ratio grid points are used. If == 0, then an
                adaptive number of grid points are used (computed as ceil(roi_width /
                output_width), and likewise for height). Default is 0.

            spatial_scale: Multiplicative spatial scale factor to translate ROI
                coordinates from their input spatial scale to the scale used when
                pooling, i.e., spatial scale of the input feature map X relative to the
                input image. E.g.; default is 1.0f.
        """

        schema = get_schema("RoiAlign", 16, "")
        op = Op(self, "RoiAlign", schema)
        return op(
            *self._prepare_inputs(schema, X, rois, batch_indices),
            coordinate_transformation_mode=coordinate_transformation_mode,
            mode=mode,
            output_height=output_height,
            output_width=output_width,
            sampling_ratio=sampling_ratio,
            spatial_scale=spatial_scale,
        )

    V_Scan = TypeVar(
        "V_Scan",
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Scan(
        self,
        *initial_state_and_scan_inputs: V_Scan,
        body: GraphProto,
        num_scan_inputs: int,
        scan_input_axes: Optional[Sequence[int]] = None,
        scan_input_directions: Optional[Sequence[int]] = None,
        scan_output_axes: Optional[Sequence[int]] = None,
        scan_output_directions: Optional[Sequence[int]] = None,
    ) -> V_Scan:
        r"""[🌐 Scan(16)](https://onnx.ai/onnx/operators/onnx__Scan.html#scan-16 "Online Documentation")


        Scan can be used to iterate over one or more scan_input tensors,
        constructing zero or more scan_output tensors. It combines ideas from general recurrences,
        functional programming constructs such as scan, fold, map, and zip, and is intended to enable
        generalizations of RNN-like constructs for sequence-to-sequence processing.
        Other tensors (referred to as state_variables here) can be used to carry a state
        when iterating from one element to another (similar to hidden-state in RNNs, also referred
        to as loop-carried dependences in the context of loops).
        Many common usages involve a single scan_input tensor (where functionality
        similar to scan, fold and map can be obtained). When more than one scan_input is used,
        a behavior similar to zip is obtained.

        The attribute body must be a graph, specifying the computation to be performed in
        every iteration. It takes as input the current values of the state_variables and
        the current iterated element of the scan_inputs. It must return the (updated) values
        of the state_variables and zero or more scan_output_element tensors. The values of the
        scan_output_element tensors are concatenated over all the iterations to produce the
        scan_output values of the scan construct (similar to the concatenated intermediate
        hidden-state values of RNN-like constructs). All the output tensors (state_variables as
        well as scan_output_element tensors) are required to have the same shape in each iteration
        of the loop (a restriction imposed to enable efficient memory allocation).

        Note that the iterated element passed to the body subgraph does not have a sequence
        axis. It will have a rank one less than the rank of the corresponding scan_input.

        The scan operation returns the final values of the state_variables as well as the
        scan_outputs.

        The optional attribute scan_input_directions specifies the direction (forward or backward)
        for each scan input. If this attribute is omitted, all sequences are scanned in the forward
        direction. A bidirectional scan may be performed by specifying the same tensor input twice
        in the scan_inputs, once with a forward direction, and once with a backward direction.

        The scan_output of the operation is produced by concatenating the scan_output_element
        values produced by the body in each iteration.  The optional attribute scan_output_directions
        specifies the direction in which scan_output is constructed (by appending or prepending the
        scan_output_element to scan_output in each iteration) for each scan_output. If this attribute
        is omitted, the scan_output_element is appended to the scan_output in each iteration.

        The optional attribute scan_input_axes specifies the axis to be scanned for each scan_input.
        If omitted, every scan_input will be scanned in axis 0. For example, if axis 0 is the
        batch axis and axis 1 is the time axis (to be scanned), specify an axis value of 1.
        Note that scanning a non-zero axis may be less efficient than scanning axis zero.

        The optional attribute scan_output_axes specifies the axis along which the scan_outputs
        are accumulated for each scan_output. For example, if axis 1 is the time axis (to be
        scanned) for both inputs and outputs, specify a scan_input axis and scan_output axis
        value of 1.

        Note that because of the ONNX restriction that only the last parameter of an operator can
        be variadic, the initial-states and scan-inputs are listed together as one input parameter.
        Similarly, the final-states and scan-outputs are listed together as one output parameter.
        The attribute num_scan_inputs indicates the number M of scan-inputs.

        The behavior of

            Scan <
                num_scan_inputs = m,
                body = loop-body,
                scan_input_axes = [axis_1, ..., axis_m]
            > (init_1, ..., init_n, scan_1, ..., scan_m)

        is equivalent to the following pseudo-code:

            // scan_i.shape[axis_i] denotes the (max) sequence-length of scan_i
            // scan_i.shape[axis_i] is required to be equal to scan_j.shape[axis_j] for all i,j.
            sequence_length = scan_1.shape[axis_1];

            // initialize state-variables
            st_1 = init_1; ... st_n = init_n;
            // initialize scan-output variables: [] denotes an empty tensor
            scan_out_1 = []; ...; scan_out_k = [];
            // identify number of iterations:

            // execute loop
            for (int t = 0; t < sequence_length; ++t) {
                // generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor
                // of rank one less than T obtained by indexing T at position t along axis k.
                si_1 = scan_1<axis=axis_1>[t];
                ... ;
                si_m = scan_m<axis=axis_m>[t];
                // execute loop-body
                st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)
                // accumulate the scan-output elements
                scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);
            }

            return st_1, ..., st_n, scan_out_1, ..., scan_out_k;

        *Sample usage: Encoding RNN using a Scan*

        The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,
        recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can
        be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes
        %Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these
        values are computed in the outer graph, they need to be passed in as extra state_variables.

            graph rnn-encoding {
              %H_0 = ...
              %X = ...
              %Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1](%H_0, %X)
              return %Y, %Y_h
            }

            graph rnn-cell-1 (
              %H_tminus1[FLOAT, tensor]
              %X_t[FLOAT, tensor]
            ) {
              %Wi = ...
              %Ri = ...
              %Wbi = ...
              %Rbi = ...
              %t1 = X_t * (Wi^T)
              %t2 = H_tminus1*(Ri^T)
              %t3 = Add(%t1, %t2)
              %t4 = Add(%t3, %Wbi)
              %t5 = Add(%t4, %Rbi)
              %Ht = Tanh(%t5)
              %Accumulate = Identity(%Ht)
              return %Ht, %Accumulate
            }



        Args:
            initial_state_and_scan_inputs: (variadic, heterogeneous) Initial values of
                the loop's N state variables followed by M scan_inputs

            body: The graph run each iteration. It has N+M inputs: (loop state
                variables..., scan_input_elts...). It has N+K outputs: (loop state
                variables..., scan_output_elts...). Each scan_output is created by
                concatenating the value of the specified scan_output_elt value at the
                end of each iteration of the loop. It is an error if the dimensions of
                these values change across loop iterations.

            num_scan_inputs: An attribute specifying the number of scan_inputs M.

            scan_input_axes: An optional list of M flags. The i-th element of the list
                specifies the axis to be scanned (the sequence axis) for the i-th
                scan_input. If omitted, 0 will be used as the scan axis for every
                scan_input. Negative value for an axis means counting dimensions from
                the back. Accepted range is [-r, r-1] where r = rank(input).

            scan_input_directions: An optional list of M flags. The i-th element of the
                list specifies the direction to be scanned for the i-th scan_input
                tensor: 0 indicates forward direction and 1 indicates reverse direction.
                If omitted, all scan_input tensors will be scanned in the forward
                direction.

            scan_output_axes: An optional list of K flags. The i-th element of the list
                specifies the axis for the i-th scan_output. The scan outputs are
                accumulated along the specified axis. If omitted, 0 will be used as the
                scan axis for every scan_output. Negative value for an axis means
                counting dimensions from the back. Accepted range is [-r, r-1].

            scan_output_directions: An optional list of K flags, one for each
                scan_output. The i-th element of the list specifies whether the i-th
                scan_output should be constructed by appending or prepending a new value
                in each iteration: 0 indicates appending and 1 indicates prepending. If
                omitted, all scan_output tensors will be produced by appending a value
                in each iteration.
        """

        schema = get_schema("Scan", 16, "")
        op = Op(self, "Scan", schema)
        return op(
            *self._prepare_inputs(schema, *initial_state_and_scan_inputs),
            body=body,
            num_scan_inputs=num_scan_inputs,
            scan_input_axes=scan_input_axes,
            scan_input_directions=scan_input_directions,
            scan_output_axes=scan_output_axes,
            scan_output_directions=scan_output_directions,
        )

    T_ScatterElements = TypeVar(
        "T_ScatterElements",
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    Tind_ScatterElements = TypeVar("Tind_ScatterElements", INT32, INT64)

    def ScatterElements(
        self,
        data: T_ScatterElements,
        indices: Tind_ScatterElements,
        updates: T_ScatterElements,
        *,
        axis: int = 0,
        reduction: str = "none",
    ) -> T_ScatterElements:
        r"""[🌐 ScatterElements(16)](https://onnx.ai/onnx/operators/onnx__ScatterElements.html#scatterelements-16 "Online Documentation")


        ScatterElements takes three inputs `data`, `updates`, and `indices` of the same
        rank r >= 1 and an optional attribute axis that identifies an axis of `data`
        (by default, the outer-most axis, that is axis 0). The output of the operation
        is produced by creating a copy of the input `data`, and then updating its value
        to values specified by `updates` at specific index positions specified by
        `indices`. Its output shape is the same as the shape of `data`.
        For each entry in `updates`, the target index in `data` is obtained by combining
        the corresponding entry in `indices` with the index of the entry itself: the
        index-value for dimension = axis is obtained from the value of the corresponding
        entry in `indices` and the index-value for dimension != axis is obtained from the
        index of the entry itself.
        `reduction` allows specification of an optional reduction operation, which is applied to all values in `updates`
        tensor into `output` at the specified `indices`.
        In cases where `reduction` is set to "none", indices should not have duplicate entries: that is, if idx1 != idx2,
        then indices[idx1] != indices[idx2]. For instance, in a 2-D tensor case, the update
        corresponding to the [i][j] entry is performed as below:
        ::

              output[indices[i][j]][j] = updates[i][j] if axis = 0,
              output[i][indices[i][j]] = updates[i][j] if axis = 1,


        When `reduction` is set to "add", the update corresponding to the [i][j] entry is performed as below:
        ::

              output[indices[i][j]][j] += updates[i][j] if axis = 0,
              output[i][indices[i][j]] += updates[i][j] if axis = 1,


        When `reduction` is set to "mul", the update corresponding to the [i][j] entry is performed as below:
        ::

              output[indices[i][j]][j] *= updates[i][j] if axis = 0,
              output[i][indices[i][j]] *= updates[i][j] if axis = 1,


        This operator is the inverse of GatherElements. It is similar to Torch's Scatter operation.
        Example 1:
        ::

              data = [
                  [0.0, 0.0, 0.0],
                  [0.0, 0.0, 0.0],
                  [0.0, 0.0, 0.0],
              ]
              indices = [
                  [1, 0, 2],
                  [0, 2, 1],
              ]
              updates = [
                  [1.0, 1.1, 1.2],
                  [2.0, 2.1, 2.2],
              ]
              output = [
                  [2.0, 1.1, 0.0]
                  [1.0, 0.0, 2.2]
                  [0.0, 2.1, 1.2]
              ]


        Example 2:
        ::

              data = [[1.0, 2.0, 3.0, 4.0, 5.0]]
              indices = [[1, 3]]
              updates = [[1.1, 2.1]]
              axis = 1
              output = [[1.0, 1.1, 3.0, 2.1, 5.0]]




        Args:
            data: (differentiable) Tensor of rank r >= 1.

            indices: (non-differentiable) Tensor of int32/int64 indices, of r >= 1 (same
                rank as input). All index values are expected to be within bounds [-s,
                s-1] along axis of size s. It is an error if any of the index values are
                out of bounds.

            updates: (differentiable) Tensor of rank r >=1 (same rank and shape as
                indices)

            axis: Which axis to scatter on. Negative value means counting dimensions
                from the back. Accepted range is [-r, r-1] where r = rank(data).

            reduction: Type of reduction to apply: none (default), add, mul. 'none': no
                reduction applied. 'add':  reduction using the addition operation.
                'mul': reduction using the multiplication operation.
        """

        schema = get_schema("ScatterElements", 16, "")
        op = Op(self, "ScatterElements", schema)
        return op(
            *self._prepare_inputs(schema, data, indices, updates),
            axis=axis,
            reduction=reduction,
        )

    T_ScatterND = TypeVar(
        "T_ScatterND",
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def ScatterND(
        self,
        data: T_ScatterND,
        indices: INT64,
        updates: T_ScatterND,
        *,
        reduction: str = "none",
    ) -> T_ScatterND:
        r"""[🌐 ScatterND(16)](https://onnx.ai/onnx/operators/onnx__ScatterND.html#scatternd-16 "Online Documentation")


        ScatterND takes three inputs `data` tensor of rank r >= 1, `indices` tensor of rank q >= 1,
        and `updates` tensor of rank q + r - indices.shape[-1] - 1. The output of the operation
        is produced by creating a copy of the input `data`, and then updating its value to values
        specified by `updates` at specific index positions specified by `indices`. Its output shape
        is the same as the shape of `data`.

        `indices` is an integer tensor. Let k denote indices.shape[-1], the last dimension in the shape of `indices`.
         `indices` is treated as a (q-1)-dimensional tensor of k-tuples, where each k-tuple is a partial-index into `data`.
        Hence, k can be a value at most the rank of `data`. When k equals rank(data), each update entry specifies an
        update to a single element of the tensor. When k is less than rank(data) each update entry specifies an
        update to a slice of the tensor. Index values are allowed to be negative, as per the usual
        convention for counting backwards from the end, but are expected in the valid range.

        `updates` is treated as a (q-1)-dimensional tensor of replacement-slice-values. Thus, the
        first (q-1) dimensions of updates.shape must match the first (q-1) dimensions of indices.shape.
        The remaining dimensions of `updates` correspond to the dimensions of the
        replacement-slice-values. Each replacement-slice-value is a (r-k) dimensional tensor,
        corresponding to the trailing (r-k) dimensions of `data`.  Thus, the shape of `updates`
        must equal indices.shape[0:q-1] ++ data.shape[k:r-1], where ++ denotes the concatenation
        of shapes.

        The `output` is calculated via the following equation:
            output = np.copy(data)
            update_indices = indices.shape[:-1]
            for idx in np.ndindex(update_indices):
                output[indices[idx]] = updates[idx]
        The order of iteration in the above loop is not specified.
        In particular, indices should not have duplicate entries: that is, if idx1 != idx2, then indices[idx1] != indices[idx2].
        This ensures that the output value does not depend on the iteration order.

        `reduction` allows specification of an optional reduction operation, which is applied to all values in `updates`
        tensor into `output` at the specified `indices`.
        In cases where `reduction` is set to "none", indices should not have duplicate entries: that is, if idx1 != idx2,
        then indices[idx1] != indices[idx2]. This ensures that the output value does not depend on the iteration order.
        When `reduction` is set to "add", `output` is calculated as follows:
            output = np.copy(data)
            update_indices = indices.shape[:-1]
            for idx in np.ndindex(update_indices):
                output[indices[idx]] += updates[idx]
        When `reduction` is set to "mul", `output` is calculated as follows:
            output = np.copy(data)
            update_indices = indices.shape[:-1]
            for idx in np.ndindex(update_indices):
                output[indices[idx]] *= updates[idx]
        This operator is the inverse of GatherND.
        Example 1:
        ::

              data    = [1, 2, 3, 4, 5, 6, 7, 8]
              indices = [[4], [3], [1], [7]]
              updates = [9, 10, 11, 12]
              output  = [1, 11, 3, 10, 9, 6, 7, 12]


        Example 2:
        ::

              data    = [[[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],
                         [[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],
                         [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]],
                         [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]]]
              indices = [[0], [2]]
              updates = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
                         [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]]]
              output  = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
                         [[1, 2, 3, 4], [5, 6, 7, 8], [8, 7, 6, 5], [4, 3, 2, 1]],
                         [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]],
                         [[8, 7, 6, 5], [4, 3, 2, 1], [1, 2, 3, 4], [5, 6, 7, 8]]]




        Args:
            data: (differentiable) Tensor of rank r >= 1.

            indices: (non-differentiable) Tensor of rank q >= 1.

            updates: (differentiable) Tensor of rank q + r - indices_shape[-1] - 1.

            reduction: Type of reduction to apply: none (default), add, mul. 'none': no
                reduction applied. 'add':  reduction using the addition operation.
                'mul': reduction using the multiplication operation.
        """

        schema = get_schema("ScatterND", 16, "")
        op = Op(self, "ScatterND", schema)
        return op(*self._prepare_inputs(schema, data, indices, updates), reduction=reduction)

    B_Where: TypeAlias = BOOL

    T_Where = TypeVar(
        "T_Where",
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Where(self, condition: B_Where, X: T_Where, Y: T_Where) -> T_Where:
        r"""[🌐 Where(16)](https://onnx.ai/onnx/operators/onnx__Where.html#where-16 "Online Documentation")


        Return elements, either from X or Y, depending on condition.
        Where behaves like
        [numpy.where](https://docs.scipy.org/doc/numpy/reference/generated/numpy.where.html)
        with three parameters.

        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.

        Args:
            condition: (non-differentiable) When True (nonzero), yield X, otherwise
                yield Y

            X: (differentiable) values selected at indices where condition is True

            Y: (differentiable) values selected at indices where condition is False
        """

        schema = get_schema("Where", 16, "")
        op = Op(self, "Where", schema)
        return op(*self._prepare_inputs(schema, condition, X, Y))
