"""
Service datasources definitions and utilities.

This module provides access to predefined service datasources and their schemas
for both Tinybird and Organization scopes.
"""

from typing import Any, Dict, List, Optional


def get_tinybird_service_datasources() -> List[Dict[str, Any]]:
    """
    Get all Tinybird-specific service datasources.

    Returns:
        List[Dict[str, Any]]: A list of Tinybird service datasources.
    """
    return [
        {
            "name": "tinybird.pipe_stats_rt",
            "description": "Contains information about all requests made to your API endpoints in real time. This data source has a TTL of 7 days. If you need to query data older than 7 days you must use the aggregated by day data available at tinybird.pipe_stats.",
            "dateColumn": "start_datetime",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "pipe_id, start_datetime",
                "ttl": "start_datetime + toIntervalDay(7)",
            },
            "columns": [
                {"name": "start_datetime", "type": "DateTime"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "token", "type": "String"},
                {"name": "token_name", "type": "String"},
                {"name": "duration", "type": "Float32"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "cpu_time", "type": "Float32"},
                {"name": "url", "type": "Nullable(String)"},
                {"name": "error", "type": "UInt8"},
                {"name": "status_code", "type": "Int32"},
                {"name": "request_id", "type": "String"},
                {"name": "parameters", "type": "Map(String, String)"},
                {"name": "method", "type": "String"},
                {"name": "release", "type": "String"},
                {"name": "user_agent", "type": "Nullable(String)"},
                {"name": "resource_tags", "type": "Array(String)"},
                {"name": "memory_usage", "type": "UInt64"},
            ],
        },
        {
            "name": "tinybird.pipe_stats",
            "description": "Aggregates the request stats in tinybird.pipe_stats_rt by day.",
            "dateColumn": "date",
            "engine": {
                "engine": "SummingMergeTree",
                "sorting_key": "pipe_id, date",
                "partition_key": "toYYYYMM(date)",
            },
            "columns": [
                {"name": "date", "type": "DateTime"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "view_count", "type": "UInt64"},
                {
                    "name": "quantile_timing_state",
                    "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
                },
                {"name": "read_bytes_sum", "type": "UInt64"},
                {"name": "read_rows_sum", "type": "UInt64"},
                {"name": "cpu_time_sum", "type": "Float64"},
                {"name": "resource_tags", "type": "Array(String)"},
            ],
        },
        {
            "name": "tinybird.block_log",
            "description": "The data source contains details about how Tinybird ingests data into your data sources. You can use this Service data source to spot problematic parts of your data.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "timestamp, cityHash64(datasource_name)",
                "partition_key": "toYear(timestamp)",
                "sampling_key": "cityHash64(datasource_name)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "request_id", "type": "String"},
                {"name": "import_id", "type": "String"},
                {"name": "job_id", "type": "Nullable(String)"},
                {"name": "source", "type": "String"},
                {"name": "token_id", "type": "String"},
                {"name": "block_id", "type": "String"},
                {"name": "status", "type": "String"},
                {"name": "user_id", "type": "String"},
                {"name": "user_mail", "type": "String"},
                {"name": "datasource_id", "type": "String"},
                {"name": "datasource_name", "type": "String"},
                {"name": "start_offset", "type": "Nullable(Int64)"},
                {"name": "end_offset", "type": "Nullable(Int64)"},
                {"name": "rows", "type": "Nullable(Int32)"},
                {"name": "parser", "type": "Nullable(String)"},
                {"name": "quarantine_lines", "type": "Nullable(UInt32)"},
                {"name": "empty_lines", "type": "Nullable(UInt32)"},
                {"name": "bytes", "type": "Nullable(UInt32)"},
                {"name": "processing_time", "type": "Nullable(Float32)"},
                {"name": "processing_error", "type": "Nullable(String)"},
            ],
        },
        {
            "name": "tinybird.datasources_ops_log",
            "description": "Contains all operations performed to your data sources. Tinybird tracks the following operations: create, append, append, append-hfi, append-kafka, replace, delete, truncate, rename, populateview-queued, populateview, copy, alter",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "datasource_id, timestamp",
                "partition_key": "toYYYYMM(timestamp)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "event_type", "type": "String"},
                {"name": "datasource_id", "type": "String"},
                {"name": "datasource_name", "type": "String"},
                {"name": "result", "type": "String"},
                {"name": "elapsed_time", "type": "Float32"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "request_id", "type": "String"},
                {"name": "import_id", "type": "Nullable(String)"},
                {"name": "job_id", "type": "Nullable(String)"},
                {"name": "rows", "type": "Nullable(UInt64)"},
                {"name": "rows_quarantine", "type": "Nullable(UInt64)"},
                {"name": "blocks_ids", "type": "Array(String)"},
                {"name": "operation_id", "type": "String"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "cpu_time", "type": "Float32"},
                {"name": "memory_usage", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "written_rows", "type": "UInt64"},
                {"name": "written_bytes", "type": "UInt64"},
                {"name": "written_rows_quarantine", "type": "UInt64"},
                {"name": "written_bytes_quarantine", "type": "UInt64"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "release", "type": "String"},
            ],
        },
        {
            "name": "tinybird.datasources_ops_stats",
            "description": "Data from tinybird.datasources_ops_log, aggregated by day.",
            "dateColumn": "event_date",
            "engine": {
                "engine": "SummingMergeTree",
                "sorting_key": "event_date, event_type, pipe_id",
                "partition_key": "toYYYYMM(event_date)",
            },
            "columns": [
                {"name": "event_date", "type": "DateTime"},
                {"name": "workspace_id", "type": "String"},
                {"name": "event_type", "type": "LowCardinality(String)"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "executions", "type": "UInt64"},
                {"name": "avg_elapsed_time_state", "type": "AggregateFunction(avg, Float32)"},
                {"name": "quantiles_state", "type": "AggregateFunction(quantiles(0.9, 0.95, 0.99), Float64)"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "written_rows", "type": "UInt64"},
                {"name": "written_bytes", "type": "UInt64"},
                {"name": "written_rows_quarantine", "type": "UInt64"},
                {"name": "written_bytes_quarantine", "type": "UInt64"},
                {"name": "cpu_time", "type": "Float64"},
                {"name": "resource_tags", "type": "Array(String)"},
            ],
        },
        {
            "name": "tinybird.endpoint_errors",
            "description": "It provides the last 30 days errors of your published endpoints.",
            "dateColumn": "start_datetime",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "start_datetime",
                "partition_key": "toYYYYMM(toDate(start_datetime))",
                "ttl": "start_datetime + toIntervalDay(30)",
            },
            "columns": [
                {"name": "start_datetime", "type": "DateTime"},
                {"name": "request_id", "type": "String"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "params", "type": "Nullable(String)"},
                {"name": "url", "type": "Nullable(String)"},
                {"name": "status_code", "type": "Nullable(Int32)"},
                {"name": "error", "type": "Nullable(String)"},
            ],
        },
        {
            "name": "tinybird.kafka_ops_log",
            "description": "Contains all operations performed to your Kafka Data Sources during the last 30 days.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "datasource_id, topic, timestamp",
                "partition_key": "toYYYYMMDD(timestamp)",
                "ttl": "timestamp + toIntervalDay(30)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "datasource_id", "type": "String"},
                {"name": "topic", "type": "String"},
                {"name": "partition", "type": "Int16"},
                {"name": "msg_type", "type": "String"},
                {"name": "lag", "type": "Int64"},
                {"name": "processed_messages", "type": "Int32"},
                {"name": "processed_bytes", "type": "Int32"},
                {"name": "committed_messages", "type": "Int32"},
                {"name": "msg", "type": "String"},
            ],
        },
        {
            "name": "tinybird.datasources_storage",
            "description": "Contains stats about your Data Sources storage.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "AggregatingMergeTree",
                "sorting_key": "datasource_id, timestamp",
                "partition_key": "toYYYYMM(timestamp)",
            },
            "columns": [
                {"name": "datasource_id", "type": "String"},
                {"name": "datasource_name", "type": "String"},
                {"name": "timestamp", "type": "DateTime"},
                {"name": "bytes", "type": "SimpleAggregateFunction(max, UInt64)"},
                {"name": "rows", "type": "SimpleAggregateFunction(max, UInt64)"},
                {"name": "bytes_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
                {"name": "rows_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
            ],
        },
        {
            "name": "tinybird.bi_stats_rt",
            "description": "Contains information about all requests to your BI Connector interface in real time.",
            "dateColumn": "start_datetime",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "cityHash64(query_normalized), start_datetime",
                "ttl": "start_datetime + toIntervalDay(7)",
            },
            "columns": [
                {"name": "start_datetime", "type": "DateTime"},
                {"name": "query", "type": "String"},
                {"name": "query_normalized", "type": "String"},
                {"name": "error_code", "type": "Int32"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "url", "type": "String"},
                {"name": "duration", "type": "UInt64"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "result_rows", "type": "UInt64"},
                {"name": "result_bytes", "type": "UInt64"},
            ],
        },
        {
            "name": "tinybird.bi_stats",
            "description": "Aggregates the stats in tinybird.bi_stats_rt by day.",
            "dateColumn": "date",
            "engine": {"engine": "MergeTree", "sorting_key": "cityHash64(query_normalized), start_datetime"},
            "columns": [
                {"name": "date", "type": "Date"},
                {"name": "query_normalized", "type": "String"},
                {"name": "view_count", "type": "UInt64"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
                {
                    "name": "quantile_timing_state",
                    "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
                },
                {"name": "read_bytes_sum", "type": "UInt64"},
                {"name": "read_rows_sum", "type": "UInt64"},
                {"name": "avg_result_rows_state", "type": "AggregateFunction(avg, Float32)"},
                {"name": "avg_result_bytes_state", "type": "AggregateFunction(avg, Float32)"},
            ],
        },
        {
            "name": "tinybird.sinks_ops_log",
            "description": "Contains information about your Sink pipes.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "pipe_id, timestamp",
                "partition_key": "toYYYYMM(timestamp)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "job_id", "type": "Nullable(String)"},
                {"name": "service", "type": "LowCardinality(String)"},
                {"name": "pipe_name", "type": "String"},
                {"name": "pipe_id", "type": "String"},
                {"name": "result", "type": "LowCardinality(String)"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "elapsed_time", "type": "Float64"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "written_rows", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "written_bytes", "type": "UInt64"},
                {"name": "cpu_time", "type": "Float32"},
                {"name": "output", "type": "UInt64"},
                {"name": "parameters", "type": "Map(String, String)"},
                {"name": "options", "type": "Map(String, String)"},
                {"name": "token_name", "type": "String"},
            ],
        },
        {
            "name": "tinybird.releases_log",
            "description": "Contains operations performed to your releases.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "semver, timestamp",
                "partition_key": "toYYYYMM(timestamp)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime64(3)"},
                {"name": "event_type", "type": "LowCardinality(String)"},
                {"name": "commit", "type": "String"},
                {"name": "semver", "type": "String"},
                {"name": "token", "type": "String"},
                {"name": "token_name", "type": "String"},
                {"name": "result", "type": "LowCardinality(String)"},
                {"name": "error", "type": "String"},
            ],
        },
        {
            "name": "tinybird.data_transfer",
            "description": "Stats of data transferred per hour by a workspace.",
            "dateColumn": "timestamp",
            "engine": {},
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "event_type", "type": "LowCardinality(String)"},
                {"name": "origin_provider", "type": "LowCardinality(String)"},
                {"name": "origin_region", "type": "LowCardinality(String)"},
                {"name": "destination_provider", "type": "LowCardinality(String)"},
                {"name": "destination_region", "type": "LowCardinality(String)"},
                {"name": "kind", "type": "LowCardinality(String)"},
                {"name": "bytes", "type": "UInt64"},
            ],
        },
        {
            "name": "tinybird.jobs_log",
            "description": "Contains all job executions performed in your workspace.",
            "dateColumn": "created_at",
            "engine": {
                "engine": "ReplacingMergeTree",
                "sorting_key": "created_at, job_id",
                "partition_key": "toYYYYMM(created_at)",
            },
            "columns": [
                {"name": "job_id", "type": "String"},
                {"name": "job_type", "type": "LowCardinality(String)"},
                {"name": "status", "type": "LowCardinality(String)"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "workspace_id", "type": "String"},
                {"name": "request_id", "type": "Nullable(String)"},
                {"name": "pipe_id", "type": "Nullable(String)"},
                {"name": "datasource_id", "type": "Nullable(String)"},
                {"name": "created_at", "type": "DateTime64(3)"},
                {"name": "started_at", "type": "Nullable(DateTime64(3))"},
                {"name": "updated_at", "type": "DateTime64(3)"},
                {"name": "job_metadata", "type": "String"},
            ],
        },
        {
            "name": "tinybird.hook_log",
            "description": "Log of hook executions and their results.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "ReplacingMergeTree",
                "sorting_key": "timestamp, cityHash64(datasource_name)",
                "partition_key": "toYear(timestamp)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "request_id", "type": "String"},
                {"name": "import_id", "type": "Nullable(String)"},
                {"name": "job_id", "type": "Nullable(String)"},
                {"name": "source", "type": "String"},
                {"name": "hook_id", "type": "String"},
                {"name": "name", "type": "String"},
                {"name": "operation", "type": "String"},
                {"name": "status", "type": "String"},
                {"name": "datasource_id", "type": "String"},
                {"name": "datasource_name", "type": "String"},
                {"name": "processing_time", "type": "Nullable(Float32)"},
                {"name": "processing_error", "type": "Nullable(String)"},
            ],
        },
        {
            "name": "tinybird.data_guess",
            "description": "Guesses the type of data in a datasource for a user.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "ReplacingMergeTree",
                "sorting_key": "user_id, datasource_id, timestamp",
                "partition_key": "toYYYYMMDD(timestamp)",
            },
            "columns": [
                {"name": "user_id", "type": "LowCardinality(String)"},
                {"name": "datasource_id", "type": "LowCardinality(String)"},
                {"name": "timestamp", "type": "DateTime"},
                {"name": "path", "type": "LowCardinality(String)"},
                {"name": "type", "type": "LowCardinality(String)"},
                {"name": "num", "type": "Float64"},
                {"name": "str", "type": "String"},
            ],
        },
        {
            "name": "tinybird.estimated_shared_infra_cpu_time",
            "description": "Contains CPU time in seconds for all your operations in the workspace during a natural minute.",
            "dateColumn": "minute",
            "engine": {
                "engine": "AggregatingMergeTree",
                "sorting_key": "minute_slot, workspace_id",
                "partition_key": "toYYYYMM(minute)",
            },
            "columns": [
                {"name": "minute", "type": "DateTime"},
                {"name": "workspace_id", "type": "String"},
                {"name": "total_cpu_time_seconds", "type": "Float64"},
            ],
        },
    ]


def get_organization_service_datasources() -> List[Dict[str, Any]]:
    """
    Get all Organization-specific service datasources.

    Returns:
        List[Dict[str, Any]]: A list of Organization service datasources.
    """
    return [
        {
            "name": "organization.workspaces",
            "description": "Lists all Organization Workspaces and related information (name, IDs, databases, plan, when it was created, and whether it was soft-deleted).",
            "dateColumn": "timestamp",
            "engine": {"engine": "ReplacingMergeTree", "sorting_key": "workspace_id", "partition_key": "tuple()"},
            "columns": [
                {"name": "workspace_id", "type": "String"},
                {"name": "name", "type": "String"},
                {"name": "database", "type": "String"},
                {"name": "plan", "type": "String"},
                {"name": "created_at", "type": "DateTime"},
                {"name": "deleted_at", "type": "Nullable(DateTime)"},
            ],
        },
        {
            "name": "organization.processed_data",
            "description": "Information related to all processed data per day per workspace.",
            "dateColumn": "date",
            "engine": {
                "engine": "SummingMergeTree",
                "sorting_key": "database, date",
                "partition_key": "toYYYYMM(date)",
            },
            "columns": [
                {"name": "date", "type": "Date"},
                {"name": "database", "type": "String"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "written_bytes", "type": "UInt64"},
            ],
        },
        {
            "name": "organization.datasources_storage",
            "description": "Similar to tinybird.datasources_storage but with data for all Organization Workspaces.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "AggregatingMergeTree",
                "sorting_key": "workspace_id, datasource_id, timestamp",
                "partition_key": "toYYYYMM(timestamp)",
            },
            "columns": [
                {"name": "workspace_id", "type": "String"},
                {"name": "datasource_id", "type": "String"},
                {"name": "datasource_name", "type": "String"},
                {"name": "timestamp", "type": "DateTime"},
                {"name": "bytes", "type": "SimpleAggregateFunction(max, UInt64)"},
                {"name": "rows", "type": "SimpleAggregateFunction(max, UInt64)"},
                {"name": "bytes_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
                {"name": "rows_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
            ],
        },
        {
            "name": "organization.datasources_ops_log",
            "description": "Similar to tinybird.datasources_ops_log but with data for all Organization Workspaces.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "ReplacingMergeTree",
                "sorting_key": "workspace_id, datasource_id, timestamp",
                "partition_key": "tuple()",
            },
            "columns": [
                {"name": "workspace_id", "type": "String"},
                {"name": "timestamp", "type": "DateTime"},
                {"name": "event_type", "type": "String"},
                {"name": "datasource_id", "type": "String"},
                {"name": "datasource_name", "type": "String"},
                {"name": "result", "type": "String"},
                {"name": "elapsed_time", "type": "Float32"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "request_id", "type": "String"},
                {"name": "import_id", "type": "Nullable(String)"},
                {"name": "job_id", "type": "Nullable(String)"},
                {"name": "rows", "type": "Nullable(UInt64)"},
                {"name": "rows_quarantine", "type": "Nullable(UInt64)"},
                {"name": "blocks_ids", "type": "Array(String)"},
                {"name": "operation_id", "type": "String"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "cpu_time", "type": "Float32"},
                {"name": "memory_usage", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "written_rows", "type": "UInt64"},
                {"name": "written_bytes", "type": "UInt64"},
                {"name": "written_rows_quarantine", "type": "UInt64"},
                {"name": "written_bytes_quarantine", "type": "UInt64"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "release", "type": "String"},
            ],
        },
        {
            "name": "organization.datasources_ops_stats",
            "description": "Similar to tinybird.datasources_ops_stats but with data for all Organization Workspaces.",
            "dateColumn": "event_date",
            "engine": {
                "engine": "SummingMergeTree",
                "sorting_key": "event_date, workspace_id, event_type, pipe_id",
                "partition_key": "toYYYYMM(event_date)",
            },
            "columns": [
                {"name": "event_date", "type": "DateTime"},
                {"name": "workspace_id", "type": "String"},
                {"name": "event_type", "type": "LowCardinality(String)"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "executions", "type": "UInt64"},
                {"name": "avg_elapsed_time_state", "type": "AggregateFunction(avg, Float32)"},
                {"name": "quantiles_state", "type": "AggregateFunction(quantiles(0.9, 0.95, 0.99), Float64)"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "written_rows", "type": "UInt64"},
                {"name": "written_bytes", "type": "UInt64"},
                {"name": "written_rows_quarantine", "type": "UInt64"},
                {"name": "written_bytes_quarantine", "type": "UInt64"},
            ],
        },
        {
            "name": "organization.pipe_stats",
            "description": "Similar to tinybird.pipe_stats but with data for all Organization Workspaces.",
            "dateColumn": "date",
            "engine": {
                "engine": "SummingMergeTree",
                "sorting_key": "workspace_id, pipe_id, date",
                "partition_key": "toYYYYMM(date)",
            },
            "columns": [
                {"name": "workspace_id", "type": "String"},
                {"name": "date", "type": "Date"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "view_count", "type": "UInt64"},
                {
                    "name": "quantile_timing_state",
                    "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
                },
                {"name": "read_bytes_sum", "type": "UInt64"},
                {"name": "read_rows_sum", "type": "UInt64"},
                {"name": "cpu_time_sum", "type": "Float64"},
                {"name": "resource_tags", "type": "Array(String)"},
            ],
        },
        {
            "name": "organization.pipe_stats_rt",
            "description": "Similar to tinybird.pipe_stats_rt but with data for all Organization Workspaces.",
            "dateColumn": "start_datetime",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "workspace_id, pipe_id, start_datetime",
                "ttl": "start_datetime + toIntervalDay(7)",
            },
            "columns": [
                {"name": "workspace_id", "type": "String"},
                {"name": "start_datetime", "type": "DateTime"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "token", "type": "String"},
                {"name": "token_name", "type": "String"},
                {"name": "duration", "type": "Float32"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "cpu_time", "type": "Float32"},
                {"name": "url", "type": "Nullable(String)"},
                {"name": "error", "type": "UInt8"},
                {"name": "status_code", "type": "Int32"},
                {"name": "request_id", "type": "String"},
                {"name": "parameters", "type": "Map(String, String)"},
                {"name": "method", "type": "String"},
                {"name": "release", "type": "String"},
                {"name": "user_agent", "type": "Nullable(String)"},
                {"name": "resource_tags", "type": "Array(String)"},
            ],
        },
        {
            "name": "organization.data_transfer",
            "description": "Similar to tinybird.data_transfer but with data for all Organization Workspaces.",
            "dateColumn": "timestamp",
            "engine": {},
            "columns": [
                {"name": "workspace_id", "type": "String"},
                {"name": "timestamp", "type": "DateTime"},
                {"name": "event_type", "type": "LowCardinality(String)"},
                {"name": "origin_provider", "type": "LowCardinality(String)"},
                {"name": "origin_region", "type": "LowCardinality(String)"},
                {"name": "destination_provider", "type": "LowCardinality(String)"},
                {"name": "destination_region", "type": "LowCardinality(String)"},
                {"name": "kind", "type": "LowCardinality(String)"},
                {"name": "bytes", "type": "UInt64"},
            ],
        },
        {
            "name": "organization.jobs_log",
            "description": "Historic Logs for all kinds of job executions across the organization",
            "dateColumn": "created_at",
            "engine": {
                "engine": "ReplacingMergeTree",
                "sorting_key": "workspace_id, created_at, job_id",
                "partition_key": "toYYYYMM(created_at)",
            },
            "columns": [
                {"name": "job_id", "type": "String"},
                {"name": "job_type", "type": "LowCardinality(String)"},
                {"name": "status", "type": "LowCardinality(String)"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "workspace_id", "type": "String"},
                {"name": "request_id", "type": "Nullable(String)"},
                {"name": "pipe_id", "type": "Nullable(String)"},
                {"name": "datasource_id", "type": "Nullable(String)"},
                {"name": "created_at", "type": "DateTime64(3)"},
                {"name": "started_at", "type": "Nullable(DateTime64(3))"},
                {"name": "updated_at", "type": "DateTime64(3)"},
                {"name": "job_metadata", "type": "String"},
            ],
        },
        {
            "name": "organization.sinks_ops_log",
            "description": "Historic Logs for all Sink job executions across the organization",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "ReplacingMergeTree",
                "sorting_key": "workspace_id, pipe_id, timestamp",
                "partition_key": "toYYYYMM(timestamp)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "workspace_id", "type": "String"},
                {"name": "service", "type": "LowCardinality(String)"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "result", "type": "LowCardinality(String)"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "elapsed_time", "type": "Float64"},
                {"name": "job_id", "type": "Nullable(String)"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "written_rows", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "written_bytes", "type": "UInt64"},
                {"name": "cpu_time", "type": "Float32"},
                {"name": "output", "type": "Array(String)"},
                {"name": "parameters", "type": "Map(String, String)"},
                {"name": "options", "type": "Map(String, String)"},
                {"name": "token_name", "type": "String"},
            ],
        },
        {
            "name": "organization.bi_stats_rt",
            "description": "Contains information about all requests to the BI Connector interface for the whole Organization in real time.",
            "dateColumn": "start_datetime",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "cityHash64(query_normalized), start_datetime",
                "ttl": "start_datetime + toIntervalDay(7)",
            },
            "columns": [
                {"name": "database", "type": "String"},
                {"name": "start_datetime", "type": "DateTime"},
                {"name": "query", "type": "String"},
                {"name": "query_normalized", "type": "String"},
                {"name": "error_code", "type": "Int32"},
                {"name": "error", "type": "Nullable(String)"},
                {"name": "url", "type": "String"},
                {"name": "duration", "type": "UInt64"},
                {"name": "read_rows", "type": "UInt64"},
                {"name": "read_bytes", "type": "UInt64"},
                {"name": "result_rows", "type": "UInt64"},
                {"name": "result_bytes", "type": "UInt64"},
            ],
        },
        {
            "name": "organization.bi_stats",
            "description": "Aggregates the stats in organization.bi_stats_rt by day.",
            "dateColumn": "date",
            "engine": {"engine": "MergeTree", "sorting_key": "database, cityHash64(query_normalized), date"},
            "columns": [
                {"name": "database", "type": "String"},
                {"name": "date", "type": "Date"},
                {"name": "query_normalized", "type": "String"},
                {"name": "view_count", "type": "UInt64"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
                {
                    "name": "quantile_timing_state",
                    "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
                },
                {"name": "read_bytes_sum", "type": "UInt64"},
                {"name": "read_rows_sum", "type": "UInt64"},
                {"name": "avg_result_rows_state", "type": "AggregateFunction(avg, Float32)"},
                {"name": "avg_result_bytes_state", "type": "AggregateFunction(avg, Float32)"},
            ],
        },
        {
            "name": "organization.metrics_logs",
            "description": "Metrics of your organization's dedicated clusters",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "timestamp",
                "ttl": "toDate(timestamp) + toIntervalDay(30)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "cluster", "type": "LowCardinality(String)"},
                {"name": "host", "type": "LowCardinality(String)"},
                {"name": "metric", "type": "LowCardinality(String)"},
                {"name": "value", "type": "String"},
                {"name": "description", "type": "LowCardinality(String)"},
            ],
        },
        {
            "name": "organization.kafka_ops_log",
            "description": "Contains all operations performed to your Kafka Data Sources during the last 30 days accross the organization.",
            "dateColumn": "timestamp",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "workspace_id, datasource_id, topic, timestamp",
                "partition_key": "toYYYYMMDD(timestamp)",
                "ttl": "timestamp + toIntervalDay(30)",
            },
            "columns": [
                {"name": "timestamp", "type": "DateTime"},
                {"name": "workspace_id", "type": "String"},
                {"name": "datasource_id", "type": "String"},
                {"name": "topic", "type": "String"},
                {"name": "partition", "type": "Int16"},
                {"name": "msg_type", "type": "String"},
                {"name": "lag", "type": "Int64"},
                {"name": "processed_messages", "type": "Int32"},
                {"name": "processed_bytes", "type": "Int32"},
                {"name": "committed_messages", "type": "Int32"},
                {"name": "msg", "type": "String"},
            ],
        },
        {
            "name": "organization.endpoint_errors",
            "description": "Similar to tinybird.endpoint_errors but with data for all Organization Workspaces.",
            "dateColumn": "start_datetime",
            "engine": {
                "engine": "MergeTree",
                "sorting_key": "start_datetime",
                "partition_key": "toYYYYMM(toDate(start_datetime))",
                "ttl": "start_datetime + toIntervalDay(30)",
            },
            "columns": [
                {"name": "workspace_id", "type": "String"},
                {"name": "start_datetime", "type": "DateTime"},
                {"name": "request_id", "type": "String"},
                {"name": "pipe_id", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "params", "type": "Nullable(String)"},
                {"name": "url", "type": "Nullable(String)"},
                {"name": "status_code", "type": "Nullable(Int32)"},
                {"name": "error", "type": "Nullable(String)"},
            ],
        },
        {
            "name": "organization.shared_infra_active_minutes",
            "description": "Contains information about vCPU active minutes consumption aggregated by minute for all Organization workspaces. Only available for Developer and Enterprise plans in shared infrastructure.",
            "dateColumn": "minute",
            "columns": [
                {"name": "minute", "type": "DateTime"},
                {"name": "organization_id", "type": "String"},
                {"name": "organization_name", "type": "String"},
                {"name": "total_cpu_time_in_seconds", "type": "Float64"},
                {"name": "vcpus", "type": "SimpleAggregateFunction(max, Float64)"},
                {"name": "active_minutes", "type": "Float64"},
            ],
        },
        {
            "name": "organization.shared_infra_qps_overages",
            "description": "Contains information about QPS consumption and overages aggregated by second for all Organization workspaces. Only available for Developer and Enterprise plans in shared infrastructure.",
            "dateColumn": "start_datetime",
            "columns": [
                {"name": "start_datetime", "type": "DateTime"},
                {"name": "organization_id", "type": "String"},
                {"name": "organization_name", "type": "String"},
                {"name": "plan_qps", "type": "SimpleAggregateFunction(max, Int64)"},
                {"name": "total_qps", "type": "UInt64"},
                {"name": "overage", "type": "Int64"},
            ],
        },
        {
            "name": "organization.pipe_metrics_by_minute",
            "description": (
                "Contains information about organization.pipe_stats_rt metrics aggregated by minute, "
                "pipe_name and workspace_id for all Organization workspaces. Join with organization.workspaces "
                "to get the workspace name."
            ),
            "dateColumn": "minute_interval",
            "columns": [
                {"name": "minute_interval", "type": "DateTime"},
                {"name": "pipe_name", "type": "String"},
                {"name": "workspace_id", "type": "String"},
                {"name": "request_count", "type": "UInt64"},
                {"name": "avg_duration", "type": "Float64"},
                {"name": "max_duration", "type": "Float64"},
                {"name": "quantiles_05_09_099_duration", "type": "Array(Float64)"},
                {"name": "avg_cpu_time", "type": "Float64"},
                {"name": "max_cpu_time", "type": "Float64"},
                {"name": "quantiles_05_09_099_cpu_time", "type": "Array(Float64)"},
                {"name": "avg_memory_usage", "type": "Float64"},
                {"name": "max_memory_usage", "type": "Float64"},
                {"name": "quantiles_05_09_099_memory_usage", "type": "Array(Float64)"},
                {"name": "total_read_rows", "type": "UInt64"},
                {"name": "total_read_bytes", "type": "UInt64"},
                {"name": "total_result_rows", "type": "UInt64"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "timeout_count", "type": "UInt64"},
                {"name": "rate_limit_count", "type": "UInt64"},
                {"name": "error_500_count", "type": "UInt64"},
            ],
        },
        {
            "name": "organization.datasource_metrics_by_minute",
            "description": (
                "Contains information about organization.datasources_ops_log metrics aggregated by minute, "
                "datasource_name, event_type, pipe_name and workspace_id for all Organization workspaces. "
                "Join with organization.workspaces to get the workspace name."
            ),
            "dateColumn": "minute_interval",
            "columns": [
                {"name": "minute_interval", "type": "DateTime"},
                {"name": "workspace_id", "type": "String"},
                {"name": "datasource_name", "type": "String"},
                {"name": "event_type", "type": "String"},
                {"name": "pipe_name", "type": "String"},
                {"name": "request_count", "type": "UInt64"},
                {"name": "error_count", "type": "UInt64"},
                {"name": "avg_elapsed_time", "type": "Float64"},
                {"name": "max_elapsed_time", "type": "Float64"},
                {"name": "quantiles_05_09_099_elapsed_time", "type": "Array(Float64)"},
                {"name": "avg_cpu_time", "type": "Float64"},
                {"name": "max_cpu_time", "type": "Float64"},
                {"name": "quantiles_05_09_099_cpu_time", "type": "Array(Float64)"},
                {"name": "avg_memory_usage", "type": "Float64"},
                {"name": "max_memory_usage", "type": "Float64"},
                {"name": "quantiles_05_09_099_memory_usage", "type": "Array(Float64)"},
                {"name": "total_read_rows", "type": "UInt64"},
                {"name": "total_read_bytes", "type": "UInt64"},
                {"name": "total_written_rows", "type": "UInt64"},
                {"name": "total_written_bytes", "type": "UInt64"},
            ],
        },
    ]


def get_service_datasources() -> List[Dict[str, Any]]:
    """
    Get the list of all Tinybird and Organization service datasources.

    Returns:
        List[Dict[str, Any]]: A combined list of all service datasource definitions.
    """
    return get_tinybird_service_datasources() + get_organization_service_datasources()


def get_service_datasource_by_name(name: str) -> Optional[Dict[str, Any]]:
    """
    Get a specific service datasource by name. Name should include the type (e.g. tinybird.datasources_ops_log)

    Args:
        name: The name of the service datasource to retrieve.

    Returns:
        Optional[Dict[str, Any]]: The service datasource definition or None if not found.
    """
    service_datasources = get_service_datasources()
    for ds in service_datasources:
        if ds["name"] == name:
            return ds
    return None
