kiln_ai.adapters.model_adapters.base_adapter

  1import json
  2from abc import ABCMeta, abstractmethod
  3from dataclasses import dataclass
  4from typing import Dict, Literal, Tuple
  5
  6from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
  7from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id
  8from kiln_ai.adapters.prompt_builders import BasePromptBuilder, SimplePromptBuilder
  9from kiln_ai.adapters.provider_tools import kiln_model_provider_from
 10from kiln_ai.adapters.run_output import RunOutput
 11from kiln_ai.datamodel import (
 12    DataSource,
 13    DataSourceType,
 14    Task,
 15    TaskOutput,
 16    TaskRun,
 17)
 18from kiln_ai.datamodel.json_schema import validate_schema
 19from kiln_ai.utils.config import Config
 20
 21
 22@dataclass
 23class AdapterInfo:
 24    adapter_name: str
 25    model_name: str
 26    model_provider: str
 27    prompt_builder_name: str
 28    prompt_id: str | None = None
 29
 30
 31COT_FINAL_ANSWER_PROMPT = "Considering the above, return a final result."
 32
 33
 34class BaseAdapter(metaclass=ABCMeta):
 35    """Base class for AI model adapters that handle task execution.
 36
 37    This abstract class provides the foundation for implementing model-specific adapters
 38    that can process tasks with structured or unstructured inputs/outputs. It handles
 39    input/output validation, prompt building, and run tracking.
 40
 41    Attributes:
 42        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 43        kiln_task (Task): The task configuration and metadata
 44        output_schema (dict | None): JSON schema for validating structured outputs
 45        input_schema (dict | None): JSON schema for validating structured inputs
 46    """
 47
 48    def __init__(
 49        self,
 50        kiln_task: Task,
 51        model_name: str,
 52        model_provider_name: str,
 53        prompt_builder: BasePromptBuilder | None = None,
 54        tags: list[str] | None = None,
 55    ):
 56        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 57        self.kiln_task = kiln_task
 58        self.output_schema = self.kiln_task.output_json_schema
 59        self.input_schema = self.kiln_task.input_json_schema
 60        self.default_tags = tags
 61        self.model_name = model_name
 62        self.model_provider_name = model_provider_name
 63        self._model_provider: KilnModelProvider | None = None
 64
 65    def model_provider(self) -> KilnModelProvider:
 66        """
 67        Lazy load the model provider for this adapter.
 68        """
 69        if self._model_provider is not None:
 70            return self._model_provider
 71        if not self.model_name or not self.model_provider_name:
 72            raise ValueError("model_name and model_provider_name must be provided")
 73        self._model_provider = kiln_model_provider_from(
 74            self.model_name, self.model_provider_name
 75        )
 76        if not self._model_provider:
 77            raise ValueError(
 78                f"model_provider_name {self.model_provider_name} not found for model {self.model_name}"
 79            )
 80        return self._model_provider
 81
 82    async def invoke_returning_raw(
 83        self,
 84        input: Dict | str,
 85        input_source: DataSource | None = None,
 86    ) -> Dict | str:
 87        result = await self.invoke(input, input_source)
 88        if self.kiln_task.output_json_schema is None:
 89            return result.output.output
 90        else:
 91            return json.loads(result.output.output)
 92
 93    async def invoke(
 94        self,
 95        input: Dict | str,
 96        input_source: DataSource | None = None,
 97    ) -> TaskRun:
 98        # validate input
 99        if self.input_schema is not None:
100            if not isinstance(input, dict):
101                raise ValueError(f"structured input is not a dict: {input}")
102            validate_schema(input, self.input_schema)
103
104        # Run
105        run_output = await self._run(input)
106
107        # Parse
108        provider = self.model_provider()
109        parser = model_parser_from_id(provider.parser)(
110            structured_output=self.has_structured_output()
111        )
112        parsed_output = parser.parse_output(original_output=run_output)
113
114        # validate output
115        if self.output_schema is not None:
116            if not isinstance(parsed_output.output, dict):
117                raise RuntimeError(
118                    f"structured response is not a dict: {parsed_output.output}"
119                )
120            validate_schema(parsed_output.output, self.output_schema)
121        else:
122            if not isinstance(parsed_output.output, str):
123                raise RuntimeError(
124                    f"response is not a string for non-structured task: {parsed_output.output}"
125                )
126
127        # Generate the run and output
128        run = self.generate_run(input, input_source, parsed_output)
129
130        # Save the run if configured to do so, and we have a path to save to
131        if Config.shared().autosave_runs and self.kiln_task.path is not None:
132            run.save_to_file()
133        else:
134            # Clear the ID to indicate it's not persisted
135            run.id = None
136
137        return run
138
139    def has_structured_output(self) -> bool:
140        return self.output_schema is not None
141
142    @abstractmethod
143    def adapter_info(self) -> AdapterInfo:
144        pass
145
146    @abstractmethod
147    async def _run(self, input: Dict | str) -> RunOutput:
148        pass
149
150    def build_prompt(self) -> str:
151        # The prompt builder needs to know if we want to inject formatting instructions
152        provider = self.model_provider()
153        add_json_instructions = self.has_structured_output() and (
154            provider.structured_output_mode == StructuredOutputMode.json_instructions
155            or provider.structured_output_mode
156            == StructuredOutputMode.json_instruction_and_object
157        )
158
159        return self.prompt_builder.build_prompt(
160            include_json_instructions=add_json_instructions
161        )
162
163    def run_strategy(
164        self,
165    ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]:
166        # Determine the run strategy for COT prompting. 3 options:
167        # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message.
168        # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response.
169        # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt.
170        cot_prompt = self.prompt_builder.chain_of_thought_prompt()
171        reasoning_capable = self.model_provider().reasoning_capable
172
173        if cot_prompt and reasoning_capable:
174            # 1: "Thinking" LLM designed to output thinking in a structured format
175            # A simple message with the COT prompt appended to the message list is sufficient
176            return "cot_as_message", cot_prompt
177        elif cot_prompt:
178            # 2: Unstructured output with COT
179            # Two calls to separate the thinking from the final response
180            return "cot_two_call", cot_prompt
181        else:
182            return "basic", None
183
184    # create a run and task output
185    def generate_run(
186        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
187    ) -> TaskRun:
188        # Convert input and output to JSON strings if they are dictionaries
189        input_str = (
190            json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input
191        )
192        output_str = (
193            json.dumps(run_output.output, ensure_ascii=False)
194            if isinstance(run_output.output, dict)
195            else run_output.output
196        )
197
198        # If no input source is provided, use the human data source
199        if input_source is None:
200            input_source = DataSource(
201                type=DataSourceType.human,
202                properties={"created_by": Config.shared().user_id},
203            )
204
205        new_task_run = TaskRun(
206            parent=self.kiln_task,
207            input=input_str,
208            input_source=input_source,
209            output=TaskOutput(
210                output=output_str,
211                # Synthetic since an adapter, not a human, is creating this
212                source=DataSource(
213                    type=DataSourceType.synthetic,
214                    properties=self._properties_for_task_output(),
215                ),
216            ),
217            intermediate_outputs=run_output.intermediate_outputs,
218            tags=self.default_tags or [],
219        )
220
221        return new_task_run
222
223    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
224        props = {}
225
226        # adapter info
227        adapter_info = self.adapter_info()
228        props["adapter_name"] = adapter_info.adapter_name
229        props["model_name"] = adapter_info.model_name
230        props["model_provider"] = adapter_info.model_provider
231        props["prompt_builder_name"] = adapter_info.prompt_builder_name
232        if adapter_info.prompt_id is not None:
233            props["prompt_id"] = adapter_info.prompt_id
234
235        return props
@dataclass
class AdapterInfo:
23@dataclass
24class AdapterInfo:
25    adapter_name: str
26    model_name: str
27    model_provider: str
28    prompt_builder_name: str
29    prompt_id: str | None = None
AdapterInfo( adapter_name: str, model_name: str, model_provider: str, prompt_builder_name: str, prompt_id: str | None = None)
adapter_name: str
model_name: str
model_provider: str
prompt_builder_name: str
prompt_id: str | None = None
COT_FINAL_ANSWER_PROMPT = 'Considering the above, return a final result.'
class BaseAdapter:
 35class BaseAdapter(metaclass=ABCMeta):
 36    """Base class for AI model adapters that handle task execution.
 37
 38    This abstract class provides the foundation for implementing model-specific adapters
 39    that can process tasks with structured or unstructured inputs/outputs. It handles
 40    input/output validation, prompt building, and run tracking.
 41
 42    Attributes:
 43        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 44        kiln_task (Task): The task configuration and metadata
 45        output_schema (dict | None): JSON schema for validating structured outputs
 46        input_schema (dict | None): JSON schema for validating structured inputs
 47    """
 48
 49    def __init__(
 50        self,
 51        kiln_task: Task,
 52        model_name: str,
 53        model_provider_name: str,
 54        prompt_builder: BasePromptBuilder | None = None,
 55        tags: list[str] | None = None,
 56    ):
 57        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 58        self.kiln_task = kiln_task
 59        self.output_schema = self.kiln_task.output_json_schema
 60        self.input_schema = self.kiln_task.input_json_schema
 61        self.default_tags = tags
 62        self.model_name = model_name
 63        self.model_provider_name = model_provider_name
 64        self._model_provider: KilnModelProvider | None = None
 65
 66    def model_provider(self) -> KilnModelProvider:
 67        """
 68        Lazy load the model provider for this adapter.
 69        """
 70        if self._model_provider is not None:
 71            return self._model_provider
 72        if not self.model_name or not self.model_provider_name:
 73            raise ValueError("model_name and model_provider_name must be provided")
 74        self._model_provider = kiln_model_provider_from(
 75            self.model_name, self.model_provider_name
 76        )
 77        if not self._model_provider:
 78            raise ValueError(
 79                f"model_provider_name {self.model_provider_name} not found for model {self.model_name}"
 80            )
 81        return self._model_provider
 82
 83    async def invoke_returning_raw(
 84        self,
 85        input: Dict | str,
 86        input_source: DataSource | None = None,
 87    ) -> Dict | str:
 88        result = await self.invoke(input, input_source)
 89        if self.kiln_task.output_json_schema is None:
 90            return result.output.output
 91        else:
 92            return json.loads(result.output.output)
 93
 94    async def invoke(
 95        self,
 96        input: Dict | str,
 97        input_source: DataSource | None = None,
 98    ) -> TaskRun:
 99        # validate input
100        if self.input_schema is not None:
101            if not isinstance(input, dict):
102                raise ValueError(f"structured input is not a dict: {input}")
103            validate_schema(input, self.input_schema)
104
105        # Run
106        run_output = await self._run(input)
107
108        # Parse
109        provider = self.model_provider()
110        parser = model_parser_from_id(provider.parser)(
111            structured_output=self.has_structured_output()
112        )
113        parsed_output = parser.parse_output(original_output=run_output)
114
115        # validate output
116        if self.output_schema is not None:
117            if not isinstance(parsed_output.output, dict):
118                raise RuntimeError(
119                    f"structured response is not a dict: {parsed_output.output}"
120                )
121            validate_schema(parsed_output.output, self.output_schema)
122        else:
123            if not isinstance(parsed_output.output, str):
124                raise RuntimeError(
125                    f"response is not a string for non-structured task: {parsed_output.output}"
126                )
127
128        # Generate the run and output
129        run = self.generate_run(input, input_source, parsed_output)
130
131        # Save the run if configured to do so, and we have a path to save to
132        if Config.shared().autosave_runs and self.kiln_task.path is not None:
133            run.save_to_file()
134        else:
135            # Clear the ID to indicate it's not persisted
136            run.id = None
137
138        return run
139
140    def has_structured_output(self) -> bool:
141        return self.output_schema is not None
142
143    @abstractmethod
144    def adapter_info(self) -> AdapterInfo:
145        pass
146
147    @abstractmethod
148    async def _run(self, input: Dict | str) -> RunOutput:
149        pass
150
151    def build_prompt(self) -> str:
152        # The prompt builder needs to know if we want to inject formatting instructions
153        provider = self.model_provider()
154        add_json_instructions = self.has_structured_output() and (
155            provider.structured_output_mode == StructuredOutputMode.json_instructions
156            or provider.structured_output_mode
157            == StructuredOutputMode.json_instruction_and_object
158        )
159
160        return self.prompt_builder.build_prompt(
161            include_json_instructions=add_json_instructions
162        )
163
164    def run_strategy(
165        self,
166    ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]:
167        # Determine the run strategy for COT prompting. 3 options:
168        # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message.
169        # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response.
170        # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt.
171        cot_prompt = self.prompt_builder.chain_of_thought_prompt()
172        reasoning_capable = self.model_provider().reasoning_capable
173
174        if cot_prompt and reasoning_capable:
175            # 1: "Thinking" LLM designed to output thinking in a structured format
176            # A simple message with the COT prompt appended to the message list is sufficient
177            return "cot_as_message", cot_prompt
178        elif cot_prompt:
179            # 2: Unstructured output with COT
180            # Two calls to separate the thinking from the final response
181            return "cot_two_call", cot_prompt
182        else:
183            return "basic", None
184
185    # create a run and task output
186    def generate_run(
187        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
188    ) -> TaskRun:
189        # Convert input and output to JSON strings if they are dictionaries
190        input_str = (
191            json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input
192        )
193        output_str = (
194            json.dumps(run_output.output, ensure_ascii=False)
195            if isinstance(run_output.output, dict)
196            else run_output.output
197        )
198
199        # If no input source is provided, use the human data source
200        if input_source is None:
201            input_source = DataSource(
202                type=DataSourceType.human,
203                properties={"created_by": Config.shared().user_id},
204            )
205
206        new_task_run = TaskRun(
207            parent=self.kiln_task,
208            input=input_str,
209            input_source=input_source,
210            output=TaskOutput(
211                output=output_str,
212                # Synthetic since an adapter, not a human, is creating this
213                source=DataSource(
214                    type=DataSourceType.synthetic,
215                    properties=self._properties_for_task_output(),
216                ),
217            ),
218            intermediate_outputs=run_output.intermediate_outputs,
219            tags=self.default_tags or [],
220        )
221
222        return new_task_run
223
224    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
225        props = {}
226
227        # adapter info
228        adapter_info = self.adapter_info()
229        props["adapter_name"] = adapter_info.adapter_name
230        props["model_name"] = adapter_info.model_name
231        props["model_provider"] = adapter_info.model_provider
232        props["prompt_builder_name"] = adapter_info.prompt_builder_name
233        if adapter_info.prompt_id is not None:
234            props["prompt_id"] = adapter_info.prompt_id
235
236        return props

Base class for AI model adapters that handle task execution.

This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.

Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs

prompt_builder
kiln_task
output_schema
input_schema
default_tags
model_name
model_provider_name
def model_provider(self) -> kiln_ai.adapters.ml_model_list.KilnModelProvider:
66    def model_provider(self) -> KilnModelProvider:
67        """
68        Lazy load the model provider for this adapter.
69        """
70        if self._model_provider is not None:
71            return self._model_provider
72        if not self.model_name or not self.model_provider_name:
73            raise ValueError("model_name and model_provider_name must be provided")
74        self._model_provider = kiln_model_provider_from(
75            self.model_name, self.model_provider_name
76        )
77        if not self._model_provider:
78            raise ValueError(
79                f"model_provider_name {self.model_provider_name} not found for model {self.model_name}"
80            )
81        return self._model_provider

Lazy load the model provider for this adapter.

async def invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
83    async def invoke_returning_raw(
84        self,
85        input: Dict | str,
86        input_source: DataSource | None = None,
87    ) -> Dict | str:
88        result = await self.invoke(input, input_source)
89        if self.kiln_task.output_json_schema is None:
90            return result.output.output
91        else:
92            return json.loads(result.output.output)
async def invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
 94    async def invoke(
 95        self,
 96        input: Dict | str,
 97        input_source: DataSource | None = None,
 98    ) -> TaskRun:
 99        # validate input
100        if self.input_schema is not None:
101            if not isinstance(input, dict):
102                raise ValueError(f"structured input is not a dict: {input}")
103            validate_schema(input, self.input_schema)
104
105        # Run
106        run_output = await self._run(input)
107
108        # Parse
109        provider = self.model_provider()
110        parser = model_parser_from_id(provider.parser)(
111            structured_output=self.has_structured_output()
112        )
113        parsed_output = parser.parse_output(original_output=run_output)
114
115        # validate output
116        if self.output_schema is not None:
117            if not isinstance(parsed_output.output, dict):
118                raise RuntimeError(
119                    f"structured response is not a dict: {parsed_output.output}"
120                )
121            validate_schema(parsed_output.output, self.output_schema)
122        else:
123            if not isinstance(parsed_output.output, str):
124                raise RuntimeError(
125                    f"response is not a string for non-structured task: {parsed_output.output}"
126                )
127
128        # Generate the run and output
129        run = self.generate_run(input, input_source, parsed_output)
130
131        # Save the run if configured to do so, and we have a path to save to
132        if Config.shared().autosave_runs and self.kiln_task.path is not None:
133            run.save_to_file()
134        else:
135            # Clear the ID to indicate it's not persisted
136            run.id = None
137
138        return run
def has_structured_output(self) -> bool:
140    def has_structured_output(self) -> bool:
141        return self.output_schema is not None
@abstractmethod
def adapter_info(self) -> AdapterInfo:
143    @abstractmethod
144    def adapter_info(self) -> AdapterInfo:
145        pass
def build_prompt(self) -> str:
151    def build_prompt(self) -> str:
152        # The prompt builder needs to know if we want to inject formatting instructions
153        provider = self.model_provider()
154        add_json_instructions = self.has_structured_output() and (
155            provider.structured_output_mode == StructuredOutputMode.json_instructions
156            or provider.structured_output_mode
157            == StructuredOutputMode.json_instruction_and_object
158        )
159
160        return self.prompt_builder.build_prompt(
161            include_json_instructions=add_json_instructions
162        )
def run_strategy( self) -> Tuple[Literal['cot_as_message', 'cot_two_call', 'basic'], str | None]:
164    def run_strategy(
165        self,
166    ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]:
167        # Determine the run strategy for COT prompting. 3 options:
168        # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message.
169        # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response.
170        # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt.
171        cot_prompt = self.prompt_builder.chain_of_thought_prompt()
172        reasoning_capable = self.model_provider().reasoning_capable
173
174        if cot_prompt and reasoning_capable:
175            # 1: "Thinking" LLM designed to output thinking in a structured format
176            # A simple message with the COT prompt appended to the message list is sufficient
177            return "cot_as_message", cot_prompt
178        elif cot_prompt:
179            # 2: Unstructured output with COT
180            # Two calls to separate the thinking from the final response
181            return "cot_two_call", cot_prompt
182        else:
183            return "basic", None
def generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, run_output: kiln_ai.adapters.run_output.RunOutput) -> kiln_ai.datamodel.TaskRun:
186    def generate_run(
187        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
188    ) -> TaskRun:
189        # Convert input and output to JSON strings if they are dictionaries
190        input_str = (
191            json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input
192        )
193        output_str = (
194            json.dumps(run_output.output, ensure_ascii=False)
195            if isinstance(run_output.output, dict)
196            else run_output.output
197        )
198
199        # If no input source is provided, use the human data source
200        if input_source is None:
201            input_source = DataSource(
202                type=DataSourceType.human,
203                properties={"created_by": Config.shared().user_id},
204            )
205
206        new_task_run = TaskRun(
207            parent=self.kiln_task,
208            input=input_str,
209            input_source=input_source,
210            output=TaskOutput(
211                output=output_str,
212                # Synthetic since an adapter, not a human, is creating this
213                source=DataSource(
214                    type=DataSourceType.synthetic,
215                    properties=self._properties_for_task_output(),
216                ),
217            ),
218            intermediate_outputs=run_output.intermediate_outputs,
219            tags=self.default_tags or [],
220        )
221
222        return new_task_run