kiln_ai.adapters.model_adapters.base_adapter
1import json 2from abc import ABCMeta, abstractmethod 3from dataclasses import dataclass 4from typing import Dict, Literal, Tuple 5 6from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode 7from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id 8from kiln_ai.adapters.prompt_builders import BasePromptBuilder, SimplePromptBuilder 9from kiln_ai.adapters.provider_tools import kiln_model_provider_from 10from kiln_ai.adapters.run_output import RunOutput 11from kiln_ai.datamodel import ( 12 DataSource, 13 DataSourceType, 14 Task, 15 TaskOutput, 16 TaskRun, 17) 18from kiln_ai.datamodel.json_schema import validate_schema 19from kiln_ai.utils.config import Config 20 21 22@dataclass 23class AdapterInfo: 24 adapter_name: str 25 model_name: str 26 model_provider: str 27 prompt_builder_name: str 28 prompt_id: str | None = None 29 30 31COT_FINAL_ANSWER_PROMPT = "Considering the above, return a final result." 32 33 34class BaseAdapter(metaclass=ABCMeta): 35 """Base class for AI model adapters that handle task execution. 36 37 This abstract class provides the foundation for implementing model-specific adapters 38 that can process tasks with structured or unstructured inputs/outputs. It handles 39 input/output validation, prompt building, and run tracking. 40 41 Attributes: 42 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 43 kiln_task (Task): The task configuration and metadata 44 output_schema (dict | None): JSON schema for validating structured outputs 45 input_schema (dict | None): JSON schema for validating structured inputs 46 """ 47 48 def __init__( 49 self, 50 kiln_task: Task, 51 model_name: str, 52 model_provider_name: str, 53 prompt_builder: BasePromptBuilder | None = None, 54 tags: list[str] | None = None, 55 ): 56 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 57 self.kiln_task = kiln_task 58 self.output_schema = self.kiln_task.output_json_schema 59 self.input_schema = self.kiln_task.input_json_schema 60 self.default_tags = tags 61 self.model_name = model_name 62 self.model_provider_name = model_provider_name 63 self._model_provider: KilnModelProvider | None = None 64 65 def model_provider(self) -> KilnModelProvider: 66 """ 67 Lazy load the model provider for this adapter. 68 """ 69 if self._model_provider is not None: 70 return self._model_provider 71 if not self.model_name or not self.model_provider_name: 72 raise ValueError("model_name and model_provider_name must be provided") 73 self._model_provider = kiln_model_provider_from( 74 self.model_name, self.model_provider_name 75 ) 76 if not self._model_provider: 77 raise ValueError( 78 f"model_provider_name {self.model_provider_name} not found for model {self.model_name}" 79 ) 80 return self._model_provider 81 82 async def invoke_returning_raw( 83 self, 84 input: Dict | str, 85 input_source: DataSource | None = None, 86 ) -> Dict | str: 87 result = await self.invoke(input, input_source) 88 if self.kiln_task.output_json_schema is None: 89 return result.output.output 90 else: 91 return json.loads(result.output.output) 92 93 async def invoke( 94 self, 95 input: Dict | str, 96 input_source: DataSource | None = None, 97 ) -> TaskRun: 98 # validate input 99 if self.input_schema is not None: 100 if not isinstance(input, dict): 101 raise ValueError(f"structured input is not a dict: {input}") 102 validate_schema(input, self.input_schema) 103 104 # Run 105 run_output = await self._run(input) 106 107 # Parse 108 provider = self.model_provider() 109 parser = model_parser_from_id(provider.parser)( 110 structured_output=self.has_structured_output() 111 ) 112 parsed_output = parser.parse_output(original_output=run_output) 113 114 # validate output 115 if self.output_schema is not None: 116 if not isinstance(parsed_output.output, dict): 117 raise RuntimeError( 118 f"structured response is not a dict: {parsed_output.output}" 119 ) 120 validate_schema(parsed_output.output, self.output_schema) 121 else: 122 if not isinstance(parsed_output.output, str): 123 raise RuntimeError( 124 f"response is not a string for non-structured task: {parsed_output.output}" 125 ) 126 127 # Generate the run and output 128 run = self.generate_run(input, input_source, parsed_output) 129 130 # Save the run if configured to do so, and we have a path to save to 131 if Config.shared().autosave_runs and self.kiln_task.path is not None: 132 run.save_to_file() 133 else: 134 # Clear the ID to indicate it's not persisted 135 run.id = None 136 137 return run 138 139 def has_structured_output(self) -> bool: 140 return self.output_schema is not None 141 142 @abstractmethod 143 def adapter_info(self) -> AdapterInfo: 144 pass 145 146 @abstractmethod 147 async def _run(self, input: Dict | str) -> RunOutput: 148 pass 149 150 def build_prompt(self) -> str: 151 # The prompt builder needs to know if we want to inject formatting instructions 152 provider = self.model_provider() 153 add_json_instructions = self.has_structured_output() and ( 154 provider.structured_output_mode == StructuredOutputMode.json_instructions 155 or provider.structured_output_mode 156 == StructuredOutputMode.json_instruction_and_object 157 ) 158 159 return self.prompt_builder.build_prompt( 160 include_json_instructions=add_json_instructions 161 ) 162 163 def run_strategy( 164 self, 165 ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]: 166 # Determine the run strategy for COT prompting. 3 options: 167 # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message. 168 # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response. 169 # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt. 170 cot_prompt = self.prompt_builder.chain_of_thought_prompt() 171 reasoning_capable = self.model_provider().reasoning_capable 172 173 if cot_prompt and reasoning_capable: 174 # 1: "Thinking" LLM designed to output thinking in a structured format 175 # A simple message with the COT prompt appended to the message list is sufficient 176 return "cot_as_message", cot_prompt 177 elif cot_prompt: 178 # 2: Unstructured output with COT 179 # Two calls to separate the thinking from the final response 180 return "cot_two_call", cot_prompt 181 else: 182 return "basic", None 183 184 # create a run and task output 185 def generate_run( 186 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 187 ) -> TaskRun: 188 # Convert input and output to JSON strings if they are dictionaries 189 input_str = ( 190 json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input 191 ) 192 output_str = ( 193 json.dumps(run_output.output, ensure_ascii=False) 194 if isinstance(run_output.output, dict) 195 else run_output.output 196 ) 197 198 # If no input source is provided, use the human data source 199 if input_source is None: 200 input_source = DataSource( 201 type=DataSourceType.human, 202 properties={"created_by": Config.shared().user_id}, 203 ) 204 205 new_task_run = TaskRun( 206 parent=self.kiln_task, 207 input=input_str, 208 input_source=input_source, 209 output=TaskOutput( 210 output=output_str, 211 # Synthetic since an adapter, not a human, is creating this 212 source=DataSource( 213 type=DataSourceType.synthetic, 214 properties=self._properties_for_task_output(), 215 ), 216 ), 217 intermediate_outputs=run_output.intermediate_outputs, 218 tags=self.default_tags or [], 219 ) 220 221 return new_task_run 222 223 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 224 props = {} 225 226 # adapter info 227 adapter_info = self.adapter_info() 228 props["adapter_name"] = adapter_info.adapter_name 229 props["model_name"] = adapter_info.model_name 230 props["model_provider"] = adapter_info.model_provider 231 props["prompt_builder_name"] = adapter_info.prompt_builder_name 232 if adapter_info.prompt_id is not None: 233 props["prompt_id"] = adapter_info.prompt_id 234 235 return props
@dataclass
class
AdapterInfo:
23@dataclass 24class AdapterInfo: 25 adapter_name: str 26 model_name: str 27 model_provider: str 28 prompt_builder_name: str 29 prompt_id: str | None = None
COT_FINAL_ANSWER_PROMPT =
'Considering the above, return a final result.'
class
BaseAdapter:
35class BaseAdapter(metaclass=ABCMeta): 36 """Base class for AI model adapters that handle task execution. 37 38 This abstract class provides the foundation for implementing model-specific adapters 39 that can process tasks with structured or unstructured inputs/outputs. It handles 40 input/output validation, prompt building, and run tracking. 41 42 Attributes: 43 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 44 kiln_task (Task): The task configuration and metadata 45 output_schema (dict | None): JSON schema for validating structured outputs 46 input_schema (dict | None): JSON schema for validating structured inputs 47 """ 48 49 def __init__( 50 self, 51 kiln_task: Task, 52 model_name: str, 53 model_provider_name: str, 54 prompt_builder: BasePromptBuilder | None = None, 55 tags: list[str] | None = None, 56 ): 57 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 58 self.kiln_task = kiln_task 59 self.output_schema = self.kiln_task.output_json_schema 60 self.input_schema = self.kiln_task.input_json_schema 61 self.default_tags = tags 62 self.model_name = model_name 63 self.model_provider_name = model_provider_name 64 self._model_provider: KilnModelProvider | None = None 65 66 def model_provider(self) -> KilnModelProvider: 67 """ 68 Lazy load the model provider for this adapter. 69 """ 70 if self._model_provider is not None: 71 return self._model_provider 72 if not self.model_name or not self.model_provider_name: 73 raise ValueError("model_name and model_provider_name must be provided") 74 self._model_provider = kiln_model_provider_from( 75 self.model_name, self.model_provider_name 76 ) 77 if not self._model_provider: 78 raise ValueError( 79 f"model_provider_name {self.model_provider_name} not found for model {self.model_name}" 80 ) 81 return self._model_provider 82 83 async def invoke_returning_raw( 84 self, 85 input: Dict | str, 86 input_source: DataSource | None = None, 87 ) -> Dict | str: 88 result = await self.invoke(input, input_source) 89 if self.kiln_task.output_json_schema is None: 90 return result.output.output 91 else: 92 return json.loads(result.output.output) 93 94 async def invoke( 95 self, 96 input: Dict | str, 97 input_source: DataSource | None = None, 98 ) -> TaskRun: 99 # validate input 100 if self.input_schema is not None: 101 if not isinstance(input, dict): 102 raise ValueError(f"structured input is not a dict: {input}") 103 validate_schema(input, self.input_schema) 104 105 # Run 106 run_output = await self._run(input) 107 108 # Parse 109 provider = self.model_provider() 110 parser = model_parser_from_id(provider.parser)( 111 structured_output=self.has_structured_output() 112 ) 113 parsed_output = parser.parse_output(original_output=run_output) 114 115 # validate output 116 if self.output_schema is not None: 117 if not isinstance(parsed_output.output, dict): 118 raise RuntimeError( 119 f"structured response is not a dict: {parsed_output.output}" 120 ) 121 validate_schema(parsed_output.output, self.output_schema) 122 else: 123 if not isinstance(parsed_output.output, str): 124 raise RuntimeError( 125 f"response is not a string for non-structured task: {parsed_output.output}" 126 ) 127 128 # Generate the run and output 129 run = self.generate_run(input, input_source, parsed_output) 130 131 # Save the run if configured to do so, and we have a path to save to 132 if Config.shared().autosave_runs and self.kiln_task.path is not None: 133 run.save_to_file() 134 else: 135 # Clear the ID to indicate it's not persisted 136 run.id = None 137 138 return run 139 140 def has_structured_output(self) -> bool: 141 return self.output_schema is not None 142 143 @abstractmethod 144 def adapter_info(self) -> AdapterInfo: 145 pass 146 147 @abstractmethod 148 async def _run(self, input: Dict | str) -> RunOutput: 149 pass 150 151 def build_prompt(self) -> str: 152 # The prompt builder needs to know if we want to inject formatting instructions 153 provider = self.model_provider() 154 add_json_instructions = self.has_structured_output() and ( 155 provider.structured_output_mode == StructuredOutputMode.json_instructions 156 or provider.structured_output_mode 157 == StructuredOutputMode.json_instruction_and_object 158 ) 159 160 return self.prompt_builder.build_prompt( 161 include_json_instructions=add_json_instructions 162 ) 163 164 def run_strategy( 165 self, 166 ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]: 167 # Determine the run strategy for COT prompting. 3 options: 168 # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message. 169 # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response. 170 # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt. 171 cot_prompt = self.prompt_builder.chain_of_thought_prompt() 172 reasoning_capable = self.model_provider().reasoning_capable 173 174 if cot_prompt and reasoning_capable: 175 # 1: "Thinking" LLM designed to output thinking in a structured format 176 # A simple message with the COT prompt appended to the message list is sufficient 177 return "cot_as_message", cot_prompt 178 elif cot_prompt: 179 # 2: Unstructured output with COT 180 # Two calls to separate the thinking from the final response 181 return "cot_two_call", cot_prompt 182 else: 183 return "basic", None 184 185 # create a run and task output 186 def generate_run( 187 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 188 ) -> TaskRun: 189 # Convert input and output to JSON strings if they are dictionaries 190 input_str = ( 191 json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input 192 ) 193 output_str = ( 194 json.dumps(run_output.output, ensure_ascii=False) 195 if isinstance(run_output.output, dict) 196 else run_output.output 197 ) 198 199 # If no input source is provided, use the human data source 200 if input_source is None: 201 input_source = DataSource( 202 type=DataSourceType.human, 203 properties={"created_by": Config.shared().user_id}, 204 ) 205 206 new_task_run = TaskRun( 207 parent=self.kiln_task, 208 input=input_str, 209 input_source=input_source, 210 output=TaskOutput( 211 output=output_str, 212 # Synthetic since an adapter, not a human, is creating this 213 source=DataSource( 214 type=DataSourceType.synthetic, 215 properties=self._properties_for_task_output(), 216 ), 217 ), 218 intermediate_outputs=run_output.intermediate_outputs, 219 tags=self.default_tags or [], 220 ) 221 222 return new_task_run 223 224 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 225 props = {} 226 227 # adapter info 228 adapter_info = self.adapter_info() 229 props["adapter_name"] = adapter_info.adapter_name 230 props["model_name"] = adapter_info.model_name 231 props["model_provider"] = adapter_info.model_provider 232 props["prompt_builder_name"] = adapter_info.prompt_builder_name 233 if adapter_info.prompt_id is not None: 234 props["prompt_id"] = adapter_info.prompt_id 235 236 return props
Base class for AI model adapters that handle task execution.
This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.
Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs
66 def model_provider(self) -> KilnModelProvider: 67 """ 68 Lazy load the model provider for this adapter. 69 """ 70 if self._model_provider is not None: 71 return self._model_provider 72 if not self.model_name or not self.model_provider_name: 73 raise ValueError("model_name and model_provider_name must be provided") 74 self._model_provider = kiln_model_provider_from( 75 self.model_name, self.model_provider_name 76 ) 77 if not self._model_provider: 78 raise ValueError( 79 f"model_provider_name {self.model_provider_name} not found for model {self.model_name}" 80 ) 81 return self._model_provider
Lazy load the model provider for this adapter.
async def
invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
83 async def invoke_returning_raw( 84 self, 85 input: Dict | str, 86 input_source: DataSource | None = None, 87 ) -> Dict | str: 88 result = await self.invoke(input, input_source) 89 if self.kiln_task.output_json_schema is None: 90 return result.output.output 91 else: 92 return json.loads(result.output.output)
async def
invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
94 async def invoke( 95 self, 96 input: Dict | str, 97 input_source: DataSource | None = None, 98 ) -> TaskRun: 99 # validate input 100 if self.input_schema is not None: 101 if not isinstance(input, dict): 102 raise ValueError(f"structured input is not a dict: {input}") 103 validate_schema(input, self.input_schema) 104 105 # Run 106 run_output = await self._run(input) 107 108 # Parse 109 provider = self.model_provider() 110 parser = model_parser_from_id(provider.parser)( 111 structured_output=self.has_structured_output() 112 ) 113 parsed_output = parser.parse_output(original_output=run_output) 114 115 # validate output 116 if self.output_schema is not None: 117 if not isinstance(parsed_output.output, dict): 118 raise RuntimeError( 119 f"structured response is not a dict: {parsed_output.output}" 120 ) 121 validate_schema(parsed_output.output, self.output_schema) 122 else: 123 if not isinstance(parsed_output.output, str): 124 raise RuntimeError( 125 f"response is not a string for non-structured task: {parsed_output.output}" 126 ) 127 128 # Generate the run and output 129 run = self.generate_run(input, input_source, parsed_output) 130 131 # Save the run if configured to do so, and we have a path to save to 132 if Config.shared().autosave_runs and self.kiln_task.path is not None: 133 run.save_to_file() 134 else: 135 # Clear the ID to indicate it's not persisted 136 run.id = None 137 138 return run
def
build_prompt(self) -> str:
151 def build_prompt(self) -> str: 152 # The prompt builder needs to know if we want to inject formatting instructions 153 provider = self.model_provider() 154 add_json_instructions = self.has_structured_output() and ( 155 provider.structured_output_mode == StructuredOutputMode.json_instructions 156 or provider.structured_output_mode 157 == StructuredOutputMode.json_instruction_and_object 158 ) 159 160 return self.prompt_builder.build_prompt( 161 include_json_instructions=add_json_instructions 162 )
def
run_strategy( self) -> Tuple[Literal['cot_as_message', 'cot_two_call', 'basic'], str | None]:
164 def run_strategy( 165 self, 166 ) -> Tuple[Literal["cot_as_message", "cot_two_call", "basic"], str | None]: 167 # Determine the run strategy for COT prompting. 3 options: 168 # 1. "Thinking" LLM designed to output thinking in a structured format plus a COT prompt: we make 1 call to the LLM, which outputs thinking in a structured format. We include the thinking instuctions as a message. 169 # 2. Normal LLM with COT prompt: we make 2 calls to the LLM - one for thinking and one for the final response. This helps us use the LLM's structured output modes (json_schema, tools, etc), which can't be used in a single call. It also separates the thinking from the final response. 170 # 3. Non chain of thought: we make 1 call to the LLM, with no COT prompt. 171 cot_prompt = self.prompt_builder.chain_of_thought_prompt() 172 reasoning_capable = self.model_provider().reasoning_capable 173 174 if cot_prompt and reasoning_capable: 175 # 1: "Thinking" LLM designed to output thinking in a structured format 176 # A simple message with the COT prompt appended to the message list is sufficient 177 return "cot_as_message", cot_prompt 178 elif cot_prompt: 179 # 2: Unstructured output with COT 180 # Two calls to separate the thinking from the final response 181 return "cot_two_call", cot_prompt 182 else: 183 return "basic", None
def
generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, run_output: kiln_ai.adapters.run_output.RunOutput) -> kiln_ai.datamodel.TaskRun:
186 def generate_run( 187 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 188 ) -> TaskRun: 189 # Convert input and output to JSON strings if they are dictionaries 190 input_str = ( 191 json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input 192 ) 193 output_str = ( 194 json.dumps(run_output.output, ensure_ascii=False) 195 if isinstance(run_output.output, dict) 196 else run_output.output 197 ) 198 199 # If no input source is provided, use the human data source 200 if input_source is None: 201 input_source = DataSource( 202 type=DataSourceType.human, 203 properties={"created_by": Config.shared().user_id}, 204 ) 205 206 new_task_run = TaskRun( 207 parent=self.kiln_task, 208 input=input_str, 209 input_source=input_source, 210 output=TaskOutput( 211 output=output_str, 212 # Synthetic since an adapter, not a human, is creating this 213 source=DataSource( 214 type=DataSourceType.synthetic, 215 properties=self._properties_for_task_output(), 216 ), 217 ), 218 intermediate_outputs=run_output.intermediate_outputs, 219 tags=self.default_tags or [], 220 ) 221 222 return new_task_run