kiln_ai.datamodel
See our docs for details about our datamodel classes and hierarchy:
Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
User docs: https://docs.getkiln.ai/developers/kiln-datamodel
1""" 2See our docs for details about our datamodel classes and hierarchy: 3 4Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html 5 6User docs: https://docs.getkiln.ai/developers/kiln-datamodel 7""" 8 9# This component uses "flat" imports so we don't have too much internal structure exposed in the API. 10# for example you can just `from datamodel import Task, Project` instead of `from datamodel.task import Task; from datamodel.project import Project` 11 12from __future__ import annotations 13 14from kiln_ai.datamodel.datamodel_enums import ( 15 FinetuneDataStrategy, 16 FineTuneStatusType, 17 Priority, 18 StructuredOutputMode, 19 TaskOutputRatingType, 20) 21from kiln_ai.datamodel.dataset_split import ( 22 DatasetSplit, 23 DatasetSplitDefinition, 24) 25from kiln_ai.datamodel.finetune import ( 26 Finetune, 27) 28from kiln_ai.datamodel.project import Project 29from kiln_ai.datamodel.prompt import Prompt 30from kiln_ai.datamodel.task import Task, TaskRequirement 31from kiln_ai.datamodel.task_output import ( 32 DataSource, 33 DataSourceProperty, 34 DataSourceType, 35 RequirementRating, 36 TaskOutput, 37 TaskOutputRating, 38) 39from kiln_ai.datamodel.task_run import ( 40 TaskRun, 41) 42 43__all__ = [ 44 "strict_mode", 45 "dataset_split", 46 "Task", 47 "Project", 48 "TaskRun", 49 "TaskOutput", 50 "Priority", 51 "DataSource", 52 "DataSourceType", 53 "DataSourceProperty", 54 "Finetune", 55 "FineTuneStatusType", 56 "TaskOutputRatingType", 57 "TaskRequirement", 58 "DatasetSplitDefinition", 59 "DatasetSplit", 60 "RequirementRating", 61 "TaskRequirement", 62 "Prompt", 63 "TaskOutputRating", 64 "StructuredOutputMode", 65 "FinetuneDataStrategy", 66]
38class Task( 39 KilnParentedModel, 40 KilnParentModel, 41 parent_of={ 42 "runs": TaskRun, 43 "dataset_splits": DatasetSplit, 44 "finetunes": Finetune, 45 "prompts": Prompt, 46 }, 47): 48 """ 49 Represents a specific task to be performed, with associated requirements and validation rules. 50 51 Contains the task definition, requirements, input/output schemas, and maintains 52 a collection of task runs. 53 """ 54 55 name: str = NAME_FIELD 56 description: str | None = Field( 57 default=None, 58 description="A description of the task for you and your team. Will not be used in prompts/training/validation.", 59 ) 60 instruction: str = Field( 61 min_length=1, 62 description="The instructions for the task. Will be used in prompts/training/validation.", 63 ) 64 requirements: List[TaskRequirement] = Field(default=[]) 65 output_json_schema: JsonObjectSchema | None = None 66 input_json_schema: JsonObjectSchema | None = None 67 thinking_instruction: str | None = Field( 68 default=None, 69 description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.", 70 ) 71 72 def output_schema(self) -> Dict | None: 73 if self.output_json_schema is None: 74 return None 75 return schema_from_json_str(self.output_json_schema) 76 77 def input_schema(self) -> Dict | None: 78 if self.input_json_schema is None: 79 return None 80 return schema_from_json_str(self.input_json_schema) 81 82 # These wrappers help for typechecking. TODO P2: fix this in KilnParentModel 83 def runs(self, readonly: bool = False) -> list[TaskRun]: 84 return super().runs(readonly=readonly) # type: ignore 85 86 def dataset_splits(self, readonly: bool = False) -> list[DatasetSplit]: 87 return super().dataset_splits(readonly=readonly) # type: ignore 88 89 def finetunes(self, readonly: bool = False) -> list[Finetune]: 90 return super().finetunes(readonly=readonly) # type: ignore 91 92 def prompts(self, readonly: bool = False) -> list[Prompt]: 93 return super().prompts(readonly=readonly) # type: ignore
Represents a specific task to be performed, with associated requirements and validation rules.
Contains the task definition, requirements, input/output schemas, and maintains a collection of task runs.
418 def child_method(self, readonly: bool = False) -> list[child_class]: 419 return child_class.all_children_of_parent_path(self.path, readonly=readonly)
The type of the None singleton.
418 def child_method(self, readonly: bool = False) -> list[child_class]: 419 return child_class.all_children_of_parent_path(self.path, readonly=readonly)
The type of the None singleton.
418 def child_method(self, readonly: bool = False) -> list[child_class]: 419 return child_class.all_children_of_parent_path(self.path, readonly=readonly)
The type of the None singleton.
418 def child_method(self, readonly: bool = False) -> list[child_class]: 419 return child_class.all_children_of_parent_path(self.path, readonly=readonly)
The type of the None singleton.
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
8class Project(KilnParentModel, parent_of={"tasks": Task}): 9 """ 10 A collection of related tasks. 11 12 Projects organize tasks into logical groups and provide high-level descriptions 13 of the overall goals. 14 """ 15 16 name: str = NAME_FIELD 17 description: str | None = Field( 18 default=None, 19 description="A description of the project for you and your team. Will not be used in prompts/training/validation.", 20 ) 21 22 # Needed for typechecking. TODO P2: fix this in KilnParentModel 23 def tasks(self) -> list[Task]: 24 return super().tasks() # type: ignore
A collection of related tasks.
Projects organize tasks into logical groups and provide high-level descriptions of the overall goals.
418 def child_method(self, readonly: bool = False) -> list[child_class]: 419 return child_class.all_children_of_parent_path(self.path, readonly=readonly)
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
19class TaskRun(KilnParentedModel): 20 """ 21 Represents a single execution of a Task. 22 23 Contains the input used, its source, the output produced, and optional 24 repair information if the output needed correction. 25 """ 26 27 input: str = Field( 28 description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input." 29 ) 30 input_source: DataSource | None = Field( 31 default=None, description="The source of the input: human or synthetic." 32 ) 33 34 output: TaskOutput = Field(description="The output of the task run.") 35 repair_instructions: str | None = Field( 36 default=None, 37 description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.", 38 ) 39 repaired_output: TaskOutput | None = Field( 40 default=None, 41 description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.", 42 ) 43 intermediate_outputs: Dict[str, str] | None = Field( 44 default=None, 45 description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.", 46 ) 47 tags: List[str] = Field( 48 default=[], 49 description="Tags for the task run. Tags are used to categorize task runs for filtering and reporting.", 50 ) 51 52 def has_thinking_training_data(self) -> bool: 53 """ 54 Does this run have thinking data that we can use to train a thinking model? 55 """ 56 if self.intermediate_outputs is None: 57 return False 58 return ( 59 "chain_of_thought" in self.intermediate_outputs 60 or "reasoning" in self.intermediate_outputs 61 ) 62 63 # Workaround to return typed parent without importing Task 64 def parent_task(self) -> Union["Task", None]: 65 if self.parent is None or self.parent.__class__.__name__ != "Task": 66 return None 67 return self.parent # type: ignore 68 69 @model_validator(mode="after") 70 def validate_input_format(self, info: ValidationInfo) -> Self: 71 # Don't validate if loading from file (not new). Too slow. 72 # We don't allow changing task schema, so this is redundant validation. 73 # Note: we still validate if editing a loaded model 74 if self.loading_from_file(info): 75 # Consider loading an existing model as validated. 76 self._last_validated_input = self.input 77 return self 78 79 # Don't validate if input has not changed. Too slow to run this every time. 80 if ( 81 hasattr(self, "_last_validated_input") 82 and self.input == self._last_validated_input 83 ): 84 return self 85 86 task = self.parent_task() 87 if task is None: 88 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 89 return self 90 91 # validate output 92 if task.input_json_schema is not None: 93 try: 94 validate_schema(json.loads(self.input), task.input_json_schema) 95 except json.JSONDecodeError: 96 raise ValueError("Input is not a valid JSON object") 97 except jsonschema.exceptions.ValidationError as e: 98 raise ValueError(f"Input does not match task input schema: {e}") 99 self._last_validated_input = self.input 100 return self 101 102 @model_validator(mode="after") 103 def validate_output_format(self, info: ValidationInfo) -> Self: 104 # Don't validate if loading from file (not new). Too slow. 105 # Note: we still validate if editing a loaded model's output. 106 if self.loading_from_file(info): 107 # Consider loading an existing model as validated. 108 self._last_validated_output = self.output.output if self.output else None 109 return self 110 111 # Don't validate unless output has changed since last validation. 112 # The validator is slow and costly, don't want it running when setting other fields. 113 if ( 114 hasattr(self, "_last_validated_output") 115 and self.output is not None 116 and self.output.output == self._last_validated_output 117 ): 118 return self 119 120 task = self.parent_task() 121 if task is None: 122 return self 123 124 self.output.validate_output_format(task) 125 self._last_validated_output = self.output.output if self.output else None 126 return self 127 128 @model_validator(mode="after") 129 def validate_repaired_output(self) -> Self: 130 if self.repaired_output is not None: 131 if self.repaired_output.rating is not None: 132 raise ValueError( 133 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 134 ) 135 if self.repair_instructions is None and self.repaired_output is not None: 136 raise ValueError( 137 "Repair instructions are required if providing a repaired output." 138 ) 139 if self.repair_instructions is not None and self.repaired_output is None: 140 raise ValueError( 141 "A repaired output is required if providing repair instructions." 142 ) 143 return self 144 145 @model_validator(mode="after") 146 def validate_input_source(self, info: ValidationInfo) -> Self: 147 # On strict mode and not loaded from file, we validate input_source is not None. 148 # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data. 149 if not strict_mode(): 150 return self 151 if self.loaded_from_file(info): 152 return self 153 if self.input_source is None: 154 raise ValueError("input_source is required when strict mode is enabled") 155 return self 156 157 @model_validator(mode="after") 158 def validate_tags(self) -> Self: 159 for tag in self.tags: 160 if not tag: 161 raise ValueError("Tags cannot be empty strings") 162 if " " in tag: 163 raise ValueError("Tags cannot contain spaces. Try underscores.") 164 165 return self
Represents a single execution of a Task.
Contains the input used, its source, the output produced, and optional repair information if the output needed correction.
52 def has_thinking_training_data(self) -> bool: 53 """ 54 Does this run have thinking data that we can use to train a thinking model? 55 """ 56 if self.intermediate_outputs is None: 57 return False 58 return ( 59 "chain_of_thought" in self.intermediate_outputs 60 or "reasoning" in self.intermediate_outputs 61 )
Does this run have thinking data that we can use to train a thinking model?
69 @model_validator(mode="after") 70 def validate_input_format(self, info: ValidationInfo) -> Self: 71 # Don't validate if loading from file (not new). Too slow. 72 # We don't allow changing task schema, so this is redundant validation. 73 # Note: we still validate if editing a loaded model 74 if self.loading_from_file(info): 75 # Consider loading an existing model as validated. 76 self._last_validated_input = self.input 77 return self 78 79 # Don't validate if input has not changed. Too slow to run this every time. 80 if ( 81 hasattr(self, "_last_validated_input") 82 and self.input == self._last_validated_input 83 ): 84 return self 85 86 task = self.parent_task() 87 if task is None: 88 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 89 return self 90 91 # validate output 92 if task.input_json_schema is not None: 93 try: 94 validate_schema(json.loads(self.input), task.input_json_schema) 95 except json.JSONDecodeError: 96 raise ValueError("Input is not a valid JSON object") 97 except jsonschema.exceptions.ValidationError as e: 98 raise ValueError(f"Input does not match task input schema: {e}") 99 self._last_validated_input = self.input 100 return self
102 @model_validator(mode="after") 103 def validate_output_format(self, info: ValidationInfo) -> Self: 104 # Don't validate if loading from file (not new). Too slow. 105 # Note: we still validate if editing a loaded model's output. 106 if self.loading_from_file(info): 107 # Consider loading an existing model as validated. 108 self._last_validated_output = self.output.output if self.output else None 109 return self 110 111 # Don't validate unless output has changed since last validation. 112 # The validator is slow and costly, don't want it running when setting other fields. 113 if ( 114 hasattr(self, "_last_validated_output") 115 and self.output is not None 116 and self.output.output == self._last_validated_output 117 ): 118 return self 119 120 task = self.parent_task() 121 if task is None: 122 return self 123 124 self.output.validate_output_format(task) 125 self._last_validated_output = self.output.output if self.output else None 126 return self
128 @model_validator(mode="after") 129 def validate_repaired_output(self) -> Self: 130 if self.repaired_output is not None: 131 if self.repaired_output.rating is not None: 132 raise ValueError( 133 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 134 ) 135 if self.repair_instructions is None and self.repaired_output is not None: 136 raise ValueError( 137 "Repair instructions are required if providing a repaired output." 138 ) 139 if self.repair_instructions is not None and self.repaired_output is None: 140 raise ValueError( 141 "A repaired output is required if providing repair instructions." 142 ) 143 return self
145 @model_validator(mode="after") 146 def validate_input_source(self, info: ValidationInfo) -> Self: 147 # On strict mode and not loaded from file, we validate input_source is not None. 148 # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data. 149 if not strict_mode(): 150 return self 151 if self.loaded_from_file(info): 152 return self 153 if self.input_source is None: 154 raise ValueError("input_source is required when strict mode is enabled") 155 return self
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
261class TaskOutput(KilnBaseModel): 262 """ 263 An output for a specific task run. 264 265 Contains the actual output content, its source (human or synthetic), 266 and optional rating information. 267 """ 268 269 output: str = Field( 270 description="The output of the task. JSON formatted for structured output, plaintext for unstructured output." 271 ) 272 source: DataSource | None = Field( 273 description="The source of the output: human or synthetic.", 274 default=None, 275 ) 276 rating: TaskOutputRating | None = Field( 277 default=None, description="The rating of the output" 278 ) 279 280 def validate_output_format(self, task: "Task") -> Self: 281 # validate output 282 if task.output_json_schema is not None: 283 try: 284 validate_schema(json.loads(self.output), task.output_json_schema) 285 except json.JSONDecodeError: 286 raise ValueError("Output is not a valid JSON object") 287 except jsonschema.exceptions.ValidationError as e: 288 raise ValueError(f"Output does not match task output schema: {e}") 289 return self 290 291 @model_validator(mode="after") 292 def validate_output_source(self, info: ValidationInfo) -> Self: 293 # On strict mode and not loaded from file, we validate output_source is not None. 294 # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data. 295 if not strict_mode(): 296 return self 297 if self.loaded_from_file(info): 298 return self 299 if self.source is None: 300 raise ValueError("Output source is required when strict mode is enabled") 301 return self
An output for a specific task run.
Contains the actual output content, its source (human or synthetic), and optional rating information.
280 def validate_output_format(self, task: "Task") -> Self: 281 # validate output 282 if task.output_json_schema is not None: 283 try: 284 validate_schema(json.loads(self.output), task.output_json_schema) 285 except json.JSONDecodeError: 286 raise ValueError("Output is not a valid JSON object") 287 except jsonschema.exceptions.ValidationError as e: 288 raise ValueError(f"Output does not match task output schema: {e}") 289 return self
291 @model_validator(mode="after") 292 def validate_output_source(self, info: ValidationInfo) -> Self: 293 # On strict mode and not loaded from file, we validate output_source is not None. 294 # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data. 295 if not strict_mode(): 296 return self 297 if self.loaded_from_file(info): 298 return self 299 if self.source is None: 300 raise ValueError("Output source is required when strict mode is enabled") 301 return self
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
5class Priority(IntEnum): 6 """Defines priority levels for tasks and requirements, where P0 is highest priority.""" 7 8 p0 = 0 9 p1 = 1 10 p2 = 2 11 p3 = 3
Defines priority levels for tasks and requirements, where P0 is highest priority.
170class DataSource(BaseModel): 171 """ 172 Represents the origin of data, either human or synthetic, with associated properties. 173 174 Properties vary based on the source type - for synthetic sources this includes 175 model information, for human sources this includes creator information. 176 """ 177 178 type: DataSourceType 179 properties: Dict[str, str | int | float] = Field( 180 default={}, 181 description="Properties describing the data source. For synthetic things like model. For human, the human's name.", 182 ) 183 184 _data_source_properties = [ 185 DataSourceProperty( 186 name="created_by", 187 type=str, 188 required_for=[DataSourceType.human], 189 not_allowed_for=[DataSourceType.synthetic], 190 ), 191 DataSourceProperty( 192 name="model_name", 193 type=str, 194 required_for=[DataSourceType.synthetic], 195 not_allowed_for=[DataSourceType.human], 196 ), 197 DataSourceProperty( 198 name="model_provider", 199 type=str, 200 required_for=[DataSourceType.synthetic], 201 not_allowed_for=[DataSourceType.human], 202 ), 203 DataSourceProperty( 204 name="adapter_name", 205 type=str, 206 required_for=[DataSourceType.synthetic], 207 not_allowed_for=[DataSourceType.human], 208 ), 209 DataSourceProperty( 210 name="prompt_builder_name", 211 type=str, 212 not_allowed_for=[DataSourceType.human], 213 ), 214 DataSourceProperty( 215 # Optional: an ID within the scope of the prompt_builder_name. 216 # Used for prompt builders with IDs (like saved prompts, fine-tune prompts) 217 name="prompt_id", 218 type=str, 219 not_allowed_for=[DataSourceType.human], 220 ), 221 ] 222 223 @model_validator(mode="after") 224 def validate_type(self) -> "DataSource": 225 if self.type not in DataSourceType: 226 raise ValueError(f"Invalid data source type: {self.type}") 227 return self 228 229 @model_validator(mode="after") 230 def validate_properties(self) -> "DataSource": 231 for prop in self._data_source_properties: 232 # Check the property type is correct 233 if prop.name in self.properties: 234 if not isinstance(self.properties[prop.name], prop.type): 235 raise ValueError( 236 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 237 ) 238 # Check the property is required for the data source type 239 if self.type in prop.required_for: 240 if prop.name not in self.properties: 241 raise ValueError( 242 f"'{prop.name}' is required for {self.type} data source" 243 ) 244 # Check the property is not allowed for the data source type 245 elif self.type in prop.not_allowed_for and prop.name in self.properties: 246 raise ValueError( 247 f"'{prop.name}' is not allowed for {self.type} data source" 248 ) 249 return self 250 251 @model_validator(mode="after") 252 def validate_no_empty_properties(self) -> Self: 253 for prop, value in self.properties.items(): 254 if isinstance(value, str) and value == "": 255 raise ValueError( 256 f"Property '{prop}' must be a non-empty string for {self.type} data source" 257 ) 258 return self
Represents the origin of data, either human or synthetic, with associated properties.
Properties vary based on the source type - for synthetic sources this includes model information, for human sources this includes creator information.
229 @model_validator(mode="after") 230 def validate_properties(self) -> "DataSource": 231 for prop in self._data_source_properties: 232 # Check the property type is correct 233 if prop.name in self.properties: 234 if not isinstance(self.properties[prop.name], prop.type): 235 raise ValueError( 236 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 237 ) 238 # Check the property is required for the data source type 239 if self.type in prop.required_for: 240 if prop.name not in self.properties: 241 raise ValueError( 242 f"'{prop.name}' is required for {self.type} data source" 243 ) 244 # Check the property is not allowed for the data source type 245 elif self.type in prop.not_allowed_for and prop.name in self.properties: 246 raise ValueError( 247 f"'{prop.name}' is not allowed for {self.type} data source" 248 ) 249 return self
251 @model_validator(mode="after") 252 def validate_no_empty_properties(self) -> Self: 253 for prop, value in self.properties.items(): 254 if isinstance(value, str) and value == "": 255 raise ValueError( 256 f"Property '{prop}' must be a non-empty string for {self.type} data source" 257 ) 258 return self
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
384def init_private_attributes(self: BaseModel, context: Any, /) -> None: 385 """This function is meant to behave like a BaseModel method to initialise private attributes. 386 387 It takes context as an argument since that's what pydantic-core passes when calling it. 388 389 Args: 390 self: The BaseModel instance. 391 context: The context. 392 """ 393 if getattr(self, '__pydantic_private__', None) is None: 394 pydantic_private = {} 395 for name, private_attr in self.__private_attributes__.items(): 396 default = private_attr.get_default() 397 if default is not PydanticUndefined: 398 pydantic_private[name] = default 399 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
144class DataSourceType(str, Enum): 145 """ 146 The source type of a piece of data. 147 148 Human: a human created the data 149 Synthetic: a model created the data 150 """ 151 152 human = "human" 153 synthetic = "synthetic"
The source type of a piece of data.
Human: a human created the data Synthetic: a model created the data
156class DataSourceProperty(BaseModel): 157 """ 158 Defines a property that can be associated with a data source. 159 160 Includes validation rules for when properties are required or not allowed 161 based on the data source type. 162 """ 163 164 name: str 165 type: Type[Union[str, int, float]] 166 required_for: List[DataSourceType] = [] 167 not_allowed_for: List[DataSourceType] = []
Defines a property that can be associated with a data source.
Includes validation rules for when properties are required or not allowed based on the data source type.
18class Finetune(KilnParentedModel): 19 """ 20 The Kiln fine-tune datamodel. 21 22 Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID. 23 """ 24 25 name: str = NAME_FIELD 26 description: str | None = Field( 27 default=None, 28 description="A description of the fine-tune for you and your team. Not used in training.", 29 ) 30 structured_output_mode: StructuredOutputMode | None = Field( 31 default=None, 32 description="The mode to use to train the model for structured output, if it was trained with structured output. Will determine how we call the tuned model, so we call with the matching mode.", 33 ) 34 provider: str = Field( 35 description="The provider to use for the fine-tune (e.g. 'openai')." 36 ) 37 base_model_id: str = Field( 38 description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs." 39 ) 40 provider_id: str | None = Field( 41 default=None, 42 description="The ID of the fine-tune job on the provider's side. May not be the same as the fine_tune_model_id.", 43 ) 44 fine_tune_model_id: str | None = Field( 45 default=None, 46 description="The ID of the fine-tuned model on the provider's side. May not be the same as the provider_id.", 47 ) 48 dataset_split_id: str = Field( 49 description="The ID of the dataset split to use for this fine-tune.", 50 ) 51 train_split_name: str = Field( 52 default="train", 53 description="The name of the training split to use for this fine-tune.", 54 ) 55 validation_split_name: str | None = Field( 56 default=None, 57 description="The name of the validation split to use for this fine-tune. Optional.", 58 ) 59 parameters: dict[str, str | int | float | bool] = Field( 60 default={}, 61 description="The parameters to use for this fine-tune. These are provider-specific.", 62 ) 63 # These two fields are saved exactly used for training. Even if they map exactly to a custom prompt or generator, those can change, so we want to keep a record of the training prompt. 64 system_message: str = Field( 65 description="The system message to use for this fine-tune.", 66 ) 67 thinking_instructions: str | None = Field( 68 default=None, 69 description="The thinking instructions to use for this fine-tune. Only used when data_strategy is final_and_intermediate.", 70 ) 71 latest_status: FineTuneStatusType = Field( 72 default=FineTuneStatusType.unknown, 73 description="The latest known status of this fine-tune. Not updated in real time.", 74 ) 75 properties: Dict[str, str | int | float] = Field( 76 default={}, 77 description="Properties of the fine-tune. Different providers may use different properties.", 78 ) 79 data_strategy: FinetuneDataStrategy = Field( 80 default=FinetuneDataStrategy.final_only, 81 description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).", 82 ) 83 84 # Workaround to return typed parent without importing Task 85 def parent_task(self) -> Union["Task", None]: 86 if self.parent is None or self.parent.__class__.__name__ != "Task": 87 return None 88 return self.parent # type: ignore 89 90 @model_validator(mode="after") 91 def validate_thinking_instructions(self) -> Self: 92 if ( 93 self.thinking_instructions is not None 94 and self.data_strategy != FinetuneDataStrategy.final_and_intermediate 95 ): 96 raise ValueError( 97 "Thinking instructions can only be used when data_strategy is final_and_intermediate" 98 ) 99 if ( 100 self.thinking_instructions is None 101 and self.data_strategy == FinetuneDataStrategy.final_and_intermediate 102 ): 103 raise ValueError( 104 "Thinking instructions are required when data_strategy is final_and_intermediate" 105 ) 106 return self
The Kiln fine-tune datamodel.
Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
90 @model_validator(mode="after") 91 def validate_thinking_instructions(self) -> Self: 92 if ( 93 self.thinking_instructions is not None 94 and self.data_strategy != FinetuneDataStrategy.final_and_intermediate 95 ): 96 raise ValueError( 97 "Thinking instructions can only be used when data_strategy is final_and_intermediate" 98 ) 99 if ( 100 self.thinking_instructions is None 101 and self.data_strategy == FinetuneDataStrategy.final_and_intermediate 102 ): 103 raise ValueError( 104 "Thinking instructions are required when data_strategy is final_and_intermediate" 105 ) 106 return self
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
44class FineTuneStatusType(str, Enum): 45 """ 46 The status type of a fine-tune (running, completed, failed, etc). 47 """ 48 49 unknown = "unknown" # server error 50 pending = "pending" 51 running = "running" 52 completed = "completed" 53 failed = "failed"
The status type of a fine-tune (running, completed, failed, etc).
15class TaskOutputRatingType(str, Enum): 16 """Defines the types of rating systems available for task outputs.""" 17 18 five_star = "five_star" 19 pass_fail = "pass_fail" 20 pass_fail_critical = "pass_fail_critical" 21 custom = "custom"
Defines the types of rating systems available for task outputs.
22class TaskRequirement(BaseModel): 23 """ 24 Defines a specific requirement that should be met by task outputs. 25 26 Includes an identifier, name, description, instruction for meeting the requirement, 27 priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom). 28 """ 29 30 id: ID_TYPE = ID_FIELD 31 name: str = SHORT_NAME_FIELD 32 description: str | None = Field(default=None) 33 instruction: str = Field(min_length=1) 34 priority: Priority = Field(default=Priority.p2) 35 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
Defines a specific requirement that should be met by task outputs.
Includes an identifier, name, description, instruction for meeting the requirement, priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).
75class DatasetSplitDefinition(BaseModel): 76 """ 77 A definition of a split in a dataset. 78 79 Example: name="train", description="The training set", percentage=0.8 (80% of the dataset) 80 """ 81 82 name: str = NAME_FIELD 83 description: str | None = Field( 84 default=None, 85 description="A description of the dataset for you and your team. Not used in training.", 86 ) 87 percentage: float = Field( 88 ge=0.0, 89 le=1.0, 90 description="The percentage of the dataset that this split represents (between 0 and 1).", 91 )
A definition of a split in a dataset.
Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
113class DatasetSplit(KilnParentedModel): 114 """ 115 A collection of task runs, with optional splits (train, test, validation). 116 117 Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks. 118 119 Maintains a list of IDs for each split, to avoid data duplication. 120 """ 121 122 name: str = NAME_FIELD 123 description: str | None = Field( 124 default=None, 125 description="A description of the dataset for you and your team. Not used in training.", 126 ) 127 splits: list[DatasetSplitDefinition] = Field( 128 default_factory=list, 129 description="The splits in the dataset.", 130 ) 131 split_contents: dict[str, list[str]] = Field( 132 description="The contents of each split in the dataset. The key is the split name, and the value is a list of task run IDs.", 133 ) 134 filter: DatasetFilterType | None = Field( 135 default=None, 136 description="The filter used to build the dataset.", 137 ) 138 139 @model_validator(mode="after") 140 def validate_split_percentages(self) -> "DatasetSplit": 141 total = sum(split.percentage for split in self.splits) 142 if not math.isclose(total, 1.0, rel_tol=1e-9): 143 raise ValueError(f"The sum of split percentages must be 1.0 (got {total})") 144 return self 145 146 @classmethod 147 def from_task( 148 cls, 149 name: str, 150 task: "Task", 151 splits: list[DatasetSplitDefinition], 152 filter_type: DatasetFilterType = DatasetFilterType.ALL, 153 description: str | None = None, 154 ): 155 """ 156 Build a dataset split from a task. 157 """ 158 filter = dataset_filters[filter_type] 159 split_contents = cls.build_split_contents(task, splits, filter) 160 return cls( 161 parent=task, 162 name=name, 163 description=description, 164 splits=splits, 165 split_contents=split_contents, 166 filter=filter_type, 167 ) 168 169 @classmethod 170 def build_split_contents( 171 cls, 172 task: "Task", 173 splits: list[DatasetSplitDefinition], 174 filter: DatasetFilter, 175 ) -> dict[str, list[str]]: 176 valid_ids = [] 177 for task_run in task.runs(): 178 if filter(task_run): 179 valid_ids.append(task_run.id) 180 181 # Shuffle and split by split percentage 182 random.shuffle(valid_ids) 183 split_contents = {} 184 start_idx = 0 185 remaining_items = len(valid_ids) 186 187 # Handle all splits except the last one 188 for split in splits[:-1]: 189 split_size = round(len(valid_ids) * split.percentage) 190 split_contents[split.name] = valid_ids[start_idx : start_idx + split_size] 191 start_idx += split_size 192 remaining_items -= split_size 193 194 # Last split gets all remaining items (for rounding) 195 if splits: 196 split_contents[splits[-1].name] = valid_ids[start_idx:] 197 198 return split_contents 199 200 def parent_task(self) -> "Task | None": 201 # inline import to avoid circular import 202 from kiln_ai.datamodel import Task 203 204 if not isinstance(self.parent, Task): 205 return None 206 return self.parent 207 208 def missing_count(self) -> int: 209 """ 210 Returns: 211 int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset 212 """ 213 parent = self.parent_task() 214 if parent is None: 215 raise ValueError("DatasetSplit has no parent task") 216 217 runs = parent.runs(readonly=True) 218 all_ids = set(run.id for run in runs) 219 all_ids_in_splits = set() 220 for ids in self.split_contents.values(): 221 all_ids_in_splits.update(ids) 222 missing = all_ids_in_splits - all_ids 223 return len(missing)
A collection of task runs, with optional splits (train, test, validation).
Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.
Maintains a list of IDs for each split, to avoid data duplication.
139 @model_validator(mode="after") 140 def validate_split_percentages(self) -> "DatasetSplit": 141 total = sum(split.percentage for split in self.splits) 142 if not math.isclose(total, 1.0, rel_tol=1e-9): 143 raise ValueError(f"The sum of split percentages must be 1.0 (got {total})") 144 return self
146 @classmethod 147 def from_task( 148 cls, 149 name: str, 150 task: "Task", 151 splits: list[DatasetSplitDefinition], 152 filter_type: DatasetFilterType = DatasetFilterType.ALL, 153 description: str | None = None, 154 ): 155 """ 156 Build a dataset split from a task. 157 """ 158 filter = dataset_filters[filter_type] 159 split_contents = cls.build_split_contents(task, splits, filter) 160 return cls( 161 parent=task, 162 name=name, 163 description=description, 164 splits=splits, 165 split_contents=split_contents, 166 filter=filter_type, 167 )
Build a dataset split from a task.
169 @classmethod 170 def build_split_contents( 171 cls, 172 task: "Task", 173 splits: list[DatasetSplitDefinition], 174 filter: DatasetFilter, 175 ) -> dict[str, list[str]]: 176 valid_ids = [] 177 for task_run in task.runs(): 178 if filter(task_run): 179 valid_ids.append(task_run.id) 180 181 # Shuffle and split by split percentage 182 random.shuffle(valid_ids) 183 split_contents = {} 184 start_idx = 0 185 remaining_items = len(valid_ids) 186 187 # Handle all splits except the last one 188 for split in splits[:-1]: 189 split_size = round(len(valid_ids) * split.percentage) 190 split_contents[split.name] = valid_ids[start_idx : start_idx + split_size] 191 start_idx += split_size 192 remaining_items -= split_size 193 194 # Last split gets all remaining items (for rounding) 195 if splits: 196 split_contents[splits[-1].name] = valid_ids[start_idx:] 197 198 return split_contents
208 def missing_count(self) -> int: 209 """ 210 Returns: 211 int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset 212 """ 213 parent = self.parent_task() 214 if parent is None: 215 raise ValueError("DatasetSplit has no parent task") 216 217 runs = parent.runs(readonly=True) 218 all_ids = set(run.id for run in runs) 219 all_ids_in_splits = set() 220 for ids in self.split_contents.values(): 221 all_ids_in_splits.update(ids) 222 missing = all_ids_in_splits - all_ids 223 return len(missing)
Returns: int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
21class RequirementRating(BaseModel): 22 """Rating for a specific requirement within a task output.""" 23 24 value: float = Field( 25 description="The rating value. Interpretation depends on rating type" 26 ) 27 type: TaskOutputRatingType = Field(description="The type of rating")
Rating for a specific requirement within a task output.
7class Prompt(KilnParentedModel): 8 """ 9 A prompt for a task. 10 """ 11 12 name: str = NAME_FIELD 13 prompt: str = Field( 14 description="The prompt for the task.", 15 min_length=1, 16 ) 17 chain_of_thought_instructions: str | None = Field( 18 default=None, 19 description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting. COT will not be used unless this is provided.", 20 )
A prompt for a task.
The type of the None singleton.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
30class TaskOutputRating(KilnBaseModel): 31 """ 32 A rating for a task output, including an overall rating and ratings for each requirement. 33 34 Supports: 35 - five_star: 1-5 star ratings 36 - pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail) 37 - pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail) 38 """ 39 40 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star) 41 value: float | None = Field( 42 description="The rating value. Interpretation depends on rating type:\n- five_star: 1-5 stars\n- pass_fail: 1.0 (pass) or 0.0 (fail)\n- pass_fail_critical: 1.0 (pass), 0.0 (fail), or -1.0 (critical fail)", 43 default=None, 44 ) 45 requirement_ratings: Dict[ID_TYPE, RequirementRating] = Field( 46 default={}, 47 description="The ratings of the requirements of the task.", 48 ) 49 50 # Previously we stored rating values as a dict of floats, but now we store them as RequirementRating objects. 51 @model_validator(mode="before") 52 def upgrade_old_format(cls, data: dict) -> dict: 53 if not isinstance(data, dict): 54 return data 55 56 # Check if we have the old format (dict of floats) 57 req_ratings = data.get("requirement_ratings", {}) 58 if req_ratings and all( 59 isinstance(v, (int, float)) for v in req_ratings.values() 60 ): 61 # Convert each float to a RequirementRating object 62 # all ratings are five star at the point we used this format 63 data["requirement_ratings"] = { 64 k: {"value": v, "type": TaskOutputRatingType.five_star} 65 for k, v in req_ratings.items() 66 } 67 68 return data 69 70 # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc) 71 def is_high_quality(self) -> bool: 72 if self.value is None: 73 return False 74 75 if self.type == TaskOutputRatingType.five_star: 76 return self.value >= 4 77 elif self.type == TaskOutputRatingType.pass_fail: 78 return self.value == 1.0 79 elif self.type == TaskOutputRatingType.pass_fail_critical: 80 return self.value == 1.0 81 return False 82 83 @model_validator(mode="after") 84 def validate_rating(self) -> Self: 85 if self.type not in TaskOutputRatingType: 86 raise ValueError(f"Invalid rating type: {self.type}") 87 88 # Overall rating is optional 89 if self.value is not None: 90 self._validate_rating(self.type, self.value, "overall rating") 91 92 for req_id, req_rating in self.requirement_ratings.items(): 93 self._validate_rating( 94 req_rating.type, 95 req_rating.value, 96 f"requirement rating for req ID: {req_id}", 97 ) 98 99 return self 100 101 def _validate_rating( 102 self, type: TaskOutputRatingType, rating: float | None, rating_name: str 103 ) -> None: 104 if type == TaskOutputRatingType.five_star: 105 self._validate_five_star(rating, rating_name) 106 elif type == TaskOutputRatingType.pass_fail: 107 self._validate_pass_fail(rating, rating_name) 108 elif type == TaskOutputRatingType.pass_fail_critical: 109 self._validate_pass_fail_critical(rating, rating_name) 110 111 def _validate_five_star(self, rating: float | None, rating_name: str) -> None: 112 if rating is None or not isinstance(rating, float) or not rating.is_integer(): 113 raise ValueError( 114 f"{rating_name.capitalize()} of type five_star must be an integer value (1-5)" 115 ) 116 if rating < 1 or rating > 5: 117 raise ValueError( 118 f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars" 119 ) 120 121 def _validate_pass_fail(self, rating: float | None, rating_name: str) -> None: 122 if rating is None or not isinstance(rating, float) or not rating.is_integer(): 123 raise ValueError( 124 f"{rating_name.capitalize()} of type pass_fail must be an integer value (0 or 1)" 125 ) 126 if rating not in [0, 1]: 127 raise ValueError( 128 f"{rating_name.capitalize()} of type pass_fail must be 0 (fail) or 1 (pass)" 129 ) 130 131 def _validate_pass_fail_critical( 132 self, rating: float | None, rating_name: str 133 ) -> None: 134 if rating is None or not isinstance(rating, float) or not rating.is_integer(): 135 raise ValueError( 136 f"{rating_name.capitalize()} of type pass_fail_critical must be an integer value (-1, 0, or 1)" 137 ) 138 if rating not in [-1, 0, 1]: 139 raise ValueError( 140 f"{rating_name.capitalize()} of type pass_fail_critical must be -1 (critical fail), 0 (fail), or 1 (pass)" 141 )
A rating for a task output, including an overall rating and ratings for each requirement.
Supports:
- five_star: 1-5 star ratings
- pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
- pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
51 @model_validator(mode="before") 52 def upgrade_old_format(cls, data: dict) -> dict: 53 if not isinstance(data, dict): 54 return data 55 56 # Check if we have the old format (dict of floats) 57 req_ratings = data.get("requirement_ratings", {}) 58 if req_ratings and all( 59 isinstance(v, (int, float)) for v in req_ratings.values() 60 ): 61 # Convert each float to a RequirementRating object 62 # all ratings are five star at the point we used this format 63 data["requirement_ratings"] = { 64 k: {"value": v, "type": TaskOutputRatingType.five_star} 65 for k, v in req_ratings.items() 66 } 67 68 return data
71 def is_high_quality(self) -> bool: 72 if self.value is None: 73 return False 74 75 if self.type == TaskOutputRatingType.five_star: 76 return self.value >= 4 77 elif self.type == TaskOutputRatingType.pass_fail: 78 return self.value == 1.0 79 elif self.type == TaskOutputRatingType.pass_fail_critical: 80 return self.value == 1.0 81 return False
83 @model_validator(mode="after") 84 def validate_rating(self) -> Self: 85 if self.type not in TaskOutputRatingType: 86 raise ValueError(f"Invalid rating type: {self.type}") 87 88 # Overall rating is optional 89 if self.value is not None: 90 self._validate_rating(self.type, self.value, "overall rating") 91 92 for req_id, req_rating in self.requirement_ratings.items(): 93 self._validate_rating( 94 req_rating.type, 95 req_rating.value, 96 f"requirement rating for req ID: {req_id}", 97 ) 98 99 return self
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
24class StructuredOutputMode(str, Enum): 25 """ 26 Enumeration of supported structured output modes. 27 28 - default: let the adapter decide 29 - json_schema: request json using API capabilities for json_schema 30 - function_calling: request json using API capabilities for function calling 31 - json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema 32 - json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings. 33 - json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries). 34 """ 35 36 default = "default" 37 json_schema = "json_schema" 38 function_calling = "function_calling" 39 json_mode = "json_mode" 40 json_instructions = "json_instructions" 41 json_instruction_and_object = "json_instruction_and_object"
Enumeration of supported structured output modes.
- default: let the adapter decide
- json_schema: request json using API capabilities for json_schema
- function_calling: request json using API capabilities for function calling
- json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
- json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
- json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
56class FinetuneDataStrategy(str, Enum): 57 final_only = "final_only" 58 final_and_intermediate = "final_and_intermediate"
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.