kiln_ai.datamodel

See our docs for details about our datamodel classes and hierarchy:

Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html

User docs: https://docs.getkiln.ai/developers/kiln-datamodel

 1"""
 2See our docs for details about our datamodel classes and hierarchy:
 3
 4Developer docs: https://kiln-ai.github.io/Kiln/kiln_core_docs/kiln_ai.html
 5
 6User docs: https://docs.getkiln.ai/developers/kiln-datamodel
 7"""
 8
 9# This component uses "flat" imports so we don't have too much internal structure exposed in the API.
10# for example you can just `from datamodel import Task, Project` instead of `from datamodel.task import Task; from datamodel.project import Project`
11
12from __future__ import annotations
13
14from kiln_ai.datamodel.datamodel_enums import (
15    FinetuneDataStrategy,
16    FineTuneStatusType,
17    Priority,
18    StructuredOutputMode,
19    TaskOutputRatingType,
20)
21from kiln_ai.datamodel.dataset_split import (
22    DatasetSplit,
23    DatasetSplitDefinition,
24)
25from kiln_ai.datamodel.finetune import (
26    Finetune,
27)
28from kiln_ai.datamodel.project import Project
29from kiln_ai.datamodel.prompt import Prompt
30from kiln_ai.datamodel.task import Task, TaskRequirement
31from kiln_ai.datamodel.task_output import (
32    DataSource,
33    DataSourceProperty,
34    DataSourceType,
35    RequirementRating,
36    TaskOutput,
37    TaskOutputRating,
38)
39from kiln_ai.datamodel.task_run import (
40    TaskRun,
41)
42
43__all__ = [
44    "strict_mode",
45    "dataset_split",
46    "Task",
47    "Project",
48    "TaskRun",
49    "TaskOutput",
50    "Priority",
51    "DataSource",
52    "DataSourceType",
53    "DataSourceProperty",
54    "Finetune",
55    "FineTuneStatusType",
56    "TaskOutputRatingType",
57    "TaskRequirement",
58    "DatasetSplitDefinition",
59    "DatasetSplit",
60    "RequirementRating",
61    "TaskRequirement",
62    "Prompt",
63    "TaskOutputRating",
64    "StructuredOutputMode",
65    "FinetuneDataStrategy",
66]
class Task(kiln_ai.datamodel.basemodel.KilnParentedModel, kiln_ai.datamodel.basemodel.KilnParentModel):
38class Task(
39    KilnParentedModel,
40    KilnParentModel,
41    parent_of={
42        "runs": TaskRun,
43        "dataset_splits": DatasetSplit,
44        "finetunes": Finetune,
45        "prompts": Prompt,
46    },
47):
48    """
49    Represents a specific task to be performed, with associated requirements and validation rules.
50
51    Contains the task definition, requirements, input/output schemas, and maintains
52    a collection of task runs.
53    """
54
55    name: str = NAME_FIELD
56    description: str | None = Field(
57        default=None,
58        description="A description of the task for you and your team. Will not be used in prompts/training/validation.",
59    )
60    instruction: str = Field(
61        min_length=1,
62        description="The instructions for the task. Will be used in prompts/training/validation.",
63    )
64    requirements: List[TaskRequirement] = Field(default=[])
65    output_json_schema: JsonObjectSchema | None = None
66    input_json_schema: JsonObjectSchema | None = None
67    thinking_instruction: str | None = Field(
68        default=None,
69        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.",
70    )
71
72    def output_schema(self) -> Dict | None:
73        if self.output_json_schema is None:
74            return None
75        return schema_from_json_str(self.output_json_schema)
76
77    def input_schema(self) -> Dict | None:
78        if self.input_json_schema is None:
79            return None
80        return schema_from_json_str(self.input_json_schema)
81
82    # These wrappers help for typechecking. TODO P2: fix this in KilnParentModel
83    def runs(self, readonly: bool = False) -> list[TaskRun]:
84        return super().runs(readonly=readonly)  # type: ignore
85
86    def dataset_splits(self, readonly: bool = False) -> list[DatasetSplit]:
87        return super().dataset_splits(readonly=readonly)  # type: ignore
88
89    def finetunes(self, readonly: bool = False) -> list[Finetune]:
90        return super().finetunes(readonly=readonly)  # type: ignore
91
92    def prompts(self, readonly: bool = False) -> list[Prompt]:
93        return super().prompts(readonly=readonly)  # type: ignore

Represents a specific task to be performed, with associated requirements and validation rules.

Contains the task definition, requirements, input/output schemas, and maintains a collection of task runs.

name: str
description: str | None
instruction: str
requirements: List[TaskRequirement]
output_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x1073556c0>)]]
input_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x1073556c0>)]]
thinking_instruction: str | None
def output_schema(self) -> Optional[Dict]:
72    def output_schema(self) -> Dict | None:
73        if self.output_json_schema is None:
74            return None
75        return schema_from_json_str(self.output_json_schema)
def input_schema(self) -> Optional[Dict]:
77    def input_schema(self) -> Dict | None:
78        if self.input_json_schema is None:
79            return None
80        return schema_from_json_str(self.input_json_schema)
def runs(self, readonly=False) -> List[TaskRun]:
418        def child_method(self, readonly: bool = False) -> list[child_class]:
419            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def dataset_splits( self, readonly=False) -> List[DatasetSplit]:
418        def child_method(self, readonly: bool = False) -> list[child_class]:
419            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def finetunes(self, readonly=False) -> List[Finetune]:
418        def child_method(self, readonly: bool = False) -> list[child_class]:
419            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def prompts(self, readonly=False) -> List[Prompt]:
418        def child_method(self, readonly: bool = False) -> list[child_class]:
419            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

def relationship_name() -> str:
436        def relationship_name_method() -> str:
437            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
429        def parent_class_method() -> Type[KilnParentModel]:
430            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class Project(kiln_ai.datamodel.basemodel.KilnParentModel):
 8class Project(KilnParentModel, parent_of={"tasks": Task}):
 9    """
10    A collection of related tasks.
11
12    Projects organize tasks into logical groups and provide high-level descriptions
13    of the overall goals.
14    """
15
16    name: str = NAME_FIELD
17    description: str | None = Field(
18        default=None,
19        description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
20    )
21
22    # Needed for typechecking. TODO P2: fix this in KilnParentModel
23    def tasks(self) -> list[Task]:
24        return super().tasks()  # type: ignore

A collection of related tasks.

Projects organize tasks into logical groups and provide high-level descriptions of the overall goals.

name: str
description: str | None
def tasks(self, readonly=False) -> List[Task]:
418        def child_method(self, readonly: bool = False) -> list[child_class]:
419            return child_class.all_children_of_parent_path(self.path, readonly=readonly)

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskRun(kiln_ai.datamodel.basemodel.KilnParentedModel):
 19class TaskRun(KilnParentedModel):
 20    """
 21    Represents a single execution of a Task.
 22
 23    Contains the input used, its source, the output produced, and optional
 24    repair information if the output needed correction.
 25    """
 26
 27    input: str = Field(
 28        description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
 29    )
 30    input_source: DataSource | None = Field(
 31        default=None, description="The source of the input: human or synthetic."
 32    )
 33
 34    output: TaskOutput = Field(description="The output of the task run.")
 35    repair_instructions: str | None = Field(
 36        default=None,
 37        description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.",
 38    )
 39    repaired_output: TaskOutput | None = Field(
 40        default=None,
 41        description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.",
 42    )
 43    intermediate_outputs: Dict[str, str] | None = Field(
 44        default=None,
 45        description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.",
 46    )
 47    tags: List[str] = Field(
 48        default=[],
 49        description="Tags for the task run. Tags are used to categorize task runs for filtering and reporting.",
 50    )
 51
 52    def has_thinking_training_data(self) -> bool:
 53        """
 54        Does this run have thinking data that we can use to train a thinking model?
 55        """
 56        if self.intermediate_outputs is None:
 57            return False
 58        return (
 59            "chain_of_thought" in self.intermediate_outputs
 60            or "reasoning" in self.intermediate_outputs
 61        )
 62
 63    # Workaround to return typed parent without importing Task
 64    def parent_task(self) -> Union["Task", None]:
 65        if self.parent is None or self.parent.__class__.__name__ != "Task":
 66            return None
 67        return self.parent  # type: ignore
 68
 69    @model_validator(mode="after")
 70    def validate_input_format(self, info: ValidationInfo) -> Self:
 71        # Don't validate if loading from file (not new). Too slow.
 72        # We don't allow changing task schema, so this is redundant validation.
 73        # Note: we still validate if editing a loaded model
 74        if self.loading_from_file(info):
 75            # Consider loading an existing model as validated.
 76            self._last_validated_input = self.input
 77            return self
 78
 79        # Don't validate if input has not changed. Too slow to run this every time.
 80        if (
 81            hasattr(self, "_last_validated_input")
 82            and self.input == self._last_validated_input
 83        ):
 84            return self
 85
 86        task = self.parent_task()
 87        if task is None:
 88            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
 89            return self
 90
 91        # validate output
 92        if task.input_json_schema is not None:
 93            try:
 94                validate_schema(json.loads(self.input), task.input_json_schema)
 95            except json.JSONDecodeError:
 96                raise ValueError("Input is not a valid JSON object")
 97            except jsonschema.exceptions.ValidationError as e:
 98                raise ValueError(f"Input does not match task input schema: {e}")
 99        self._last_validated_input = self.input
100        return self
101
102    @model_validator(mode="after")
103    def validate_output_format(self, info: ValidationInfo) -> Self:
104        # Don't validate if loading from file (not new). Too slow.
105        # Note: we still validate if editing a loaded model's output.
106        if self.loading_from_file(info):
107            # Consider loading an existing model as validated.
108            self._last_validated_output = self.output.output if self.output else None
109            return self
110
111        # Don't validate unless output has changed since last validation.
112        # The validator is slow and costly, don't want it running when setting other fields.
113        if (
114            hasattr(self, "_last_validated_output")
115            and self.output is not None
116            and self.output.output == self._last_validated_output
117        ):
118            return self
119
120        task = self.parent_task()
121        if task is None:
122            return self
123
124        self.output.validate_output_format(task)
125        self._last_validated_output = self.output.output if self.output else None
126        return self
127
128    @model_validator(mode="after")
129    def validate_repaired_output(self) -> Self:
130        if self.repaired_output is not None:
131            if self.repaired_output.rating is not None:
132                raise ValueError(
133                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
134                )
135        if self.repair_instructions is None and self.repaired_output is not None:
136            raise ValueError(
137                "Repair instructions are required if providing a repaired output."
138            )
139        if self.repair_instructions is not None and self.repaired_output is None:
140            raise ValueError(
141                "A repaired output is required if providing repair instructions."
142            )
143        return self
144
145    @model_validator(mode="after")
146    def validate_input_source(self, info: ValidationInfo) -> Self:
147        # On strict mode and not loaded from file, we validate input_source is not None.
148        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
149        if not strict_mode():
150            return self
151        if self.loaded_from_file(info):
152            return self
153        if self.input_source is None:
154            raise ValueError("input_source is required when strict mode is enabled")
155        return self
156
157    @model_validator(mode="after")
158    def validate_tags(self) -> Self:
159        for tag in self.tags:
160            if not tag:
161                raise ValueError("Tags cannot be empty strings")
162            if " " in tag:
163                raise ValueError("Tags cannot contain spaces. Try underscores.")
164
165        return self

Represents a single execution of a Task.

Contains the input used, its source, the output produced, and optional repair information if the output needed correction.

input: str
input_source: DataSource | None
output: TaskOutput
repair_instructions: str | None
repaired_output: TaskOutput | None
intermediate_outputs: Optional[Dict[str, str]]
tags: List[str]
def has_thinking_training_data(self) -> bool:
52    def has_thinking_training_data(self) -> bool:
53        """
54        Does this run have thinking data that we can use to train a thinking model?
55        """
56        if self.intermediate_outputs is None:
57            return False
58        return (
59            "chain_of_thought" in self.intermediate_outputs
60            or "reasoning" in self.intermediate_outputs
61        )

Does this run have thinking data that we can use to train a thinking model?

def parent_task(self) -> Optional[Task]:
64    def parent_task(self) -> Union["Task", None]:
65        if self.parent is None or self.parent.__class__.__name__ != "Task":
66            return None
67        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_input_format(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
 69    @model_validator(mode="after")
 70    def validate_input_format(self, info: ValidationInfo) -> Self:
 71        # Don't validate if loading from file (not new). Too slow.
 72        # We don't allow changing task schema, so this is redundant validation.
 73        # Note: we still validate if editing a loaded model
 74        if self.loading_from_file(info):
 75            # Consider loading an existing model as validated.
 76            self._last_validated_input = self.input
 77            return self
 78
 79        # Don't validate if input has not changed. Too slow to run this every time.
 80        if (
 81            hasattr(self, "_last_validated_input")
 82            and self.input == self._last_validated_input
 83        ):
 84            return self
 85
 86        task = self.parent_task()
 87        if task is None:
 88            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
 89            return self
 90
 91        # validate output
 92        if task.input_json_schema is not None:
 93            try:
 94                validate_schema(json.loads(self.input), task.input_json_schema)
 95            except json.JSONDecodeError:
 96                raise ValueError("Input is not a valid JSON object")
 97            except jsonschema.exceptions.ValidationError as e:
 98                raise ValueError(f"Input does not match task input schema: {e}")
 99        self._last_validated_input = self.input
100        return self
@model_validator(mode='after')
def validate_output_format(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
102    @model_validator(mode="after")
103    def validate_output_format(self, info: ValidationInfo) -> Self:
104        # Don't validate if loading from file (not new). Too slow.
105        # Note: we still validate if editing a loaded model's output.
106        if self.loading_from_file(info):
107            # Consider loading an existing model as validated.
108            self._last_validated_output = self.output.output if self.output else None
109            return self
110
111        # Don't validate unless output has changed since last validation.
112        # The validator is slow and costly, don't want it running when setting other fields.
113        if (
114            hasattr(self, "_last_validated_output")
115            and self.output is not None
116            and self.output.output == self._last_validated_output
117        ):
118            return self
119
120        task = self.parent_task()
121        if task is None:
122            return self
123
124        self.output.validate_output_format(task)
125        self._last_validated_output = self.output.output if self.output else None
126        return self
@model_validator(mode='after')
def validate_repaired_output(self) -> Self:
128    @model_validator(mode="after")
129    def validate_repaired_output(self) -> Self:
130        if self.repaired_output is not None:
131            if self.repaired_output.rating is not None:
132                raise ValueError(
133                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
134                )
135        if self.repair_instructions is None and self.repaired_output is not None:
136            raise ValueError(
137                "Repair instructions are required if providing a repaired output."
138            )
139        if self.repair_instructions is not None and self.repaired_output is None:
140            raise ValueError(
141                "A repaired output is required if providing repair instructions."
142            )
143        return self
@model_validator(mode='after')
def validate_input_source(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
145    @model_validator(mode="after")
146    def validate_input_source(self, info: ValidationInfo) -> Self:
147        # On strict mode and not loaded from file, we validate input_source is not None.
148        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
149        if not strict_mode():
150            return self
151        if self.loaded_from_file(info):
152            return self
153        if self.input_source is None:
154            raise ValueError("input_source is required when strict mode is enabled")
155        return self
@model_validator(mode='after')
def validate_tags(self) -> Self:
157    @model_validator(mode="after")
158    def validate_tags(self) -> Self:
159        for tag in self.tags:
160            if not tag:
161                raise ValueError("Tags cannot be empty strings")
162            if " " in tag:
163                raise ValueError("Tags cannot contain spaces. Try underscores.")
164
165        return self
def relationship_name() -> str:
436        def relationship_name_method() -> str:
437            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
429        def parent_class_method() -> Type[KilnParentModel]:
430            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskOutput(kiln_ai.datamodel.basemodel.KilnBaseModel):
261class TaskOutput(KilnBaseModel):
262    """
263    An output for a specific task run.
264
265    Contains the actual output content, its source (human or synthetic),
266    and optional rating information.
267    """
268
269    output: str = Field(
270        description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
271    )
272    source: DataSource | None = Field(
273        description="The source of the output: human or synthetic.",
274        default=None,
275    )
276    rating: TaskOutputRating | None = Field(
277        default=None, description="The rating of the output"
278    )
279
280    def validate_output_format(self, task: "Task") -> Self:
281        # validate output
282        if task.output_json_schema is not None:
283            try:
284                validate_schema(json.loads(self.output), task.output_json_schema)
285            except json.JSONDecodeError:
286                raise ValueError("Output is not a valid JSON object")
287            except jsonschema.exceptions.ValidationError as e:
288                raise ValueError(f"Output does not match task output schema: {e}")
289        return self
290
291    @model_validator(mode="after")
292    def validate_output_source(self, info: ValidationInfo) -> Self:
293        # On strict mode and not loaded from file, we validate output_source is not None.
294        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
295        if not strict_mode():
296            return self
297        if self.loaded_from_file(info):
298            return self
299        if self.source is None:
300            raise ValueError("Output source is required when strict mode is enabled")
301        return self

An output for a specific task run.

Contains the actual output content, its source (human or synthetic), and optional rating information.

output: str
source: DataSource | None
rating: TaskOutputRating | None
def validate_output_format(self, task: Task) -> Self:
280    def validate_output_format(self, task: "Task") -> Self:
281        # validate output
282        if task.output_json_schema is not None:
283            try:
284                validate_schema(json.loads(self.output), task.output_json_schema)
285            except json.JSONDecodeError:
286                raise ValueError("Output is not a valid JSON object")
287            except jsonschema.exceptions.ValidationError as e:
288                raise ValueError(f"Output does not match task output schema: {e}")
289        return self
@model_validator(mode='after')
def validate_output_source(self, info: pydantic_core.core_schema.ValidationInfo) -> Self:
291    @model_validator(mode="after")
292    def validate_output_source(self, info: ValidationInfo) -> Self:
293        # On strict mode and not loaded from file, we validate output_source is not None.
294        # We want to be able to load any data, even if it's not perfect. But we want to create perfect data when adding new data.
295        if not strict_mode():
296            return self
297        if self.loaded_from_file(info):
298            return self
299        if self.source is None:
300            raise ValueError("Output source is required when strict mode is enabled")
301        return self
model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class Priority(enum.IntEnum):
 5class Priority(IntEnum):
 6    """Defines priority levels for tasks and requirements, where P0 is highest priority."""
 7
 8    p0 = 0
 9    p1 = 1
10    p2 = 2
11    p3 = 3

Defines priority levels for tasks and requirements, where P0 is highest priority.

p0 = <Priority.p0: 0>
p1 = <Priority.p1: 1>
p2 = <Priority.p2: 2>
p3 = <Priority.p3: 3>
class DataSource(pydantic.main.BaseModel):
170class DataSource(BaseModel):
171    """
172    Represents the origin of data, either human or synthetic, with associated properties.
173
174    Properties vary based on the source type - for synthetic sources this includes
175    model information, for human sources this includes creator information.
176    """
177
178    type: DataSourceType
179    properties: Dict[str, str | int | float] = Field(
180        default={},
181        description="Properties describing the data source. For synthetic things like model. For human, the human's name.",
182    )
183
184    _data_source_properties = [
185        DataSourceProperty(
186            name="created_by",
187            type=str,
188            required_for=[DataSourceType.human],
189            not_allowed_for=[DataSourceType.synthetic],
190        ),
191        DataSourceProperty(
192            name="model_name",
193            type=str,
194            required_for=[DataSourceType.synthetic],
195            not_allowed_for=[DataSourceType.human],
196        ),
197        DataSourceProperty(
198            name="model_provider",
199            type=str,
200            required_for=[DataSourceType.synthetic],
201            not_allowed_for=[DataSourceType.human],
202        ),
203        DataSourceProperty(
204            name="adapter_name",
205            type=str,
206            required_for=[DataSourceType.synthetic],
207            not_allowed_for=[DataSourceType.human],
208        ),
209        DataSourceProperty(
210            name="prompt_builder_name",
211            type=str,
212            not_allowed_for=[DataSourceType.human],
213        ),
214        DataSourceProperty(
215            # Optional: an ID within the scope of the prompt_builder_name.
216            # Used for prompt builders with IDs (like saved prompts, fine-tune prompts)
217            name="prompt_id",
218            type=str,
219            not_allowed_for=[DataSourceType.human],
220        ),
221    ]
222
223    @model_validator(mode="after")
224    def validate_type(self) -> "DataSource":
225        if self.type not in DataSourceType:
226            raise ValueError(f"Invalid data source type: {self.type}")
227        return self
228
229    @model_validator(mode="after")
230    def validate_properties(self) -> "DataSource":
231        for prop in self._data_source_properties:
232            # Check the property type is correct
233            if prop.name in self.properties:
234                if not isinstance(self.properties[prop.name], prop.type):
235                    raise ValueError(
236                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
237                    )
238            # Check the property is required for the data source type
239            if self.type in prop.required_for:
240                if prop.name not in self.properties:
241                    raise ValueError(
242                        f"'{prop.name}' is required for {self.type} data source"
243                    )
244            # Check the property is not allowed for the data source type
245            elif self.type in prop.not_allowed_for and prop.name in self.properties:
246                raise ValueError(
247                    f"'{prop.name}' is not allowed for {self.type} data source"
248                )
249        return self
250
251    @model_validator(mode="after")
252    def validate_no_empty_properties(self) -> Self:
253        for prop, value in self.properties.items():
254            if isinstance(value, str) and value == "":
255                raise ValueError(
256                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
257                )
258        return self

Represents the origin of data, either human or synthetic, with associated properties.

Properties vary based on the source type - for synthetic sources this includes model information, for human sources this includes creator information.

properties: Dict[str, str | int | float]
@model_validator(mode='after')
def validate_type(self) -> DataSource:
223    @model_validator(mode="after")
224    def validate_type(self) -> "DataSource":
225        if self.type not in DataSourceType:
226            raise ValueError(f"Invalid data source type: {self.type}")
227        return self
@model_validator(mode='after')
def validate_properties(self) -> DataSource:
229    @model_validator(mode="after")
230    def validate_properties(self) -> "DataSource":
231        for prop in self._data_source_properties:
232            # Check the property type is correct
233            if prop.name in self.properties:
234                if not isinstance(self.properties[prop.name], prop.type):
235                    raise ValueError(
236                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
237                    )
238            # Check the property is required for the data source type
239            if self.type in prop.required_for:
240                if prop.name not in self.properties:
241                    raise ValueError(
242                        f"'{prop.name}' is required for {self.type} data source"
243                    )
244            # Check the property is not allowed for the data source type
245            elif self.type in prop.not_allowed_for and prop.name in self.properties:
246                raise ValueError(
247                    f"'{prop.name}' is not allowed for {self.type} data source"
248                )
249        return self
@model_validator(mode='after')
def validate_no_empty_properties(self) -> Self:
251    @model_validator(mode="after")
252    def validate_no_empty_properties(self) -> Self:
253        for prop, value in self.properties.items():
254            if isinstance(value, str) and value == "":
255                raise ValueError(
256                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
257                )
258        return self
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
384def init_private_attributes(self: BaseModel, context: Any, /) -> None:
385    """This function is meant to behave like a BaseModel method to initialise private attributes.
386
387    It takes context as an argument since that's what pydantic-core passes when calling it.
388
389    Args:
390        self: The BaseModel instance.
391        context: The context.
392    """
393    if getattr(self, '__pydantic_private__', None) is None:
394        pydantic_private = {}
395        for name, private_attr in self.__private_attributes__.items():
396            default = private_attr.get_default()
397            if default is not PydanticUndefined:
398                pydantic_private[name] = default
399        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

class DataSourceType(builtins.str, enum.Enum):
144class DataSourceType(str, Enum):
145    """
146    The source type of a piece of data.
147
148    Human: a human created the data
149    Synthetic: a model created the data
150    """
151
152    human = "human"
153    synthetic = "synthetic"

The source type of a piece of data.

Human: a human created the data Synthetic: a model created the data

human = <DataSourceType.human: 'human'>
synthetic = <DataSourceType.synthetic: 'synthetic'>
class DataSourceProperty(pydantic.main.BaseModel):
156class DataSourceProperty(BaseModel):
157    """
158    Defines a property that can be associated with a data source.
159
160    Includes validation rules for when properties are required or not allowed
161    based on the data source type.
162    """
163
164    name: str
165    type: Type[Union[str, int, float]]
166    required_for: List[DataSourceType] = []
167    not_allowed_for: List[DataSourceType] = []

Defines a property that can be associated with a data source.

Includes validation rules for when properties are required or not allowed based on the data source type.

name: str
type: Type[Union[str, int, float]]
required_for: List[DataSourceType]
not_allowed_for: List[DataSourceType]
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class Finetune(kiln_ai.datamodel.basemodel.KilnParentedModel):
 18class Finetune(KilnParentedModel):
 19    """
 20    The Kiln fine-tune datamodel.
 21
 22    Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.
 23    """
 24
 25    name: str = NAME_FIELD
 26    description: str | None = Field(
 27        default=None,
 28        description="A description of the fine-tune for you and your team. Not used in training.",
 29    )
 30    structured_output_mode: StructuredOutputMode | None = Field(
 31        default=None,
 32        description="The mode to use to train the model for structured output, if it was trained with structured output. Will determine how we call the tuned model, so we call with the matching mode.",
 33    )
 34    provider: str = Field(
 35        description="The provider to use for the fine-tune (e.g. 'openai')."
 36    )
 37    base_model_id: str = Field(
 38        description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs."
 39    )
 40    provider_id: str | None = Field(
 41        default=None,
 42        description="The ID of the fine-tune job on the provider's side. May not be the same as the fine_tune_model_id.",
 43    )
 44    fine_tune_model_id: str | None = Field(
 45        default=None,
 46        description="The ID of the fine-tuned model on the provider's side. May not be the same as the provider_id.",
 47    )
 48    dataset_split_id: str = Field(
 49        description="The ID of the dataset split to use for this fine-tune.",
 50    )
 51    train_split_name: str = Field(
 52        default="train",
 53        description="The name of the training split to use for this fine-tune.",
 54    )
 55    validation_split_name: str | None = Field(
 56        default=None,
 57        description="The name of the validation split to use for this fine-tune. Optional.",
 58    )
 59    parameters: dict[str, str | int | float | bool] = Field(
 60        default={},
 61        description="The parameters to use for this fine-tune. These are provider-specific.",
 62    )
 63    # These two fields are saved exactly used for training. Even if they map exactly to a custom prompt or generator, those can change, so we want to keep a record of the training prompt.
 64    system_message: str = Field(
 65        description="The system message to use for this fine-tune.",
 66    )
 67    thinking_instructions: str | None = Field(
 68        default=None,
 69        description="The thinking instructions to use for this fine-tune. Only used when data_strategy is final_and_intermediate.",
 70    )
 71    latest_status: FineTuneStatusType = Field(
 72        default=FineTuneStatusType.unknown,
 73        description="The latest known status of this fine-tune. Not updated in real time.",
 74    )
 75    properties: Dict[str, str | int | float] = Field(
 76        default={},
 77        description="Properties of the fine-tune. Different providers may use different properties.",
 78    )
 79    data_strategy: FinetuneDataStrategy = Field(
 80        default=FinetuneDataStrategy.final_only,
 81        description="The strategy to use for training the model. 'final_only' will only train on the final response. 'final_and_intermediate' will train on the final response and intermediate outputs (chain of thought or reasoning).",
 82    )
 83
 84    # Workaround to return typed parent without importing Task
 85    def parent_task(self) -> Union["Task", None]:
 86        if self.parent is None or self.parent.__class__.__name__ != "Task":
 87            return None
 88        return self.parent  # type: ignore
 89
 90    @model_validator(mode="after")
 91    def validate_thinking_instructions(self) -> Self:
 92        if (
 93            self.thinking_instructions is not None
 94            and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
 95        ):
 96            raise ValueError(
 97                "Thinking instructions can only be used when data_strategy is final_and_intermediate"
 98            )
 99        if (
100            self.thinking_instructions is None
101            and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
102        ):
103            raise ValueError(
104                "Thinking instructions are required when data_strategy is final_and_intermediate"
105            )
106        return self

The Kiln fine-tune datamodel.

Initially holds a reference to a training job, with needed identifiers to update the status. When complete, contains the new model ID.

name: str
description: str | None
structured_output_mode: StructuredOutputMode | None
provider: str
base_model_id: str
provider_id: str | None
fine_tune_model_id: str | None
dataset_split_id: str
train_split_name: str
validation_split_name: str | None
parameters: dict[str, str | int | float | bool]
system_message: str
thinking_instructions: str | None
latest_status: FineTuneStatusType
properties: Dict[str, str | int | float]
data_strategy: FinetuneDataStrategy
def parent_task(self) -> Optional[Task]:
85    def parent_task(self) -> Union["Task", None]:
86        if self.parent is None or self.parent.__class__.__name__ != "Task":
87            return None
88        return self.parent  # type: ignore
@model_validator(mode='after')
def validate_thinking_instructions(self) -> Self:
 90    @model_validator(mode="after")
 91    def validate_thinking_instructions(self) -> Self:
 92        if (
 93            self.thinking_instructions is not None
 94            and self.data_strategy != FinetuneDataStrategy.final_and_intermediate
 95        ):
 96            raise ValueError(
 97                "Thinking instructions can only be used when data_strategy is final_and_intermediate"
 98            )
 99        if (
100            self.thinking_instructions is None
101            and self.data_strategy == FinetuneDataStrategy.final_and_intermediate
102        ):
103            raise ValueError(
104                "Thinking instructions are required when data_strategy is final_and_intermediate"
105            )
106        return self
def relationship_name() -> str:
436        def relationship_name_method() -> str:
437            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
429        def parent_class_method() -> Type[KilnParentModel]:
430            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class FineTuneStatusType(builtins.str, enum.Enum):
44class FineTuneStatusType(str, Enum):
45    """
46    The status type of a fine-tune (running, completed, failed, etc).
47    """
48
49    unknown = "unknown"  # server error
50    pending = "pending"
51    running = "running"
52    completed = "completed"
53    failed = "failed"

The status type of a fine-tune (running, completed, failed, etc).

unknown = <FineTuneStatusType.unknown: 'unknown'>
pending = <FineTuneStatusType.pending: 'pending'>
running = <FineTuneStatusType.running: 'running'>
completed = <FineTuneStatusType.completed: 'completed'>
failed = <FineTuneStatusType.failed: 'failed'>
class TaskOutputRatingType(builtins.str, enum.Enum):
15class TaskOutputRatingType(str, Enum):
16    """Defines the types of rating systems available for task outputs."""
17
18    five_star = "five_star"
19    pass_fail = "pass_fail"
20    pass_fail_critical = "pass_fail_critical"
21    custom = "custom"

Defines the types of rating systems available for task outputs.

five_star = <TaskOutputRatingType.five_star: 'five_star'>
pass_fail = <TaskOutputRatingType.pass_fail: 'pass_fail'>
pass_fail_critical = <TaskOutputRatingType.pass_fail_critical: 'pass_fail_critical'>
custom = <TaskOutputRatingType.custom: 'custom'>
class TaskRequirement(pydantic.main.BaseModel):
22class TaskRequirement(BaseModel):
23    """
24    Defines a specific requirement that should be met by task outputs.
25
26    Includes an identifier, name, description, instruction for meeting the requirement,
27    priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).
28    """
29
30    id: ID_TYPE = ID_FIELD
31    name: str = SHORT_NAME_FIELD
32    description: str | None = Field(default=None)
33    instruction: str = Field(min_length=1)
34    priority: Priority = Field(default=Priority.p2)
35    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)

Defines a specific requirement that should be met by task outputs.

Includes an identifier, name, description, instruction for meeting the requirement, priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).

id: Optional[str]
name: str
description: str | None
instruction: str
priority: Priority
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class DatasetSplitDefinition(pydantic.main.BaseModel):
75class DatasetSplitDefinition(BaseModel):
76    """
77    A definition of a split in a dataset.
78
79    Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)
80    """
81
82    name: str = NAME_FIELD
83    description: str | None = Field(
84        default=None,
85        description="A description of the dataset for you and your team. Not used in training.",
86    )
87    percentage: float = Field(
88        ge=0.0,
89        le=1.0,
90        description="The percentage of the dataset that this split represents (between 0 and 1).",
91    )

A definition of a split in a dataset.

Example: name="train", description="The training set", percentage=0.8 (80% of the dataset)

name: str
description: str | None
percentage: float
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class DatasetSplit(kiln_ai.datamodel.basemodel.KilnParentedModel):
113class DatasetSplit(KilnParentedModel):
114    """
115    A collection of task runs, with optional splits (train, test, validation).
116
117    Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.
118
119    Maintains a list of IDs for each split, to avoid data duplication.
120    """
121
122    name: str = NAME_FIELD
123    description: str | None = Field(
124        default=None,
125        description="A description of the dataset for you and your team. Not used in training.",
126    )
127    splits: list[DatasetSplitDefinition] = Field(
128        default_factory=list,
129        description="The splits in the dataset.",
130    )
131    split_contents: dict[str, list[str]] = Field(
132        description="The contents of each split in the dataset. The key is the split name, and the value is a list of task run IDs.",
133    )
134    filter: DatasetFilterType | None = Field(
135        default=None,
136        description="The filter used to build the dataset.",
137    )
138
139    @model_validator(mode="after")
140    def validate_split_percentages(self) -> "DatasetSplit":
141        total = sum(split.percentage for split in self.splits)
142        if not math.isclose(total, 1.0, rel_tol=1e-9):
143            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
144        return self
145
146    @classmethod
147    def from_task(
148        cls,
149        name: str,
150        task: "Task",
151        splits: list[DatasetSplitDefinition],
152        filter_type: DatasetFilterType = DatasetFilterType.ALL,
153        description: str | None = None,
154    ):
155        """
156        Build a dataset split from a task.
157        """
158        filter = dataset_filters[filter_type]
159        split_contents = cls.build_split_contents(task, splits, filter)
160        return cls(
161            parent=task,
162            name=name,
163            description=description,
164            splits=splits,
165            split_contents=split_contents,
166            filter=filter_type,
167        )
168
169    @classmethod
170    def build_split_contents(
171        cls,
172        task: "Task",
173        splits: list[DatasetSplitDefinition],
174        filter: DatasetFilter,
175    ) -> dict[str, list[str]]:
176        valid_ids = []
177        for task_run in task.runs():
178            if filter(task_run):
179                valid_ids.append(task_run.id)
180
181        # Shuffle and split by split percentage
182        random.shuffle(valid_ids)
183        split_contents = {}
184        start_idx = 0
185        remaining_items = len(valid_ids)
186
187        # Handle all splits except the last one
188        for split in splits[:-1]:
189            split_size = round(len(valid_ids) * split.percentage)
190            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
191            start_idx += split_size
192            remaining_items -= split_size
193
194        # Last split gets all remaining items (for rounding)
195        if splits:
196            split_contents[splits[-1].name] = valid_ids[start_idx:]
197
198        return split_contents
199
200    def parent_task(self) -> "Task | None":
201        # inline import to avoid circular import
202        from kiln_ai.datamodel import Task
203
204        if not isinstance(self.parent, Task):
205            return None
206        return self.parent
207
208    def missing_count(self) -> int:
209        """
210        Returns:
211            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
212        """
213        parent = self.parent_task()
214        if parent is None:
215            raise ValueError("DatasetSplit has no parent task")
216
217        runs = parent.runs(readonly=True)
218        all_ids = set(run.id for run in runs)
219        all_ids_in_splits = set()
220        for ids in self.split_contents.values():
221            all_ids_in_splits.update(ids)
222        missing = all_ids_in_splits - all_ids
223        return len(missing)

A collection of task runs, with optional splits (train, test, validation).

Used to freeze a dataset into train/test/validation splits for repeatable fine-tuning or other tasks.

Maintains a list of IDs for each split, to avoid data duplication.

name: str
description: str | None
splits: list[DatasetSplitDefinition]
split_contents: dict[str, list[str]]
@model_validator(mode='after')
def validate_split_percentages(self) -> DatasetSplit:
139    @model_validator(mode="after")
140    def validate_split_percentages(self) -> "DatasetSplit":
141        total = sum(split.percentage for split in self.splits)
142        if not math.isclose(total, 1.0, rel_tol=1e-9):
143            raise ValueError(f"The sum of split percentages must be 1.0 (got {total})")
144        return self
@classmethod
def from_task( cls, name: str, task: Task, splits: list[DatasetSplitDefinition], filter_type: kiln_ai.datamodel.dataset_split.DatasetFilterType = <DatasetFilterType.ALL: 'all'>, description: str | None = None):
146    @classmethod
147    def from_task(
148        cls,
149        name: str,
150        task: "Task",
151        splits: list[DatasetSplitDefinition],
152        filter_type: DatasetFilterType = DatasetFilterType.ALL,
153        description: str | None = None,
154    ):
155        """
156        Build a dataset split from a task.
157        """
158        filter = dataset_filters[filter_type]
159        split_contents = cls.build_split_contents(task, splits, filter)
160        return cls(
161            parent=task,
162            name=name,
163            description=description,
164            splits=splits,
165            split_contents=split_contents,
166            filter=filter_type,
167        )

Build a dataset split from a task.

@classmethod
def build_split_contents( cls, task: Task, splits: list[DatasetSplitDefinition], filter: Callable[[TaskRun], bool]) -> dict[str, list[str]]:
169    @classmethod
170    def build_split_contents(
171        cls,
172        task: "Task",
173        splits: list[DatasetSplitDefinition],
174        filter: DatasetFilter,
175    ) -> dict[str, list[str]]:
176        valid_ids = []
177        for task_run in task.runs():
178            if filter(task_run):
179                valid_ids.append(task_run.id)
180
181        # Shuffle and split by split percentage
182        random.shuffle(valid_ids)
183        split_contents = {}
184        start_idx = 0
185        remaining_items = len(valid_ids)
186
187        # Handle all splits except the last one
188        for split in splits[:-1]:
189            split_size = round(len(valid_ids) * split.percentage)
190            split_contents[split.name] = valid_ids[start_idx : start_idx + split_size]
191            start_idx += split_size
192            remaining_items -= split_size
193
194        # Last split gets all remaining items (for rounding)
195        if splits:
196            split_contents[splits[-1].name] = valid_ids[start_idx:]
197
198        return split_contents
def parent_task(self) -> Task | None:
200    def parent_task(self) -> "Task | None":
201        # inline import to avoid circular import
202        from kiln_ai.datamodel import Task
203
204        if not isinstance(self.parent, Task):
205            return None
206        return self.parent
def missing_count(self) -> int:
208    def missing_count(self) -> int:
209        """
210        Returns:
211            int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset
212        """
213        parent = self.parent_task()
214        if parent is None:
215            raise ValueError("DatasetSplit has no parent task")
216
217        runs = parent.runs(readonly=True)
218        all_ids = set(run.id for run in runs)
219        all_ids_in_splits = set()
220        for ids in self.split_contents.values():
221            all_ids_in_splits.update(ids)
222        missing = all_ids_in_splits - all_ids
223        return len(missing)

Returns: int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset

def relationship_name() -> str:
436        def relationship_name_method() -> str:
437            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
429        def parent_class_method() -> Type[KilnParentModel]:
430            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class RequirementRating(pydantic.main.BaseModel):
21class RequirementRating(BaseModel):
22    """Rating for a specific requirement within a task output."""
23
24    value: float = Field(
25        description="The rating value. Interpretation depends on rating type"
26    )
27    type: TaskOutputRatingType = Field(description="The type of rating")

Rating for a specific requirement within a task output.

value: float
model_config: ClassVar[pydantic.config.ConfigDict] = {}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class Prompt(kiln_ai.datamodel.basemodel.KilnParentedModel):
 7class Prompt(KilnParentedModel):
 8    """
 9    A prompt for a task.
10    """
11
12    name: str = NAME_FIELD
13    prompt: str = Field(
14        description="The prompt for the task.",
15        min_length=1,
16    )
17    chain_of_thought_instructions: str | None = Field(
18        default=None,
19        description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting. COT will not be used unless this is provided.",
20    )

A prompt for a task.

name: str
prompt: str
chain_of_thought_instructions: str | None
def relationship_name() -> str:
436        def relationship_name_method() -> str:
437            return relationship_name

The type of the None singleton.

def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
429        def parent_class_method() -> Type[KilnParentModel]:
430            return cls

The type of the None singleton.

model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class TaskOutputRating(kiln_ai.datamodel.basemodel.KilnBaseModel):
 30class TaskOutputRating(KilnBaseModel):
 31    """
 32    A rating for a task output, including an overall rating and ratings for each requirement.
 33
 34    Supports:
 35    - five_star: 1-5 star ratings
 36    - pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
 37    - pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
 38    """
 39
 40    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
 41    value: float | None = Field(
 42        description="The rating value. Interpretation depends on rating type:\n- five_star: 1-5 stars\n- pass_fail: 1.0 (pass) or 0.0 (fail)\n- pass_fail_critical: 1.0 (pass), 0.0 (fail), or -1.0 (critical fail)",
 43        default=None,
 44    )
 45    requirement_ratings: Dict[ID_TYPE, RequirementRating] = Field(
 46        default={},
 47        description="The ratings of the requirements of the task.",
 48    )
 49
 50    # Previously we stored rating values as a dict of floats, but now we store them as RequirementRating objects.
 51    @model_validator(mode="before")
 52    def upgrade_old_format(cls, data: dict) -> dict:
 53        if not isinstance(data, dict):
 54            return data
 55
 56        # Check if we have the old format (dict of floats)
 57        req_ratings = data.get("requirement_ratings", {})
 58        if req_ratings and all(
 59            isinstance(v, (int, float)) for v in req_ratings.values()
 60        ):
 61            # Convert each float to a RequirementRating object
 62            # all ratings are five star at the point we used this format
 63            data["requirement_ratings"] = {
 64                k: {"value": v, "type": TaskOutputRatingType.five_star}
 65                for k, v in req_ratings.items()
 66            }
 67
 68        return data
 69
 70    # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc)
 71    def is_high_quality(self) -> bool:
 72        if self.value is None:
 73            return False
 74
 75        if self.type == TaskOutputRatingType.five_star:
 76            return self.value >= 4
 77        elif self.type == TaskOutputRatingType.pass_fail:
 78            return self.value == 1.0
 79        elif self.type == TaskOutputRatingType.pass_fail_critical:
 80            return self.value == 1.0
 81        return False
 82
 83    @model_validator(mode="after")
 84    def validate_rating(self) -> Self:
 85        if self.type not in TaskOutputRatingType:
 86            raise ValueError(f"Invalid rating type: {self.type}")
 87
 88        # Overall rating is optional
 89        if self.value is not None:
 90            self._validate_rating(self.type, self.value, "overall rating")
 91
 92        for req_id, req_rating in self.requirement_ratings.items():
 93            self._validate_rating(
 94                req_rating.type,
 95                req_rating.value,
 96                f"requirement rating for req ID: {req_id}",
 97            )
 98
 99        return self
100
101    def _validate_rating(
102        self, type: TaskOutputRatingType, rating: float | None, rating_name: str
103    ) -> None:
104        if type == TaskOutputRatingType.five_star:
105            self._validate_five_star(rating, rating_name)
106        elif type == TaskOutputRatingType.pass_fail:
107            self._validate_pass_fail(rating, rating_name)
108        elif type == TaskOutputRatingType.pass_fail_critical:
109            self._validate_pass_fail_critical(rating, rating_name)
110
111    def _validate_five_star(self, rating: float | None, rating_name: str) -> None:
112        if rating is None or not isinstance(rating, float) or not rating.is_integer():
113            raise ValueError(
114                f"{rating_name.capitalize()} of type five_star must be an integer value (1-5)"
115            )
116        if rating < 1 or rating > 5:
117            raise ValueError(
118                f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars"
119            )
120
121    def _validate_pass_fail(self, rating: float | None, rating_name: str) -> None:
122        if rating is None or not isinstance(rating, float) or not rating.is_integer():
123            raise ValueError(
124                f"{rating_name.capitalize()} of type pass_fail must be an integer value (0 or 1)"
125            )
126        if rating not in [0, 1]:
127            raise ValueError(
128                f"{rating_name.capitalize()} of type pass_fail must be 0 (fail) or 1 (pass)"
129            )
130
131    def _validate_pass_fail_critical(
132        self, rating: float | None, rating_name: str
133    ) -> None:
134        if rating is None or not isinstance(rating, float) or not rating.is_integer():
135            raise ValueError(
136                f"{rating_name.capitalize()} of type pass_fail_critical must be an integer value (-1, 0, or 1)"
137            )
138        if rating not in [-1, 0, 1]:
139            raise ValueError(
140                f"{rating_name.capitalize()} of type pass_fail_critical must be -1 (critical fail), 0 (fail), or 1 (pass)"
141            )

A rating for a task output, including an overall rating and ratings for each requirement.

Supports:

  • five_star: 1-5 star ratings
  • pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
  • pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
value: float | None
requirement_ratings: Dict[Optional[str], RequirementRating]
@model_validator(mode='before')
def upgrade_old_format(cls, data: dict) -> dict:
51    @model_validator(mode="before")
52    def upgrade_old_format(cls, data: dict) -> dict:
53        if not isinstance(data, dict):
54            return data
55
56        # Check if we have the old format (dict of floats)
57        req_ratings = data.get("requirement_ratings", {})
58        if req_ratings and all(
59            isinstance(v, (int, float)) for v in req_ratings.values()
60        ):
61            # Convert each float to a RequirementRating object
62            # all ratings are five star at the point we used this format
63            data["requirement_ratings"] = {
64                k: {"value": v, "type": TaskOutputRatingType.five_star}
65                for k, v in req_ratings.items()
66            }
67
68        return data
def is_high_quality(self) -> bool:
71    def is_high_quality(self) -> bool:
72        if self.value is None:
73            return False
74
75        if self.type == TaskOutputRatingType.five_star:
76            return self.value >= 4
77        elif self.type == TaskOutputRatingType.pass_fail:
78            return self.value == 1.0
79        elif self.type == TaskOutputRatingType.pass_fail_critical:
80            return self.value == 1.0
81        return False
@model_validator(mode='after')
def validate_rating(self) -> Self:
83    @model_validator(mode="after")
84    def validate_rating(self) -> Self:
85        if self.type not in TaskOutputRatingType:
86            raise ValueError(f"Invalid rating type: {self.type}")
87
88        # Overall rating is optional
89        if self.value is not None:
90            self._validate_rating(self.type, self.value, "overall rating")
91
92        for req_id, req_rating in self.requirement_ratings.items():
93            self._validate_rating(
94                req_rating.type,
95                req_rating.value,
96                f"requirement rating for req ID: {req_id}",
97            )
98
99        return self
model_config = {'validate_assignment': True}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
122                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
123                        """We need to both initialize private attributes and call the user-defined model_post_init
124                        method.
125                        """
126                        init_private_attributes(self, context)
127                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

class StructuredOutputMode(builtins.str, enum.Enum):
24class StructuredOutputMode(str, Enum):
25    """
26    Enumeration of supported structured output modes.
27
28    - default: let the adapter decide
29    - json_schema: request json using API capabilities for json_schema
30    - function_calling: request json using API capabilities for function calling
31    - json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
32    - json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
33    - json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
34    """
35
36    default = "default"
37    json_schema = "json_schema"
38    function_calling = "function_calling"
39    json_mode = "json_mode"
40    json_instructions = "json_instructions"
41    json_instruction_and_object = "json_instruction_and_object"

Enumeration of supported structured output modes.

  • default: let the adapter decide
  • json_schema: request json using API capabilities for json_schema
  • function_calling: request json using API capabilities for function calling
  • json_mode: request json using API's JSON mode, which should return valid JSON, but isn't checking/passing the schema
  • json_instructions: append instructions to the prompt to request json matching the schema. No API capabilities are used. You should have a custom parser on these models as they will be returning strings.
  • json_instruction_and_object: append instructions to the prompt to request json matching the schema. Also request the response as json_mode via API capabilities (returning dictionaries).
default = <StructuredOutputMode.default: 'default'>
json_schema = <StructuredOutputMode.json_schema: 'json_schema'>
function_calling = <StructuredOutputMode.function_calling: 'function_calling'>
json_mode = <StructuredOutputMode.json_mode: 'json_mode'>
json_instructions = <StructuredOutputMode.json_instructions: 'json_instructions'>
json_instruction_and_object = <StructuredOutputMode.json_instruction_and_object: 'json_instruction_and_object'>
class FinetuneDataStrategy(builtins.str, enum.Enum):
56class FinetuneDataStrategy(str, Enum):
57    final_only = "final_only"
58    final_and_intermediate = "final_and_intermediate"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

final_only = <FinetuneDataStrategy.final_only: 'final_only'>
final_and_intermediate = <FinetuneDataStrategy.final_and_intermediate: 'final_and_intermediate'>