Source code for materforge.parsing.validation.property_type_detector

import logging
import re
from enum import auto, Enum
from typing import Any, Dict, Set

import sympy as sp

from materforge.parsing.config.yaml_keys import (
    FILE_PATH_KEY, DEPENDENCY_COLUMN_KEY, PROPERTY_COLUMN_KEY, BOUNDS_KEY,
    REGRESSION_KEY, DEPENDENCY_KEY, EQUATION_KEY, CONSTANT_KEY,
    EXTRAPOLATE_KEY, SIMPLIFY_KEY, DEGREE_KEY, SEGMENTS_KEY, PRE_KEY, POST_KEY,
    MELTING_TEMPERATURE_KEY, BOILING_TEMPERATURE_KEY, SOLIDUS_TEMPERATURE_KEY,
    LIQUIDUS_TEMPERATURE_KEY, INITIAL_BOILING_TEMPERATURE_KEY, FINAL_BOILING_TEMPERATURE_KEY, VALUE_KEY
)
from materforge.data.constants import ProcessingConstants

logger = logging.getLogger(__name__)


# --- Enum ---
[docs] class PropertyType(Enum): CONSTANT_VALUE = auto() STEP_FUNCTION = auto() FILE_IMPORT = auto() TABULAR_DATA = auto() PIECEWISE_EQUATION = auto() COMPUTED_PROPERTY = auto() INVALID = auto()
# --- Main Class ---
[docs] class PropertyTypeDetector: """Utility class for detecting and validating property types from configuration values.""" # --- DETECTION RULES --- # The order is crucial: more specific patterns must come before general ones. DETECTION_RULES = [ # Unique key checks first (most efficient) (lambda c: FILE_PATH_KEY in c, PropertyType.FILE_IMPORT), # Patterns sharing keys (order matters) (lambda c: DEPENDENCY_KEY in c and VALUE_KEY in c and PropertyTypeDetector._is_step_function(c), PropertyType.STEP_FUNCTION), (lambda c: DEPENDENCY_KEY in c and VALUE_KEY in c, PropertyType.TABULAR_DATA), (lambda c: DEPENDENCY_KEY in c and EQUATION_KEY in c and isinstance(c.get(EQUATION_KEY), list), PropertyType.PIECEWISE_EQUATION), (lambda c: DEPENDENCY_KEY in c and EQUATION_KEY in c and isinstance(c.get(EQUATION_KEY), str), PropertyType.COMPUTED_PROPERTY), ] # --- Main Public API ---
[docs] @staticmethod def determine_property_type(prop_name: str, config: Any) -> PropertyType: """Determines the property type using a declarative, rule-based approach.""" logger.debug(f"Determining property type for '{prop_name}'") if PropertyTypeDetector._is_constant_format(config): return PropertyType.CONSTANT_VALUE if not isinstance(config, dict): raise ValueError(f"Property '{prop_name}' has an invalid format. " f"Expected a dictionary or a numeric constant, but got {type(config).__name__}.") for detector, prop_type in PropertyTypeDetector.DETECTION_RULES: if detector(config): logger.debug(f"Detected property '{prop_name}' as type: {prop_type.name}") return prop_type present_keys = sorted(config.keys()) raise ValueError(f"Property '{prop_name}' doesn't match any known configuration pattern. " f"Present keys: {present_keys}.")
# --- High-Level Detectors (for DETECTION_RULES) --- @staticmethod def _is_constant_format(val: Any) -> bool: """Checks if the value has the format of a numeric constant.""" if isinstance(val, int): raise ValueError(f"must be defined as a float, not an integer. Use decimal format like '{val}.0'") return isinstance(val, float) or (isinstance(val, str) and ('.' in val or 'e' in val.lower())) @staticmethod def _is_step_function(config: Dict[str, Any]) -> bool: """ A quick, non-validating check if a config looks like a step function. A step function has a list of 2 values AND a single temperature point (not a list). """ val_list = config.get(VALUE_KEY) temp_def = config.get(DEPENDENCY_KEY) is_two_values = isinstance(val_list, list) and len(val_list) == 2 is_single_temp = not isinstance(temp_def, list) # Must be a string or number return is_two_values and is_single_temp # --- Strict Validators (called by the parser) ---
[docs] @staticmethod def validate_property_config(prop_name: str, config: Any, prop_type: PropertyType) -> None: """Performs strict validation based on the detected property type.""" logger.debug(f"Validating property '{prop_name}' for type: {prop_type.name}") validator_map = { PropertyType.CONSTANT_VALUE: PropertyTypeDetector._validate_constant_value, PropertyType.STEP_FUNCTION: PropertyTypeDetector._validate_step_function, PropertyType.FILE_IMPORT: PropertyTypeDetector._validate_file_import, PropertyType.TABULAR_DATA: PropertyTypeDetector._validate_tabular_data, PropertyType.PIECEWISE_EQUATION: PropertyTypeDetector._validate_piecewise_equation, PropertyType.COMPUTED_PROPERTY: PropertyTypeDetector._validate_computed_property, } validator = validator_map.get(prop_type) if validator: try: validator(prop_name, config) except Exception as e: raise ValueError( f"Invalid configuration for '{prop_name}' (expected type {prop_type.name}): {str(e)}") from e else: raise NotImplementedError(f"No validation implemented for property type: {prop_type.name}")
# --- Strict Validators (called by validate_property_config) --- @staticmethod def _validate_constant_value(prop_name: str, val: Any) -> None: try: float(val) except (ValueError, TypeError): raise ValueError(f"'{prop_name}' could not be converted to a float. Invalid value: '{val}'") @staticmethod def _validate_step_function(prop_name: str, config: Dict[str, Any]) -> None: required = {DEPENDENCY_KEY, VALUE_KEY} optional = {BOUNDS_KEY} PropertyTypeDetector._check_keys(config, required, optional, "STEP_FUNCTION") if BOUNDS_KEY in config: PropertyTypeDetector._check_bounds(config[BOUNDS_KEY]) val_list = config[VALUE_KEY] if not isinstance(val_list, list) or len(val_list) != 2: raise ValueError(f"'value' for a step function must be a list of exactly two numbers, got {val_list}") try: float(val_list[0]) float(val_list[1]) except (ValueError, TypeError): raise ValueError(f"step function values must be numeric, got {val_list}") temp_def = config[DEPENDENCY_KEY] if isinstance(temp_def, str): # Check if it's a valid arithmetic expression match = re.match(ProcessingConstants.TEMP_ARITHMETIC_REGEX, temp_def.strip()) if match: # If it matches, check if the base reference is valid base_ref = match.group(1) valid_refs = { MELTING_TEMPERATURE_KEY, SOLIDUS_TEMPERATURE_KEY, LIQUIDUS_TEMPERATURE_KEY, INITIAL_BOILING_TEMPERATURE_KEY, FINAL_BOILING_TEMPERATURE_KEY, BOILING_TEMPERATURE_KEY } if base_ref not in valid_refs: raise ValueError(f"invalid base temperature reference '{base_ref}' in expression '{temp_def}'. " f"Allowed base references are: {sorted(list(valid_refs))}") else: # If not arithmetic, it must be an exact reference valid_refs = { MELTING_TEMPERATURE_KEY, SOLIDUS_TEMPERATURE_KEY, LIQUIDUS_TEMPERATURE_KEY, INITIAL_BOILING_TEMPERATURE_KEY, FINAL_BOILING_TEMPERATURE_KEY, BOILING_TEMPERATURE_KEY } if temp_def not in valid_refs: raise ValueError(f"invalid temperature reference '{temp_def}'. " f"Must be a numeric value, a valid transition name, or an arithmetic expression " f"(e.g., 'melting_temperature + 10').") elif not isinstance(temp_def, (int, float, str)): raise ValueError(f"'temperature' must be a numeric value or a valid transition reference, got '{temp_def}'") @staticmethod def _validate_file_import(prop_name: str, config: Dict[str, Any]) -> None: required = {FILE_PATH_KEY, DEPENDENCY_COLUMN_KEY, PROPERTY_COLUMN_KEY, BOUNDS_KEY} optional = {REGRESSION_KEY} PropertyTypeDetector._check_keys(config, required, optional, "FILE_IMPORT") PropertyTypeDetector._check_bounds(config[BOUNDS_KEY]) if REGRESSION_KEY in config: PropertyTypeDetector._check_regression(config[REGRESSION_KEY]) @staticmethod def _validate_tabular_data(prop_name: str, config: Dict[str, Any]) -> None: required = {DEPENDENCY_KEY, VALUE_KEY, BOUNDS_KEY} optional = {REGRESSION_KEY} PropertyTypeDetector._check_keys(config, required, optional, "TABULAR_DATA") PropertyTypeDetector._check_bounds(config[BOUNDS_KEY]) if REGRESSION_KEY in config: PropertyTypeDetector._check_regression(config[REGRESSION_KEY]) temp_def = config[DEPENDENCY_KEY] val_list = config[VALUE_KEY] if not isinstance(val_list, list): raise ValueError("'value' for a key-val property must be a list.") if isinstance(temp_def, list) and len(temp_def) != len(val_list): raise ValueError(f"temperature list (length {len(temp_def)}) and value list (length {len(val_list)}) " f"must have the same length") @staticmethod def _validate_piecewise_equation(prop_name: str, config: Dict[str, Any]) -> None: required = {DEPENDENCY_KEY, EQUATION_KEY, BOUNDS_KEY} optional = {REGRESSION_KEY} PropertyTypeDetector._check_keys(config, required, optional, "PIECEWISE_EQUATION") PropertyTypeDetector._check_bounds(config[BOUNDS_KEY]) if REGRESSION_KEY in config: PropertyTypeDetector._check_regression(config[REGRESSION_KEY]) if not isinstance(config[EQUATION_KEY], list): raise ValueError("'equation' for a piecewise equation must be a list of strings") @staticmethod def _validate_computed_property(prop_name: str, config: Dict[str, Any]) -> None: required = {DEPENDENCY_KEY, EQUATION_KEY, BOUNDS_KEY} optional = {REGRESSION_KEY} PropertyTypeDetector._check_keys(config, required, optional, "COMPUTED_PROPERTY") PropertyTypeDetector._check_bounds(config[BOUNDS_KEY]) if REGRESSION_KEY in config: PropertyTypeDetector._check_regression(config[REGRESSION_KEY]) if not isinstance(config[EQUATION_KEY], str): raise ValueError("'equation' for a computed property must be a string") try: sp.sympify(config[EQUATION_KEY]) except (sp.SympifyError, TypeError) as e: raise ValueError(f"invalid mathematical expression in 'equation': {str(e)}") # --- Low-Level Validation Helpers --- @staticmethod def _check_keys(value: Dict[str, Any], required: Set[str], optional: Set[str], context: str) -> None: keys = set(value.keys()) missing = required - keys if missing: raise ValueError(f"missing required keys for {context} property: {sorted(list(missing))}") extra = keys - required - optional if extra: raise ValueError(f"found unexpected keys for {context} property: {sorted(list(extra))}") @staticmethod def _check_bounds(bounds: Any) -> None: if not isinstance(bounds, list) or len(bounds) != 2: raise ValueError("'bounds' must be a list of exactly two elements") valid = {CONSTANT_KEY, EXTRAPOLATE_KEY} if bounds[0] not in valid or bounds[1] not in valid: raise ValueError(f"bound types must be one of {valid}, got {bounds}") @staticmethod def _check_regression(reg: Dict[str, Any]) -> None: PropertyTypeDetector._check_keys(reg, {SIMPLIFY_KEY, DEGREE_KEY, SEGMENTS_KEY}, set(), "regression") if reg[SIMPLIFY_KEY] not in {PRE_KEY, POST_KEY}: raise ValueError(f"regression 'simplify' must be '{PRE_KEY}' or '{POST_KEY}'") if not isinstance(reg[DEGREE_KEY], int) or reg[DEGREE_KEY] < 1: raise ValueError("regression 'degree' must be a positive integer") if not isinstance(reg[SEGMENTS_KEY], int) or reg[SEGMENTS_KEY] < 1: raise ValueError("regression 'segments' must be a positive integer")