Coverage for src/configuraptor/core.py: 100%
226 statements
« prev ^ index » next coverage.py v7.8.1, created at 2025-05-22 10:42 +0200
« prev ^ index » next coverage.py v7.8.1, created at 2025-05-22 10:42 +0200
1"""
2Contains most of the loading logic.
3"""
5import dataclasses as dc
6import io
7import os
8import typing
9import warnings
10from pathlib import Path
11from typing import Any, Type
13import requests
15from . import loaders
16from .abs import AnyType, C, T, T_data, Type_C
17from .alias import Alias, has_alias
18from .binary_config import BinaryConfig
19from .errors import (
20 ConfigErrorCouldNotConvert,
21 ConfigErrorInvalidType,
22 ConfigErrorMissingKey,
23 FailedToLoad,
24)
25from .helpers import (
26 all_annotations,
27 camel_to_snake,
28 check_type,
29 dataclass_field,
30 find_pyproject_toml,
31 is_custom_class,
32 is_optional,
33 is_parameterized,
34)
35from .postpone import Postponed
36from .type_converters import CONVERTERS
39def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]:
40 """
41 If a key contains a dot, traverse the raw dict until the right key was found.
43 Example:
44 key = some.nested.key
45 raw = {"some": {"nested": {"key": {"with": "data"}}}}
46 -> {"with": "data"}
47 """
48 parts = key.split(".")
49 while parts:
50 key = parts.pop(0)
51 if key not in raw:
52 return {}
54 raw = raw[key]
56 return raw
59def _guess_key(clsname: str) -> str:
60 """
61 If no key is manually defined for `load_into`, \
62 the class' name is converted to snake_case to use as the default key.
63 """
64 return camel_to_snake(clsname)
67def _from_mock_url(url: str) -> str:
68 """
69 Pytest only: when starting a url with mock:// it is expected to just be json afterwards.
70 """
71 return url.removeprefix("mock://")
74def guess_filetype_for_url(url: str, response: requests.Response = None) -> str:
75 """
76 Based on the url (which may have an extension) and the requests response \
77 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml).
79 Falls back to JSON if none can be found.
80 """
81 url = url.split("?")[0]
82 if url_extension := os.path.splitext(url)[1].lower():
83 return url_extension.strip(".")
85 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()):
86 content_type = content_type_header.split("/")[-1]
87 if content_type != "plain":
88 return content_type
90 # If both methods fail, default to JSON
91 return "json"
94def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]:
95 """
96 Load data as bytes into a file-like object and return the file type.
98 This can be used by __load_data:
99 > loader = loaders.get(filetype)
100 > # dev/null exists but always returns b''
101 > data = loader(contents, Path("/dev/null"))
102 """
103 if url.startswith("mock://"):
104 data = _from_mock_url(url)
105 resp = None
106 elif _dummy:
107 resp = None
108 data = "{}"
109 else:
110 ssl_verify = os.getenv("SSL_VERIFY", "1") == "1"
112 resp = requests.get(url, timeout=10, verify=ssl_verify)
113 data = resp.text
115 filetype = guess_filetype_for_url(url, resp)
116 return io.BytesIO(data.encode()), filetype
119def _load_data(
120 data: T_data,
121 key: str = None,
122 classname: str = None,
123 lower_keys: bool = False,
124 allow_types: tuple[type, ...] = (dict,),
125) -> dict[str, typing.Any]:
126 """
127 Tries to load the right data from a filename/path or dict, based on a manual key or a classname.
129 E.g. class Tool will be mapped to key tool.
130 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}}
131 """
132 if isinstance(data, bytes):
133 # instantly return, don't modify
134 # bytes as inputs -> bytes as output
135 # but since `T_data` is re-used, that's kind of hard to type for mypy.
136 return data # type: ignore
138 if isinstance(data, list):
139 if not data:
140 raise ValueError("Empty list passed!")
142 final_data: dict[str, typing.Any] = {}
143 for source in data:
144 final_data |= load_data(source, key=key, classname=classname, lower_keys=True, allow_types=allow_types)
146 return final_data
148 if isinstance(data, str):
149 if data.startswith(("http://", "https://", "mock://")):
150 contents, filetype = from_url(data)
152 loader = loaders.get(filetype)
153 # dev/null exists but always returns b''
154 data = loader(contents, Path("/dev/null"))
155 else:
156 data = Path(data)
158 if isinstance(data, Path):
159 with data.open("rb") as f:
160 loader = loaders.get(data.suffix or data.name)
161 data = loader(f, data.resolve())
163 if not data:
164 return {}
166 if key is None:
167 # try to guess key by grabbing the first one or using the class name
168 if len(data) == 1:
169 key = next(iter(data.keys()))
170 elif classname is not None:
171 key = _guess_key(classname)
173 if key:
174 data = _data_for_nested_key(key, data)
176 if not data:
177 raise ValueError("No data found!")
179 if not isinstance(data, allow_types):
180 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!")
182 if lower_keys and isinstance(data, dict):
183 data = {k.lower(): v for k, v in data.items()}
185 return typing.cast(dict[str, typing.Any], data)
188def load_data(
189 data: T_data,
190 key: str = None,
191 classname: str = None,
192 lower_keys: bool = False,
193 allow_types: tuple[type, ...] = (dict,),
194 strict: bool = False,
195) -> dict[str, typing.Any]:
196 """
197 Wrapper around __load_data that retries with key="" if anything goes wrong.
198 """
199 if data is None:
200 # try to load pyproject.toml
201 data = find_pyproject_toml()
203 try:
204 return _load_data(data, key, classname, lower_keys=lower_keys, allow_types=allow_types)
205 except Exception as e:
206 # sourcery skip: remove-unnecessary-else, simplify-empty-collection-comparison, swap-if-else-branches
207 # @sourcery: `key != ""` is NOT the same as `not key`
208 if key != "":
209 # try again with key ""
210 return load_data(data, "", classname, lower_keys=lower_keys, allow_types=allow_types, strict=strict)
211 elif strict:
212 raise FailedToLoad(data) from e
213 else:
214 # e.g. if settings are to be loaded via a URL that is unavailable or returns invalid json
215 warnings.warn(f"Data ('{data}') could not be loaded", source=e, category=UserWarning)
216 return {}
219F = typing.TypeVar("F")
222def convert_between(from_value: F, from_type: Type[F], to_type: Type[T]) -> T:
223 """
224 Convert a value between types.
225 """
226 if converter := CONVERTERS.get((from_type, to_type)):
227 return typing.cast(T, converter(from_value))
229 # default: just convert type:
230 return to_type(from_value) # type: ignore
233def check_and_convert_type(value: Any, _type: Type[T], convert_types: bool, key: str = "variable") -> T:
234 """
235 Checks if the given value matches the specified type. If it does, the value is returned as is.
237 Args:
238 value (Any): The value to be checked and potentially converted.
239 _type (Type[T]): The expected type for the value.
240 convert_types (bool): If True, allows type conversion if the types do not match.
241 key (str, optional): The name or key associated with the variable (used in error messages).
242 Defaults to "variable".
244 Returns:
245 T: The value, potentially converted to the expected type.
247 Raises:
248 ConfigErrorInvalidType: If the type does not match, and type conversion is not allowed.
249 ConfigErrorCouldNotConvert: If type conversion fails.
250 """
251 if check_type(value, _type):
252 # type matches
253 return value
255 if isinstance(value, Alias):
256 if is_optional(_type):
257 return typing.cast(T, None)
258 else:
259 # unresolved alias, error should've already been thrown for parent but lets do it again:
260 raise ConfigErrorInvalidType(value.to, value=value, expected_type=_type)
262 if not convert_types:
263 # type does not match and should not be converted
264 raise ConfigErrorInvalidType(key, value=value, expected_type=_type)
266 # else: type does not match, try to convert it
267 try:
268 return convert_between(value, type(value), _type)
269 except (TypeError, ValueError) as e:
270 raise ConfigErrorCouldNotConvert(type(value), _type, value) from e
273def ensure_types(
274 data: dict[str, T], annotations: dict[str, type[T]], convert_types: bool = False
275) -> dict[str, T | None]:
276 """
277 Make sure all values in 'data' are in line with the ones stored in 'annotations'.
279 If an annotated key in missing from data, it will be filled with None for convenience.
281 TODO: python 3.11 exception groups to throw multiple errors at once!
282 """
283 # custom object to use instead of None, since typing.Optional can be None!
284 # cast to T to make mypy happy
285 notfound = typing.cast(T, object())
287 final: dict[str, T | None] = {}
288 for key, _type in annotations.items():
289 compare = data.get(key, notfound)
290 if compare is notfound: # pragma: nocover
291 warnings.warn("This should not happen since `load_recursive` already fills `data` based on `annotations`")
292 # skip!
293 continue
295 if isinstance(compare, Postponed):
296 # don't do anything with this item!
297 continue
299 if isinstance(compare, Alias):
300 related_data = data.get(compare.to, notfound)
301 if related_data is not notfound:
302 if isinstance(related_data, Postponed):
303 # also continue alias for postponed items
304 continue
306 # original key set, update alias
307 compare = related_data
309 compare = check_and_convert_type(compare, _type, convert_types, key)
311 final[key] = compare
313 return final
316def convert_key(key: str) -> str:
317 """
318 Replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
319 """
320 return key.replace("-", "_").replace(".", "_")
323def convert_config(items: dict[str, T]) -> dict[str, T]:
324 """
325 Converts the config dict (from toml) or 'overwrites' dict in two ways.
327 1. removes any items where the value is None, since in that case the default should be used;
328 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
329 """
330 return {convert_key(k): v for k, v in items.items() if v is not None}
333def load_recursive(
334 cls: AnyType, data: dict[str, T], annotations: dict[str, AnyType], convert_types: bool = False
335) -> dict[str, T]:
336 """
337 For all annotations (recursively gathered from parents with `all_annotations`), \
338 try to resolve the tree of annotations.
340 Uses `load_into_recurse`, not itself directly.
342 Example:
343 class First:
344 key: str
346 class Second:
347 other: First
349 # step 1
350 cls = Second
351 data = {"second": {"other": {"key": "anything"}}}
352 annotations: {"other": First}
354 # step 1.5
355 data = {"other": {"key": "anything"}
356 annotations: {"other": First}
358 # step 2
359 cls = First
360 data = {"key": "anything"}
361 annotations: {"key": str}
364 TODO: python 3.11 exception groups to throw multiple errors at once!
365 """
366 updated = {}
368 for _key, _type in annotations.items():
369 if _key in data:
370 value: typing.Any = data[_key] # value can change so define it as any instead of T
371 if is_parameterized(_type):
372 origin = typing.get_origin(_type)
373 arguments = typing.get_args(_type)
374 if origin is list and arguments and is_custom_class(arguments[0]):
375 subtype = arguments[0]
376 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value]
378 elif origin is dict and arguments and is_custom_class(arguments[1]):
379 # e.g. dict[str, Point]
380 subkeytype, subvaluetype = arguments
381 # subkey(type) is not a custom class, so don't try to convert it:
382 value = {
383 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types)
384 for subkey, subvalue in value.items()
385 }
386 # elif origin is dict:
387 # keep data the same
388 elif origin is typing.Union and arguments:
389 for arg in arguments:
390 if is_custom_class(arg):
391 value = _load_into_recurse(arg, value, convert_types=convert_types)
393 # todo: other parameterized/unions/typing.Optional
395 elif is_custom_class(_type):
396 # type must be C (custom class) at this point
397 value = _load_into_recurse(
398 # make mypy and pycharm happy by telling it _type is of type C...
399 # actually just passing _type as first arg!
400 typing.cast(Type_C[typing.Any], _type),
401 value,
402 convert_types=convert_types,
403 )
404 elif value := has_alias(cls, _key, data):
405 # value updated by alias
406 ...
407 elif _key in cls.__dict__:
408 # property has default, use that instead.
409 value = cls.__dict__[_key]
410 elif is_optional(_type):
411 # type is optional and not found in __dict__ -> default is None
412 value = None
413 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING:
414 # could have a default factory
415 # todo: do something with field.default?
416 value = field.default_factory()
417 else:
418 raise ConfigErrorMissingKey(_key, cls, _type)
420 updated[_key] = value
422 return updated
425def check_and_convert_data(
426 cls: typing.Type[C],
427 data: dict[str, typing.Any],
428 _except: typing.Iterable[str],
429 strict: bool = True,
430 convert_types: bool = False,
431) -> dict[str, typing.Any]:
432 """
433 Based on class annotations, this prepares the data for `load_into_recurse`.
435 1. convert config-keys to python compatible config_keys
436 2. loads custom class type annotations with the same logic (see also `load_recursive`)
437 3. ensures the annotated types match the actual types after loading the config file.
438 """
439 annotations = all_annotations(cls, _except=_except)
441 to_load = convert_config(data)
442 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types)
444 if strict:
445 to_load = ensure_types(to_load, annotations, convert_types=convert_types)
447 return to_load
450T_init_list = list[typing.Any]
451T_init_dict = dict[str, typing.Any]
452T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None
455@typing.no_type_check # (mypy doesn't understand 'match' fully yet)
456def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]:
457 """
458 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple.
459 """
460 if not init:
461 return [], {}
463 args: T_init_list = []
464 kwargs: T_init_dict = {}
465 match init:
466 case (args, kwargs):
467 return args, kwargs
468 case [*args]:
469 return args, {}
470 case {**kwargs}:
471 return [], kwargs
472 case _:
473 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.")
476def _load_into_recurse(
477 cls: typing.Type[C],
478 data: dict[str, typing.Any] | bytes,
479 init: T_init = None,
480 strict: bool = True,
481 convert_types: bool = False,
482) -> C:
483 """
484 Loads an instance of `cls` filled with `data`.
486 Uses `load_recursive` to load any fillable annotated properties (see that method for an example).
487 `init` can be used to optionally pass extra __init__ arguments. \
488 NOTE: This will overwrite a config key with the same name!
489 """
490 init_args, init_kwargs = _split_init(init)
492 if isinstance(data, bytes) or issubclass(cls, BinaryConfig):
493 if not isinstance(data, (bytes, dict)): # pragma: no cover
494 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.")
495 elif not issubclass(cls, BinaryConfig): # pragma: no cover
496 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.")
498 inst = typing.cast(C, cls._parse_into(data))
499 elif dc.is_dataclass(cls):
500 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types)
501 if init:
502 raise ValueError("Init is not allowed for dataclasses!")
504 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`)
505 inst = typing.cast(C, cls(**to_load))
506 elif isinstance(data, cls):
507 # already the right type! (e.g. Pathlib)
508 inst = typing.cast(C, data)
509 else:
510 inst = cls(*init_args, **init_kwargs)
511 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types)
512 inst.__dict__.update(**to_load)
514 return inst
517def _load_into_instance(
518 inst: C,
519 cls: typing.Type[C],
520 data: dict[str, typing.Any],
521 init: T_init = None,
522 strict: bool = True,
523 convert_types: bool = False,
524) -> C:
525 """
526 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \
527 and thus does not support init.
529 """
530 if init is not None:
531 raise ValueError("Can not init an existing instance!")
533 existing_data = inst.__dict__
535 to_load = check_and_convert_data(
536 cls, data, _except=existing_data.keys(), strict=strict, convert_types=convert_types
537 )
539 inst.__dict__.update(**to_load)
541 return inst
544def load_into_class(
545 cls: typing.Type[C],
546 data: T_data,
547 /,
548 key: str = None,
549 init: T_init = None,
550 strict: bool = True,
551 lower_keys: bool = False,
552 convert_types: bool = False,
553) -> C:
554 """
555 Shortcut for _load_data + load_into_recurse.
556 """
557 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
558 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types, strict=strict)
559 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types)
562def load_into_instance(
563 inst: C,
564 data: T_data,
565 /,
566 key: str = None,
567 init: T_init = None,
568 strict: bool = True,
569 lower_keys: bool = False,
570 convert_types: bool = False,
571) -> C:
572 """
573 Shortcut for _load_data + load_into_existing.
574 """
575 cls = inst.__class__
576 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
577 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types, strict=strict)
578 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types)
581def load_into(
582 cls: typing.Type[C],
583 data: T_data = None,
584 /,
585 key: str = None,
586 init: T_init = None,
587 strict: bool = True,
588 lower_keys: bool = False,
589 convert_types: bool = False,
590) -> C:
591 """
592 Load your config into a class (instance).
594 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only
595 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`.
597 Args:
598 cls: either a class or an existing instance of that class.
599 data: can be a dictionary or a path to a file to load (as pathlib.Path or str)
600 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific')
601 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already)
602 strict: enable type checks or allow anything?
603 lower_keys: should the config keys be lowercased? (for .env)
604 convert_types: should the types be converted to the annotated type if not yet matching? (for .env)
606 """
607 if not isinstance(cls, type):
608 # would not be supported according to mypy, but you can still load_into(instance)
609 return load_into_instance(
610 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types
611 )
613 # make mypy and pycharm happy by telling it cls is of type C and not just 'type'
614 # _cls = typing.cast(typing.Type[C], cls)
615 return load_into_class(
616 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types
617 )