Coverage for src/configuraptor/core.py: 100%

226 statements  

« prev     ^ index     » next       coverage.py v7.8.1, created at 2025-05-22 10:42 +0200

1""" 

2Contains most of the loading logic. 

3""" 

4 

5import dataclasses as dc 

6import io 

7import os 

8import typing 

9import warnings 

10from pathlib import Path 

11from typing import Any, Type 

12 

13import requests 

14 

15from . import loaders 

16from .abs import AnyType, C, T, T_data, Type_C 

17from .alias import Alias, has_alias 

18from .binary_config import BinaryConfig 

19from .errors import ( 

20 ConfigErrorCouldNotConvert, 

21 ConfigErrorInvalidType, 

22 ConfigErrorMissingKey, 

23 FailedToLoad, 

24) 

25from .helpers import ( 

26 all_annotations, 

27 camel_to_snake, 

28 check_type, 

29 dataclass_field, 

30 find_pyproject_toml, 

31 is_custom_class, 

32 is_optional, 

33 is_parameterized, 

34) 

35from .postpone import Postponed 

36from .type_converters import CONVERTERS 

37 

38 

39def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]: 

40 """ 

41 If a key contains a dot, traverse the raw dict until the right key was found. 

42 

43 Example: 

44 key = some.nested.key 

45 raw = {"some": {"nested": {"key": {"with": "data"}}}} 

46 -> {"with": "data"} 

47 """ 

48 parts = key.split(".") 

49 while parts: 

50 key = parts.pop(0) 

51 if key not in raw: 

52 return {} 

53 

54 raw = raw[key] 

55 

56 return raw 

57 

58 

59def _guess_key(clsname: str) -> str: 

60 """ 

61 If no key is manually defined for `load_into`, \ 

62 the class' name is converted to snake_case to use as the default key. 

63 """ 

64 return camel_to_snake(clsname) 

65 

66 

67def _from_mock_url(url: str) -> str: 

68 """ 

69 Pytest only: when starting a url with mock:// it is expected to just be json afterwards. 

70 """ 

71 return url.removeprefix("mock://") 

72 

73 

74def guess_filetype_for_url(url: str, response: requests.Response = None) -> str: 

75 """ 

76 Based on the url (which may have an extension) and the requests response \ 

77 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml). 

78 

79 Falls back to JSON if none can be found. 

80 """ 

81 url = url.split("?")[0] 

82 if url_extension := os.path.splitext(url)[1].lower(): 

83 return url_extension.strip(".") 

84 

85 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()): 

86 content_type = content_type_header.split("/")[-1] 

87 if content_type != "plain": 

88 return content_type 

89 

90 # If both methods fail, default to JSON 

91 return "json" 

92 

93 

94def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]: 

95 """ 

96 Load data as bytes into a file-like object and return the file type. 

97 

98 This can be used by __load_data: 

99 > loader = loaders.get(filetype) 

100 > # dev/null exists but always returns b'' 

101 > data = loader(contents, Path("/dev/null")) 

102 """ 

103 if url.startswith("mock://"): 

104 data = _from_mock_url(url) 

105 resp = None 

106 elif _dummy: 

107 resp = None 

108 data = "{}" 

109 else: 

110 ssl_verify = os.getenv("SSL_VERIFY", "1") == "1" 

111 

112 resp = requests.get(url, timeout=10, verify=ssl_verify) 

113 data = resp.text 

114 

115 filetype = guess_filetype_for_url(url, resp) 

116 return io.BytesIO(data.encode()), filetype 

117 

118 

119def _load_data( 

120 data: T_data, 

121 key: str = None, 

122 classname: str = None, 

123 lower_keys: bool = False, 

124 allow_types: tuple[type, ...] = (dict,), 

125) -> dict[str, typing.Any]: 

126 """ 

127 Tries to load the right data from a filename/path or dict, based on a manual key or a classname. 

128 

129 E.g. class Tool will be mapped to key tool. 

130 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}} 

131 """ 

132 if isinstance(data, bytes): 

133 # instantly return, don't modify 

134 # bytes as inputs -> bytes as output 

135 # but since `T_data` is re-used, that's kind of hard to type for mypy. 

136 return data # type: ignore 

137 

138 if isinstance(data, list): 

139 if not data: 

140 raise ValueError("Empty list passed!") 

141 

142 final_data: dict[str, typing.Any] = {} 

143 for source in data: 

144 final_data |= load_data(source, key=key, classname=classname, lower_keys=True, allow_types=allow_types) 

145 

146 return final_data 

147 

148 if isinstance(data, str): 

149 if data.startswith(("http://", "https://", "mock://")): 

150 contents, filetype = from_url(data) 

151 

152 loader = loaders.get(filetype) 

153 # dev/null exists but always returns b'' 

154 data = loader(contents, Path("/dev/null")) 

155 else: 

156 data = Path(data) 

157 

158 if isinstance(data, Path): 

159 with data.open("rb") as f: 

160 loader = loaders.get(data.suffix or data.name) 

161 data = loader(f, data.resolve()) 

162 

163 if not data: 

164 return {} 

165 

166 if key is None: 

167 # try to guess key by grabbing the first one or using the class name 

168 if len(data) == 1: 

169 key = next(iter(data.keys())) 

170 elif classname is not None: 

171 key = _guess_key(classname) 

172 

173 if key: 

174 data = _data_for_nested_key(key, data) 

175 

176 if not data: 

177 raise ValueError("No data found!") 

178 

179 if not isinstance(data, allow_types): 

180 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!") 

181 

182 if lower_keys and isinstance(data, dict): 

183 data = {k.lower(): v for k, v in data.items()} 

184 

185 return typing.cast(dict[str, typing.Any], data) 

186 

187 

188def load_data( 

189 data: T_data, 

190 key: str = None, 

191 classname: str = None, 

192 lower_keys: bool = False, 

193 allow_types: tuple[type, ...] = (dict,), 

194 strict: bool = False, 

195) -> dict[str, typing.Any]: 

196 """ 

197 Wrapper around __load_data that retries with key="" if anything goes wrong. 

198 """ 

199 if data is None: 

200 # try to load pyproject.toml 

201 data = find_pyproject_toml() 

202 

203 try: 

204 return _load_data(data, key, classname, lower_keys=lower_keys, allow_types=allow_types) 

205 except Exception as e: 

206 # sourcery skip: remove-unnecessary-else, simplify-empty-collection-comparison, swap-if-else-branches 

207 # @sourcery: `key != ""` is NOT the same as `not key` 

208 if key != "": 

209 # try again with key "" 

210 return load_data(data, "", classname, lower_keys=lower_keys, allow_types=allow_types, strict=strict) 

211 elif strict: 

212 raise FailedToLoad(data) from e 

213 else: 

214 # e.g. if settings are to be loaded via a URL that is unavailable or returns invalid json 

215 warnings.warn(f"Data ('{data}') could not be loaded", source=e, category=UserWarning) 

216 return {} 

217 

218 

219F = typing.TypeVar("F") 

220 

221 

222def convert_between(from_value: F, from_type: Type[F], to_type: Type[T]) -> T: 

223 """ 

224 Convert a value between types. 

225 """ 

226 if converter := CONVERTERS.get((from_type, to_type)): 

227 return typing.cast(T, converter(from_value)) 

228 

229 # default: just convert type: 

230 return to_type(from_value) # type: ignore 

231 

232 

233def check_and_convert_type(value: Any, _type: Type[T], convert_types: bool, key: str = "variable") -> T: 

234 """ 

235 Checks if the given value matches the specified type. If it does, the value is returned as is. 

236 

237 Args: 

238 value (Any): The value to be checked and potentially converted. 

239 _type (Type[T]): The expected type for the value. 

240 convert_types (bool): If True, allows type conversion if the types do not match. 

241 key (str, optional): The name or key associated with the variable (used in error messages). 

242 Defaults to "variable". 

243 

244 Returns: 

245 T: The value, potentially converted to the expected type. 

246 

247 Raises: 

248 ConfigErrorInvalidType: If the type does not match, and type conversion is not allowed. 

249 ConfigErrorCouldNotConvert: If type conversion fails. 

250 """ 

251 if check_type(value, _type): 

252 # type matches 

253 return value 

254 

255 if isinstance(value, Alias): 

256 if is_optional(_type): 

257 return typing.cast(T, None) 

258 else: 

259 # unresolved alias, error should've already been thrown for parent but lets do it again: 

260 raise ConfigErrorInvalidType(value.to, value=value, expected_type=_type) 

261 

262 if not convert_types: 

263 # type does not match and should not be converted 

264 raise ConfigErrorInvalidType(key, value=value, expected_type=_type) 

265 

266 # else: type does not match, try to convert it 

267 try: 

268 return convert_between(value, type(value), _type) 

269 except (TypeError, ValueError) as e: 

270 raise ConfigErrorCouldNotConvert(type(value), _type, value) from e 

271 

272 

273def ensure_types( 

274 data: dict[str, T], annotations: dict[str, type[T]], convert_types: bool = False 

275) -> dict[str, T | None]: 

276 """ 

277 Make sure all values in 'data' are in line with the ones stored in 'annotations'. 

278 

279 If an annotated key in missing from data, it will be filled with None for convenience. 

280 

281 TODO: python 3.11 exception groups to throw multiple errors at once! 

282 """ 

283 # custom object to use instead of None, since typing.Optional can be None! 

284 # cast to T to make mypy happy 

285 notfound = typing.cast(T, object()) 

286 

287 final: dict[str, T | None] = {} 

288 for key, _type in annotations.items(): 

289 compare = data.get(key, notfound) 

290 if compare is notfound: # pragma: nocover 

291 warnings.warn("This should not happen since `load_recursive` already fills `data` based on `annotations`") 

292 # skip! 

293 continue 

294 

295 if isinstance(compare, Postponed): 

296 # don't do anything with this item! 

297 continue 

298 

299 if isinstance(compare, Alias): 

300 related_data = data.get(compare.to, notfound) 

301 if related_data is not notfound: 

302 if isinstance(related_data, Postponed): 

303 # also continue alias for postponed items 

304 continue 

305 

306 # original key set, update alias 

307 compare = related_data 

308 

309 compare = check_and_convert_type(compare, _type, convert_types, key) 

310 

311 final[key] = compare 

312 

313 return final 

314 

315 

316def convert_key(key: str) -> str: 

317 """ 

318 Replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties. 

319 """ 

320 return key.replace("-", "_").replace(".", "_") 

321 

322 

323def convert_config(items: dict[str, T]) -> dict[str, T]: 

324 """ 

325 Converts the config dict (from toml) or 'overwrites' dict in two ways. 

326 

327 1. removes any items where the value is None, since in that case the default should be used; 

328 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties. 

329 """ 

330 return {convert_key(k): v for k, v in items.items() if v is not None} 

331 

332 

333def load_recursive( 

334 cls: AnyType, data: dict[str, T], annotations: dict[str, AnyType], convert_types: bool = False 

335) -> dict[str, T]: 

336 """ 

337 For all annotations (recursively gathered from parents with `all_annotations`), \ 

338 try to resolve the tree of annotations. 

339 

340 Uses `load_into_recurse`, not itself directly. 

341 

342 Example: 

343 class First: 

344 key: str 

345 

346 class Second: 

347 other: First 

348 

349 # step 1 

350 cls = Second 

351 data = {"second": {"other": {"key": "anything"}}} 

352 annotations: {"other": First} 

353 

354 # step 1.5 

355 data = {"other": {"key": "anything"} 

356 annotations: {"other": First} 

357 

358 # step 2 

359 cls = First 

360 data = {"key": "anything"} 

361 annotations: {"key": str} 

362 

363 

364 TODO: python 3.11 exception groups to throw multiple errors at once! 

365 """ 

366 updated = {} 

367 

368 for _key, _type in annotations.items(): 

369 if _key in data: 

370 value: typing.Any = data[_key] # value can change so define it as any instead of T 

371 if is_parameterized(_type): 

372 origin = typing.get_origin(_type) 

373 arguments = typing.get_args(_type) 

374 if origin is list and arguments and is_custom_class(arguments[0]): 

375 subtype = arguments[0] 

376 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value] 

377 

378 elif origin is dict and arguments and is_custom_class(arguments[1]): 

379 # e.g. dict[str, Point] 

380 subkeytype, subvaluetype = arguments 

381 # subkey(type) is not a custom class, so don't try to convert it: 

382 value = { 

383 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types) 

384 for subkey, subvalue in value.items() 

385 } 

386 # elif origin is dict: 

387 # keep data the same 

388 elif origin is typing.Union and arguments: 

389 for arg in arguments: 

390 if is_custom_class(arg): 

391 value = _load_into_recurse(arg, value, convert_types=convert_types) 

392 

393 # todo: other parameterized/unions/typing.Optional 

394 

395 elif is_custom_class(_type): 

396 # type must be C (custom class) at this point 

397 value = _load_into_recurse( 

398 # make mypy and pycharm happy by telling it _type is of type C... 

399 # actually just passing _type as first arg! 

400 typing.cast(Type_C[typing.Any], _type), 

401 value, 

402 convert_types=convert_types, 

403 ) 

404 elif value := has_alias(cls, _key, data): 

405 # value updated by alias 

406 ... 

407 elif _key in cls.__dict__: 

408 # property has default, use that instead. 

409 value = cls.__dict__[_key] 

410 elif is_optional(_type): 

411 # type is optional and not found in __dict__ -> default is None 

412 value = None 

413 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING: 

414 # could have a default factory 

415 # todo: do something with field.default? 

416 value = field.default_factory() 

417 else: 

418 raise ConfigErrorMissingKey(_key, cls, _type) 

419 

420 updated[_key] = value 

421 

422 return updated 

423 

424 

425def check_and_convert_data( 

426 cls: typing.Type[C], 

427 data: dict[str, typing.Any], 

428 _except: typing.Iterable[str], 

429 strict: bool = True, 

430 convert_types: bool = False, 

431) -> dict[str, typing.Any]: 

432 """ 

433 Based on class annotations, this prepares the data for `load_into_recurse`. 

434 

435 1. convert config-keys to python compatible config_keys 

436 2. loads custom class type annotations with the same logic (see also `load_recursive`) 

437 3. ensures the annotated types match the actual types after loading the config file. 

438 """ 

439 annotations = all_annotations(cls, _except=_except) 

440 

441 to_load = convert_config(data) 

442 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types) 

443 

444 if strict: 

445 to_load = ensure_types(to_load, annotations, convert_types=convert_types) 

446 

447 return to_load 

448 

449 

450T_init_list = list[typing.Any] 

451T_init_dict = dict[str, typing.Any] 

452T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None 

453 

454 

455@typing.no_type_check # (mypy doesn't understand 'match' fully yet) 

456def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]: 

457 """ 

458 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple. 

459 """ 

460 if not init: 

461 return [], {} 

462 

463 args: T_init_list = [] 

464 kwargs: T_init_dict = {} 

465 match init: 

466 case (args, kwargs): 

467 return args, kwargs 

468 case [*args]: 

469 return args, {} 

470 case {**kwargs}: 

471 return [], kwargs 

472 case _: 

473 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.") 

474 

475 

476def _load_into_recurse( 

477 cls: typing.Type[C], 

478 data: dict[str, typing.Any] | bytes, 

479 init: T_init = None, 

480 strict: bool = True, 

481 convert_types: bool = False, 

482) -> C: 

483 """ 

484 Loads an instance of `cls` filled with `data`. 

485 

486 Uses `load_recursive` to load any fillable annotated properties (see that method for an example). 

487 `init` can be used to optionally pass extra __init__ arguments. \ 

488 NOTE: This will overwrite a config key with the same name! 

489 """ 

490 init_args, init_kwargs = _split_init(init) 

491 

492 if isinstance(data, bytes) or issubclass(cls, BinaryConfig): 

493 if not isinstance(data, (bytes, dict)): # pragma: no cover 

494 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.") 

495 elif not issubclass(cls, BinaryConfig): # pragma: no cover 

496 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.") 

497 

498 inst = typing.cast(C, cls._parse_into(data)) 

499 elif dc.is_dataclass(cls): 

500 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types) 

501 if init: 

502 raise ValueError("Init is not allowed for dataclasses!") 

503 

504 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`) 

505 inst = typing.cast(C, cls(**to_load)) 

506 elif isinstance(data, cls): 

507 # already the right type! (e.g. Pathlib) 

508 inst = typing.cast(C, data) 

509 else: 

510 inst = cls(*init_args, **init_kwargs) 

511 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types) 

512 inst.__dict__.update(**to_load) 

513 

514 return inst 

515 

516 

517def _load_into_instance( 

518 inst: C, 

519 cls: typing.Type[C], 

520 data: dict[str, typing.Any], 

521 init: T_init = None, 

522 strict: bool = True, 

523 convert_types: bool = False, 

524) -> C: 

525 """ 

526 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \ 

527 and thus does not support init. 

528 

529 """ 

530 if init is not None: 

531 raise ValueError("Can not init an existing instance!") 

532 

533 existing_data = inst.__dict__ 

534 

535 to_load = check_and_convert_data( 

536 cls, data, _except=existing_data.keys(), strict=strict, convert_types=convert_types 

537 ) 

538 

539 inst.__dict__.update(**to_load) 

540 

541 return inst 

542 

543 

544def load_into_class( 

545 cls: typing.Type[C], 

546 data: T_data, 

547 /, 

548 key: str = None, 

549 init: T_init = None, 

550 strict: bool = True, 

551 lower_keys: bool = False, 

552 convert_types: bool = False, 

553) -> C: 

554 """ 

555 Shortcut for _load_data + load_into_recurse. 

556 """ 

557 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,) 

558 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types, strict=strict) 

559 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types) 

560 

561 

562def load_into_instance( 

563 inst: C, 

564 data: T_data, 

565 /, 

566 key: str = None, 

567 init: T_init = None, 

568 strict: bool = True, 

569 lower_keys: bool = False, 

570 convert_types: bool = False, 

571) -> C: 

572 """ 

573 Shortcut for _load_data + load_into_existing. 

574 """ 

575 cls = inst.__class__ 

576 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,) 

577 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types, strict=strict) 

578 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types) 

579 

580 

581def load_into( 

582 cls: typing.Type[C], 

583 data: T_data = None, 

584 /, 

585 key: str = None, 

586 init: T_init = None, 

587 strict: bool = True, 

588 lower_keys: bool = False, 

589 convert_types: bool = False, 

590) -> C: 

591 """ 

592 Load your config into a class (instance). 

593 

594 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only 

595 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`. 

596 

597 Args: 

598 cls: either a class or an existing instance of that class. 

599 data: can be a dictionary or a path to a file to load (as pathlib.Path or str) 

600 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific') 

601 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already) 

602 strict: enable type checks or allow anything? 

603 lower_keys: should the config keys be lowercased? (for .env) 

604 convert_types: should the types be converted to the annotated type if not yet matching? (for .env) 

605 

606 """ 

607 if not isinstance(cls, type): 

608 # would not be supported according to mypy, but you can still load_into(instance) 

609 return load_into_instance( 

610 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types 

611 ) 

612 

613 # make mypy and pycharm happy by telling it cls is of type C and not just 'type' 

614 # _cls = typing.cast(typing.Type[C], cls) 

615 return load_into_class( 

616 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types 

617 )