Coverage for src/dtexp/parse_expression.py: 100%
112 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 14:19 +0100
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 14:19 +0100
1"""Parse dtexp datetime expressions."""
3import datetime
4import re
5from typing import Literal
7from dtexp.addition import apply_addition_via_pendulum
8from dtexp.conditions import handle_condition_expression
9from dtexp.exceptions import DtexpParsingError
10from dtexp.parse_timestamp import parse_timestamp_from_start
12_BASIC_OPERATORS = {"-", "minus", "+", "plus", "/"}
14_MINUS_OPERATORS = {"-", "minus"}
16_PLUS_OPERATORS = {"+", "plus"}
18_CONDITION_OPERATORS = {"next", "last", "upcoming", "previous"}
21def parse_dtexp(
22 expression: str,
23 *,
24 to_utc: bool = True,
25 default_unaware_timezone: datetime.timezone = datetime.UTC,
26 now: datetime.datetime | None = None,
27 max_iso_timestamp_length: int = 35,
28 fixed_iso_timestamp_length: int | None = None,
29 max_iter: int = 1000,
30 allow_conditions: bool = True,
31) -> datetime.datetime:
32 """Parse a dtexp datetime expression.
34 Args:
35 ----
36 expression: The dtexp datetime expression, e.g. "now - 2w / w".
37 to_utc: Whether the initial timestamp is immediately converted to utc.
38 This happens before all operations are applied and leads to utc result.
39 default_unaware_timezone: The timezone that is assumed for unaware absolute
40 timestamps or now at the beginning of the expression.
41 now: Allows to provide an explicit value for "now" that is used instead of invoking
42 datetime.datetime.now(tzinfo=...).
43 max_iso_timestamp_length: The beginning of the expression is scanned for iso timestamps.
44 If you know that your source does e.g. not use microseconds, you can reduce this
45 in order fail earlier for malformed expressions.
46 fixed_iso_timestamp_length: If you know exactly how long isoformat timestamps
47 at beginning of expression can be, you can set this to fail earlier on
48 malformed expressions.
49 max_iter: Evaluating conditions is iterative (brute force). This controls the maximum
50 amount of iteration steps before failing.
51 allow_conditions: May be set to False for expressions from untrusted sources,
52 since evaluating conditions can have negative performance impacts.
54 Returns:
55 -------
56 The parsed datetime object. Always timezone-aware.
58 Raises:
59 ------
60 DtexpParsingError if parsing fails for any reason.
62 """
64 start, remaining = parse_timestamp_from_start(
65 expression,
66 to_utc=to_utc,
67 now=now,
68 max_iso_timestamp_length=max_iso_timestamp_length,
69 fixed_iso_timestamp_length=fixed_iso_timestamp_length,
70 default_unaware_timezone=default_unaware_timezone,
71 )
73 if start is None:
74 raise DtexpParsingError("Could not parse a start datetime from beginning of expression")
76 return parse_time_expression(
77 split_result=split_expression(remaining),
78 left=start,
79 max_iter=max_iter,
80 allow_conditions=allow_conditions,
81 )
84def parse_dtexp_interval(
85 expression_left: str,
86 expression_right: str,
87 *,
88 to_utc: bool = True,
89 default_unaware_timezone: datetime.timezone = datetime.UTC,
90 now: datetime.datetime | None = None,
91 max_iso_timestamp_length: int = 35,
92 fixed_iso_timestamp_length: int | None = None,
93 max_iter: int = 1000,
94 allow_conditions: bool = True,
95) -> tuple[datetime.datetime, datetime.datetime]:
96 """Parse two dtexp expressions with same parameters.
98 Convenience wrapper for parse_dtexp for resolving intervals, i.e.
99 two dtexp expressions.
101 Returns the resolved dtexp expression results as a pair.
102 """
103 return (
104 parse_dtexp(
105 expression_left,
106 to_utc=to_utc,
107 default_unaware_timezone=default_unaware_timezone,
108 now=now,
109 max_iso_timestamp_length=max_iso_timestamp_length,
110 fixed_iso_timestamp_length=fixed_iso_timestamp_length,
111 max_iter=max_iter,
112 allow_conditions=allow_conditions,
113 ),
114 parse_dtexp(
115 expression_right,
116 to_utc=to_utc,
117 default_unaware_timezone=default_unaware_timezone,
118 now=now,
119 max_iso_timestamp_length=max_iso_timestamp_length,
120 fixed_iso_timestamp_length=fixed_iso_timestamp_length,
121 max_iter=max_iter,
122 allow_conditions=allow_conditions,
123 ),
124 )
127def parse_timedelta_for_pendulum_add( # noqa: PLR0911
128 timedelta_str: str, sign: Literal[1, -1] = 1
129) -> dict[str, int]:
130 """Parse timedelta into data for a pendulum add operation.
132 Returns:
133 dict of form {"days": -5} or {"months": 2} or similar that can be passed
134 as kwargs into pendulum instance's .add method
136 """
138 try:
139 match timedelta_str:
140 case s if s.endswith("min"):
141 return {"minutes": sign * int(s.removesuffix("min"))}
142 case s if s.endswith("us"):
143 return {"microseconds": sign * int(s.removesuffix("us"))}
144 case s if s.endswith("w"):
145 return {"weeks": sign * int(s.removesuffix("w"))}
146 case s if s.endswith("d"):
147 return {"days": sign * int(s.removesuffix("d"))}
148 case s if s.endswith("m"):
149 return {"months": sign * int(s.removesuffix("m"))}
150 case s if s.endswith("h"):
151 return {"hours": sign * int(s.removesuffix("h"))}
152 case s if s.endswith("s"):
153 return {"seconds": sign * int(s.removesuffix("s"))}
154 case s if s.endswith("y"):
155 return {"years": sign * int(s.removesuffix("y"))}
156 case _:
157 msg = f"Unknown timedelta str {timedelta_str}"
158 raise DtexpParsingError(msg)
159 except (ValueError, TypeError) as e:
160 msg = f"Error parsing timedelta {timedelta_str}"
161 raise DtexpParsingError(msg) from e
164def extract_int_before_suffix(text: str, suffix: str, default_for_empty_num: int = 0) -> int:
165 """Extract integer before suffix in a string.
167 E.g for text "32d" with suffix "d" it returns 32.
169 If nothing is present before the suffix, default_for_empty_num is returned.
171 Raises DtexpParsingError if extracting fails.
172 """
173 num_str = text.removesuffix(suffix).strip()
174 if num_str == "":
175 return default_for_empty_num
177 try:
178 num = int(num_str)
179 except (ValueError, TypeError) as e:
180 msg = f"Could not parse integer at beginning of timedelta string: {text}"
181 raise DtexpParsingError(msg) from e
183 return num
186def largest_fitting_multiple(current_val: int, num: int) -> int:
187 """Compute largest multiple of num fitting into current_val.
189 Actually this is the largest multiple of num fitting into
190 current_val and for num=0 we return current_val instead.
191 """
192 return ((current_val // num) * num) if num != 0 else current_val
195def apply_operator( # noqa: C901, PLR0911
196 left: datetime.datetime, operator: str, right: str
197) -> datetime.datetime:
198 """Apply operator to its operands."""
199 if operator == "/":
200 match right:
201 case s if s.endswith("min"):
202 return left.replace(
203 minute=largest_fitting_multiple(
204 left.minute, extract_int_before_suffix(s, "min")
205 ),
206 second=0,
207 microsecond=0,
208 )
209 case s if s.endswith("us"):
210 return left.replace(
211 microsecond=largest_fitting_multiple(
212 left.microsecond, extract_int_before_suffix(s, "us")
213 ),
214 )
215 case s if s.endswith("d"):
216 return left.replace(
217 day=largest_fitting_multiple(left.day, extract_int_before_suffix(s, "d")),
218 hour=0,
219 minute=0,
220 second=0,
221 microsecond=0,
222 )
223 case s if s.endswith("m"):
224 return left.replace(
225 month=largest_fitting_multiple(left.month, extract_int_before_suffix(s, "m")),
226 day=1,
227 hour=0,
228 minute=0,
229 second=0,
230 microsecond=0,
231 )
232 case s if s.endswith("y"):
233 return left.replace(
234 year=largest_fitting_multiple(left.year, extract_int_before_suffix(s, "y")),
235 month=1,
236 day=1,
237 hour=0,
238 minute=0,
239 second=0,
240 microsecond=0,
241 )
242 case s if s.endswith("w"):
243 week_in_year_number = left.isocalendar().week
244 target_week_number = largest_fitting_multiple(
245 week_in_year_number, extract_int_before_suffix(s, "w")
246 )
247 return apply_addition_via_pendulum(
248 left, weeks=target_week_number - week_in_year_number, days=-1 * left.weekday()
249 ).replace(
250 hour=0,
251 minute=0,
252 second=0,
253 microsecond=0,
254 )
255 case s if s.endswith("h"):
256 return left.replace(
257 hour=largest_fitting_multiple(left.hour, extract_int_before_suffix(s, "h")),
258 minute=0,
259 second=0,
260 microsecond=0,
261 )
262 case s if s.endswith("s"):
263 return left.replace(
264 second=largest_fitting_multiple(left.second, extract_int_before_suffix(s, "s")),
265 microsecond=0,
266 )
267 case _:
268 msg = f"Unknown period {right}"
269 raise DtexpParsingError(msg)
271 if operator in _MINUS_OPERATORS:
272 return apply_addition_via_pendulum(left, **parse_timedelta_for_pendulum_add(right, -1))
274 if operator in _PLUS_OPERATORS:
275 return apply_addition_via_pendulum(left, **parse_timedelta_for_pendulum_add(right, +1))
277 msg = f"Unknown operator {operator}"
278 raise DtexpParsingError(msg)
281_EXPRESSION_SPLIT_PATTERN = re.compile(
282 r"([-+]|/|\bplus\b|\bminus\b|\bnext\b|\blast\b|\bupcoming\b|\bprevious\b|\bwhere\b|\bis\b|\band\b)"
283)
286def split_expression(expression_str: str) -> list[str]:
287 """Split up expression string."""
288 return [
289 token.strip()
290 for token in _EXPRESSION_SPLIT_PATTERN.split(
291 expression_str,
292 )
293 if token.strip()
294 ]
297def parse_time_expression(
298 split_result: list[str],
299 left: datetime.datetime,
300 *,
301 max_iter: int = 1000,
302 allow_conditions: bool = True,
303) -> datetime.datetime:
304 """Apply expression elements to a given left datetime.
306 This is the main worker function that is applied recursively.
307 """
308 if len(split_result) == 0:
309 return left
311 first_element = split_result[0]
313 if len(split_result) == 1:
314 msg = f"Cannot understand expression with one operator and no arguments: {first_element}"
315 raise DtexpParsingError(msg)
317 # now only case len(split_result)>=2 left
319 if first_element in _BASIC_OPERATORS:
320 operator = first_element
322 if len(split_result) == 2:
323 return apply_operator(left, operator, split_result[1]) # ready!
324 if len(split_result) == 3:
325 msg = f"Missing value right of operator {split_result[2]}"
326 raise DtexpParsingError(msg)
328 return parse_time_expression(
329 split_result[2:],
330 left=apply_operator(left, operator, split_result[1]),
331 max_iter=max_iter,
332 )
334 if first_element in _CONDITION_OPERATORS:
335 if not allow_conditions:
336 raise DtexpParsingError("Condition expressions are deactivated / not allowed.")
338 result_date, remaining_elements = handle_condition_expression(
339 split_result, left, max_iter=max_iter
340 )
342 if len(remaining_elements) == 0:
343 return result_date
345 return parse_time_expression(remaining_elements, left=result_date, max_iter=max_iter)
347 msg = f"Unknown operator {first_element}"
348 raise DtexpParsingError(msg)