Coverage for src/dtexp/parse_timestamp.py: 100%
39 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 14:19 +0100
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-07 14:19 +0100
1"""Parsing timestamps or now at beginning of expressions."""
3import datetime
5from dtexp.exceptions import DtexpParsingError
8def check_aware(dt: datetime.datetime) -> bool:
9 """Check whether Python datetime is non-naive."""
10 # see https://stackoverflow.com/a/27596917
11 return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
14def ensure_aware(
15 dt: datetime.datetime,
16 default_unaware_timezone: datetime.timezone = datetime.UTC,
17) -> datetime.datetime:
18 """Ensure that a datetime is aware, applying default_unaware_timezone to unaware datetimes.
20 Return aware datetimes as is. For unaware datetimes this sets the timezone
21 to default_unaware_timezone.
23 The result is always aware.
24 """
25 if not check_aware(dt):
26 return dt.replace(tzinfo=default_unaware_timezone)
27 return dt
30def parse_isoformat_from_start(
31 text: str,
32 max_iso_timestamp_length: int = 35,
33 # max length example: "2025-01-01T00:00:00.000000000+00:00"
34 fixed_iso_timestamp_length: int | None = None,
35) -> tuple[datetime.datetime | None, str]:
36 """Parse isoformat from start of a string.
38 The requirements here are
39 * parse isoformat from beginning of strings without clearly defined
40 separation character between timestamp and the remaining string
41 * stay compatible with what Python's datetime.fromisoformat(...) is
42 and is able and will be able to parse in the future
44 The later disallows using a regexp here. Therefore we pursue the following
45 strategy:
47 * start at a maximal possible isoformat timestamp length
48 * try to parse, decreasing possible length, until it succeeds or
49 reaches zero characters.
51 This could possibly be optimized at several points. However the requirements
52 specified above must be taken into account!
54 Returns a pair of shape
55 parsed timestamp (or None if no timestamp found), remaining text
56 """
58 if not text or not text.strip():
59 return None, text
61 text = text.strip()
63 # Fall back to trying all lengths from highest to lowest
64 for i in (
65 range(min(len(text), max_iso_timestamp_length), 1, -1)
66 if fixed_iso_timestamp_length is None
67 else (fixed_iso_timestamp_length,)
68 ):
69 candidate = text[:i]
70 try:
71 dt = datetime.datetime.fromisoformat(candidate)
73 except ValueError:
74 continue
75 else:
76 remaining = text[i:]
77 return dt, remaining
79 return None, text
82def parse_timestamp_from_start(
83 text: str,
84 *,
85 to_utc: bool = True,
86 now: datetime.datetime | None = None,
87 max_iso_timestamp_length: int = 35,
88 fixed_iso_timestamp_length: int | None = None,
89 default_unaware_timezone: datetime.timezone = datetime.UTC,
90) -> tuple[datetime.datetime | None, str]:
91 """Parse datetime from start of string.
93 Can handle now or isoformat strings at the beginning.
95 Returns pair
96 (datetime, remaining_string)
97 on success and
98 (None, text)
99 if no datetime could be extracted from start of text.
101 Important: Makes sure that the resulting datetime is timezone aware and by default
102 even that it has UTC timezone.
103 * If no timezone is specified (unaware) it will be interpreted as default_unaware_timezone
104 (defaulting to utc)
105 * if to_utc is True (the default) the datetime will be converted to utc timezone.
106 Set to_utc to False to keep other timezones.
108 Furthermore this evaluates "now" at runtime. You can supply a datetime object
109 using the parameter now to enforce now to be this datetime.
111 "now" will yield a datetime with timezone default_unaware_timezone if to_utc is False.
112 """
113 datetime_obj: datetime.datetime | None
114 if text.lower().startswith("now"):
115 datetime_obj = datetime.datetime.now(tz=default_unaware_timezone) if now is None else now
116 remaining = text[3:]
117 else:
118 datetime_obj, remaining = parse_isoformat_from_start(
119 text,
120 max_iso_timestamp_length=max_iso_timestamp_length,
121 fixed_iso_timestamp_length=fixed_iso_timestamp_length,
122 )
124 if datetime_obj is None:
125 return None, remaining
127 datetime_obj = ensure_aware(datetime_obj, default_unaware_timezone=default_unaware_timezone)
129 if to_utc:
130 datetime_obj = datetime_obj.astimezone(datetime.UTC)
132 return datetime_obj, remaining
135def parse_timestamp(
136 timestamp: str,
137 *,
138 to_utc: bool = True,
139 now: datetime.datetime | None = None,
140 default_unaware_timezone: datetime.timezone = datetime.UTC,
141) -> datetime.datetime:
142 """Parse a datetime from "now" or an isoformat timestamp string.
144 Expects only now or the timestamp to be contained and no remaining characters.
146 to_utc, now, and default_unaware_timezone are handled as described for
147 parse_timestamp_from_start.
149 Raises DtexpParsingError if either no parsing succeeded from beginning of timestamp
150 or unparsed characters remain.
152 Returns parsed datetime object.
153 """
154 result, remaining = parse_timestamp_from_start(
155 timestamp,
156 to_utc=to_utc,
157 now=now,
158 fixed_iso_timestamp_length=len(timestamp),
159 default_unaware_timezone=default_unaware_timezone,
160 )
162 if result is None:
163 raise DtexpParsingError("Could not parse timestamp")
165 if len(remaining) > 0:
166 raise DtexpParsingError(
167 "Only beginning could be parsed as timestamp, found remaining characters."
168 )
170 return result