Coverage for src/dtexp/parse_timestamp.py: 100%

39 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-07 14:19 +0100

1"""Parsing timestamps or now at beginning of expressions.""" 

2 

3import datetime 

4 

5from dtexp.exceptions import DtexpParsingError 

6 

7 

8def check_aware(dt: datetime.datetime) -> bool: 

9 """Check whether Python datetime is non-naive.""" 

10 # see https://stackoverflow.com/a/27596917 

11 return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None 

12 

13 

14def ensure_aware( 

15 dt: datetime.datetime, 

16 default_unaware_timezone: datetime.timezone = datetime.UTC, 

17) -> datetime.datetime: 

18 """Ensure that a datetime is aware, applying default_unaware_timezone to unaware datetimes. 

19 

20 Return aware datetimes as is. For unaware datetimes this sets the timezone 

21 to default_unaware_timezone. 

22 

23 The result is always aware. 

24 """ 

25 if not check_aware(dt): 

26 return dt.replace(tzinfo=default_unaware_timezone) 

27 return dt 

28 

29 

30def parse_isoformat_from_start( 

31 text: str, 

32 max_iso_timestamp_length: int = 35, 

33 # max length example: "2025-01-01T00:00:00.000000000+00:00" 

34 fixed_iso_timestamp_length: int | None = None, 

35) -> tuple[datetime.datetime | None, str]: 

36 """Parse isoformat from start of a string. 

37 

38 The requirements here are 

39 * parse isoformat from beginning of strings without clearly defined 

40 separation character between timestamp and the remaining string 

41 * stay compatible with what Python's datetime.fromisoformat(...) is 

42 and is able and will be able to parse in the future 

43 

44 The later disallows using a regexp here. Therefore we pursue the following 

45 strategy: 

46 

47 * start at a maximal possible isoformat timestamp length 

48 * try to parse, decreasing possible length, until it succeeds or 

49 reaches zero characters. 

50 

51 This could possibly be optimized at several points. However the requirements 

52 specified above must be taken into account! 

53 

54 Returns a pair of shape 

55 parsed timestamp (or None if no timestamp found), remaining text 

56 """ 

57 

58 if not text or not text.strip(): 

59 return None, text 

60 

61 text = text.strip() 

62 

63 # Fall back to trying all lengths from highest to lowest 

64 for i in ( 

65 range(min(len(text), max_iso_timestamp_length), 1, -1) 

66 if fixed_iso_timestamp_length is None 

67 else (fixed_iso_timestamp_length,) 

68 ): 

69 candidate = text[:i] 

70 try: 

71 dt = datetime.datetime.fromisoformat(candidate) 

72 

73 except ValueError: 

74 continue 

75 else: 

76 remaining = text[i:] 

77 return dt, remaining 

78 

79 return None, text 

80 

81 

82def parse_timestamp_from_start( 

83 text: str, 

84 *, 

85 to_utc: bool = True, 

86 now: datetime.datetime | None = None, 

87 max_iso_timestamp_length: int = 35, 

88 fixed_iso_timestamp_length: int | None = None, 

89 default_unaware_timezone: datetime.timezone = datetime.UTC, 

90) -> tuple[datetime.datetime | None, str]: 

91 """Parse datetime from start of string. 

92 

93 Can handle now or isoformat strings at the beginning. 

94 

95 Returns pair 

96 (datetime, remaining_string) 

97 on success and 

98 (None, text) 

99 if no datetime could be extracted from start of text. 

100 

101 Important: Makes sure that the resulting datetime is timezone aware and by default 

102 even that it has UTC timezone. 

103 * If no timezone is specified (unaware) it will be interpreted as default_unaware_timezone 

104 (defaulting to utc) 

105 * if to_utc is True (the default) the datetime will be converted to utc timezone. 

106 Set to_utc to False to keep other timezones. 

107 

108 Furthermore this evaluates "now" at runtime. You can supply a datetime object 

109 using the parameter now to enforce now to be this datetime. 

110 

111 "now" will yield a datetime with timezone default_unaware_timezone if to_utc is False. 

112 """ 

113 datetime_obj: datetime.datetime | None 

114 if text.lower().startswith("now"): 

115 datetime_obj = datetime.datetime.now(tz=default_unaware_timezone) if now is None else now 

116 remaining = text[3:] 

117 else: 

118 datetime_obj, remaining = parse_isoformat_from_start( 

119 text, 

120 max_iso_timestamp_length=max_iso_timestamp_length, 

121 fixed_iso_timestamp_length=fixed_iso_timestamp_length, 

122 ) 

123 

124 if datetime_obj is None: 

125 return None, remaining 

126 

127 datetime_obj = ensure_aware(datetime_obj, default_unaware_timezone=default_unaware_timezone) 

128 

129 if to_utc: 

130 datetime_obj = datetime_obj.astimezone(datetime.UTC) 

131 

132 return datetime_obj, remaining 

133 

134 

135def parse_timestamp( 

136 timestamp: str, 

137 *, 

138 to_utc: bool = True, 

139 now: datetime.datetime | None = None, 

140 default_unaware_timezone: datetime.timezone = datetime.UTC, 

141) -> datetime.datetime: 

142 """Parse a datetime from "now" or an isoformat timestamp string. 

143 

144 Expects only now or the timestamp to be contained and no remaining characters. 

145 

146 to_utc, now, and default_unaware_timezone are handled as described for 

147 parse_timestamp_from_start. 

148 

149 Raises DtexpParsingError if either no parsing succeeded from beginning of timestamp 

150 or unparsed characters remain. 

151 

152 Returns parsed datetime object. 

153 """ 

154 result, remaining = parse_timestamp_from_start( 

155 timestamp, 

156 to_utc=to_utc, 

157 now=now, 

158 fixed_iso_timestamp_length=len(timestamp), 

159 default_unaware_timezone=default_unaware_timezone, 

160 ) 

161 

162 if result is None: 

163 raise DtexpParsingError("Could not parse timestamp") 

164 

165 if len(remaining) > 0: 

166 raise DtexpParsingError( 

167 "Only beginning could be parsed as timestamp, found remaining characters." 

168 ) 

169 

170 return result