Coverage for arrakis_server/scope.py: 75.2%

129 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-08-12 16:39 -0700

1from __future__ import annotations 

2 

3import logging 

4import math 

5from collections import defaultdict 

6from collections.abc import Iterable 

7from dataclasses import dataclass, field 

8from pathlib import Path 

9from typing import TYPE_CHECKING, Any 

10 

11import gpstime 

12from arrakis import Time 

13from ruamel.yaml import YAML 

14from typing_extensions import Self 

15 

16from . import constants 

17from .channel import extract_channel_scope 

18 

19if TYPE_CHECKING: 

20 from .traits import ServerBackend 

21 

22logger = logging.getLogger("arrakis") 

23 

24ChannelEndpointList = list[tuple[list[str], list[str]]] 

25 

26 

27@dataclass 

28class ScopeMap: 

29 """A mapping between endpoints and the scopes they provide. 

30 

31 Parameters 

32 ---------- 

33 servers : dict[str, ScopeInfo], optional 

34 A dictionary whose keys are endpoints (server URLs) and values are 

35 information about the scopes they provide. 

36 local_endpoint : str, optional 

37 If provided, define which URL correspond to the local server URL. 

38 

39 """ 

40 

41 servers: dict[str, ScopeInfo] = field(default_factory=dict) 

42 local_endpoint: str | None = None 

43 

44 @property 

45 def endpoints(self) -> set[str]: 

46 """The set of endpoints contained within this mapping.""" 

47 return {endpoint for endpoint in self.servers.keys()} 

48 

49 @property 

50 def domains(self) -> set[str]: 

51 """The set of domains that the list of endpoints provide.""" 

52 return { 

53 domain for info in self.servers.values() for domain in info.scopes.keys() 

54 } 

55 

56 def __str__(self): 

57 return f"<ScopeMap {self.endpoints}>" 

58 

59 def endpoints_for_domain(self, domain: str) -> list[str]: 

60 """Determine all endpoints that can serve a given domain. 

61 

62 Parameters 

63 ---------- 

64 domain : str 

65 The domain to determine the endpoints for. 

66 

67 Returns 

68 ------- 

69 list[str] 

70 All endpoints that serve a given domain. 

71 

72 """ 

73 # first check if local server serves this domain 

74 if self.local_endpoint and self.local_endpoint in self.servers: 

75 if domain in self.servers[self.local_endpoint].scopes: 

76 return [constants.FLIGHT_REUSE_URL] 

77 

78 # otherwise, find all endpoints which can serve this domain 

79 return [ 

80 endpoint for endpoint, info in self.servers.items() if domain in info.scopes 

81 ] 

82 

83 def endpoints_for_channel(self, channel: str) -> list[str]: 

84 """Determine all endpoints that can serve a given channel. 

85 

86 Parameters 

87 ---------- 

88 channel : str 

89 The channel to determine the endpoints for. 

90 

91 Returns 

92 ------- 

93 list[str] 

94 All endpoints that serve a given channel. 

95 

96 """ 

97 # first check if local server serves this data 

98 if self.local_endpoint and self.local_endpoint in self.servers: 

99 if self.servers[self.local_endpoint].in_scope(channel): 

100 return [constants.FLIGHT_REUSE_URL] 

101 

102 # otherwise, find all endpoints which can serve this data 

103 return [ 

104 endpoint 

105 for endpoint, info in self.servers.items() 

106 if info.in_scope(channel) 

107 ] 

108 

109 def endpoints_for_channels(self, channels: Iterable[str]) -> ChannelEndpointList: 

110 """Determine the endpoints that can serve the given channels. 

111 

112 Parameters 

113 ---------- 

114 channels : Iterable[str] 

115 The channels to determine the endpoints for. 

116 

117 Returns 

118 ------- 

119 ChannelEndpointList 

120 A list of (channels, endpoints) pairs, where each grouping is a 

121 list of endpoints where the channels can be found at. The channels 

122 are disjoint such that the union of all channels returns the 

123 original channels requested. 

124 

125 """ 

126 # construct a bi-directional map between endpoints and channels 

127 endpoint_channel_map = defaultdict(set) 

128 channel_endpoint_map = {} 

129 for channel in channels: 

130 endpoints = self.endpoints_for_channel(channel) 

131 channel_endpoint_map[channel] = endpoints 

132 for endpoint in endpoints: 

133 endpoint_channel_map[endpoint].add(channel) 

134 

135 # consolidate endpoints for each set of channels 

136 endpoints_for_channels: ChannelEndpointList = [] 

137 remaining_channels = set(channel_endpoint_map.keys()) 

138 

139 # first, prioritize local endpoint 

140 if constants.FLIGHT_REUSE_URL in endpoint_channel_map: 

141 local_endpoint = constants.FLIGHT_REUSE_URL 

142 local_channels = endpoint_channel_map.pop(local_endpoint) 

143 remaining_channels -= frozenset(local_channels) 

144 endpoints_for_channels.append((list(local_channels), [local_endpoint])) 

145 

146 # then, prioritize endpoints which serve the most channels 

147 while remaining_channels: 

148 for endpoint in endpoint_channel_map: 

149 endpoint_channel_map[endpoint] -= remaining_channels 

150 

151 if not endpoint_channel_map: 

152 break 

153 

154 # choose one with the most channels, and select all endpoints 

155 # matching the set of channels it serves 

156 max_endpoint = max( 

157 endpoint_channel_map, key=lambda x: len(endpoint_channel_map[x]) 

158 ) 

159 max_channels = endpoint_channel_map[max_endpoint] 

160 endpoints = [ 

161 endpoint 

162 for endpoint, channels in endpoint_channel_map.items() 

163 if channels == max_channels 

164 ] 

165 for endpoint in endpoints: 

166 endpoint_channel_map.pop(endpoint) 

167 endpoints_for_channels.append((list(max_channels), endpoints)) 

168 remaining_channels -= frozenset(max_channels) 

169 

170 return endpoints_for_channels 

171 

172 def sync_local_map(self, backend: ServerBackend | None, endpoint: str) -> None: 

173 """Add local scope information to the scope map. 

174 

175 This checks consistency between the scope map and the new local scope 

176 information that is provided. In addition, this updates the local 

177 endpoint accordingly to prioritize the serving of local data if this 

178 backend also serves data and/or metadata. 

179 

180 Parameters 

181 ---------- 

182 backend : ServerBackend 

183 The local backend in which to update the scope map with. 

184 endpoint : str 

185 The endpoint associated with the local server. 

186 

187 """ 

188 

189 self.local_endpoint = endpoint 

190 # check consistency with the scope map and what's in the backend 

191 if backend: 

192 if endpoint in self.servers: 

193 for domain in backend.scope_info.domains: 

194 if domain not in self.servers[endpoint].scopes: 

195 logger.warning("domain %s not in global scope map", domain) 

196 elif backend.scope_info.scopes != self.servers[endpoint]: 

197 logger.warning( 

198 "local scope info is inconsistent with global scope map" 

199 ) 

200 

201 self.servers[endpoint] = backend.scope_info 

202 

203 def filter_by_range(self, start: int | None, end: int | None) -> Self: 

204 """Filter the scope map by a time range, specified in nanoseconds. 

205 

206 Parameters 

207 ---------- 

208 start : int, optional 

209 GPS start time, in nanoseconds. 

210 end : int, optional 

211 GPS end time, in nanoseconds. 

212 

213 Returns 

214 ------- 

215 SourceMap 

216 The filtered scope map. 

217 

218 """ 

219 servers = {} 

220 for endpoint, info in self.servers.items(): 

221 if info.retention.in_range(start) and info.retention.in_range(end): 

222 servers[endpoint] = info 

223 return type(self)(servers, self.local_endpoint) 

224 

225 @classmethod 

226 def load(cls, scope_map_file: Path) -> Self: 

227 """Load a configuration-based scope map from disk. 

228 

229 Parameters 

230 ---------- 

231 scope_map_file : Path 

232 The path to the scope map configuration file 

233 

234 Returns 

235 ------- 

236 SourceMap 

237 The loaded scope map. 

238 

239 """ 

240 servers = {} 

241 with open(scope_map_file, "r") as f: 

242 for endpoint, info in YAML(typ="safe").load(f).items(): 

243 servers[endpoint] = ScopeInfo( 

244 scopes=info["scopes"], 

245 retention=Retention(**info["retention"]), 

246 ) 

247 return cls(servers) 

248 

249 

250@dataclass 

251class ScopeInfo: 

252 """Information about the scopes and retention a server provides. 

253 

254 Parameters 

255 ---------- 

256 scopes : dict[str, list[dict[str, Any]]] 

257 The scopes that a server provide for each domain. The keys are 

258 domains and the values are lists of key-value pairs indicating how the 

259 domains are scoped. These could be subsystems, specific channels, etc. 

260 retention : Retention 

261 The range of time available (from now) that is accessible to query. 

262 

263 """ 

264 

265 scopes: dict[str, list[dict[str, Any]]] 

266 retention: Retention 

267 

268 @property 

269 def domains(self) -> set[str]: 

270 return set(self.scopes.keys()) 

271 

272 def in_scope(self, channel: str) -> bool: 

273 """Check whether the channel is served by this endpoint.""" 

274 domain, subsystem = extract_channel_scope(channel) 

275 if domain not in self.scopes: 

276 return False 

277 served_subsystems = {scope["subsystem"] for scope in self.scopes[domain]} 

278 return subsystem in served_subsystems 

279 

280 

281@dataclass 

282class Retention: 

283 """Information about the time retention that can be queried from a server. 

284 

285 This is used to inform what range of data backends can serve and how 

286 incoming requests can take a range of times and delegate them to various 

287 servers. 

288 

289 The times specified here are in seconds all relative to 'now', i.e. 0 

290 corresponds to serving live data, while 3600 corresponds to '1 hour ago'. 

291 

292 Live-only data sources have both 'newest' and 'oldest' set to 0, and can 

293 be created using the class method Retention.from_live_only(). 

294 

295 Parameters 

296 ---------- 

297 newest : float, optional 

298 The most recent time from now that can be queried. Defaults to 0, or 

299 'now'. 

300 oldest : float, optional 

301 The oldest time from now that can be queried. Defaults to inf, or 

302 infinite lookback. 

303 

304 """ 

305 

306 newest: float = 0 

307 oldest: float = math.inf 

308 

309 @property 

310 def is_live(self) -> bool: 

311 """Determine whether this backend can serve live data.""" 

312 return self.newest == 0 

313 

314 @property 

315 def is_live_only(self) -> bool: 

316 """Determine whether this backend can only serve live data.""" 

317 return self.newest == 0 and self.oldest == 0 

318 

319 @property 

320 def is_historical_only(self) -> bool: 

321 """Determine whether this backend can only serve historical data.""" 

322 return self.newest > 0 

323 

324 @classmethod 

325 def from_live_only(cls) -> Self: 

326 """Create a retention that can only serve live data.""" 

327 return cls(newest=0, oldest=0) 

328 

329 def in_range(self, time_ns: int | None) -> bool: 

330 """Check whether the time (in nanoseconds) is served by this backend.""" 

331 # unspecified times can only be served by live backends: 

332 # start=None -> serve data starting at 'now' 

333 # end=None -> serve data forever 

334 if time_ns is None: 

335 return self.is_live 

336 

337 time = time_ns // Time.s 

338 time_now = gpstime.gpsnow() 

339 

340 # if time is in the future, live backends can serve this data 

341 if time >= time_now: 

342 return self.is_live 

343 

344 # calculate time relative to retention 

345 time_rel = time_now - time 

346 return time_rel >= self.newest and time_rel <= self.oldest