Coverage for arrakis_server/scope.py: 75.2%
129 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-08-12 16:39 -0700
« prev ^ index » next coverage.py v7.6.12, created at 2025-08-12 16:39 -0700
1from __future__ import annotations
3import logging
4import math
5from collections import defaultdict
6from collections.abc import Iterable
7from dataclasses import dataclass, field
8from pathlib import Path
9from typing import TYPE_CHECKING, Any
11import gpstime
12from arrakis import Time
13from ruamel.yaml import YAML
14from typing_extensions import Self
16from . import constants
17from .channel import extract_channel_scope
19if TYPE_CHECKING:
20 from .traits import ServerBackend
22logger = logging.getLogger("arrakis")
24ChannelEndpointList = list[tuple[list[str], list[str]]]
27@dataclass
28class ScopeMap:
29 """A mapping between endpoints and the scopes they provide.
31 Parameters
32 ----------
33 servers : dict[str, ScopeInfo], optional
34 A dictionary whose keys are endpoints (server URLs) and values are
35 information about the scopes they provide.
36 local_endpoint : str, optional
37 If provided, define which URL correspond to the local server URL.
39 """
41 servers: dict[str, ScopeInfo] = field(default_factory=dict)
42 local_endpoint: str | None = None
44 @property
45 def endpoints(self) -> set[str]:
46 """The set of endpoints contained within this mapping."""
47 return {endpoint for endpoint in self.servers.keys()}
49 @property
50 def domains(self) -> set[str]:
51 """The set of domains that the list of endpoints provide."""
52 return {
53 domain for info in self.servers.values() for domain in info.scopes.keys()
54 }
56 def __str__(self):
57 return f"<ScopeMap {self.endpoints}>"
59 def endpoints_for_domain(self, domain: str) -> list[str]:
60 """Determine all endpoints that can serve a given domain.
62 Parameters
63 ----------
64 domain : str
65 The domain to determine the endpoints for.
67 Returns
68 -------
69 list[str]
70 All endpoints that serve a given domain.
72 """
73 # first check if local server serves this domain
74 if self.local_endpoint and self.local_endpoint in self.servers:
75 if domain in self.servers[self.local_endpoint].scopes:
76 return [constants.FLIGHT_REUSE_URL]
78 # otherwise, find all endpoints which can serve this domain
79 return [
80 endpoint for endpoint, info in self.servers.items() if domain in info.scopes
81 ]
83 def endpoints_for_channel(self, channel: str) -> list[str]:
84 """Determine all endpoints that can serve a given channel.
86 Parameters
87 ----------
88 channel : str
89 The channel to determine the endpoints for.
91 Returns
92 -------
93 list[str]
94 All endpoints that serve a given channel.
96 """
97 # first check if local server serves this data
98 if self.local_endpoint and self.local_endpoint in self.servers:
99 if self.servers[self.local_endpoint].in_scope(channel):
100 return [constants.FLIGHT_REUSE_URL]
102 # otherwise, find all endpoints which can serve this data
103 return [
104 endpoint
105 for endpoint, info in self.servers.items()
106 if info.in_scope(channel)
107 ]
109 def endpoints_for_channels(self, channels: Iterable[str]) -> ChannelEndpointList:
110 """Determine the endpoints that can serve the given channels.
112 Parameters
113 ----------
114 channels : Iterable[str]
115 The channels to determine the endpoints for.
117 Returns
118 -------
119 ChannelEndpointList
120 A list of (channels, endpoints) pairs, where each grouping is a
121 list of endpoints where the channels can be found at. The channels
122 are disjoint such that the union of all channels returns the
123 original channels requested.
125 """
126 # construct a bi-directional map between endpoints and channels
127 endpoint_channel_map = defaultdict(set)
128 channel_endpoint_map = {}
129 for channel in channels:
130 endpoints = self.endpoints_for_channel(channel)
131 channel_endpoint_map[channel] = endpoints
132 for endpoint in endpoints:
133 endpoint_channel_map[endpoint].add(channel)
135 # consolidate endpoints for each set of channels
136 endpoints_for_channels: ChannelEndpointList = []
137 remaining_channels = set(channel_endpoint_map.keys())
139 # first, prioritize local endpoint
140 if constants.FLIGHT_REUSE_URL in endpoint_channel_map:
141 local_endpoint = constants.FLIGHT_REUSE_URL
142 local_channels = endpoint_channel_map.pop(local_endpoint)
143 remaining_channels -= frozenset(local_channels)
144 endpoints_for_channels.append((list(local_channels), [local_endpoint]))
146 # then, prioritize endpoints which serve the most channels
147 while remaining_channels:
148 for endpoint in endpoint_channel_map:
149 endpoint_channel_map[endpoint] -= remaining_channels
151 if not endpoint_channel_map:
152 break
154 # choose one with the most channels, and select all endpoints
155 # matching the set of channels it serves
156 max_endpoint = max(
157 endpoint_channel_map, key=lambda x: len(endpoint_channel_map[x])
158 )
159 max_channels = endpoint_channel_map[max_endpoint]
160 endpoints = [
161 endpoint
162 for endpoint, channels in endpoint_channel_map.items()
163 if channels == max_channels
164 ]
165 for endpoint in endpoints:
166 endpoint_channel_map.pop(endpoint)
167 endpoints_for_channels.append((list(max_channels), endpoints))
168 remaining_channels -= frozenset(max_channels)
170 return endpoints_for_channels
172 def sync_local_map(self, backend: ServerBackend | None, endpoint: str) -> None:
173 """Add local scope information to the scope map.
175 This checks consistency between the scope map and the new local scope
176 information that is provided. In addition, this updates the local
177 endpoint accordingly to prioritize the serving of local data if this
178 backend also serves data and/or metadata.
180 Parameters
181 ----------
182 backend : ServerBackend
183 The local backend in which to update the scope map with.
184 endpoint : str
185 The endpoint associated with the local server.
187 """
189 self.local_endpoint = endpoint
190 # check consistency with the scope map and what's in the backend
191 if backend:
192 if endpoint in self.servers:
193 for domain in backend.scope_info.domains:
194 if domain not in self.servers[endpoint].scopes:
195 logger.warning("domain %s not in global scope map", domain)
196 elif backend.scope_info.scopes != self.servers[endpoint]:
197 logger.warning(
198 "local scope info is inconsistent with global scope map"
199 )
201 self.servers[endpoint] = backend.scope_info
203 def filter_by_range(self, start: int | None, end: int | None) -> Self:
204 """Filter the scope map by a time range, specified in nanoseconds.
206 Parameters
207 ----------
208 start : int, optional
209 GPS start time, in nanoseconds.
210 end : int, optional
211 GPS end time, in nanoseconds.
213 Returns
214 -------
215 SourceMap
216 The filtered scope map.
218 """
219 servers = {}
220 for endpoint, info in self.servers.items():
221 if info.retention.in_range(start) and info.retention.in_range(end):
222 servers[endpoint] = info
223 return type(self)(servers, self.local_endpoint)
225 @classmethod
226 def load(cls, scope_map_file: Path) -> Self:
227 """Load a configuration-based scope map from disk.
229 Parameters
230 ----------
231 scope_map_file : Path
232 The path to the scope map configuration file
234 Returns
235 -------
236 SourceMap
237 The loaded scope map.
239 """
240 servers = {}
241 with open(scope_map_file, "r") as f:
242 for endpoint, info in YAML(typ="safe").load(f).items():
243 servers[endpoint] = ScopeInfo(
244 scopes=info["scopes"],
245 retention=Retention(**info["retention"]),
246 )
247 return cls(servers)
250@dataclass
251class ScopeInfo:
252 """Information about the scopes and retention a server provides.
254 Parameters
255 ----------
256 scopes : dict[str, list[dict[str, Any]]]
257 The scopes that a server provide for each domain. The keys are
258 domains and the values are lists of key-value pairs indicating how the
259 domains are scoped. These could be subsystems, specific channels, etc.
260 retention : Retention
261 The range of time available (from now) that is accessible to query.
263 """
265 scopes: dict[str, list[dict[str, Any]]]
266 retention: Retention
268 @property
269 def domains(self) -> set[str]:
270 return set(self.scopes.keys())
272 def in_scope(self, channel: str) -> bool:
273 """Check whether the channel is served by this endpoint."""
274 domain, subsystem = extract_channel_scope(channel)
275 if domain not in self.scopes:
276 return False
277 served_subsystems = {scope["subsystem"] for scope in self.scopes[domain]}
278 return subsystem in served_subsystems
281@dataclass
282class Retention:
283 """Information about the time retention that can be queried from a server.
285 This is used to inform what range of data backends can serve and how
286 incoming requests can take a range of times and delegate them to various
287 servers.
289 The times specified here are in seconds all relative to 'now', i.e. 0
290 corresponds to serving live data, while 3600 corresponds to '1 hour ago'.
292 Live-only data sources have both 'newest' and 'oldest' set to 0, and can
293 be created using the class method Retention.from_live_only().
295 Parameters
296 ----------
297 newest : float, optional
298 The most recent time from now that can be queried. Defaults to 0, or
299 'now'.
300 oldest : float, optional
301 The oldest time from now that can be queried. Defaults to inf, or
302 infinite lookback.
304 """
306 newest: float = 0
307 oldest: float = math.inf
309 @property
310 def is_live(self) -> bool:
311 """Determine whether this backend can serve live data."""
312 return self.newest == 0
314 @property
315 def is_live_only(self) -> bool:
316 """Determine whether this backend can only serve live data."""
317 return self.newest == 0 and self.oldest == 0
319 @property
320 def is_historical_only(self) -> bool:
321 """Determine whether this backend can only serve historical data."""
322 return self.newest > 0
324 @classmethod
325 def from_live_only(cls) -> Self:
326 """Create a retention that can only serve live data."""
327 return cls(newest=0, oldest=0)
329 def in_range(self, time_ns: int | None) -> bool:
330 """Check whether the time (in nanoseconds) is served by this backend."""
331 # unspecified times can only be served by live backends:
332 # start=None -> serve data starting at 'now'
333 # end=None -> serve data forever
334 if time_ns is None:
335 return self.is_live
337 time = time_ns // Time.s
338 time_now = gpstime.gpsnow()
340 # if time is in the future, live backends can serve this data
341 if time >= time_now:
342 return self.is_live
344 # calculate time relative to retention
345 time_rel = time_now - time
346 return time_rel >= self.newest and time_rel <= self.oldest