Coverage for fastblocks/adapters/sitemap/core.py: 0%

159 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-09 00:47 -0700

1"""FastBlocks Sitemap Core Implementation. 

2 

3Core sitemap functionality moved from standalone module to adapter pattern. 

4Based on asgi-sitemaps by Florian Dahlitz with FastBlocks enhancements. 

5 

6Original asgi-sitemaps library: 

7- Author: Florian Dahlitz 

8- Repository: https://github.com/DahlitzFlorian/asgi-sitemaps 

9- License: MIT 

10""" 

11 

12import contextvars 

13import datetime as dt 

14import inspect 

15import typing as t 

16from collections.abc import ( 

17 AsyncIterable, 

18 AsyncIterator, 

19 Awaitable, 

20 Callable, 

21 Iterable, 

22 Sequence, 

23) 

24from typing import TypeVar, cast 

25from urllib.parse import urljoin, urlsplit 

26 

27from acb.debug import debug 

28from acb.depends import depends 

29 

30if t.TYPE_CHECKING: 

31 from starlette.types import Scope 

32 

33T = TypeVar("T") 

34ItemsTypes = Iterable[T] | Awaitable[Iterable[T]] | AsyncIterable[T] 

35 

36SCOPE_CTX_VAR = contextvars.ContextVar["Scope"]("fastblocks.sitemaps.scope") 

37 

38 

39class BaseSitemap[T]: 

40 protocol: str = "auto" 

41 

42 def __init__(self) -> None: 

43 if self.protocol not in ("http", "https", "auto"): 

44 raise ValueError(f"Invalid protocol: {self.protocol}") 

45 debug(f"BaseSitemap: Initialized with protocol={self.protocol}") 

46 

47 def items(self) -> ItemsTypes[T]: 

48 raise NotImplementedError("Subclasses must implement items() method") 

49 

50 def location(self, item: T) -> str: 

51 raise NotImplementedError("Subclasses must implement location() method") 

52 

53 def lastmod(self, item: T) -> dt.datetime | None: 

54 return None 

55 

56 def changefreq(self, item: T) -> str | None: 

57 return None 

58 

59 def priority(self, item: T) -> float: 

60 return 0.5 

61 

62 @property 

63 def scope(self) -> "Scope": 

64 try: 

65 return SCOPE_CTX_VAR.get() 

66 except LookupError as e: 

67 raise RuntimeError( 

68 "Scope accessed outside of an ASGI request. " 

69 "Ensure sitemap generation happens within request context." 

70 ) from e 

71 

72 

73class SitemapApp: 

74 def __init__( 

75 self, 

76 sitemaps: BaseSitemap[t.Any] | list[BaseSitemap[t.Any]], 

77 *, 

78 domain: str, 

79 cache_ttl: int = 3600, 

80 ) -> None: 

81 self._sitemaps = ( 

82 [sitemaps] if isinstance(sitemaps, BaseSitemap) else list(sitemaps) 

83 ) 

84 self._domain = domain 

85 self._cache_ttl = cache_ttl 

86 debug( 

87 f"SitemapApp: Initialized with {len(self._sitemaps)} sitemaps, domain={domain}" 

88 ) 

89 

90 async def __call__( 

91 self, 

92 scope: "Scope", 

93 receive: Callable[[], Awaitable[dict[str, t.Any]]], 

94 send: Callable[[dict[str, t.Any]], Awaitable[None]], 

95 ) -> None: 

96 if scope["type"] != "http": 

97 await self._send_error(send, 404) 

98 return 

99 

100 debug( 

101 f"SitemapApp: Processing sitemap request for {scope.get('path', 'unknown')}" 

102 ) 

103 

104 try: 

105 content = await generate_sitemap( 

106 self._sitemaps, scope=scope, domain=self._domain 

107 ) 

108 

109 headers = [ 

110 [b"content-type", b"application/xml; charset=utf-8"], 

111 [b"content-length", str(len(content)).encode()], 

112 [b"cache-control", f"public, max-age={self._cache_ttl}".encode()], 

113 ] 

114 

115 message = await receive() 

116 if message["type"] != "http.request": 

117 await self._send_error(send, 400) 

118 return 

119 

120 await send( 

121 {"type": "http.response.start", "status": 200, "headers": headers} 

122 ) 

123 await send({"type": "http.response.body", "body": content}) 

124 

125 debug(f"SitemapApp: Sent sitemap response ({len(content)} bytes)") 

126 

127 except Exception as e: 

128 debug(f"SitemapApp: Error generating sitemap: {e}") 

129 await self._send_error(send, 500) 

130 

131 async def _send_error( 

132 self, send: Callable[[dict[str, t.Any]], Awaitable[None]], status: int 

133 ) -> None: 

134 await send( 

135 { 

136 "type": "http.response.start", 

137 "status": status, 

138 "headers": [[b"content-type", b"text/plain"]], 

139 } 

140 ) 

141 await send( 

142 { 

143 "type": "http.response.body", 

144 "body": f"Error {status}".encode(), 

145 } 

146 ) 

147 

148 

149async def generate_sitemap( 

150 sitemaps: Sequence[BaseSitemap[t.Any]], *, scope: "Scope", domain: str 

151) -> bytes: 

152 debug(f"generate_sitemap: Starting generation for {len(sitemaps)} sitemaps") 

153 

154 SCOPE_CTX_VAR.set(scope) 

155 

156 cache_key = f"fastblocks:sitemap:{domain}" 

157 cached_content = await _get_cached_sitemap(cache_key) 

158 if cached_content: 

159 debug("generate_sitemap: Returning cached sitemap") 

160 return cached_content 

161 

162 try: 

163 content = await _generate_sitemap_content(sitemaps, scope=scope, domain=domain) 

164 

165 await _cache_sitemap(cache_key, content) 

166 

167 debug(f"generate_sitemap: Generated {len(content)} bytes") 

168 return content 

169 

170 except Exception as e: 

171 debug(f"generate_sitemap: Error during generation: {e}") 

172 raise 

173 

174 

175async def _generate_sitemap_content( 

176 sitemaps: Sequence[BaseSitemap[t.Any]], *, scope: "Scope", domain: str 

177) -> bytes: 

178 async def _lines() -> AsyncIterator[bytes]: 

179 yield b'<?xml version="1.0" encoding="utf-8"?>' 

180 yield b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' 

181 total_urls = 0 

182 for sitemap_idx, sitemap in enumerate(sitemaps): 

183 debug( 

184 f"generate_sitemap: Processing sitemap {sitemap_idx + 1}/{len(sitemaps)}" 

185 ) 

186 try: 

187 async for item in _ensure_async_iterator(sitemap.items()): 

188 yield 4 * b" " + b"<url>" 

189 fields = get_fields(sitemap, item, scope=scope, domain=domain) 

190 for name, value in fields.items(): 

191 escaped_value = _escape_xml(value) 

192 yield 8 * b" " + f"<{name}>{escaped_value}</{name}>".encode() 

193 yield 4 * b" " + b"</url>" 

194 total_urls += 1 

195 except Exception as e: 

196 debug(f"generate_sitemap: Error processing sitemap {sitemap_idx}: {e}") 

197 yield b"</urlset>" 

198 debug(f"generate_sitemap: Generated {total_urls} URLs") 

199 

200 return b"\n".join([line async for line in _lines()]) 

201 

202 

203async def _ensure_async_iterator[T](items: ItemsTypes[T]) -> AsyncIterator[T]: 

204 try: 

205 if hasattr(items, "__aiter__"): 

206 items_async = cast(AsyncIterable[T], items) 

207 async for item in items_async: 

208 yield item 

209 elif inspect.isawaitable(items): 

210 items_awaitable = items 

211 resolved_items = await items_awaitable 

212 for item in resolved_items: 

213 yield item 

214 else: 

215 items_sync = items 

216 for item in items_sync: 

217 yield item 

218 except Exception as e: 

219 debug(f"_ensure_async_iterator: Error processing items: {e}") 

220 

221 

222def get_fields( 

223 sitemap: BaseSitemap[T], item: T, *, scope: "Scope", domain: str 

224) -> dict[str, str]: 

225 if sitemap.protocol == "auto": 

226 protocol = scope.get("scheme", "https") 

227 else: 

228 protocol = sitemap.protocol 

229 

230 try: 

231 location = sitemap.location(item) 

232 lastmod = sitemap.lastmod(item) 

233 changefreq = sitemap.changefreq(item) 

234 priority = sitemap.priority(item) 

235 

236 parsed_location = urlsplit(location) 

237 if parsed_location.scheme or parsed_location.netloc: 

238 raise ValueError(f"Location contains scheme or domain: {location}") 

239 

240 fields: dict[str, str] = {} 

241 

242 fields["loc"] = urljoin(f"{protocol}://{domain}", location) 

243 

244 if lastmod is not None: 

245 fields["lastmod"] = lastmod.strftime("%Y-%m-%d") 

246 if changefreq is not None: 

247 fields["changefreq"] = changefreq 

248 

249 priority_value = max(0.0, min(1.0, priority)) 

250 fields["priority"] = f"{priority_value:.1f}" 

251 

252 return fields 

253 

254 except Exception as e: 

255 debug(f"get_fields: Error processing item {item}: {e}") 

256 return {"loc": urljoin(f"{protocol}://{domain}", "/"), "priority": "0.5"} 

257 

258 

259def _escape_xml(value: str) -> str: 

260 return ( 

261 value.replace("&", "&amp;") 

262 .replace("<", "&lt;") 

263 .replace(">", "&gt;") 

264 .replace('"', "&quot;") 

265 .replace("'", "&#x27;") 

266 ) 

267 

268 

269async def _get_cached_sitemap(cache_key: str) -> bytes | None: 

270 try: 

271 cache = depends.get("cache") 

272 if cache and hasattr(cache, "get"): 

273 cached_data = await cache.get(cache_key) 

274 if cached_data: 

275 debug(f"_get_cached_sitemap: Cache hit for {cache_key}") 

276 return ( 

277 cached_data 

278 if isinstance(cached_data, bytes) 

279 else cached_data.encode() 

280 ) 

281 except Exception as e: 

282 debug(f"_get_cached_sitemap: Cache error: {e}") 

283 

284 return None 

285 

286 

287async def _cache_sitemap(cache_key: str, content: bytes) -> None: 

288 try: 

289 cache = depends.get("cache") 

290 if cache and hasattr(cache, "set"): 

291 await cache.set(cache_key, content, ttl=3600) 

292 debug(f"_cache_sitemap: Cached sitemap ({len(content)} bytes)") 

293 except Exception as e: 

294 debug(f"_cache_sitemap: Cache error: {e}")