Coverage for fastblocks/adapters/sitemap/core.py: 0%
159 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-21 04:50 -0700
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-21 04:50 -0700
1"""FastBlocks Sitemap Core Implementation.
3Core sitemap functionality moved from standalone module to adapter pattern.
4Based on asgi-sitemaps by Florian Dahlitz with FastBlocks enhancements.
6Original asgi-sitemaps library:
7- Author: Florian Dahlitz
8- Repository: https://github.com/DahlitzFlorian/asgi-sitemaps
9- License: MIT
10"""
12import contextvars
13import datetime as dt
14import inspect
15import typing as t
16from collections.abc import (
17 AsyncIterable,
18 AsyncIterator,
19 Awaitable,
20 Callable,
21 Iterable,
22 Sequence,
23)
24from typing import TypeVar, cast
25from urllib.parse import urljoin, urlsplit
27from acb.debug import debug
28from acb.depends import depends
30if t.TYPE_CHECKING:
31 from starlette.types import Scope
33T = TypeVar("T")
34ItemsTypes = Iterable[T] | Awaitable[Iterable[T]] | AsyncIterable[T]
36SCOPE_CTX_VAR = contextvars.ContextVar["Scope"]("fastblocks.sitemaps.scope")
39class BaseSitemap[T]:
40 protocol: str = "auto"
42 def __init__(self) -> None:
43 if self.protocol not in ("http", "https", "auto"):
44 raise ValueError(f"Invalid protocol: {self.protocol}")
45 debug(f"BaseSitemap: Initialized with protocol={self.protocol}")
47 def items(self) -> ItemsTypes[T]:
48 raise NotImplementedError("Subclasses must implement items() method")
50 def location(self, item: T) -> str:
51 raise NotImplementedError("Subclasses must implement location() method")
53 def lastmod(self, item: T) -> dt.datetime | None:
54 return None
56 def changefreq(self, item: T) -> str | None:
57 return None
59 def priority(self, item: T) -> float:
60 return 0.5
62 @property
63 def scope(self) -> "Scope":
64 try:
65 return SCOPE_CTX_VAR.get()
66 except LookupError as e:
67 raise RuntimeError(
68 "Scope accessed outside of an ASGI request. "
69 "Ensure sitemap generation happens within request context."
70 ) from e
73class SitemapApp:
74 def __init__(
75 self,
76 sitemaps: BaseSitemap[t.Any] | list[BaseSitemap[t.Any]],
77 *,
78 domain: str,
79 cache_ttl: int = 3600,
80 ) -> None:
81 self._sitemaps = (
82 [sitemaps] if isinstance(sitemaps, BaseSitemap) else list(sitemaps)
83 )
84 self._domain = domain
85 self._cache_ttl = cache_ttl
86 debug(
87 f"SitemapApp: Initialized with {len(self._sitemaps)} sitemaps, domain={domain}"
88 )
90 async def __call__(
91 self,
92 scope: "Scope",
93 receive: Callable[[], Awaitable[dict[str, t.Any]]],
94 send: Callable[[dict[str, t.Any]], Awaitable[None]],
95 ) -> None:
96 if scope["type"] != "http":
97 await self._send_error(send, 404)
98 return
100 debug(
101 f"SitemapApp: Processing sitemap request for {scope.get('path', 'unknown')}"
102 )
104 try:
105 content = await generate_sitemap(
106 self._sitemaps, scope=scope, domain=self._domain
107 )
109 headers = [
110 [b"content-type", b"application/xml; charset=utf-8"],
111 [b"content-length", str(len(content)).encode()],
112 [b"cache-control", f"public, max-age={self._cache_ttl}".encode()],
113 ]
115 message = await receive()
116 if message["type"] != "http.request":
117 await self._send_error(send, 400)
118 return
120 await send(
121 {"type": "http.response.start", "status": 200, "headers": headers}
122 )
123 await send({"type": "http.response.body", "body": content})
125 debug(f"SitemapApp: Sent sitemap response ({len(content)} bytes)")
127 except Exception as e:
128 debug(f"SitemapApp: Error generating sitemap: {e}")
129 await self._send_error(send, 500)
131 async def _send_error(
132 self, send: Callable[[dict[str, t.Any]], Awaitable[None]], status: int
133 ) -> None:
134 await send(
135 {
136 "type": "http.response.start",
137 "status": status,
138 "headers": [[b"content-type", b"text/plain"]],
139 }
140 )
141 await send(
142 {
143 "type": "http.response.body",
144 "body": f"Error {status}".encode(),
145 }
146 )
149async def generate_sitemap(
150 sitemaps: Sequence[BaseSitemap[t.Any]], *, scope: "Scope", domain: str
151) -> bytes:
152 debug(f"generate_sitemap: Starting generation for {len(sitemaps)} sitemaps")
154 SCOPE_CTX_VAR.set(scope)
156 cache_key = f"fastblocks:sitemap:{domain}"
157 cached_content = await _get_cached_sitemap(cache_key)
158 if cached_content:
159 debug("generate_sitemap: Returning cached sitemap")
160 return cached_content
162 try:
163 content = await _generate_sitemap_content(sitemaps, scope=scope, domain=domain)
165 await _cache_sitemap(cache_key, content)
167 debug(f"generate_sitemap: Generated {len(content)} bytes")
168 return content
170 except Exception as e:
171 debug(f"generate_sitemap: Error during generation: {e}")
172 raise
175async def _generate_sitemap_content(
176 sitemaps: Sequence[BaseSitemap[t.Any]], *, scope: "Scope", domain: str
177) -> bytes:
178 async def _lines() -> AsyncIterator[bytes]:
179 yield b'<?xml version="1.0" encoding="utf-8"?>'
180 yield b'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
181 total_urls = 0
182 for sitemap_idx, sitemap in enumerate(sitemaps):
183 debug(
184 f"generate_sitemap: Processing sitemap {sitemap_idx + 1}/{len(sitemaps)}"
185 )
186 try:
187 async for item in _ensure_async_iterator(sitemap.items()):
188 yield 4 * b" " + b"<url>"
189 fields = get_fields(sitemap, item, scope=scope, domain=domain)
190 for name, value in fields.items():
191 escaped_value = _escape_xml(value)
192 yield 8 * b" " + f"<{name}>{escaped_value}</{name}>".encode()
193 yield 4 * b" " + b"</url>"
194 total_urls += 1
195 except Exception as e:
196 debug(f"generate_sitemap: Error processing sitemap {sitemap_idx}: {e}")
197 yield b"</urlset>"
198 debug(f"generate_sitemap: Generated {total_urls} URLs")
200 return b"\n".join([line async for line in _lines()])
203async def _ensure_async_iterator[T](items: ItemsTypes[T]) -> AsyncIterator[T]:
204 try:
205 if hasattr(items, "__aiter__"):
206 items_async = cast(AsyncIterable[T], items)
207 async for item in items_async:
208 yield item
209 elif inspect.isawaitable(items):
210 items_awaitable = items
211 resolved_items = await items_awaitable
212 for item in resolved_items:
213 yield item
214 else:
215 items_sync = cast(Iterable[T], items)
216 for item in items_sync:
217 yield item
218 except Exception as e:
219 debug(f"_ensure_async_iterator: Error processing items: {e}")
222def get_fields(
223 sitemap: BaseSitemap[T], item: T, *, scope: "Scope", domain: str
224) -> dict[str, str]:
225 if sitemap.protocol == "auto":
226 protocol = scope.get("scheme", "https")
227 else:
228 protocol = sitemap.protocol
230 try:
231 location = sitemap.location(item)
232 lastmod = sitemap.lastmod(item)
233 changefreq = sitemap.changefreq(item)
234 priority = sitemap.priority(item)
236 parsed_location = urlsplit(location)
237 if parsed_location.scheme or parsed_location.netloc:
238 raise ValueError(f"Location contains scheme or domain: {location}")
240 fields: dict[str, str] = {}
242 fields["loc"] = urljoin(f"{protocol}://{domain}", location)
244 if lastmod is not None:
245 fields["lastmod"] = lastmod.strftime("%Y-%m-%d")
246 if changefreq is not None:
247 fields["changefreq"] = changefreq
249 priority_value = max(0.0, min(1.0, priority))
250 fields["priority"] = f"{priority_value:.1f}"
252 return fields
254 except Exception as e:
255 debug(f"get_fields: Error processing item {item}: {e}")
256 return {"loc": urljoin(f"{protocol}://{domain}", "/"), "priority": "0.5"}
259def _escape_xml(value: str) -> str:
260 return (
261 value.replace("&", "&")
262 .replace("<", "<")
263 .replace(">", ">")
264 .replace('"', """)
265 .replace("'", "'")
266 )
269async def _get_cached_sitemap(cache_key: str) -> bytes | None:
270 try:
271 cache = depends.get("cache")
272 if cache and hasattr(cache, "get"):
273 cached_data = await cache.get(cache_key)
274 if cached_data:
275 debug(f"_get_cached_sitemap: Cache hit for {cache_key}")
276 return (
277 cached_data
278 if isinstance(cached_data, bytes)
279 else cached_data.encode()
280 )
281 except Exception as e:
282 debug(f"_get_cached_sitemap: Cache error: {e}")
284 return None
287async def _cache_sitemap(cache_key: str, content: bytes) -> None:
288 try:
289 cache = depends.get("cache")
290 if cache and hasattr(cache, "set"):
291 await cache.set(cache_key, content, ttl=3600)
292 debug(f"_cache_sitemap: Cached sitemap ({len(content)} bytes)")
293 except Exception as e:
294 debug(f"_cache_sitemap: Cache error: {e}")