Coverage for src/prosemark/adapters/frontmatter_codec.py: 100%
72 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-09-24 18:08 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-09-24 18:08 +0000
1# Copyright (c) 2024 Prosemark Contributors
2# This software is licensed under the MIT License
4"""YAML frontmatter codec for parsing and generating frontmatter blocks."""
6import re
7from datetime import datetime
8from typing import Any
10import yaml
12from prosemark.exceptions import FrontmatterFormatError
15class FrontmatterCodec:
16 """YAML frontmatter codec for parsing and generating frontmatter blocks.
18 This adapter handles the encoding and decoding of YAML frontmatter in markdown files.
19 It provides safe parsing and generation of frontmatter blocks with proper error handling
20 and format validation.
22 Supported frontmatter format:
23 ```
24 ---
25 key: value
26 other_key: other_value
27 ---
28 (content)
29 ```
31 The codec ensures:
32 - Safe YAML parsing (no arbitrary code execution)
33 - Consistent frontmatter block formatting
34 - Proper error handling for malformed YAML
35 - Round-trip compatibility (parse -> generate -> parse)
36 """
38 # Regex pattern to match frontmatter block at start of content
39 FRONTMATTER_PATTERN = re.compile(r'^---\r?\n(.*?)\r?\n---(?:\r?\n(.*))?$', re.DOTALL)
41 def parse(self, content: str) -> tuple[dict[str, Any], str]:
42 """Parse frontmatter and content from markdown text.
44 Args:
45 content: Raw markdown content with optional frontmatter
47 Returns:
48 Tuple of (frontmatter_dict, remaining_content)
49 If no frontmatter is found, returns ({}, original_content)
51 Raises:
52 FrontmatterFormatError: If frontmatter YAML is malformed
54 """
55 # Validate frontmatter format
56 self._validate_frontmatter_format(content)
58 # Check if content starts with frontmatter
59 match = self.FRONTMATTER_PATTERN.match(content)
60 if not match:
61 return {}, content
63 yaml_content = match.group(1)
64 remaining_content = match.group(2) or ''
66 # Remove leading newline from content if present
67 if remaining_content:
68 remaining_content = remaining_content.removeprefix('\n')
70 # Parse YAML content
71 frontmatter_data = FrontmatterCodec._parse_yaml_content(yaml_content)
72 return frontmatter_data, remaining_content
74 @staticmethod
75 def generate(frontmatter: dict[str, Any], content: str) -> str:
76 """Generate markdown content with frontmatter block.
78 Args:
79 frontmatter: Dictionary of frontmatter data
80 content: Markdown content to append after frontmatter
82 Returns:
83 Complete markdown content with frontmatter block
85 Raises:
86 FrontmatterFormatError: If YAML serialization fails
88 """
89 if not frontmatter:
90 return content
92 try:
93 # Generate YAML with consistent formatting
94 yaml_content = yaml.safe_dump(
95 frontmatter,
96 default_flow_style=False,
97 allow_unicode=True,
98 sort_keys=True,
99 default_style='',
100 ).strip()
101 except yaml.YAMLError as exc:
102 msg = 'Failed to serialize frontmatter to YAML'
103 raise FrontmatterFormatError(msg) from exc
104 else:
105 return f'---\n{yaml_content}\n---\n{content}'
107 def update_frontmatter(self, content: str, updates: dict[str, Any]) -> str:
108 """Update frontmatter in existing content.
110 Args:
111 content: Existing markdown content with or without frontmatter
112 updates: Dictionary of frontmatter updates to apply
114 Returns:
115 Updated markdown content with modified frontmatter
117 """
118 # Parse existing frontmatter
119 existing_frontmatter, remaining_content = self.parse(content)
121 # Merge updates
122 updated_frontmatter = {**existing_frontmatter, **updates}
124 # Generate new content
125 return self.generate(updated_frontmatter, remaining_content)
127 def _validate_frontmatter_format(self, content: str) -> None:
128 """Validate frontmatter format and raise errors for malformed patterns.
130 Raises:
131 FrontmatterFormatError: If frontmatter delimiters are malformed
133 """
134 if content.startswith('---') and not self.FRONTMATTER_PATTERN.match(content):
135 if '---' not in content[3:]:
136 msg = 'Frontmatter block missing closing delimiter'
137 raise FrontmatterFormatError(msg)
138 elif '---' in content and not content.startswith('---'):
139 FrontmatterCodec._check_misplaced_frontmatter(content)
141 @staticmethod
142 def _check_misplaced_frontmatter(content: str) -> None:
143 """Check for frontmatter that is not at the document start.
145 Raises:
146 FrontmatterFormatError: If frontmatter delimiters found in wrong position
148 """
149 lines = content.split('\n')
150 for i, line in enumerate(lines):
151 if line.strip() == '---':
152 if i > 0 and any(
153 'id:' in prev_line or 'title:' in prev_line or 'created:' in prev_line for prev_line in lines[:i]
154 ):
155 msg = 'Frontmatter block missing opening delimiter'
156 raise FrontmatterFormatError(msg)
157 if i < len(lines) - 1 and lines[i + 1].strip() == '---':
158 msg = 'Frontmatter block not at document start'
159 raise FrontmatterFormatError(msg)
160 break
162 @staticmethod
163 def _parse_yaml_content(yaml_content: str) -> dict[str, Any]:
164 """Parse YAML content and return processed frontmatter data.
166 Returns:
167 Parsed frontmatter data as dictionary
169 Raises:
170 FrontmatterFormatError: If YAML parsing fails or data is invalid
172 """
173 try:
174 frontmatter_data = yaml.safe_load(yaml_content)
176 if frontmatter_data is None:
177 frontmatter_data = {}
179 if not isinstance(frontmatter_data, dict):
180 msg = 'Frontmatter must be a YAML mapping/dictionary'
181 raise FrontmatterFormatError(msg)
183 return FrontmatterCodec._convert_datetimes_to_strings(frontmatter_data)
185 except yaml.YAMLError as exc:
186 msg = 'Invalid YAML in frontmatter block'
187 raise FrontmatterFormatError(msg) from exc
189 @staticmethod
190 def _convert_datetimes_to_strings(data: dict[str, Any]) -> dict[str, Any]:
191 """Convert datetime objects to ISO format strings to preserve original format.
193 YAML automatically parses ISO timestamp strings to datetime objects,
194 but we want to preserve them as strings in frontmatter for human readability.
196 Args:
197 data: Dictionary that may contain datetime objects
199 Returns:
200 Dictionary with datetime objects converted to ISO strings
202 """
203 result = {}
204 for key, value in data.items():
205 if isinstance(value, datetime):
206 # Convert datetime to ISO string with Z suffix (UTC)
207 result[key] = value.strftime('%Y-%m-%dT%H:%M:%SZ')
208 else:
209 result[key] = value
210 return result