Coverage for src/prosemark/adapters/frontmatter_codec.py: 100%

72 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-09-24 18:08 +0000

1# Copyright (c) 2024 Prosemark Contributors 

2# This software is licensed under the MIT License 

3 

4"""YAML frontmatter codec for parsing and generating frontmatter blocks.""" 

5 

6import re 

7from datetime import datetime 

8from typing import Any 

9 

10import yaml 

11 

12from prosemark.exceptions import FrontmatterFormatError 

13 

14 

15class FrontmatterCodec: 

16 """YAML frontmatter codec for parsing and generating frontmatter blocks. 

17 

18 This adapter handles the encoding and decoding of YAML frontmatter in markdown files. 

19 It provides safe parsing and generation of frontmatter blocks with proper error handling 

20 and format validation. 

21 

22 Supported frontmatter format: 

23 ``` 

24 --- 

25 key: value 

26 other_key: other_value 

27 --- 

28 (content) 

29 ``` 

30 

31 The codec ensures: 

32 - Safe YAML parsing (no arbitrary code execution) 

33 - Consistent frontmatter block formatting 

34 - Proper error handling for malformed YAML 

35 - Round-trip compatibility (parse -> generate -> parse) 

36 """ 

37 

38 # Regex pattern to match frontmatter block at start of content 

39 FRONTMATTER_PATTERN = re.compile(r'^---\r?\n(.*?)\r?\n---(?:\r?\n(.*))?$', re.DOTALL) 

40 

41 def parse(self, content: str) -> tuple[dict[str, Any], str]: 

42 """Parse frontmatter and content from markdown text. 

43 

44 Args: 

45 content: Raw markdown content with optional frontmatter 

46 

47 Returns: 

48 Tuple of (frontmatter_dict, remaining_content) 

49 If no frontmatter is found, returns ({}, original_content) 

50 

51 Raises: 

52 FrontmatterFormatError: If frontmatter YAML is malformed 

53 

54 """ 

55 # Validate frontmatter format 

56 self._validate_frontmatter_format(content) 

57 

58 # Check if content starts with frontmatter 

59 match = self.FRONTMATTER_PATTERN.match(content) 

60 if not match: 

61 return {}, content 

62 

63 yaml_content = match.group(1) 

64 remaining_content = match.group(2) or '' 

65 

66 # Remove leading newline from content if present 

67 if remaining_content: 

68 remaining_content = remaining_content.removeprefix('\n') 

69 

70 # Parse YAML content 

71 frontmatter_data = FrontmatterCodec._parse_yaml_content(yaml_content) 

72 return frontmatter_data, remaining_content 

73 

74 @staticmethod 

75 def generate(frontmatter: dict[str, Any], content: str) -> str: 

76 """Generate markdown content with frontmatter block. 

77 

78 Args: 

79 frontmatter: Dictionary of frontmatter data 

80 content: Markdown content to append after frontmatter 

81 

82 Returns: 

83 Complete markdown content with frontmatter block 

84 

85 Raises: 

86 FrontmatterFormatError: If YAML serialization fails 

87 

88 """ 

89 if not frontmatter: 

90 return content 

91 

92 try: 

93 # Generate YAML with consistent formatting 

94 yaml_content = yaml.safe_dump( 

95 frontmatter, 

96 default_flow_style=False, 

97 allow_unicode=True, 

98 sort_keys=True, 

99 default_style='', 

100 ).strip() 

101 except yaml.YAMLError as exc: 

102 msg = 'Failed to serialize frontmatter to YAML' 

103 raise FrontmatterFormatError(msg) from exc 

104 else: 

105 return f'---\n{yaml_content}\n---\n{content}' 

106 

107 def update_frontmatter(self, content: str, updates: dict[str, Any]) -> str: 

108 """Update frontmatter in existing content. 

109 

110 Args: 

111 content: Existing markdown content with or without frontmatter 

112 updates: Dictionary of frontmatter updates to apply 

113 

114 Returns: 

115 Updated markdown content with modified frontmatter 

116 

117 """ 

118 # Parse existing frontmatter 

119 existing_frontmatter, remaining_content = self.parse(content) 

120 

121 # Merge updates 

122 updated_frontmatter = {**existing_frontmatter, **updates} 

123 

124 # Generate new content 

125 return self.generate(updated_frontmatter, remaining_content) 

126 

127 def _validate_frontmatter_format(self, content: str) -> None: 

128 """Validate frontmatter format and raise errors for malformed patterns. 

129 

130 Raises: 

131 FrontmatterFormatError: If frontmatter delimiters are malformed 

132 

133 """ 

134 if content.startswith('---') and not self.FRONTMATTER_PATTERN.match(content): 

135 if '---' not in content[3:]: 

136 msg = 'Frontmatter block missing closing delimiter' 

137 raise FrontmatterFormatError(msg) 

138 elif '---' in content and not content.startswith('---'): 

139 FrontmatterCodec._check_misplaced_frontmatter(content) 

140 

141 @staticmethod 

142 def _check_misplaced_frontmatter(content: str) -> None: 

143 """Check for frontmatter that is not at the document start. 

144 

145 Raises: 

146 FrontmatterFormatError: If frontmatter delimiters found in wrong position 

147 

148 """ 

149 lines = content.split('\n') 

150 for i, line in enumerate(lines): 

151 if line.strip() == '---': 

152 if i > 0 and any( 

153 'id:' in prev_line or 'title:' in prev_line or 'created:' in prev_line for prev_line in lines[:i] 

154 ): 

155 msg = 'Frontmatter block missing opening delimiter' 

156 raise FrontmatterFormatError(msg) 

157 if i < len(lines) - 1 and lines[i + 1].strip() == '---': 

158 msg = 'Frontmatter block not at document start' 

159 raise FrontmatterFormatError(msg) 

160 break 

161 

162 @staticmethod 

163 def _parse_yaml_content(yaml_content: str) -> dict[str, Any]: 

164 """Parse YAML content and return processed frontmatter data. 

165 

166 Returns: 

167 Parsed frontmatter data as dictionary 

168 

169 Raises: 

170 FrontmatterFormatError: If YAML parsing fails or data is invalid 

171 

172 """ 

173 try: 

174 frontmatter_data = yaml.safe_load(yaml_content) 

175 

176 if frontmatter_data is None: 

177 frontmatter_data = {} 

178 

179 if not isinstance(frontmatter_data, dict): 

180 msg = 'Frontmatter must be a YAML mapping/dictionary' 

181 raise FrontmatterFormatError(msg) 

182 

183 return FrontmatterCodec._convert_datetimes_to_strings(frontmatter_data) 

184 

185 except yaml.YAMLError as exc: 

186 msg = 'Invalid YAML in frontmatter block' 

187 raise FrontmatterFormatError(msg) from exc 

188 

189 @staticmethod 

190 def _convert_datetimes_to_strings(data: dict[str, Any]) -> dict[str, Any]: 

191 """Convert datetime objects to ISO format strings to preserve original format. 

192 

193 YAML automatically parses ISO timestamp strings to datetime objects, 

194 but we want to preserve them as strings in frontmatter for human readability. 

195 

196 Args: 

197 data: Dictionary that may contain datetime objects 

198 

199 Returns: 

200 Dictionary with datetime objects converted to ISO strings 

201 

202 """ 

203 result = {} 

204 for key, value in data.items(): 

205 if isinstance(value, datetime): 

206 # Convert datetime to ISO string with Z suffix (UTC) 

207 result[key] = value.strftime('%Y-%m-%dT%H:%M:%SZ') 

208 else: 

209 result[key] = value 

210 return result