Coverage for src/prosemark/domain/models.py: 100%

213 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-09-24 18:08 +0000

1"""Domain models for prosemark.""" 

2 

3import uuid 

4from dataclasses import dataclass, field 

5from datetime import datetime 

6 

7from prosemark.exceptions import NodeIdentityError 

8 

9 

10@dataclass(frozen=True) 

11class NodeId: 

12 """Value object representing a node identifier with UUIDv7 validation. 

13 

14 NodeId serves as the stable identity for all nodes in the system. It ensures: 

15 - UUIDv7 format for sortability and uniqueness 

16 - Immutable once created 

17 - Validated to ensure proper format 

18 - Used in filenames ({id}.md, {id}.notes.md) and binder links 

19 

20 Args: 

21 value: A valid UUIDv7 string 

22 

23 Raises: 

24 NodeIdentityError: If the provided value is not a valid UUIDv7 

25 

26 Examples: 

27 >>> node_id = NodeId('0192f0c1-2345-7123-8abc-def012345678') 

28 >>> str(node_id) 

29 '0192f0c1-2345-7123-8abc-def012345678' 

30 

31 """ 

32 

33 # Expected UUID version for NodeId 

34 EXPECTED_UUID_VERSION = 7 

35 

36 value: str 

37 

38 def __post_init__(self) -> None: 

39 """Validate that the value is a valid UUIDv7.""" 

40 # Validate type first 

41 if not isinstance(self.value, str): # pragma: no cover 

42 msg = f'NodeId value must be a string, got {type(self.value).__name__}' 

43 raise NodeIdentityError(msg, self.value) # pragma: no cover 

44 

45 if not self.value: 

46 msg = 'NodeId value cannot be empty' 

47 raise NodeIdentityError(msg, self.value) 

48 

49 # Normalize to lowercase (standard UUID format) 

50 normalized_value = self.value.lower() 

51 object.__setattr__(self, 'value', normalized_value) 

52 

53 try: 

54 parsed_uuid = uuid.UUID(self.value) 

55 except ValueError as exc: 

56 msg = 'Invalid UUID format' 

57 raise NodeIdentityError(msg, self.value) from exc 

58 

59 # Check that it's specifically a version 7 UUID 

60 if parsed_uuid.version != self.EXPECTED_UUID_VERSION: 

61 msg = 'NodeId must be a UUIDv7' 

62 raise NodeIdentityError(msg, self.value, parsed_uuid.version) 

63 

64 def __str__(self) -> str: 

65 """Return the UUID string representation.""" 

66 return self.value 

67 

68 def __repr__(self) -> str: 

69 """Return the canonical string representation.""" 

70 return f'NodeId({self.value!r})' 

71 

72 def __hash__(self) -> int: 

73 """Return hash of the UUID value for use in sets and dicts.""" 

74 return hash(self.value) 

75 

76 def __eq__(self, other: object) -> bool: 

77 """Compare NodeId instances for equality.""" 

78 if not isinstance(other, NodeId): 

79 return False 

80 return self.value == other.value 

81 

82 @classmethod 

83 def generate(cls) -> 'NodeId': 

84 """Generate a new NodeId with a UUIDv7. 

85 

86 Returns: 

87 A new NodeId instance with a freshly generated UUIDv7 

88 

89 """ 

90 # TODO: Use uuid.uuid7() when available in Python standard library 

91 # For now, create a UUID7-compliant UUID manually 

92 import secrets 

93 import time 

94 

95 # Get current timestamp in milliseconds (48 bits) 

96 timestamp_ms = int(time.time() * 1000) 

97 

98 # Generate 10 random bytes for the rest 

99 rand_bytes = secrets.token_bytes(10) 

100 

101 # Build UUID7 according to RFC 9562: 

102 # 32 bits: timestamp high 

103 # 16 bits: timestamp mid 

104 # 4 bits: version (7) 

105 # 12 bits: timestamp low + random 

106 # 2 bits: variant (10) 

107 # 62 bits: random 

108 

109 # Extract timestamp parts (48 bits total) 

110 timestamp_high = (timestamp_ms >> 16) & 0xFFFFFFFF # Upper 32 bits 

111 timestamp_mid = timestamp_ms & 0xFFFF # Lower 16 bits 

112 

113 # Version 7 + 12 random bits 

114 version_and_rand = 0x7000 | (rand_bytes[0] << 4) | (rand_bytes[1] >> 4) 

115 

116 # Variant bits (10) + 14 random bits 

117 variant_and_rand = 0x8000 | ((rand_bytes[1] & 0x0F) << 10) | (rand_bytes[2] << 2) | (rand_bytes[3] >> 6) 

118 

119 # Remaining 48 random bits 

120 clock_seq_low = rand_bytes[3] & 0x3F 

121 node = int.from_bytes(rand_bytes[4:10], 'big') 

122 

123 # Construct UUID bytes in the proper order 

124 uuid_int = ( 

125 (timestamp_high << 96) 

126 | (timestamp_mid << 80) 

127 | (version_and_rand << 64) 

128 | (variant_and_rand << 48) 

129 | (clock_seq_low << 42) 

130 | node 

131 ) 

132 

133 # Convert to UUID object 

134 generated_uuid = uuid.UUID(int=uuid_int) 

135 return cls(str(generated_uuid)) 

136 

137 

138@dataclass 

139class BinderItem: 

140 """Represents an individual node in the binder hierarchy. 

141 

142 BinderItem can either reference an existing node (with NodeId) or be a 

143 placeholder (None node_id). Each item has a display title and can contain 

144 children to form a tree structure. 

145 

146 Args: 

147 display_title: Display title for the item 

148 node_id: Optional NodeId reference (None for placeholders) 

149 children: List of child BinderItem objects (defaults to empty list) 

150 parent: Optional parent BinderItem reference 

151 

152 Examples: 

153 >>> # Create a placeholder item 

154 >>> placeholder = BinderItem(display_title='New Section', node_id=None) 

155 

156 >>> # Create an item with NodeId 

157 >>> node_id = NodeId('0192f0c1-2345-7123-8abc-def012345678') 

158 >>> item = BinderItem(display_title='Chapter 1', node_id=node_id) 

159 

160 >>> # Create hierarchical structure 

161 >>> parent = BinderItem(display_title='Part 1', node_id=None) 

162 >>> parent.children.append(item) 

163 

164 """ 

165 

166 display_title: str 

167 node_id: NodeId | None = None 

168 children: list['BinderItem'] = field(default_factory=list) 

169 parent: 'BinderItem | None' = None 

170 

171 def __init__( 

172 self, 

173 display_title: str, 

174 node_id: NodeId | None = None, 

175 children: list['BinderItem'] | None = None, 

176 parent: 'BinderItem | None' = None, 

177 id_: NodeId | None = None, # backward compatibility 

178 ) -> None: 

179 """Initialize BinderItem with backward compatibility for 'id_' parameter.""" 

180 # Handle backward compatibility: if 'id_' is provided, use it for node_id 

181 if id_ is not None and node_id is None: 

182 node_id = id_ 

183 elif id_ is not None and node_id is not None: 

184 msg = "Cannot specify both 'id_' and 'node_id' parameters" 

185 raise ValueError(msg) 

186 

187 # Validate display_title is not empty or whitespace-only 

188 if not display_title or not display_title.strip(): 

189 msg = 'display_title cannot be empty or whitespace-only' 

190 raise ValueError(msg) 

191 

192 self.display_title = display_title 

193 self.node_id = node_id 

194 self.children = children or [] 

195 self.parent = parent 

196 

197 @property 

198 def id(self) -> NodeId | None: 

199 """Compatibility property for id access.""" 

200 return self.node_id 

201 

202 def is_root(self) -> bool: 

203 """Check if this item is a root item (no parent).""" 

204 return self.parent is None 

205 

206 def is_leaf(self) -> bool: 

207 """Check if this item is a leaf item (no children).""" 

208 return len(self.children) == 0 

209 

210 def is_placeholder(self) -> bool: 

211 """Check if this item is a placeholder (no node_id).""" 

212 return self.node_id is None 

213 

214 def is_materialized(self) -> bool: 

215 """Check if this item is materialized (has node_id).""" 

216 return self.node_id is not None 

217 

218 def materialize(self, node_id: NodeId) -> None: 

219 """Materialize this placeholder with a real node_id.""" 

220 if self.node_id is not None: 

221 from prosemark.exceptions import BinderIntegrityError 

222 

223 msg = 'Cannot materialize item that already has a node_id' 

224 raise BinderIntegrityError(msg) 

225 self.node_id = node_id 

226 

227 def get_depth(self) -> int: 

228 """Get the depth of this item in the tree (0 for root).""" 

229 depth = 0 

230 current = self.parent 

231 while current is not None: 

232 depth += 1 

233 current = current.parent 

234 return depth 

235 

236 def get_path_to_root(self) -> list['BinderItem']: 

237 """Get the path from this item to the root as a list of items.""" 

238 path = [] 

239 current: BinderItem | None = self 

240 while current is not None: 

241 path.append(current) 

242 current = current.parent 

243 return path 

244 

245 def get_siblings(self) -> list['BinderItem']: 

246 """Get all sibling items (items with the same parent).""" 

247 if self.parent is None: 

248 return [] 

249 return [child for child in self.parent.children if child is not self] 

250 

251 def add_child(self, child: 'BinderItem') -> None: 

252 """Add a child item to this item.""" 

253 child.parent = self 

254 self.children.append(child) 

255 

256 def remove_child(self, child: 'BinderItem') -> None: 

257 """Remove a child item from this item.""" 

258 if child in self.children: 

259 child.parent = None 

260 self.children.remove(child) 

261 

262 

263@dataclass 

264class Binder: 

265 """Aggregate root for document hierarchy with tree invariants. 

266 

267 The Binder maintains a collection of root-level BinderItems and enforces 

268 critical tree invariants: 

269 - No duplicate NodeIds across the entire tree 

270 - Tree structure integrity 

271 - Provides methods for tree operations and validation 

272 

273 Args: 

274 roots: List of root-level BinderItem objects 

275 project_title: Optional title for the entire project/binder 

276 original_content: Original file content for round-trip preservation (internal use) 

277 managed_content: Managed block content (internal use) 

278 

279 Raises: 

280 BinderIntegrityError: If tree invariants are violated (e.g., duplicate NodeIds) 

281 

282 Examples: 

283 >>> # Create empty binder 

284 >>> binder = Binder(roots=[]) 

285 

286 >>> # Create binder with items 

287 >>> item = BinderItem(id=None, display_title='Chapter 1') 

288 >>> binder = Binder(roots=[item], project_title='My Book') 

289 

290 >>> # Find node by ID 

291 >>> found = binder.find_by_id(node_id) 

292 

293 >>> # Get all NodeIds 

294 >>> all_ids = binder.get_all_node_ids() 

295 

296 """ 

297 

298 roots: list[BinderItem] = field(default_factory=list) 

299 project_title: str | None = field(default=None) 

300 original_content: str | None = field(default=None, repr=False) 

301 managed_content: str | None = field(default=None, repr=False) 

302 

303 @property 

304 def children(self) -> list[BinderItem]: 

305 """Compatibility property to allow iteration over roots.""" 

306 return self.roots 

307 

308 def __post_init__(self) -> None: 

309 """Validate tree integrity during initialization.""" 

310 self.validate_integrity() 

311 

312 def validate_integrity(self) -> None: 

313 """Validate all tree invariants using domain policies. 

314 

315 Raises: 

316 BinderIntegrityError: If any invariant is violated 

317 

318 """ 

319 # Import policies locally to avoid circular import 

320 from prosemark.domain.policies import ( 

321 validate_no_duplicate_ids, 

322 validate_placeholder_handling, 

323 validate_tree_structure, 

324 ) 

325 

326 # Apply all domain policies 

327 validate_no_duplicate_ids(self.roots) 

328 validate_tree_structure(self.roots) 

329 validate_placeholder_handling(self.roots) 

330 

331 def find_by_id(self, node_id: NodeId) -> BinderItem | None: 

332 """Find a BinderItem by its NodeId. 

333 

334 Performs a depth-first search through the tree to locate the item 

335 with the matching NodeId. 

336 

337 Args: 

338 node_id: The NodeId to search for 

339 

340 Returns: 

341 The BinderItem with matching NodeId, or None if not found 

342 

343 """ 

344 

345 def _search_item(item: BinderItem) -> BinderItem | None: 

346 """Recursively search for the NodeId in the tree.""" 

347 if item.node_id == node_id: 

348 return item 

349 

350 for child in item.children: 

351 result = _search_item(child) 

352 if result is not None: 

353 return result 

354 

355 return None 

356 

357 for root_item in self.roots: 

358 result = _search_item(root_item) 

359 if result is not None: 

360 return result 

361 

362 return None 

363 

364 def find_item_by_node_id(self, node_id: NodeId) -> BinderItem | None: 

365 """Find a BinderItem by its NodeId (alias for find_by_id).""" 

366 return self.find_by_id(node_id) 

367 

368 def get_all_node_ids(self) -> set[NodeId]: 

369 """Get all NodeIds present in the tree. 

370 

371 Returns: 

372 Set of all NodeIds in the tree (excludes None ids from placeholders) 

373 

374 """ 

375 node_ids: set[NodeId] = set() 

376 

377 def _collect_node_ids(item: BinderItem) -> None: 

378 """Recursively collect all non-None NodeIds.""" 

379 if item.node_id is not None: 

380 node_ids.add(item.node_id) 

381 

382 for child in item.children: 

383 _collect_node_ids(child) 

384 

385 for root_item in self.roots: 

386 _collect_node_ids(root_item) 

387 

388 return node_ids 

389 

390 def find_placeholder_by_display_title(self, display_title: str) -> BinderItem | None: 

391 """Find a placeholder (item with None id) by its display title. 

392 

393 Performs a depth-first search through the tree to locate the first 

394 placeholder item with the matching display title. 

395 

396 Args: 

397 display_title: The display title to search for 

398 

399 Returns: 

400 The BinderItem with matching display title and None id, or None if not found 

401 

402 """ 

403 

404 def _search_item(item: BinderItem) -> BinderItem | None: 

405 """Recursively search for the placeholder by display title.""" 

406 if item.node_id is None and item.display_title == display_title: 

407 return item 

408 

409 for child in item.children: 

410 result = _search_item(child) 

411 if result is not None: 

412 return result 

413 

414 return None 

415 

416 for root_item in self.roots: 

417 result = _search_item(root_item) 

418 if result is not None: 

419 return result 

420 

421 return None 

422 

423 def add_root_item(self, item: BinderItem) -> None: 

424 """Add a root item to the binder.""" 

425 item.parent = None 

426 self.roots.append(item) 

427 self.validate_integrity() 

428 

429 def remove_root_item(self, item: BinderItem) -> None: 

430 """Remove a root item from the binder.""" 

431 if item in self.roots: 

432 self.roots.remove(item) 

433 

434 def depth_first_traversal(self) -> list[BinderItem]: 

435 """Perform depth-first traversal of all items in the binder.""" 

436 result = [] 

437 

438 def _traverse(item: BinderItem) -> None: 

439 result.append(item) 

440 for child in item.children: 

441 _traverse(child) 

442 

443 for root in self.roots: 

444 _traverse(root) 

445 

446 return result 

447 

448 

449@dataclass(frozen=True) 

450class NodeMetadata: 

451 """Metadata for a node document. 

452 

453 NodeMetadata tracks essential information about each node including 

454 its identity, title, timestamps, and optional synopsis. The class is immutable 

455 (frozen) to ensure data integrity. 

456 

457 Args: 

458 id: Unique identifier for the node (UUIDv7) 

459 title: Optional title of the node document 

460 synopsis: Optional synopsis/summary of the node content 

461 created: ISO 8601 formatted creation timestamp string 

462 updated: ISO 8601 formatted last update timestamp string 

463 

464 Examples: 

465 >>> # Create new metadata with all fields 

466 >>> node_id = NodeId('0192f0c1-2345-7123-8abc-def012345678') 

467 >>> metadata = NodeMetadata( 

468 ... id=node_id, 

469 ... title='Chapter One', 

470 ... synopsis='Introduction to the story', 

471 ... created='2025-09-10T10:00:00-07:00', 

472 ... updated='2025-09-10T10:30:00-07:00', 

473 ... ) 

474 

475 >>> # Create with minimal fields (None values) 

476 >>> metadata = NodeMetadata( 

477 ... id=node_id, 

478 ... title=None, 

479 ... synopsis=None, 

480 ... created='2025-09-10T10:00:00-07:00', 

481 ... updated='2025-09-10T10:00:00-07:00', 

482 ... ) 

483 

484 >>> # Serialize to dictionary 

485 >>> data = metadata.to_dict() 

486 

487 >>> # Deserialize from dictionary 

488 >>> restored = NodeMetadata.from_dict(data) 

489 

490 """ 

491 

492 id: NodeId 

493 title: str | None 

494 synopsis: str | None 

495 created: str | datetime 

496 updated: str | datetime 

497 

498 def to_dict(self) -> dict[str, str | None]: 

499 """Convert NodeMetadata to a dictionary. 

500 

501 None values for title and synopsis are excluded from the dictionary 

502 to keep the serialized format clean. 

503 

504 Returns: 

505 Dictionary with metadata fields, excluding None values 

506 

507 """ 

508 from datetime import datetime 

509 

510 result: dict[str, str | None] = { 

511 'id': str(self.id), 

512 'created': self.created.isoformat() if isinstance(self.created, datetime) else self.created, 

513 'updated': self.updated.isoformat() if isinstance(self.updated, datetime) else self.updated, 

514 } 

515 

516 # Only include title and synopsis if they are not None 

517 if self.title is not None: 

518 result['title'] = self.title 

519 if self.synopsis is not None: 

520 result['synopsis'] = self.synopsis 

521 

522 return result 

523 

524 @classmethod 

525 def from_dict(cls, data: dict[str, str | None]) -> 'NodeMetadata': 

526 """Create NodeMetadata from a dictionary. 

527 

528 Handles missing optional fields by defaulting them to None. 

529 

530 Args: 

531 data: Dictionary containing metadata fields 

532 

533 Returns: 

534 New NodeMetadata instance 

535 

536 Raises: 

537 NodeIdentityError: If the id field contains an invalid NodeId 

538 

539 """ 

540 # Get the id and create a NodeId from it 

541 id_str = data.get('id') 

542 if not id_str: 

543 msg = 'Missing id field in metadata dictionary' 

544 raise NodeIdentityError(msg, None) 

545 

546 node_id = NodeId(id_str) 

547 

548 # Get optional fields, defaulting to None if not present 

549 title = data.get('title') 

550 synopsis = data.get('synopsis') 

551 

552 # Get required timestamp fields 

553 created = data.get('created') 

554 updated = data.get('updated') 

555 

556 if not created: 

557 msg = 'Missing created field in metadata dictionary' 

558 raise ValueError(msg) 

559 if not updated: 

560 msg = 'Missing updated field in metadata dictionary' 

561 raise ValueError(msg) 

562 

563 return cls( 

564 id=node_id, 

565 title=title, 

566 synopsis=synopsis, 

567 created=created, 

568 updated=updated, 

569 )