.gitignore
.pre-commit-config.yaml
01-execute_notebooks.py
02-run_all_tutorials.sh
CHECKBOX_DETECTION.md
CLAUDE.md
LICENSE
MANIFEST.in
README.md
audit_packaging.py
check_run_md.sh
example_checkbox_usage.py
mkdocs.yml
noxfile.py
publish.sh
pyproject.toml
sample-screen.png
uv.lock
.cursor/rules/analysis_framework.mdc
.cursor/rules/coding-style.mdc
.cursor/rules/edit-md-instead-of-ipynb.mdc
.cursor/rules/minimal-comments.mdc
.cursor/rules/natural-pdf-overview.mdc
.cursor/rules/user-friendly-library-code.mdc
.github/workflows/ci.yml
.github/workflows/docs.yml
.github/workflows/nightly-tutorials.yml
docs/guide_adjustment_stream.md
docs/guides_boundary_columns.md
docs/index.md
docs/api/index.md
docs/assets/favicon.png
docs/assets/favicon.svg
docs/assets/logo.svg
docs/assets/sample-screen.png
docs/assets/social-preview.png
docs/assets/social-preview.svg
docs/assets/javascripts/custom.js
docs/assets/stylesheets/custom.css
docs/categorizing-documents/index.md
docs/data-extraction/index.md
docs/describe/index.md
docs/document-qa/index.md
docs/element-selection/index.md
docs/extracting-clean-text/index.md
docs/finetuning/index.md
docs/fix-messy-tables/index.md
docs/fix-messy-tables/table_1.csv
docs/fix-messy-tables/table_2.csv
docs/fix-messy-tables/table_3.csv
docs/installation/index.md
docs/interactive-widget/index.md
docs/layout-analysis/index.md
docs/loops-and-groups/index.md
docs/ocr/index.md
docs/pdf-navigation/index.md
docs/process-forms-and-invoices/extracted_form_data.csv
docs/process-forms-and-invoices/index.md
docs/quick-reference/index.md
docs/reflowing-pages/index.md
docs/regions/index.md
docs/tables/index.md
docs/text-analysis/index.md
docs/tutorials/01-loading-and-extraction.md
docs/tutorials/02-finding-elements.md
docs/tutorials/03-extracting-blocks.md
docs/tutorials/04-table-extraction.md
docs/tutorials/05-excluding-content.md
docs/tutorials/06-document-qa.md
docs/tutorials/07-layout-analysis.md
docs/tutorials/07-working-with-regions.md
docs/tutorials/08-spatial-navigation.md
docs/tutorials/09-section-extraction.md
docs/tutorials/10-form-field-extraction.md
docs/tutorials/11-enhanced-table-processing.md
docs/tutorials/12-ocr-integration.md
docs/tutorials/13-semantic-search.md
docs/tutorials/14-categorizing-documents.md
docs/visual-debugging/index.md
docs/visual-debugging/region.png
natural_pdf/__init__.py
natural_pdf/cli.py
natural_pdf/judge.py
natural_pdf/text_mixin.py
natural_pdf.egg-info/PKG-INFO
natural_pdf.egg-info/SOURCES.txt
natural_pdf.egg-info/dependency_links.txt
natural_pdf.egg-info/entry_points.txt
natural_pdf.egg-info/requires.txt
natural_pdf.egg-info/top_level.txt
natural_pdf/analyzers/__init__.py
natural_pdf/analyzers/guides.py
natural_pdf/analyzers/shape_detection_mixin.py
natural_pdf/analyzers/text_options.py
natural_pdf/analyzers/text_structure.py
natural_pdf/analyzers/utils.py
natural_pdf/analyzers/checkbox/__init__.py
natural_pdf/analyzers/checkbox/base.py
natural_pdf/analyzers/checkbox/checkbox_analyzer.py
natural_pdf/analyzers/checkbox/checkbox_manager.py
natural_pdf/analyzers/checkbox/checkbox_options.py
natural_pdf/analyzers/checkbox/mixin.py
natural_pdf/analyzers/checkbox/rtdetr.py
natural_pdf/analyzers/layout/__init__.py
natural_pdf/analyzers/layout/base.py
natural_pdf/analyzers/layout/docling.py
natural_pdf/analyzers/layout/gemini.py
natural_pdf/analyzers/layout/layout_analyzer.py
natural_pdf/analyzers/layout/layout_manager.py
natural_pdf/analyzers/layout/layout_options.py
natural_pdf/analyzers/layout/paddle.py
natural_pdf/analyzers/layout/pdfplumber_table_finder.py
natural_pdf/analyzers/layout/surya.py
natural_pdf/analyzers/layout/table_structure_utils.py
natural_pdf/analyzers/layout/tatr.py
natural_pdf/analyzers/layout/yolo.py
natural_pdf/classification/manager.py
natural_pdf/classification/mixin.py
natural_pdf/classification/results.py
natural_pdf/collections/mixins.py
natural_pdf/core/__init__.py
natural_pdf/core/element_manager.py
natural_pdf/core/highlighting_service.py
natural_pdf/core/page.py
natural_pdf/core/page_collection.py
natural_pdf/core/page_groupby.py
natural_pdf/core/pdf.py
natural_pdf/core/pdf_collection.py
natural_pdf/core/render_spec.py
natural_pdf/describe/__init__.py
natural_pdf/describe/base.py
natural_pdf/describe/elements.py
natural_pdf/describe/mixin.py
natural_pdf/describe/summary.py
natural_pdf/elements/__init__.py
natural_pdf/elements/base.py
natural_pdf/elements/element_collection.py
natural_pdf/elements/image.py
natural_pdf/elements/line.py
natural_pdf/elements/rect.py
natural_pdf/elements/region.py
natural_pdf/elements/text.py
natural_pdf/export/mixin.py
natural_pdf/exporters/__init__.py
natural_pdf/exporters/base.py
natural_pdf/exporters/hocr.py
natural_pdf/exporters/hocr_font.py
natural_pdf/exporters/original_pdf.py
natural_pdf/exporters/paddleocr.py
natural_pdf/exporters/searchable_pdf.py
natural_pdf/exporters/data/__init__.py
natural_pdf/exporters/data/pdf.ttf
natural_pdf/exporters/data/sRGB.icc
natural_pdf/extraction/manager.py
natural_pdf/extraction/mixin.py
natural_pdf/extraction/result.py
natural_pdf/flows/__init__.py
natural_pdf/flows/collections.py
natural_pdf/flows/element.py
natural_pdf/flows/flow.py
natural_pdf/flows/region.py
natural_pdf/ocr/__init__.py
natural_pdf/ocr/engine.py
natural_pdf/ocr/engine_doctr.py
natural_pdf/ocr/engine_easyocr.py
natural_pdf/ocr/engine_paddle.py
natural_pdf/ocr/engine_surya.py
natural_pdf/ocr/ocr_factory.py
natural_pdf/ocr/ocr_manager.py
natural_pdf/ocr/ocr_options.py
natural_pdf/ocr/utils.py
natural_pdf/qa/__init__.py
natural_pdf/qa/document_qa.py
natural_pdf/qa/qa_result.py
natural_pdf/search/__init__.py
natural_pdf/search/lancedb_search_service.py
natural_pdf/search/numpy_search_service.py
natural_pdf/search/search_options.py
natural_pdf/search/search_service_protocol.py
natural_pdf/search/searchable_mixin.py
natural_pdf/selectors/__init__.py
natural_pdf/selectors/parser.py
natural_pdf/tables/__init__.py
natural_pdf/tables/result.py
natural_pdf/templates/__init__.py
natural_pdf/templates/finetune/fine_tune_paddleocr.md
natural_pdf/templates/spa/index.html
natural_pdf/templates/spa/words.txt
natural_pdf/templates/spa/css/style.css
natural_pdf/templates/spa/js/app.js
natural_pdf/utils/__init__.py
natural_pdf/utils/bidi_mirror.py
natural_pdf/utils/color_utils.py
natural_pdf/utils/debug.py
natural_pdf/utils/highlighting.py
natural_pdf/utils/identifiers.py
natural_pdf/utils/layout.py
natural_pdf/utils/locks.py
natural_pdf/utils/packaging.py
natural_pdf/utils/pdfminer_patches.py
natural_pdf/utils/reading_order.py
natural_pdf/utils/sections.py
natural_pdf/utils/spatial.py
natural_pdf/utils/text_extraction.py
natural_pdf/utils/visualization.py
natural_pdf/vision/__init__.py
natural_pdf/vision/mixin.py
natural_pdf/vision/results.py
natural_pdf/vision/similarity.py
natural_pdf/vision/template_matching.py
natural_pdf/widgets/__init__.py
natural_pdf/widgets/viewer.py
optimization/memory_comparison.py
optimization/pdf_analyzer.py
optimization/performance_analysis.py
optimization/test_cleanup_methods.py
optimization/test_memory_fix.py
optimization/performance_results/image_heavy_snapshots.csv
optimization/performance_results/image_heavy_snapshots.json
optimization/performance_results/text_heavy_snapshots.csv
optimization/performance_results/text_heavy_snapshots.json
temp/check_model.py
temp/check_pdf_content.py
temp/checkbox_checks.py
temp/checkbox_simple.py
temp/checkbox_ux_ideas.py
temp/context_manager_prototype.py
temp/convert_to_hf.py
temp/demo_text_closest.py
temp/fix_page_exclusions.py
temp/inspect_model.py
temp/rtdetr_dinov2_test.py
temp/test_closest_debug.py
temp/test_closest_debug2.py
temp/test_context_exploration.py
temp/test_draw_guides.py
temp/test_draw_guides_interactive.py
temp/test_durham.py
temp/test_empty_string.py
temp/test_exclusion_with_debug.py
temp/test_find_exclusions_fix.py
temp/test_find_exclusions_fix_no_recursion.py
temp/test_fix_real_pdf.py
temp/test_fix_working.py
temp/test_fixed_pdf_exclusions.py
temp/test_guide_draw_notebook.py
temp/test_horizontal_top_bottom.py
temp/test_inline_js.py
temp/test_marker_order.py
temp/test_original_exclusions_now_work.py
temp/test_pdf_exclusions_with_guides.py
temp/test_region_exclusions_detailed.py
temp/test_similarity.py
temp/test_stripes_real_pdf.py
temp/test_vertical_stripes.py
temp/test_widget_functionality.py
temp/test_widget_simple.py
tests/conftest.py
tests/demo_multipage.py
tests/test_aggregate_selectors.py
tests/test_annotate.py
tests/test_arabic_performance.py
tests/test_arabic_real_world.py
tests/test_auto_multipage_option.py
tests/test_closest_substring_sorting.py
tests/test_closest_until.py
tests/test_closest_until_comparison.py
tests/test_closest_until_debug.py
tests/test_closest_until_fix.py
tests/test_closest_until_ordering.py
tests/test_color_conversion.py
tests/test_color_hex_display.py
tests/test_crop_enhancements.py
tests/test_crop_region_highlights.py
tests/test_directional_defaults.py
tests/test_dissolve.py
tests/test_dissolve_cross_page_bug.py
tests/test_dissolve_debug_issue.py
tests/test_dissolve_real_world_issue.py
tests/test_dissolve_single_elements.py
tests/test_dissolve_vertical_offset_issue.py
tests/test_document_qa.py
tests/test_element_addition.py
tests/test_element_collection_guides.py
tests/test_element_collection_show_cols.py
tests/test_element_collection_slicing.py
tests/test_element_exclusions.py
tests/test_element_show_crop_highlights.py
tests/test_empty_pseudo_class.py
tests/test_exclude_multi_page.py
tests/test_exclude_real_pdf.py
tests/test_exclusion_recursion_fix.py
tests/test_exclusions.py
tests/test_expand.py
tests/test_expand_enhanced.py
tests/test_extract_text_words.py
tests/test_extraction_error.py
tests/test_extraction_mixin_fix.py
tests/test_extraction_text_and_vision.py
tests/test_extraction_working.py
tests/test_find_similar.py
tests/test_first_last_selectors.py
tests/test_fix_get_sections_zero_height.py
tests/test_flow_region_directional.py
tests/test_from_images.py
tests/test_from_parameter.py
tests/test_from_parameter_example.py
tests/test_from_self_exclusion.py
tests/test_from_simple.py
tests/test_get_sections_fix_comprehensive.py
tests/test_get_sections_zero_height.py
tests/test_groupby.py
tests/test_guide_adjustment_stream.py
tests/test_guides.py
tests/test_guides_apply_exclusions.py
tests/test_guides_apply_exclusions_simple.py
tests/test_guides_boundaries.py
tests/test_guides_extract_table.py
tests/test_guides_extract_table_collections.py
tests/test_guides_extract_table_exclusions.py
tests/test_guides_extract_table_real.py
tests/test_guides_from_headers.py
tests/test_guides_from_headers_strings.py
tests/test_guides_from_stripes.py
tests/test_guides_integration.py
tests/test_guides_marker_sorting.py
tests/test_guides_partial.py
tests/test_highlight_color_falsy.py
tests/test_highlight_detection.py
tests/test_highlight_detection_comprehensive.py
tests/test_highlight_offset.py
tests/test_highlight_protocol.py
tests/test_highlight_protocol_simple.py
tests/test_highlight_regions.py
tests/test_horizontal_guides_alignment.py
tests/test_include_boundaries_comprehensive.py
tests/test_include_boundaries_final.py
tests/test_include_boundaries_final_verification.py
tests/test_include_boundaries_fix.py
tests/test_include_boundaries_mock.py
tests/test_include_boundaries_simple.py
tests/test_include_boundaries_types_pdf.py
tests/test_include_boundaries_verification.py
tests/test_include_boundaries_with_real_text.py
tests/test_loading_original.py
tests/test_map_method.py
tests/test_match_results_sorting.py
tests/test_merge_connected.py
tests/test_merge_connected_real_world.py
tests/test_merge_method.py
tests/test_merged_flowregion_specs.py
tests/test_mixed_collection_rendering.py
tests/test_multi_page_table_discovery.py
tests/test_multipage_directional.py
tests/test_negative_bounds_pdf.py
tests/test_optional_deps.py
tests/test_page_exclusion_lists.py
tests/test_pdf_add_exclusion_elementcollection.py
tests/test_pdf_exclusions_in_find_methods.py
tests/test_pdfminer_bug_status.py
tests/test_pdfminer_color_bug.py
tests/test_pdfminer_color_stack_bug.py
tests/test_phash_masking.py
tests/test_region_find_similar.py
tests/test_region_show_crop_highlights.py
tests/test_region_viewer.py
tests/test_sections_end_only.py
tests/test_sections_with_start_and_end.py
tests/test_show_column_layout.py
tests/test_show_edge_cases.py
tests/test_show_exclusions.py
tests/test_show_exclusions_feature.py
tests/test_show_limit.py
tests/test_skip_repeating_headers_multipage.py
tests/test_slice_cache_reuse.py
tests/test_slice_exclusion_fix.py
tests/test_slice_exclusion_issue.py
tests/test_slice_exclusion_mock.py
tests/test_sliced_collection_exclusions.py
tests/test_smart_exclusion.py
tests/test_spatial_offset.py
tests/test_strikethrough_detection.py
tests/test_table_result_header_mismatch.py
tests/test_table_result_keep_blank.py
tests/test_template_matching.py
tests/test_template_white_masking.py
tests/test_text_closest_selector.py
tests/test_tiny_text_tables.py
tests/test_tiny_text_tables_table.py
tests/test_tutorials.py
tests/test_underline_detection.py
tests/test_unique_method.py
tests/test_update_text.py
tests/test_within_constraint.py
tests/test_words_vs_find_all_text.py
tests/test_words_vs_find_all_text_summary.md
tests/exporters/test_paddleocr_exporter.py
tests/test_core/test_containment_geometry.py
tests/test_core/test_elements.py
tests/test_core/test_loading.py
tests/test_core/test_spatial.py
tests/test_core/test_text_extraction.py
tests/test_core/test_text_layer.py
todo/bad_pdf_analysis.md
todo/evaluation.md
tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md
tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md
tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md
tools/bad_pdf_eval/README.md
tools/bad_pdf_eval/__init__.py
tools/bad_pdf_eval/analyser.py
tools/bad_pdf_eval/collate_summaries.py
tools/bad_pdf_eval/compile_attempts_markdown.py
tools/bad_pdf_eval/eval_suite.py
tools/bad_pdf_eval/evaluate_quality.py
tools/bad_pdf_eval/export_enrichment_csv.py
tools/bad_pdf_eval/extraction_decision_tree.md
tools/bad_pdf_eval/llm_enrich.py
tools/bad_pdf_eval/llm_enrich_with_retry.py
tools/bad_pdf_eval/reporter.py
tools/bad_pdf_eval/utils.py