LICENSE
README.md
pyproject.toml
olmocr/__init__.py
olmocr/check.py
olmocr/datatypes.py
olmocr/image_utils.py
olmocr/metrics.py
olmocr/pipeline.py
olmocr/py.typed
olmocr/repeatdetect.py
olmocr/s3_utils.py
olmocr/version.py
olmocr/work_queue.py
olmocr.egg-info/PKG-INFO
olmocr.egg-info/SOURCES.txt
olmocr.egg-info/dependency_links.txt
olmocr.egg-info/requires.txt
olmocr.egg-info/top_level.txt
olmocr/bench/__init__.py
olmocr/bench/benchmark.py
olmocr/bench/convert.py
olmocr/bench/prompts.py
olmocr/bench/report.py
olmocr/bench/review_app.py
olmocr/bench/review_app_latex.py
olmocr/bench/tests.py
olmocr/bench/utils.py
olmocr/bench/katex/__init__.py
olmocr/bench/katex/auto-render.min.js
olmocr/bench/katex/katex.min.css
olmocr/bench/katex/katex.min.js
olmocr/bench/katex/render.py
olmocr/bench/miners/check_headers_footers.py
olmocr/bench/miners/check_multicolumn.py
olmocr/bench/miners/check_old_scans_math.py
olmocr/bench/miners/cleanup_data.py
olmocr/bench/miners/cleanup_urls.py
olmocr/bench/miners/delete_rejected.py
olmocr/bench/miners/download_math.py
olmocr/bench/miners/mine_blank_pages_gpt.py
olmocr/bench/miners/mine_diffs.py
olmocr/bench/miners/mine_headers_footers.py
olmocr/bench/miners/mine_long_tiny_text.py
olmocr/bench/miners/mine_math.py
olmocr/bench/miners/mine_multi_column.py
olmocr/bench/miners/mine_multilingual_gpt.py
olmocr/bench/miners/mine_old_scan_pdf.py
olmocr/bench/miners/mine_old_scans.py
olmocr/bench/miners/mine_old_scans_math.py
olmocr/bench/miners/mine_reading_order.py
olmocr/bench/miners/mine_tables_gemini.py
olmocr/bench/miners/mine_tables_gpt.py
olmocr/bench/miners/mine_tables_gpt_simple.py
olmocr/bench/miners/pick_mediod.py
olmocr/bench/runners/__init__.py
olmocr/bench/runners/run_chatgpt.py
olmocr/bench/runners/run_claude.py
olmocr/bench/runners/run_docling.py
olmocr/bench/runners/run_gemini.py
olmocr/bench/runners/run_gotocr.py
olmocr/bench/runners/run_marker.py
olmocr/bench/runners/run_mineru.py
olmocr/bench/runners/run_mistral.py
olmocr/bench/runners/run_nanonetsocr.py
olmocr/bench/runners/run_nanonetsocr_2.py
olmocr/bench/runners/run_olmocr_pipeline.py
olmocr/bench/runners/run_paddlepaddle.py
olmocr/bench/runners/run_paddlevl.py
olmocr/bench/runners/run_rolmocr.py
olmocr/bench/runners/run_server.py
olmocr/bench/runners/run_transformers.py
olmocr/bench/scripts/difference_viewer.py
olmocr/bench/scripts/rotate_pdfs.py
olmocr/bench/scripts/run_difference.py
olmocr/bench/scripts/url_matcher.py
olmocr/bench/scripts/workspace_to_bench.py
olmocr/bench/synth/__init__.py
olmocr/bench/synth/mine_html_templates.py
olmocr/bench/synth/rotate_html_templates.py
olmocr/data/build_openai_batch_from_olmocrmix.py
olmocr/data/buildsilver.py
olmocr/data/clean_olmocrmix.py
olmocr/data/prepare_loc_transcripts.py
olmocr/data/prepare_national_archive_transcripts.py
olmocr/data/prepare_olmocrmix.py
olmocr/data/prepare_workspace.py
olmocr/data/process_openai_batch_results.py
olmocr/data/renderpdf.py
olmocr/data/repackage_olmocrmix.py
olmocr/data/runopenaibatch.py
olmocr/filter/__init__.py
olmocr/filter/coherency.py
olmocr/filter/filter.py
olmocr/prompts/__init__.py
olmocr/prompts/anchor.py
olmocr/prompts/prompts.py
olmocr/train/compare_vllm_checkpoint.py
olmocr/train/compress_checkpoint.py
olmocr/train/config.py
olmocr/train/dataloader.py
olmocr/train/grpo_train.py
olmocr/train/muon.py
olmocr/train/prepare_checkpoint.py
olmocr/train/train.py
olmocr/viewer/__init__.py
olmocr/viewer/dolmaviewer.py
olmocr/viewer/dolmaviewer_merged_template.html
olmocr/viewer/dolmaviewer_template.html
tests/test_anchor.py
tests/test_dataloader.py
tests/test_filter.py
tests/test_grpo.py
tests/test_integration.py
tests/test_mine_html_templates.py
tests/test_olmocrmix.py
tests/test_pipeline.py
tests/test_s3_work_queue.py
tests/test_tests.py