.gitignore
.markdownlint.json
.pre-commit-config.yaml
CITATION.cff
CODE_OF_CONDUCT.md
CONTRIBUTING.md
Dockerfile
LICENSE
Makefile
README.md
SECURITY.md
STYLE_GUIDE.md
pyproject.toml
.github/pull_request_template.md
.github/ISSUE_TEMPLATE/bug-report.yaml
.github/ISSUE_TEMPLATE/config.yml
.github/ISSUE_TEMPLATE/feature-request.yaml
.github/workflows/doctests.yaml
.github/workflows/gpu_install_test.yaml
.github/workflows/gpu_tests.yaml
.github/workflows/install_test.yaml
.github/workflows/pretest.yaml
.github/workflows/release_docker.yaml
.github/workflows/release_gcp.yaml
.github/workflows/release_pypi.yaml
.vscode/launch.json
.vscode/settings.json
configs/README.md
configs/apis/anthropic/eval_claude_3_5_sonnet.yaml
configs/apis/anthropic/eval_claude_3_7_sonnet.yaml
configs/apis/anthropic/infer_claude_3_5_sonnet.yaml
configs/apis/anthropic/infer_claude_3_7_sonnet.yaml
configs/apis/anthropic/infer_claude_opus_4_1.yaml
configs/apis/gemini/eval_gemini_1_5_pro.yaml
configs/apis/gemini/infer_gemini_1_5_pro.yaml
configs/apis/gemini/infer_gemini_2_5_pro.yaml
configs/apis/openai/eval_gpt_4o.yaml
configs/apis/openai/eval_gpt_o1_preview.yaml
configs/apis/openai/infer_chatgpt_4o_latest.yaml
configs/apis/openai/infer_gpt_4_1.yaml
configs/apis/openai/infer_gpt_4_1_mini.yaml
configs/apis/openai/infer_gpt_4o.yaml
configs/apis/openai/infer_gpt_4o_mini.yaml
configs/apis/openai/infer_gpt_5.yaml
configs/apis/openai/infer_gpt_5_chat_latest.yaml
configs/apis/openai/infer_gpt_5_mini.yaml
configs/apis/openai/infer_gpt_5_nano.yaml
configs/apis/openai/infer_gpt_o1_preview.yaml
configs/apis/openai/infer_o1.yaml
configs/apis/openai/infer_o1_mini.yaml
configs/apis/openai/infer_o3_mini.yaml
configs/apis/vertex/eval_llama_3_1_405b.yaml
configs/apis/vertex/eval_llama_3_3_70b.yaml
configs/apis/vertex/infer_llama_3_1_405b.yaml
configs/apis/vertex/infer_llama_3_3_70b.yaml
configs/examples/README.md
configs/examples/berry_bench/evaluation/eval.yaml
configs/examples/berry_bench/evaluation/gcp_job.yaml
configs/examples/bulk_inference/README.md
configs/examples/bulk_inference/gcp_job.yaml
configs/examples/deepspeed/README.md
configs/examples/deepspeed/llama3_1_8b_deepspeed_z2_train.yaml
configs/examples/deepspeed/llama3_1_8b_deepspeed_z3_offload_train.yaml
configs/examples/deepspeed/llama3_1_8b_deepspeed_z3_train.yaml
configs/examples/fineweb_ablation_pretraining/README.md
configs/examples/fineweb_ablation_pretraining/ddp/gcp_job.yaml
configs/examples/fineweb_ablation_pretraining/ddp/polaris_job.yaml
configs/examples/fineweb_ablation_pretraining/ddp/train.yaml
configs/examples/fineweb_ablation_pretraining/fsdp/gcp_job.yaml
configs/examples/fineweb_ablation_pretraining/fsdp/polaris_job.yaml
configs/examples/fineweb_ablation_pretraining/fsdp/train.yaml
configs/examples/grpo_tldr/gcp_job.yaml
configs/examples/grpo_tldr/train.yaml
configs/examples/grpo_verl_countdown/gcp_job.yaml
configs/examples/grpo_verl_countdown/slurm_job.yaml
configs/examples/grpo_verl_countdown/train.yaml
configs/examples/grpo_verl_geometry3k/gcp_job.yaml
configs/examples/grpo_verl_geometry3k/train.yaml
configs/examples/grpo_verl_gsm8k/slurm_job.yaml
configs/examples/grpo_verl_gsm8k/train.yaml
configs/examples/letter_counting/evaluation/eval.yaml
configs/examples/letter_counting/evaluation/gcp_job.yaml
configs/examples/letter_counting/grpo/gcp_job.yaml
configs/examples/letter_counting/grpo/train.yaml
configs/examples/macos_gguf/README.md
configs/examples/misc/README.md
configs/examples/misc/dev_gcp_job.yaml
configs/examples/misc/hello_world_gcp_job.yaml
configs/examples/misc/hello_world_polaris_job.yaml
configs/examples/misc/sky_init.sh
configs/examples/misc/slurm_init.sh
configs/examples/misc/slurm_ray_init.sh
configs/examples/misc/tulu3_sft_mini.yaml
configs/examples/misc/vllm_polaris_job.yaml
configs/examples/quantization/README.md
configs/examples/quantization/awq_quantization_config.yaml
configs/examples/quantization/bnb_quantization_config.yaml
configs/examples/synthesis/README.md
configs/examples/synthesis/conversation_synth.yaml
configs/examples/synthesis/data_augmentation_synth.yaml
configs/examples/synthesis/domain_qa_synth.yaml
configs/examples/synthesis/instruction_following_synth.yaml
configs/examples/synthesis/question_answer_synth.yaml
configs/projects/README.md
configs/projects/aya/README.md
configs/projects/aya/evaluation/eval.yaml
configs/projects/aya/evaluation/gcp_job.yaml
configs/projects/aya/sft/gcp_job.yaml
configs/projects/aya/sft/train.yaml
configs/projects/chatqa/README.md
configs/projects/chatqa/chatqa_stage1_train.yaml
configs/projects/chatqa/chatqa_stage2_train.yaml
configs/projects/chatqa/gcp_job.yaml
configs/projects/coalm/405b_train.yaml
configs/projects/coalm/70b_infer.yaml
configs/projects/coalm/70b_train.yaml
configs/projects/coalm/8b_infer.yaml
configs/projects/coalm/8b_train.yaml
configs/projects/coalm/README.md
configs/projects/coalm/images/dataset.png
configs/projects/coalm/images/results.png
configs/projects/dcvlr/README.md
configs/projects/dcvlr/run_image_synthesis.py
configs/projects/dcvlr/synthesize_images_vllm.py
configs/projects/dcvlr/starter_kit/molmo-d-train-openr1.yaml
configs/projects/dcvlr/starter_kit/molmo-o-train-openr1.yaml
configs/projects/dcvlr/starter_kit/qwenvl-openr1.yaml
configs/projects/halloumi/8b_train.yaml
configs/projects/halloumi/README.md
configs/projects/halloumi/gcp_job.yaml
configs/projects/halloumi/halloumi_classifier_inference_notebook.ipynb
configs/projects/halloumi/halloumi_eval_notebook.ipynb
configs/projects/halloumi/halloumi_inference_notebook.ipynb
configs/projects/judges/doc_qa/completeness.yaml
configs/projects/judges/doc_qa/groundedness.yaml
configs/projects/judges/doc_qa/relevance.yaml
configs/projects/judges/generic/format_compliance.yaml
configs/projects/judges/generic/instruction_following.yaml
configs/projects/judges/generic/safety.yaml
configs/projects/judges/generic/topic_adherence.yaml
configs/projects/judges/generic/truthfulness.yaml
configs/projects/limo/qwen2.5_7b_fft.yaml
configs/projects/limo/qwen2.5_7b_fft_yarn.yaml
configs/projects/limo/qwen2.5_7b_fft_yarn_deepspeed.yaml
configs/projects/limo/qwen2.5_7b_fft_yarn_deepspeed_memory_optimized_train.yaml
configs/projects/wc50m/README.md
configs/projects/wc50m/configs/base_ultrachat.yaml
configs/projects/wc50m/configs/gcp_base_ultrachat.yaml
configs/projects/wc50m/results/baseline.csv
configs/projects/wc50m/results/oumi.csv
configs/recipes/README.md
configs/recipes/deepseek_r1/README.md
configs/recipes/deepseek_r1/evaluation/distill_llama_70b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_llama_70b/gcp_job.yaml
configs/recipes/deepseek_r1/evaluation/distill_llama_8b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_llama_8b/gcp_job.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_1_5b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_1_5b/gcp_job.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_32b/eval.yaml
configs/recipes/deepseek_r1/evaluation/distill_qwen_32b/gcp_job.yaml
configs/recipes/deepseek_r1/inference/671b_together_infer.yaml
configs/recipes/deepseek_r1/inference/distill_llama_70b_infer.yaml
configs/recipes/deepseek_r1/inference/distill_llama_8b_infer.yaml
configs/recipes/deepseek_r1/inference/distill_qwen_1_5b_infer.yaml
configs/recipes/deepseek_r1/inference/distill_qwen_32b_gguf_infer.yaml
configs/recipes/deepseek_r1/inference/distill_qwen_32b_gguf_macos_infer.yaml
configs/recipes/deepseek_r1/inference/distill_qwen_32b_infer.yaml
configs/recipes/deepseek_r1/inference/distill_qwen_32b_vllm_infer.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/full_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/full_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/lora_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/qlora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_70b/qlora_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/full_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/full_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/lora_train.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/qlora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_llama_8b/qlora_train.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/full_frontier_job.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/full_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/full_train.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_1_5b/lora_train.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_32b/lora_gcp_job.yaml
configs/recipes/deepseek_r1/sft/distill_qwen_32b/lora_train.yaml
configs/recipes/falcon_e/README.md
configs/recipes/falcon_e/dpo/falcon_e_1b_instruct/dpo.yaml
configs/recipes/falcon_e/evaluation/falcon_e_1b/eval.yaml
configs/recipes/falcon_e/evaluation/falcon_e_1b_instruct/eval.yaml
configs/recipes/falcon_e/evaluation/falcon_e_3b/eval.yaml
configs/recipes/falcon_e/evaluation/falcon_e_3b_instruct/eval.yaml
configs/recipes/falcon_e/sft/falcon_e_1b/full_train.yaml
configs/recipes/falcon_e/sft/falcon_e_1b_instruct/full_train.yaml
configs/recipes/falcon_e/sft/falcon_e_3b/full_train.yaml
configs/recipes/falcon_e/sft/falcon_e_3b_instruct/full_train.yaml
configs/recipes/falcon_h1/README.md
configs/recipes/falcon_h1/dpo/falcon_h1_0_5b/qlora_dpo.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_0_5b/eval.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_0_5b/lambda_job.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_1_5b/eval.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_1_5b/lambda_job.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_1_5b_deep/eval.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_1_5b_deep/lambda_job.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_34b/eval.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_34b/lambda_job.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_3b/eval.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_3b/lambda_job.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_7b/eval.yaml
configs/recipes/falcon_h1/evaluation/falcon_h1_7b/lambda_job.yaml
configs/recipes/falcon_h1/inference/0_5b_infer.yaml
configs/recipes/falcon_h1/inference/1_5b_deep_infer.yaml
configs/recipes/falcon_h1/inference/1_5b_infer.yaml
configs/recipes/falcon_h1/inference/34b_infer.yaml
configs/recipes/falcon_h1/inference/3b_infer.yaml
configs/recipes/falcon_h1/inference/7b_infer.yaml
configs/recipes/falcon_h1/sft/falcon_h1_0_5b/full_lambda_job.yaml
configs/recipes/falcon_h1/sft/falcon_h1_0_5b/full_train.yaml
configs/recipes/falcon_h1/sft/falcon_h1_1_5b/full_lambda_job.yaml
configs/recipes/falcon_h1/sft/falcon_h1_1_5b/full_train.yaml
configs/recipes/falcon_h1/sft/falcon_h1_1_5b_deep/full_lambda_job.yaml
configs/recipes/falcon_h1/sft/falcon_h1_1_5b_deep/full_train.yaml
configs/recipes/falcon_h1/sft/falcon_h1_34b/full_lambda_job.yaml
configs/recipes/falcon_h1/sft/falcon_h1_34b/full_train.yaml
configs/recipes/falcon_h1/sft/falcon_h1_3b/full_lambda_job.yaml
configs/recipes/falcon_h1/sft/falcon_h1_3b/full_train.yaml
configs/recipes/falcon_h1/sft/falcon_h1_7b/full_lambda_job.yaml
configs/recipes/falcon_h1/sft/falcon_h1_7b/full_train.yaml
configs/recipes/gemma3/inference/3n_e4b_it_gguf_infer.yaml
configs/recipes/gemma3/inference/3n_e4b_it_gguf_macos_infer.yaml
configs/recipes/gemma3/inference/3n_e4b_it_infer.yaml
configs/recipes/gemma3/inference/3n_e4b_it_vllm_infer.yaml
configs/recipes/glm4/inference/air_gguf_infer.yaml
configs/recipes/glm4/inference/air_gguf_macos_infer.yaml
configs/recipes/glm4/inference/air_vllm_infer.yaml
configs/recipes/gpt2/README.md
configs/recipes/gpt2/evaluation/async_eval.yaml
configs/recipes/gpt2/evaluation/async_gcp_job.yaml
configs/recipes/gpt2/inference/infer.yaml
configs/recipes/gpt2/pretraining/gcp_job.yaml
configs/recipes/gpt2/pretraining/macos_train.yaml
configs/recipes/gpt2/pretraining/train.yaml
configs/recipes/gpt_oss/README.md
configs/recipes/gpt_oss/inference/120b_infer.yaml
configs/recipes/gpt_oss/inference/120b_together_infer.yaml
configs/recipes/gpt_oss/inference/120b_vllm_infer.yaml
configs/recipes/gpt_oss/inference/20b_infer.yaml
configs/recipes/gpt_oss/inference/20b_vllm_infer.yaml
configs/recipes/gpt_oss/sft/20b_lora_single_gpu_train.yaml
configs/recipes/llama3_1/README.md
configs/recipes/llama3_1/evaluation/70b_eval.yaml
configs/recipes/llama3_1/evaluation/70b_gcp_job.yaml
configs/recipes/llama3_1/evaluation/70b_polaris_job.yaml
configs/recipes/llama3_1/evaluation/8b_eval.yaml
configs/recipes/llama3_1/evaluation/8b_gcp_job.yaml
configs/recipes/llama3_1/evaluation/8b_polaris_job.yaml
configs/recipes/llama3_1/inference/70b_infer.yaml
configs/recipes/llama3_1/inference/8b_infer.yaml
configs/recipes/llama3_1/inference/8b_rvllm_infer.yaml
configs/recipes/llama3_1/inference/8b_sglang_infer.yaml
configs/recipes/llama3_1/pretraining/8b/gcp_job.yaml
configs/recipes/llama3_1/pretraining/8b/polaris_job.yaml
configs/recipes/llama3_1/pretraining/8b/train.yaml
configs/recipes/llama3_1/sft/405b_full/polaris_job.yaml
configs/recipes/llama3_1/sft/405b_full/train.yaml
configs/recipes/llama3_1/sft/405b_lora/gcp_job.yaml
configs/recipes/llama3_1/sft/405b_lora/polaris_job.yaml
configs/recipes/llama3_1/sft/405b_lora/train.yaml
configs/recipes/llama3_1/sft/405b_qlora/gcp_job.yaml
configs/recipes/llama3_1/sft/405b_qlora/polaris_job.yaml
configs/recipes/llama3_1/sft/405b_qlora/train.yaml
configs/recipes/llama3_1/sft/70b_full/gcp_job.yaml
configs/recipes/llama3_1/sft/70b_full/polaris_job.yaml
configs/recipes/llama3_1/sft/70b_full/train.yaml
configs/recipes/llama3_1/sft/70b_lora/gcp_job.yaml
configs/recipes/llama3_1/sft/70b_lora/polaris_job.yaml
configs/recipes/llama3_1/sft/70b_lora/train.yaml
configs/recipes/llama3_1/sft/70b_qlora/gcp_job.yaml
configs/recipes/llama3_1/sft/70b_qlora/polaris_job.yaml
configs/recipes/llama3_1/sft/70b_qlora/train.yaml
configs/recipes/llama3_1/sft/8b_full/accelerate.yaml
configs/recipes/llama3_1/sft/8b_full/gcp_job.yaml
configs/recipes/llama3_1/sft/8b_full/longctx_train.yaml
configs/recipes/llama3_1/sft/8b_full/polaris_job.yaml
configs/recipes/llama3_1/sft/8b_full/train.yaml
configs/recipes/llama3_1/sft/8b_lora/fsdp_gcp_job.yaml
configs/recipes/llama3_1/sft/8b_lora/fsdp_train.yaml
configs/recipes/llama3_1/sft/8b_lora/gcp_job.yaml
configs/recipes/llama3_1/sft/8b_lora/polaris_job.yaml
configs/recipes/llama3_1/sft/8b_lora/train.yaml
configs/recipes/llama3_1/sft/8b_qlora/gcp_job.yaml
configs/recipes/llama3_1/sft/8b_qlora/train.yaml
configs/recipes/llama3_2/README.md
configs/recipes/llama3_2/dpo/1b_qlora_dpo.yaml
configs/recipes/llama3_2/evaluation/1b_eval.yaml
configs/recipes/llama3_2/evaluation/3b_eval.yaml
configs/recipes/llama3_2/inference/1b_infer.yaml
configs/recipes/llama3_2/inference/1b_sglang_infer.yaml
configs/recipes/llama3_2/inference/1b_vllm_infer.yaml
configs/recipes/llama3_2/inference/3b_infer.yaml
configs/recipes/llama3_2/inference/3b_sglang_infer.yaml
configs/recipes/llama3_2/inference/3b_vllm_infer.yaml
configs/recipes/llama3_2/sft/1b_full/train.yaml
configs/recipes/llama3_2/sft/3b_full/fsdp_gcp_job.yaml
configs/recipes/llama3_2/sft/3b_full/fsdp_train.yaml
configs/recipes/llama3_2/sft/3b_full/gcp_job.yaml
configs/recipes/llama3_2/sft/3b_full/polaris_job.yaml
configs/recipes/llama3_2/sft/3b_full/train.yaml
configs/recipes/llama3_2/sft/3b_lora/fsdp_gcp_job.yaml
configs/recipes/llama3_2/sft/3b_lora/fsdp_train.yaml
configs/recipes/llama3_2/sft/3b_lora/gcp_job.yaml
configs/recipes/llama3_2/sft/3b_lora/polaris_job.yaml
configs/recipes/llama3_2/sft/3b_lora/train.yaml
configs/recipes/llama3_2/sft/3b_qlora/fsdp_gcp_job.yaml
configs/recipes/llama3_2/sft/3b_qlora/fsdp_train.yaml
configs/recipes/llama3_2/sft/3b_qlora/gcp_job.yaml
configs/recipes/llama3_2/sft/3b_qlora/polaris_job.yaml
configs/recipes/llama3_2/sft/3b_qlora/train.yaml
configs/recipes/llama3_3/README.md
configs/recipes/llama3_3/evaluation/70b_eval.yaml
configs/recipes/llama3_3/evaluation/70b_gcp_job.yaml
configs/recipes/llama3_3/inference/70b_infer.yaml
configs/recipes/llama3_3/inference/70b_vllm_infer.yaml
configs/recipes/llama3_3/inference/nemotron_super_49b_gguf_infer.yaml
configs/recipes/llama3_3/inference/nemotron_super_49b_gguf_macos_infer.yaml
configs/recipes/llama3_3/inference/nemotron_super_49b_vllm_infer.yaml
configs/recipes/llama3_3/sft/70b_full/gcp_job.yaml
configs/recipes/llama3_3/sft/70b_full/train.yaml
configs/recipes/llama3_3/sft/70b_lora/gcp_job.yaml
configs/recipes/llama3_3/sft/70b_lora/train.yaml
configs/recipes/llama3_3/sft/70b_qlora/gcp_job.yaml
configs/recipes/llama3_3/sft/70b_qlora/train.yaml
configs/recipes/llama4/README.md
configs/recipes/llama4/evaluation/scout_instruct_eval.yaml
configs/recipes/llama4/inference/maverick_instruct_together_infer.yaml
configs/recipes/llama4/inference/scout_instruct_gguf_infer.yaml
configs/recipes/llama4/inference/scout_instruct_gguf_macos_infer.yaml
configs/recipes/llama4/inference/scout_instruct_infer.yaml
configs/recipes/llama4/inference/scout_instruct_together_infer.yaml
configs/recipes/llama4/inference/scout_instruct_vllm_infer.yaml
configs/recipes/llama4/sft/scout_base_full/gcp_job.yaml
configs/recipes/llama4/sft/scout_base_full/train.yaml
configs/recipes/llama4/sft/scout_instruct_full/gcp_job.yaml
configs/recipes/llama4/sft/scout_instruct_full/train.yaml
configs/recipes/llama4/sft/scout_instruct_lora/train.yaml
configs/recipes/llama4/sft/scout_instruct_qlora/train.yaml
configs/recipes/phi3/README.md
configs/recipes/phi3/dpo/gcp_job.yaml
configs/recipes/phi3/dpo/macos_train.yaml
configs/recipes/phi3/dpo/nvidia_24g_train.yaml
configs/recipes/phi3/dpo/nvidia_80g_train.yaml
configs/recipes/phi3/dpo/train.yaml
configs/recipes/phi3/evaluation/eval.yaml
configs/recipes/phi3/evaluation/gcp_job.yaml
configs/recipes/phi3/kto/train.yaml
configs/recipes/phi3/sft/lora_macos_train.yaml
configs/recipes/phi3/sft/lora_train.yaml
configs/recipes/phi4/README.md
configs/recipes/phi4/evaluation/reasoning_plus_eval.yaml
configs/recipes/phi4/evaluation/reasoning_plus_gcp_job.yaml
configs/recipes/phi4/inference/reasoning_plus_infer.yaml
configs/recipes/phi4/sft/reasoning_plus/full_gcp_job.yaml
configs/recipes/phi4/sft/reasoning_plus/full_train.yaml
configs/recipes/phi4/sft/reasoning_plus/lora_gcp_job.yaml
configs/recipes/phi4/sft/reasoning_plus/lora_train.yaml
configs/recipes/phi4/sft/reasoning_plus/qlora_gcp_job.yaml
configs/recipes/phi4/sft/reasoning_plus/qlora_train.yaml
configs/recipes/qwen2_5/sft/3b_full/train.yaml
configs/recipes/qwen2_5/sft/7b_full/train.yaml
configs/recipes/qwen3/README.md
configs/recipes/qwen3/evaluation/0.6b_eval.yaml
configs/recipes/qwen3/evaluation/0.6b_gcp_job.yaml
configs/recipes/qwen3/evaluation/1.7b_eval.yaml
configs/recipes/qwen3/evaluation/1.7b_gcp_job.yaml
configs/recipes/qwen3/evaluation/14b_eval.yaml
configs/recipes/qwen3/evaluation/14b_gcp_job.yaml
configs/recipes/qwen3/evaluation/30b_a3b_eval.yaml
configs/recipes/qwen3/evaluation/30b_a3b_gcp_job.yaml
configs/recipes/qwen3/evaluation/32b_eval.yaml
configs/recipes/qwen3/evaluation/32b_gcp_job.yaml
configs/recipes/qwen3/evaluation/4b_eval.yaml
configs/recipes/qwen3/evaluation/4b_gcp_job.yaml
configs/recipes/qwen3/evaluation/8b_eval.yaml
configs/recipes/qwen3/evaluation/8b_gcp_job.yaml
configs/recipes/qwen3/inference/0.6b_infer.yaml
configs/recipes/qwen3/inference/1.7b_infer.yaml
configs/recipes/qwen3/inference/14b_infer.yaml
configs/recipes/qwen3/inference/235b_a22b_together_infer.yaml
configs/recipes/qwen3/inference/30b_a3b_infer.yaml
configs/recipes/qwen3/inference/30b_a3b_instruct_gguf_infer.yaml
configs/recipes/qwen3/inference/30b_a3b_instruct_gguf_macos_infer.yaml
configs/recipes/qwen3/inference/30b_a3b_instruct_vllm_infer.yaml
configs/recipes/qwen3/inference/32b_infer.yaml
configs/recipes/qwen3/inference/4b_infer.yaml
configs/recipes/qwen3/inference/4b_instruct_gguf_infer.yaml
configs/recipes/qwen3/inference/4b_instruct_gguf_macos_infer.yaml
configs/recipes/qwen3/inference/4b_instruct_infer.yaml
configs/recipes/qwen3/inference/4b_instruct_vllm_infer.yaml
configs/recipes/qwen3/inference/8b_infer.yaml
configs/recipes/qwen3/sft/0.6b_full/gcp_job.yaml
configs/recipes/qwen3/sft/0.6b_full/train.yaml
configs/recipes/qwen3/sft/1.7b_full/gcp_job.yaml
configs/recipes/qwen3/sft/1.7b_full/train.yaml
configs/recipes/qwen3/sft/14b_lora/gcp_job.yaml
configs/recipes/qwen3/sft/14b_lora/train.yaml
configs/recipes/qwen3/sft/30b_a3b_lora/gcp_job.yaml
configs/recipes/qwen3/sft/30b_a3b_lora/train.yaml
configs/recipes/qwen3/sft/32b_lora/gcp_job.yaml
configs/recipes/qwen3/sft/32b_lora/train.yaml
configs/recipes/qwen3/sft/4b_full/gcp_job.yaml
configs/recipes/qwen3/sft/4b_full/train.yaml
configs/recipes/qwen3/sft/8b_full/gcp_job.yaml
configs/recipes/qwen3/sft/8b_full/perlmutter_job.yaml
configs/recipes/qwen3/sft/8b_full/train.yaml
configs/recipes/qwen3_coder/inference/30b_a3b_instruct_gguf_infer.yaml
configs/recipes/qwen3_coder/inference/30b_a3b_instruct_gguf_macos_infer.yaml
configs/recipes/qwen3_coder/inference/30b_a3b_instruct_vllm_infer.yaml
configs/recipes/qwq/evaluation/eval.yaml
configs/recipes/qwq/evaluation/gcp_job.yaml
configs/recipes/qwq/inference/infer.yaml
configs/recipes/qwq/sft/full_gcp_job.yaml
configs/recipes/qwq/sft/full_train.yaml
configs/recipes/qwq/sft/lora_gcp_job.yaml
configs/recipes/qwq/sft/lora_train.yaml
configs/recipes/qwq/sft/qlora_gcp_job.yaml
configs/recipes/qwq/sft/qlora_train.yaml
configs/recipes/smollm/README.md
configs/recipes/smollm/evaluation/135m/eval.yaml
configs/recipes/smollm/evaluation/135m/gcp_job.yaml
configs/recipes/smollm/evaluation/135m/quickstart_alpaca_v2_eval.yaml
configs/recipes/smollm/evaluation/135m/quickstart_eval.yaml
configs/recipes/smollm/evaluation/135m/quickstart_gcp_job.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v1_eval.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v1_gcp_job.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v2_eval.yaml
configs/recipes/smollm/evaluation/135m/leaderboards/huggingface_leaderboard_v2_gcp_job.yaml
configs/recipes/smollm/inference/135m_infer.yaml
configs/recipes/smollm/sft/135m/gcp_job.yaml
configs/recipes/smollm/sft/135m/quickstart_gcp_job.yaml
configs/recipes/smollm/sft/135m/quickstart_train.yaml
configs/recipes/smollm/sft/135m/slurm_job.yaml
configs/recipes/smollm/sft/135m/train.yaml
configs/recipes/vision/README.md
configs/recipes/vision/internvl3/README.md
configs/recipes/vision/internvl3/sft/full/gcp_job.yaml
configs/recipes/vision/internvl3/sft/full/train.yaml
configs/recipes/vision/llama3_2_vision/README.md
configs/recipes/vision/llama3_2_vision/evaluation/11b_eval.yaml
configs/recipes/vision/llama3_2_vision/evaluation/11b_gcp_job.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_infer.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_rvllm_infer.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_sglang_infer.yaml
configs/recipes/vision/llama3_2_vision/inference/11b_vllm_infer.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_full/gcp_job.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_full/train.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_lora/gcp_job.yaml
configs/recipes/vision/llama3_2_vision/sft/11b_lora/train.yaml
configs/recipes/vision/llama3_2_vision/sft/90b_full/gcp_job.yaml
configs/recipes/vision/llama3_2_vision/sft/90b_full/train.yaml
configs/recipes/vision/llava_7b/README.md
configs/recipes/vision/llava_7b/dpo/train.yaml
configs/recipes/vision/llava_7b/inference/infer.yaml
configs/recipes/vision/llava_7b/inference/vllm_infer.yaml
configs/recipes/vision/llava_7b/sft/oumi_gcp_job.yaml
configs/recipes/vision/llava_7b/sft/train.yaml
configs/recipes/vision/llava_7b/sft/trl_gcp_job.yaml
configs/recipes/vision/molmo/README.md
configs/recipes/vision/molmo/grpo/train.yaml
configs/recipes/vision/molmo/sft/molmo_d_full/train.yaml
configs/recipes/vision/molmo/sft/molmo_o_full/train.yaml
configs/recipes/vision/phi3/README.md
configs/recipes/vision/phi3/dpo/train.yaml
configs/recipes/vision/phi3/inference/vllm_infer.yaml
configs/recipes/vision/phi3/sft/full/completions_only_train.yaml
configs/recipes/vision/phi3/sft/full/oumi_gcp_job.yaml
configs/recipes/vision/phi3/sft/full/train.yaml
configs/recipes/vision/phi3/sft/full/trl_gcp_job.yaml
configs/recipes/vision/phi3/sft/lora/gcp_job.yaml
configs/recipes/vision/phi3/sft/lora/train.yaml
configs/recipes/vision/phi4/README.md
configs/recipes/vision/phi4/inference/infer.yaml
configs/recipes/vision/phi4/inference/vllm_infer.yaml
configs/recipes/vision/phi4/sft/full/gcp_job.yaml
configs/recipes/vision/phi4/sft/full/train.yaml
configs/recipes/vision/phi4/sft/lora/gcp_job.yaml
configs/recipes/vision/phi4/sft/lora/train.yaml
configs/recipes/vision/qwen2_5_vl_3b/README.md
configs/recipes/vision/qwen2_5_vl_3b/dpo/train.yaml
configs/recipes/vision/qwen2_5_vl_3b/inference/infer.yaml
configs/recipes/vision/qwen2_5_vl_3b/inference/vllm_infer.yaml
configs/recipes/vision/qwen2_5_vl_3b/sft/full/gcp_job.yaml
configs/recipes/vision/qwen2_5_vl_3b/sft/full/train.yaml
configs/recipes/vision/qwen2_5_vl_3b/sft/lora/gcp_job.yaml
configs/recipes/vision/qwen2_5_vl_3b/sft/lora/train.yaml
configs/recipes/vision/qwen2_5_vl_7b/sft/full/train.yaml
configs/recipes/vision/qwen2_vl_2b/README.md
configs/recipes/vision/qwen2_vl_2b/dpo/train.yaml
configs/recipes/vision/qwen2_vl_2b/evaluation/eval.yaml
configs/recipes/vision/qwen2_vl_2b/evaluation/gcp_job.yaml
configs/recipes/vision/qwen2_vl_2b/inference/infer.yaml
configs/recipes/vision/qwen2_vl_2b/inference/sglang_infer.yaml
configs/recipes/vision/qwen2_vl_2b/inference/vllm_infer.yaml
configs/recipes/vision/qwen2_vl_2b/sft/full/gcp_job.yaml
configs/recipes/vision/qwen2_vl_2b/sft/full/train.yaml
configs/recipes/vision/qwen2_vl_2b/sft/lora/gcp_job.yaml
configs/recipes/vision/qwen2_vl_2b/sft/lora/train.yaml
configs/recipes/vision/qwen3_vl/README.md
configs/recipes/vision/qwen3_vl/sft/4b_instruct_fft_train.yaml
configs/recipes/vision/qwen3_vl/sft/4b_instruct_lora_train.yaml
configs/recipes/vision/smolvlm/README.md
configs/recipes/vision/smolvlm/inference/infer.yaml
configs/recipes/vision/smolvlm/inference/vllm_infer.yaml
configs/recipes/vision/smolvlm/sft/full/gcp_job.yaml
configs/recipes/vision/smolvlm/sft/full/train.yaml
configs/recipes/vision/smolvlm/sft/lora/gcp_job.yaml
configs/recipes/vision/smolvlm/sft/lora/train.yaml
data/dataset_examples/README.md
data/dataset_examples/alpaca_format.json
data/dataset_examples/alpaca_format.jsonl
data/dataset_examples/oumi_format.json
data/dataset_examples/oumi_format.jsonl
data/dataset_examples/vision_language_dpo_format.jsonl
data/dataset_examples/vision_language_oumi_format.jsonl
docs/.gitignore
docs/_doclinks.config
docs/_docsummaries.sh
docs/_manage_doclinks.py
docs/_summarize_module.py
docs/citations.bib
docs/conf.py
docs/index.md
docs/_static/judge/judge_figure.svg
docs/_static/logo/favicon.png
docs/_static/logo/header_logo.png
docs/_static/logo/oumi_logo_dark.png
docs/_static/logo/oumi_logo_light.png
docs/_templates/apidoc/package.rst.jinja
docs/_templates/autodoc2/index.jinja
docs/about/acknowledgements.md
docs/about/changelog.md
docs/about/citations.md
docs/about/license.md
docs/cli/commands.md
docs/development/code_of_conduct.md
docs/development/contributing.md
docs/development/dev_setup.md
docs/development/docs_guide.md
docs/development/style_guide.md
docs/faq/oom.md
docs/faq/troubleshooting.md
docs/get_started/core_concepts.md
docs/get_started/installation.md
docs/get_started/quickstart.md
docs/get_started/tutorials.md
docs/resources/recipes.md
docs/resources/datasets/data_formats.md
docs/resources/datasets/datasets.md
docs/resources/datasets/other_datasets.md
docs/resources/datasets/preference_datasets.md
docs/resources/datasets/pretraining_datasets.md
docs/resources/datasets/sft_datasets.md
docs/resources/datasets/vl_sft_datasets.md
docs/resources/models/custom_models.md
docs/resources/models/models.md
docs/resources/models/supported_models.md
docs/user_guides/customization.md
docs/user_guides/quantization.md
docs/user_guides/synth.md
docs/user_guides/evaluate/custom_evals.md
docs/user_guides/evaluate/evaluate.md
docs/user_guides/evaluate/evaluation_config.md
docs/user_guides/evaluate/generative_benchmarks.md
docs/user_guides/evaluate/leaderboards.md
docs/user_guides/evaluate/standardized_benchmarks.md
docs/user_guides/infer/common_workflows.md
docs/user_guides/infer/configuration.md
docs/user_guides/infer/infer.md
docs/user_guides/infer/inference_cli.md
docs/user_guides/infer/inference_engines.md
docs/user_guides/judge/built_in_judges.md
docs/user_guides/judge/cli_usage.md
docs/user_guides/judge/judge.md
docs/user_guides/judge/judge_config.md
docs/user_guides/launch/custom_cluster.md
docs/user_guides/launch/deploy.md
docs/user_guides/launch/launch.md
docs/user_guides/train/configuration.md
docs/user_guides/train/monitoring.md
docs/user_guides/train/train.md
docs/user_guides/train/training_methods.md
docs/user_guides/train/environments/environments.md
docs/user_guides/train/environments/local.md
docs/user_guides/train/environments/notebooks.md
docs/user_guides/train/environments/vscode.md
notebooks/Oumi - A Tour.ipynb
notebooks/Oumi - Build your own Custom Evaluation (Hallucination Classifier).ipynb
notebooks/Oumi - Bulk Inference of LLM APIs.ipynb
notebooks/Oumi - Deploying a Job.ipynb
notebooks/Oumi - Distill a Large Model.ipynb
notebooks/Oumi - Evaluation with AlpacaEval 2.0.ipynb
notebooks/Oumi - Evaluation with MT Bench.ipynb
notebooks/Oumi - Evaluation with Oumi.ipynb
notebooks/Oumi - Finetuning Tutorial.ipynb
notebooks/Oumi - Launching Jobs on Custom Clusters.ipynb
notebooks/Oumi - MiniMath-R1-1.5B.ipynb
notebooks/Oumi - Quantization Tutorial.ipynb
notebooks/Oumi - Running Jobs Remotely.ipynb
notebooks/Oumi - Simple Judge.ipynb
notebooks/Oumi - Train a Letter Counting Model using GRPO.ipynb
notebooks/Oumi - Training CNN on Custom Dataset.ipynb
notebooks/Oumi - Using NanoGPT.ipynb
notebooks/Oumi - Using vLLM Engine for Inference.ipynb
notebooks/Oumi - Vision Language Models.ipynb
scripts/.gitignore
scripts/demo.py
scripts/llama_e2e.py
scripts/memcalc.py
scripts/benchmarks/benchmark_dataloader.py
scripts/benchmarks/benchmark_nccl.py
scripts/benchmarks/benchmark_trainers.sh
scripts/benchmarks/minimal_fsdp_training.py
scripts/benchmarks/minimal_multimodal_training.py
scripts/datasets/save_conversations.py
scripts/datasets/pretokenize/README.md
scripts/datasets/pretokenize/process_dataset.py
scripts/datasets/pretokenize/sky.yaml
scripts/docker/build_docker.sh
scripts/examples/batch_inference/README.md
scripts/examples/batch_inference/bulk_infer.py
scripts/examples/batch_inference/infer.yaml
scripts/examples/batch_inference/smollm_infer.yaml
scripts/examples/evaluation/README.md
scripts/examples/evaluation/custom_evaluation.py
scripts/frontier/README.md
scripts/frontier/frontier_init.sh
scripts/frontier/launcher.sh
scripts/frontier/jobs/example_job.sh
scripts/inference/README.md
scripts/inference/gcp_inference.py
scripts/perlmutter/README.md
scripts/perlmutter/launcher.sh
scripts/perlmutter/perlmutter_init.sh
scripts/perlmutter/jobs/example_job.sh
scripts/polaris/README.md
scripts/polaris/launcher.sh
scripts/polaris/polaris_init.sh
scripts/polaris/jobs/build_apptainer_from_docker.sh
scripts/polaris/jobs/download_model_from_hf.sh
scripts/polaris/jobs/example_job.sh
scripts/polaris/jobs/fineweb_pt_job.sh
scripts/polaris/jobs/fineweb_pt_worker.sh
scripts/polaris/jobs/llama_tune.sh
scripts/polaris/jobs/vllm_job.sh
scripts/polaris/jobs/vllm_worker.sh
scripts/polaris/jobs/python/vllm_inference.py
scripts/polaris/jobs/python/vllm_parallel_inference.py
scripts/polaris/notebooks/Oumi - Multinode Inference on Polaris.ipynb
scripts/polaris/notebooks/Oumi - Tuning Llama.ipynb
src/experimental/__init__.py
src/experimental/configs/projects/zephyr/README.md
src/experimental/configs/projects/zephyr/evaluation/eval.yaml
src/experimental/configs/projects/zephyr/sft/full_gcp_job.yaml
src/experimental/configs/projects/zephyr/sft/full_train.yaml
src/experimental/configs/projects/zephyr/sft/qlora_gcp_job.yaml
src/experimental/configs/projects/zephyr/sft/qlora_train.yaml
src/experimental/configs/recipes/phi3/dpo/fsdp_gcp_job.yaml
src/experimental/configs/recipes/phi3/dpo/fsdp_nvidia_24g_train.yaml
src/experimental/notebooks/Oumi - Datasets Tutorial.ipynb
src/oumi/__init__.py
src/oumi/__main__.py
src/oumi/evaluate.py
src/oumi/evaluate_async.py
src/oumi/infer.py
src/oumi/judge.py
src/oumi/synth.py
src/oumi/train.py
src/oumi.egg-info/PKG-INFO
src/oumi.egg-info/SOURCES.txt
src/oumi.egg-info/dependency_links.txt
src/oumi.egg-info/entry_points.txt
src/oumi.egg-info/requires.txt
src/oumi.egg-info/top_level.txt
src/oumi/builders/__init__.py
src/oumi/builders/callbacks.py
src/oumi/builders/collators.py
src/oumi/builders/data.py
src/oumi/builders/inference_engines.py
src/oumi/builders/lr_schedules.py
src/oumi/builders/metrics.py
src/oumi/builders/models.py
src/oumi/builders/optimizers.py
src/oumi/builders/oumi_data.py
src/oumi/builders/processors.py
src/oumi/builders/quantizers.py
src/oumi/builders/rewards.py
src/oumi/builders/training.py
src/oumi/cli/alias.py
src/oumi/cli/cache.py
src/oumi/cli/cli_utils.py
src/oumi/cli/distributed_run.py
src/oumi/cli/env.py
src/oumi/cli/evaluate.py
src/oumi/cli/fetch.py
src/oumi/cli/infer.py
src/oumi/cli/judge.py
src/oumi/cli/launch.py
src/oumi/cli/main.py
src/oumi/cli/quantize.py
src/oumi/cli/synth.py
src/oumi/cli/train.py
src/oumi/core/__init__.py
src/oumi/core/async_utils.py
src/oumi/core/constants.py
src/oumi/core/distributed.py
src/oumi/core/analyze/__init__.py
src/oumi/core/analyze/column_types.py
src/oumi/core/analyze/dataframe_analyzer.py
src/oumi/core/analyze/dataset_analyzer.py
src/oumi/core/analyze/length_analyzer.py
src/oumi/core/analyze/sample_analyzer.py
src/oumi/core/callbacks/__init__.py
src/oumi/core/callbacks/base_trainer_callback.py
src/oumi/core/callbacks/bitnet_callback.py
src/oumi/core/callbacks/hf_mfu_callback.py
src/oumi/core/callbacks/mfu_callback.py
src/oumi/core/callbacks/nan_inf_detection_callback.py
src/oumi/core/callbacks/profiler_step_callback.py
src/oumi/core/callbacks/telemetry_callback.py
src/oumi/core/collators/text_collator_with_padding.py
src/oumi/core/collators/text_completions_collator_with_padding.py
src/oumi/core/collators/trl_data_collator_for_completion_only_lm.py
src/oumi/core/collators/vision_language_collator_with_padding.py
src/oumi/core/collators/vision_language_sft_collator.py
src/oumi/core/configs/__init__.py
src/oumi/core/configs/analyze_config.py
src/oumi/core/configs/async_evaluation_config.py
src/oumi/core/configs/base_config.py
src/oumi/core/configs/evaluation_config.py
src/oumi/core/configs/inference_config.py
src/oumi/core/configs/inference_engine_type.py
src/oumi/core/configs/job_config.py
src/oumi/core/configs/judge_config.py
src/oumi/core/configs/quantization_config.py
src/oumi/core/configs/synthesis_config.py
src/oumi/core/configs/training_config.py
src/oumi/core/configs/internal/internal_model_config.py
src/oumi/core/configs/internal/supported_models.py
src/oumi/core/configs/params/base_params.py
src/oumi/core/configs/params/data_params.py
src/oumi/core/configs/params/deepspeed_params.py
src/oumi/core/configs/params/evaluation_params.py
src/oumi/core/configs/params/fsdp_params.py
src/oumi/core/configs/params/generation_params.py
src/oumi/core/configs/params/grpo_params.py
src/oumi/core/configs/params/guided_decoding_params.py
src/oumi/core/configs/params/judge_params.py
src/oumi/core/configs/params/model_params.py
src/oumi/core/configs/params/peft_params.py
src/oumi/core/configs/params/profiler_params.py
src/oumi/core/configs/params/remote_params.py
src/oumi/core/configs/params/synthesis_params.py
src/oumi/core/configs/params/telemetry_params.py
src/oumi/core/configs/params/training_params.py
src/oumi/core/datasets/__init__.py
src/oumi/core/datasets/base_dpo_dataset.py
src/oumi/core/datasets/base_grpo_dataset.py
src/oumi/core/datasets/base_iterable_dataset.py
src/oumi/core/datasets/base_kto_dataset.py
src/oumi/core/datasets/base_map_dataset.py
src/oumi/core/datasets/base_pretraining_dataset.py
src/oumi/core/datasets/base_sft_dataset.py
src/oumi/core/datasets/packed_sft_dataset.py
src/oumi/core/datasets/pretraining_async_text_dataset.py
src/oumi/core/datasets/vision_language_dataset.py
src/oumi/core/datasets/vision_language_dpo_dataset.py
src/oumi/core/evaluation/__init__.py
src/oumi/core/evaluation/evaluation_result.py
src/oumi/core/evaluation/evaluator.py
src/oumi/core/evaluation/metrics.py
src/oumi/core/evaluation/backends/alpaca_eval.py
src/oumi/core/evaluation/backends/lm_harness.py
src/oumi/core/evaluation/utils/platform_prerequisites.py
src/oumi/core/evaluation/utils/save_utils.py
src/oumi/core/feature_generators/__init__.py
src/oumi/core/feature_generators/base_feature_generator.py
src/oumi/core/feature_generators/vision_language_conversation_feature_generator.py
src/oumi/core/inference/__init__.py
src/oumi/core/inference/base_inference_engine.py
src/oumi/core/launcher/__init__.py
src/oumi/core/launcher/base_cloud.py
src/oumi/core/launcher/base_cluster.py
src/oumi/core/models/__init__.py
src/oumi/core/models/base_model.py
src/oumi/core/processors/base_image_processor.py
src/oumi/core/processors/base_processor.py
src/oumi/core/processors/default_image_processor.py
src/oumi/core/processors/default_processor.py
src/oumi/core/registry/__init__.py
src/oumi/core/registry/registry.py
src/oumi/core/synthesis/attribute_formatter.py
src/oumi/core/synthesis/attribute_synthesizer.py
src/oumi/core/synthesis/attribute_transformation.py
src/oumi/core/synthesis/data_synthesizer.py
src/oumi/core/synthesis/dataset_ingestion.py
src/oumi/core/synthesis/dataset_planner.py
src/oumi/core/synthesis/document_ingestion.py
src/oumi/core/synthesis/synthesis_pipeline.py
src/oumi/core/tokenizers/__init__.py
src/oumi/core/tokenizers/base_tokenizer.py
src/oumi/core/tokenizers/special_tokens.py
src/oumi/core/tokenizers/utils.py
src/oumi/core/trainers/__init__.py
src/oumi/core/trainers/base_trainer.py
src/oumi/core/trainers/hf_trainer.py
src/oumi/core/trainers/oumi_trainer.py
src/oumi/core/trainers/trl_dpo_trainer.py
src/oumi/core/trainers/verl_grpo_trainer.py
src/oumi/core/trainers/verl_trainer_config.yaml
src/oumi/core/types/__init__.py
src/oumi/core/types/conversation.py
src/oumi/core/types/exceptions.py
src/oumi/core/types/proto/conversation.proto
src/oumi/core/types/proto/generated/__init__.py
src/oumi/core/types/proto/generated/conversation_pb2.py
src/oumi/core/types/proto/generated/conversation_pb2.pyi
src/oumi/datasets/__init__.py
src/oumi/datasets/debug.py
src/oumi/datasets/mmlu.py
src/oumi/datasets/chat_templates/chat_ml.jinja
src/oumi/datasets/chat_templates/default.jinja
src/oumi/datasets/chat_templates/default_gen.jinja
src/oumi/datasets/chat_templates/gpt2.jinja
src/oumi/datasets/chat_templates/internvl3.jinja
src/oumi/datasets/chat_templates/llama3-instruct.jinja
src/oumi/datasets/chat_templates/llava.jinja
src/oumi/datasets/chat_templates/molmo.jinja
src/oumi/datasets/chat_templates/phi3-instruct.jinja
src/oumi/datasets/chat_templates/qwen2-vl-instruct.jinja
src/oumi/datasets/chat_templates/qwen3-vl-instruct.jinja
src/oumi/datasets/chat_templates/zephyr.jinja
src/oumi/datasets/evaluation/__init__.py
src/oumi/datasets/evaluation/alpaca.py
src/oumi/datasets/evaluation/utils.py
src/oumi/datasets/grpo/__init__.py
src/oumi/datasets/grpo/berry_bench.py
src/oumi/datasets/grpo/countdown.py
src/oumi/datasets/grpo/gsm8k.py
src/oumi/datasets/grpo/letter_count.py
src/oumi/datasets/grpo/tldr.py
src/oumi/datasets/grpo/rewards/__init__.py
src/oumi/datasets/grpo/rewards/completion_length_rewards.py
src/oumi/datasets/grpo/rewards/count_letters_rewards.py
src/oumi/datasets/grpo/rewards/countdown_rewards.py
src/oumi/datasets/grpo/rewards/gsm8k_reward.py
src/oumi/datasets/preference_tuning/__init__.py
src/oumi/datasets/preference_tuning/kto_mix.py
src/oumi/datasets/preference_tuning/orpo_dpo_mix.py
src/oumi/datasets/pretraining/__init__.py
src/oumi/datasets/pretraining/c4.py
src/oumi/datasets/pretraining/dolma.py
src/oumi/datasets/pretraining/falcon_refinedweb.py
src/oumi/datasets/pretraining/fineweb_edu.py
src/oumi/datasets/pretraining/pile.py
src/oumi/datasets/pretraining/red_pajama_v1.py
src/oumi/datasets/pretraining/red_pajama_v2.py
src/oumi/datasets/pretraining/slim_pajama.py
src/oumi/datasets/pretraining/starcoder.py
src/oumi/datasets/pretraining/the_stack.py
src/oumi/datasets/pretraining/tiny_stories.py
src/oumi/datasets/pretraining/tiny_textbooks.py
src/oumi/datasets/pretraining/wikipedia.py
src/oumi/datasets/pretraining/wikitext.py
src/oumi/datasets/pretraining/youtube_commons.py
src/oumi/datasets/sft/__init__.py
src/oumi/datasets/sft/alpaca.py
src/oumi/datasets/sft/aya.py
src/oumi/datasets/sft/chatqa.py
src/oumi/datasets/sft/chatrag_bench.py
src/oumi/datasets/sft/coalm.py
src/oumi/datasets/sft/dolly.py
src/oumi/datasets/sft/huggingface.py
src/oumi/datasets/sft/magpie.py
src/oumi/datasets/sft/openo1_sft.py
src/oumi/datasets/sft/prompt_response.py
src/oumi/datasets/sft/sft_jsonlines.py
src/oumi/datasets/sft/tulu3_sft_mixture.py
src/oumi/datasets/sft/ultrachat.py
src/oumi/datasets/sft/wildchat.py
src/oumi/datasets/vision_language/__init__.py
src/oumi/datasets/vision_language/coco_captions.py
src/oumi/datasets/vision_language/docmatix.py
src/oumi/datasets/vision_language/flickr30k.py
src/oumi/datasets/vision_language/geometry3k.py
src/oumi/datasets/vision_language/huggingface.py
src/oumi/datasets/vision_language/llava_instruct_mix_vsft.py
src/oumi/datasets/vision_language/lmms_lab_multimodal_open_r1.py
src/oumi/datasets/vision_language/mnist_sft.py
src/oumi/datasets/vision_language/pixmo_ask_model_anything.py
src/oumi/datasets/vision_language/pixmo_cap.py
src/oumi/datasets/vision_language/pixmo_cap_qa.py
src/oumi/datasets/vision_language/rlaif_v.py
src/oumi/datasets/vision_language/the_cauldron.py
src/oumi/datasets/vision_language/vision_dpo_jsonlines.py
src/oumi/datasets/vision_language/vision_jsonlines.py
src/oumi/datasets/vision_language/vqav2_small.py
src/oumi/evaluation/registry/__init__.py
src/oumi/evaluation/registry/berry_bench_task.py
src/oumi/evaluation/registry/count_letters_task.py
src/oumi/inference/__init__.py
src/oumi/inference/adaptive_concurrency_controller.py
src/oumi/inference/adaptive_semaphore.py
src/oumi/inference/anthropic_inference_engine.py
src/oumi/inference/deepseek_inference_engine.py
src/oumi/inference/gcp_inference_engine.py
src/oumi/inference/gemini_inference_engine.py
src/oumi/inference/lambda_inference_engine.py
src/oumi/inference/llama_cpp_inference_engine.py
src/oumi/inference/native_text_inference_engine.py
src/oumi/inference/openai_inference_engine.py
src/oumi/inference/parasail_inference_engine.py
src/oumi/inference/remote_inference_engine.py
src/oumi/inference/remote_vllm_inference_engine.py
src/oumi/inference/sambanova_inference_engine.py
src/oumi/inference/sglang_inference_engine.py
src/oumi/inference/together_inference_engine.py
src/oumi/inference/vllm_inference_engine.py
src/oumi/judges/__init__.py
src/oumi/judges/base_judge.py
src/oumi/judges/simple_judge.py
src/oumi/launcher/__init__.py
src/oumi/launcher/launcher.py
src/oumi/launcher/clients/local_client.py
src/oumi/launcher/clients/polaris_client.py
src/oumi/launcher/clients/sky_client.py
src/oumi/launcher/clients/slurm_client.py
src/oumi/launcher/clouds/__init__.py
src/oumi/launcher/clouds/frontier_cloud.py
src/oumi/launcher/clouds/local_cloud.py
src/oumi/launcher/clouds/perlmutter_cloud.py
src/oumi/launcher/clouds/polaris_cloud.py
src/oumi/launcher/clouds/sky_cloud.py
src/oumi/launcher/clouds/slurm_cloud.py
src/oumi/launcher/clusters/frontier_cluster.py
src/oumi/launcher/clusters/local_cluster.py
src/oumi/launcher/clusters/perlmutter_cluster.py
src/oumi/launcher/clusters/polaris_cluster.py
src/oumi/launcher/clusters/sky_cluster.py
src/oumi/launcher/clusters/slurm_cluster.py
src/oumi/models/__init__.py
src/oumi/models/cnn_classifier.py
src/oumi/models/mlp.py
src/oumi/models/experimental/cambrian/__init__.py
src/oumi/models/experimental/cambrian/constants.py
src/oumi/models/experimental/cambrian/mm_utils.py
src/oumi/models/experimental/cambrian/utils.py
src/oumi/models/experimental/cambrian/model/__init__.py
src/oumi/models/experimental/cambrian/model/builder.py
src/oumi/models/experimental/cambrian/model/cambrian_arch.py
src/oumi/models/experimental/cambrian/model/vision_sampler.py
src/oumi/models/experimental/cambrian/model/language_model/cambrian_llama.py
src/oumi/models/experimental/cambrian/model/language_model/cambrian_phi3.py
src/oumi/models/experimental/cambrian/model/language_model/phi3/__init__.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/__init__.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/base_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/builder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/clip_convnext_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/clip_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/dino_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/load.py
src/oumi/models/experimental/cambrian/model/multimodal_encoder/siglip_encoder.py
src/oumi/models/experimental/cambrian/model/multimodal_projector/builder.py
src/oumi/models/experimental/cambrian/model/multimodal_projector/projectors.py
src/oumi/performance/mfu.py
src/oumi/performance/telemetry.py
src/oumi/performance/torch_profiler_utils.py
src/oumi/quantize/__init__.py
src/oumi/quantize/awq_quantizer.py
src/oumi/quantize/base.py
src/oumi/quantize/bnb_quantizer.py
src/oumi/quantize/constants.py
src/oumi/quantize/utils.py
src/oumi/utils/analysis_utils.py
src/oumi/utils/batching.py
src/oumi/utils/cache_utils.py
src/oumi/utils/conversation_utils.py
src/oumi/utils/debug_utils.py
src/oumi/utils/device_utils.py
src/oumi/utils/distributed_utils.py
src/oumi/utils/git_utils.py
src/oumi/utils/grpo_utils.py
src/oumi/utils/hf_cache_utils.py
src/oumi/utils/hf_utils.py
src/oumi/utils/http.py
src/oumi/utils/image_utils.py
src/oumi/utils/io_utils.py
src/oumi/utils/logging.py
src/oumi/utils/math_utils.py
src/oumi/utils/model_caching.py
src/oumi/utils/packaging.py
src/oumi/utils/peft_utils.py
src/oumi/utils/placeholders.py
src/oumi/utils/saver.py
src/oumi/utils/serialization_utils.py
src/oumi/utils/str_utils.py
src/oumi/utils/torch_naming_heuristics.py
src/oumi/utils/torch_utils.py
src/oumi/utils/verl_model_merger.py
src/oumi/utils/version_utils.py
tests/__init__.py
tests/conftest.py
tests/markers.py
tests/e2e/__init__.py
tests/e2e/sambanova_infer_tutorial.yaml
tests/e2e/test_eval_e2e.py
tests/e2e/test_notebooks.py
tests/e2e/test_sambanova_inference.py
tests/e2e/test_simple_judge.py
tests/e2e/test_train_e2e.py
tests/e2e/deps/test_circular_deps.py
tests/integration/cli/__init__.py
tests/integration/cli/test_judge_e2e.py
tests/integration/datasets/test_preference_tuning_datasets_full_epoch.py
tests/integration/datasets/test_pretraining_datasets_full_epoch.py
tests/integration/datasets/test_sft_datasets_full_epoch.py
tests/integration/datasets/test_sft_datasets_load_datasets.py
tests/integration/datasets/test_sft_vision_datasets_load_datasets.py
tests/integration/datasets/test_vision_language_completions_only.py
tests/integration/evaluate/__init__.py
tests/integration/evaluate/test_evaluate_async.py
tests/integration/evaluate/test_evaluate_lm_harness.py
tests/integration/infer/__init__.py
tests/integration/infer/test_infer.py
tests/integration/infer/test_native_text_inference_engine.py
tests/integration/models/__init__.py
tests/integration/models/test_integration_cnn_classifier.py
tests/integration/train/__init__.py
tests/integration/train/test_custom_models.py
tests/integration/train/test_train.py
tests/scripts/e2e_tests_job.yaml
tests/scripts/launch_tests.sh
tests/scripts/predownload_for_github_gpu_tests.sh
tests/scripts/runpod_e2e_tests_job.yaml
tests/testdata/adapter_config.json
tests/testdata/images/oumi_logo_dark.png
tests/testdata/images/oumi_logo_light.png
tests/testdata/images/the_great_wave_off_kanagawa.jpg
tests/testdata/pdfs/mock.pdf
tests/testdata/pdfs/oumi_getting_started_first_1page.pdf
tests/testdata/pdfs/oumi_getting_started_first_2pages.pdf
tests/testdata/pdfs/oumi_getting_started_full_4pages.pdf
tests/unit/__init__.py
tests/unit/conftest.py
tests/unit/test_apache_license_header.py
tests/unit/test_train.py
tests/unit/builders/test_build_data.py
tests/unit/builders/test_callbacks.py
tests/unit/builders/test_collators.py
tests/unit/builders/test_data_mixtures.py
tests/unit/builders/test_lr_schedules.py
tests/unit/builders/test_models.py
tests/unit/builders/test_oumi_data.py
tests/unit/builders/test_processors.py
tests/unit/builders/test_quantizers.py
tests/unit/builders/test_rewards.py
tests/unit/cli/test_cli_alias.py
tests/unit/cli/test_cli_cache.py
tests/unit/cli/test_cli_distributed_run.py
tests/unit/cli/test_cli_env.py
tests/unit/cli/test_cli_evaluate.py
tests/unit/cli/test_cli_fetch.py
tests/unit/cli/test_cli_infer.py
tests/unit/cli/test_cli_judge.py
tests/unit/cli/test_cli_launch.py
tests/unit/cli/test_cli_main.py
tests/unit/cli/test_cli_speed_regression.py
tests/unit/cli/test_cli_synth.py
tests/unit/cli/test_cli_train.py
tests/unit/cli/test_cli_utils.py
tests/unit/core/test_async_utils.py
tests/unit/core/test_distributed.py
tests/unit/core/test_length_analyzer.py
tests/unit/core/test_registry.py
tests/unit/core/analyze/test_dataset_analyzer.py
tests/unit/core/collators/test_text_collator_with_padding.py
tests/unit/core/collators/test_text_completions_collator_with_padding.py
tests/unit/core/collators/test_vision_completions_only.py
tests/unit/core/collators/test_vision_language_collator_with_padding.py
tests/unit/core/configs/test_analyze_config.py
tests/unit/core/configs/test_config.py
tests/unit/core/configs/test_guided_params.py
tests/unit/core/configs/test_parse_configs.py
tests/unit/core/configs/test_synthesis_config.py
tests/unit/core/configs/test_training_config.py
tests/unit/core/configs/internal/test_supported_models.py
tests/unit/core/configs/params/test_base_params.py
tests/unit/core/configs/params/test_data_params.py
tests/unit/core/configs/params/test_deepspeed_params.py
tests/unit/core/configs/params/test_evaluation_params.py
tests/unit/core/configs/params/test_judge_params.py
tests/unit/core/configs/params/test_model_params.py
tests/unit/core/configs/params/test_remote_params.py
tests/unit/core/configs/params/test_synthesis_params.py
tests/unit/core/datasets/test_base_map_dataset.py
tests/unit/core/datasets/test_base_sft_dataset.py
tests/unit/core/datasets/test_packed_sft_dataset.py
tests/unit/core/datasets/test_pretraining_dataset.py
tests/unit/core/datasets/test_vision_language_dataset.py
tests/unit/core/evaluation/test_backend_alpaca_eval.py
tests/unit/core/evaluation/test_backend_lm_harness.py
tests/unit/core/evaluation/test_evaluator.py
tests/unit/core/evaluation/test_save_utils.py
tests/unit/core/synthesis/test_attribute_formatter.py
tests/unit/core/synthesis/test_attribute_synthesizer.py
tests/unit/core/synthesis/test_attribute_transformation.py
tests/unit/core/synthesis/test_data_synthesizer.py
tests/unit/core/synthesis/test_dataset_ingestion.py
tests/unit/core/synthesis/test_dataset_planner.py
tests/unit/core/synthesis/test_document_ingestion.py
tests/unit/core/synthesis/test_synthesis_pipeline.py
tests/unit/core/trainers/test_oumi_trainer.py
tests/unit/core/trainers/test_verl_grpo_trainer.py
tests/unit/core/types/test_conversation.py
tests/unit/datasets/test_chat_templates.py
tests/unit/datasets/test_datasets_demo_examples.py
tests/unit/datasets/test_huggingface_vision_dataset.py
tests/unit/datasets/test_pixmo.py
tests/unit/datasets/test_pretraining_async_text_dataset.py
tests/unit/datasets/test_text_jsonlines_dataset.py
tests/unit/datasets/test_tulu3_sft_mixture.py
tests/unit/datasets/test_vision_dpo_jsonlines_dataset.py
tests/unit/datasets/test_vision_language_jsonlines_dataset.py
tests/unit/datasets/grpo/rewards/test_completion_length_rewards.py
tests/unit/datasets/grpo/rewards/test_count_letters_rewards.py
tests/unit/datasets/grpo/rewards/test_countdown_rewards.py
tests/unit/inference/test_adaptive_concurrency_controller.py
tests/unit/inference/test_adaptive_semaphore.py
tests/unit/inference/test_anthropic_inference_engine.py
tests/unit/inference/test_base_inference_engine.py
tests/unit/inference/test_deepseek_inference_engine.py
tests/unit/inference/test_gcp_inference_engine.py
tests/unit/inference/test_gemini_inference_engine.py
tests/unit/inference/test_generation_params.py
tests/unit/inference/test_inference_engine_init.py
tests/unit/inference/test_lambda_inference_engine.py
tests/unit/inference/test_llama_cpp_inference_engine.py
tests/unit/inference/test_openai_inference_engine.py
tests/unit/inference/test_parasail_inference_engine.py
tests/unit/inference/test_remote_inference_engine.py
tests/unit/inference/test_sambanova_inference_engine.py
tests/unit/inference/test_sglang_inference_engine.py
tests/unit/inference/test_together_inference_engine.py
tests/unit/inference/test_vllm_inference_engine.py
tests/unit/inference/test_vllm_inference_engine_quantization.py
tests/unit/judges/test_base_judge.py
tests/unit/judges/test_simple_judge.py
tests/unit/launcher/test_launcher.py
tests/unit/launcher/clients/test_local_client.py
tests/unit/launcher/clients/test_polaris_client.py
tests/unit/launcher/clients/test_sky_client.py
tests/unit/launcher/clients/test_slurm_client.py
tests/unit/launcher/clients/data/qstat.txt
tests/unit/launcher/clients/data/sacct.txt
tests/unit/launcher/clients/data/sacct_full.txt
tests/unit/launcher/clouds/test_frontier_cloud.py
tests/unit/launcher/clouds/test_local_cloud.py
tests/unit/launcher/clouds/test_perlmutter_cloud.py
tests/unit/launcher/clouds/test_polaris_cloud.py
tests/unit/launcher/clouds/test_sky_cloud.py
tests/unit/launcher/clouds/test_slurm_cloud.py
tests/unit/launcher/clusters/test_frontier_cluster.py
tests/unit/launcher/clusters/test_local_cluster.py
tests/unit/launcher/clusters/test_perlmutter_cluster.py
tests/unit/launcher/clusters/test_polaris_cluster.py
tests/unit/launcher/clusters/test_sky_cluster.py
tests/unit/launcher/clusters/test_slurm_cluster.py
tests/unit/models/test_cnn_classifier.py
tests/unit/performance/test_mfu.py
tests/unit/performance/test_telemetry.py
tests/unit/performance/test_torch_profiler_utils.py
tests/unit/quantize/__init__.py
tests/unit/quantize/test_awq_quantizer.py
tests/unit/quantize/test_base_quantization.py
tests/unit/quantize/test_bnb_quantizer.py
tests/unit/quantize/test_quantize_module.py
tests/unit/utils/test_analysis_utils.py
tests/unit/utils/test_cache_utils.py
tests/unit/utils/test_conversation_utils.py
tests/unit/utils/test_device_utils.py
tests/unit/utils/test_distributed_utils.py
tests/unit/utils/test_hf_utils.py
tests/unit/utils/test_http.py
tests/unit/utils/test_image_utils.py
tests/unit/utils/test_io_utils.py
tests/unit/utils/test_math_utils.py
tests/unit/utils/test_model_caching.py
tests/unit/utils/test_packaging.py
tests/unit/utils/test_peft_utils.py
tests/unit/utils/test_saver.py
tests/unit/utils/test_serialization_utils.py
tests/unit/utils/test_str_utils.py
tests/unit/utils/test_torch_naming_heuristics.py
tests/unit/utils/test_torch_utils.py
tests/unit/utils/test_version_utils.py