CONTRIBUTING.md
LICENSE
MANIFEST.in
README.md
VERSION
pyproject.toml
setup.cfg
setup.py
examples/datasets/hh-rlhf-helpful-base.py
examples/datasets/llava_instruct_mix.py
examples/datasets/lm-human-preferences-descriptiveness.py
examples/datasets/lm-human-preferences-sentiment.py
examples/datasets/math_shepherd.py
examples/datasets/prm800k.py
examples/datasets/rlaif-v.py
examples/datasets/tldr.py
examples/datasets/tldr_preference.py
examples/datasets/ultrafeedback-prompt.py
examples/datasets/ultrafeedback.py
examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py
examples/research_projects/layer_skip/scripts/config.py
examples/research_projects/layer_skip/scripts/custom_trainer.py
examples/research_projects/layer_skip/scripts/layer_skip_sft.py
examples/research_projects/stack_llama/scripts/merge_peft_adapter.py
examples/research_projects/stack_llama/scripts/reward_modeling.py
examples/research_projects/stack_llama/scripts/rl_training.py
examples/research_projects/stack_llama/scripts/supervised_finetuning.py
examples/research_projects/stack_llama_2/scripts/dpo_llama2.py
examples/research_projects/stack_llama_2/scripts/sft_llama2.py
examples/research_projects/toxicity/scripts/evaluate-toxicity.py
examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py
examples/scripts/alignprop.py
examples/scripts/bco.py
examples/scripts/cpo.py
examples/scripts/ddpo.py
examples/scripts/dpo.py
examples/scripts/dpo_online.py
examples/scripts/dpo_vlm.py
examples/scripts/gkd.py
examples/scripts/grpo_vlm.py
examples/scripts/gspo.py
examples/scripts/gspo_vlm.py
examples/scripts/kto.py
examples/scripts/mpo_vlm.py
examples/scripts/nash_md.py
examples/scripts/orpo.py
examples/scripts/prm.py
examples/scripts/reward_modeling.py
examples/scripts/rloo.py
examples/scripts/sft.py
examples/scripts/sft_gemma3.py
examples/scripts/sft_gpt_oss.py
examples/scripts/sft_video_llm.py
examples/scripts/sft_vlm.py
examples/scripts/sft_vlm_gemma3.py
examples/scripts/xpo.py
examples/scripts/evals/judge_tldr.py
examples/scripts/ppo/ppo.py
examples/scripts/ppo/ppo_tldr.py
scripts/add_copyrights.py
scripts/generate_harmony_dataset.py
scripts/generate_tiny_models.py
scripts/generate_toolcall_dataset.py
scripts/generate_zen_dataset.py
scripts/generate_zen_image_dataset.py
scripts/generate_zen_multi_image_dataset.py
scripts/log_example_reports.py
scripts/log_reports.py
tests/test_activation_offloading.py
tests/test_alignprop_trainer.py
tests/test_bco_trainer.py
tests/test_best_of_n_sampler.py
tests/test_callbacks.py
tests/test_cli.py
tests/test_cli_utils.py
tests/test_collators.py
tests/test_core.py
tests/test_cpo_trainer.py
tests/test_data_utils.py
tests/test_dataset_formatting.py
tests/test_ddpo_trainer.py
tests/test_dpo_trainer.py
tests/test_gkd_trainer.py
tests/test_grpo_trainer.py
tests/test_iterative_sft_trainer.py
tests/test_judges.py
tests/test_kto_trainer.py
tests/test_modeling_geometric_mixture_wrapper.py
tests/test_modeling_value_head.py
tests/test_nash_md_trainer.py
tests/test_online_dpo_trainer.py
tests/test_orpo_trainer.py
tests/test_peft_models.py
tests/test_ppo_trainer.py
tests/test_prm_trainer.py
tests/test_reward_trainer.py
tests/test_rewards.py
tests/test_rich_progress_callback.py
tests/test_rloo_trainer.py
tests/test_sft_trainer.py
tests/test_trainers_args.py
tests/test_utils.py
tests/test_vllm_client_server.py
tests/test_xpo_trainer.py
tests/testing_constants.py
tests/testing_utils.py
trl/__init__.py
trl/cli.py
trl/core.py
trl/data_utils.py
trl/import_utils.py
trl/mergekit_utils.py
trl/py.typed
trl.egg-info/PKG-INFO
trl.egg-info/SOURCES.txt
trl.egg-info/dependency_links.txt
trl.egg-info/entry_points.txt
trl.egg-info/requires.txt
trl.egg-info/top_level.txt
trl/accelerate_configs/fsdp1.yaml
trl/accelerate_configs/fsdp2.yaml
trl/accelerate_configs/multi_gpu.yaml
trl/accelerate_configs/single_gpu.yaml
trl/accelerate_configs/zero1.yaml
trl/accelerate_configs/zero2.yaml
trl/accelerate_configs/zero3.yaml
trl/extras/__init__.py
trl/extras/best_of_n_sampler.py
trl/extras/dataset_formatting.py
trl/extras/profiling.py
trl/extras/vllm_client.py
trl/models/__init__.py
trl/models/activation_offloading.py
trl/models/auxiliary_modules.py
trl/models/modeling_base.py
trl/models/modeling_sd_base.py
trl/models/modeling_value_head.py
trl/models/sd_utils.py
trl/models/utils.py
trl/rewards/__init__.py
trl/rewards/format_rewards.py
trl/rewards/other_rewards.py
trl/scripts/__init__.py
trl/scripts/dpo.py
trl/scripts/env.py
trl/scripts/grpo.py
trl/scripts/kto.py
trl/scripts/rloo.py
trl/scripts/sft.py
trl/scripts/utils.py
trl/scripts/vllm_serve.py
trl/templates/lm_model_card.md
trl/trainer/__init__.py
trl/trainer/alignprop_config.py
trl/trainer/alignprop_trainer.py
trl/trainer/bco_config.py
trl/trainer/bco_trainer.py
trl/trainer/callbacks.py
trl/trainer/cpo_config.py
trl/trainer/cpo_trainer.py
trl/trainer/ddpo_config.py
trl/trainer/ddpo_trainer.py
trl/trainer/dpo_config.py
trl/trainer/dpo_trainer.py
trl/trainer/gkd_config.py
trl/trainer/gkd_trainer.py
trl/trainer/grpo_config.py
trl/trainer/grpo_trainer.py
trl/trainer/iterative_sft_config.py
trl/trainer/iterative_sft_trainer.py
trl/trainer/judges.py
trl/trainer/kto_config.py
trl/trainer/kto_trainer.py
trl/trainer/model_config.py
trl/trainer/nash_md_config.py
trl/trainer/nash_md_trainer.py
trl/trainer/online_dpo_config.py
trl/trainer/online_dpo_trainer.py
trl/trainer/orpo_config.py
trl/trainer/orpo_trainer.py
trl/trainer/ppo_config.py
trl/trainer/ppo_trainer.py
trl/trainer/prm_config.py
trl/trainer/prm_trainer.py
trl/trainer/reward_config.py
trl/trainer/reward_trainer.py
trl/trainer/rloo_config.py
trl/trainer/rloo_trainer.py
trl/trainer/sft_config.py
trl/trainer/sft_trainer.py
trl/trainer/utils.py
trl/trainer/xpo_config.py
trl/trainer/xpo_trainer.py