.gitignore
.pre-commit-config.yaml
ATTRIBUTIONS.md
Dockerfile
LICENSE
README.md
pyproject.toml
requirements.txt
ruff.toml
tools
.github/workflows/build-and-test.yaml
.github/workflows/doc_pages.yaml
.github/workflows/pre-commit.yaml
assets/nvidia-cosmos-header.png
assets/rl_infra.svg
configs/deepseek-v3/moonlight-moe-13b-tp1-sft.toml
configs/deepseek-v3/moonlight-moe-13b-tp4-sft.toml
configs/qwen2-5/qwen2-5-32b-gsm8k-grpo-fp8.toml
configs/qwen2-5/qwen2-5-32b-gsm8k-grpo.toml
configs/qwen2-5/qwen2-5-32b-gsm8k-p-fsdp2-cp2-tp2-r-tp4-grpo.toml
configs/qwen2-5/qwen2-5-32b-p-fsdp2-tp4-r-tp4-pp1-dapo.toml
configs/qwen2-5/qwen2-5-32b-p-fsdp2-tp4-r-tp4-pp1-grpo-gsm8k.toml
configs/qwen2-5/qwen2-5-32b-p-fsdp2-tp4-r-tp4-pp1-grpo-math.toml
configs/qwen2-5/qwen2-5-32b-p-fsdp2-tp4-r-tp4-pp1-grpo-validation.toml
configs/qwen2-5/qwen2-5-32b-p-fsdp2-tp4-r-tp4-pp1-grpo.toml
configs/qwen2-5/qwen2-5-32b-tp2-fsdp4-sft.toml
configs/qwen2-5/qwen2-5-32b-tp8-sft.toml
configs/qwen2-5/qwen2-5-3b-p-fsdp1-tp1-r-tp1-pp1-grpo-aipo.toml
configs/qwen2-5/qwen2-5-3b-p-fsdp1-tp1-r-tp1-pp1-grpo.toml
configs/qwen2-5/qwen2-5-3b-p-fsdp1-tp2-r-n_init_replica-3-tp2-pp1-grpo.toml
configs/qwen2-5/qwen2-5-3b-p-fsdp1-tp2-r-tp2-pp1-grpo-profile.toml
configs/qwen2-5/qwen2-5-3b-p-fsdp1-tp2-r-tp2-pp1-grpo.toml
configs/qwen2-5/qwen2-5-3b-tp1-sft.toml
configs/qwen2-5/qwen2-5-3b-tp2-fsdp-sft.toml
configs/qwen2-5/qwen2-5-7b-p-fsdp1-tp2-r-tp2-pp1-grpo-vllm-flashinfer.toml
configs/qwen2-5/qwen2-5-7b-p-fsdp1-tp2-r-tp2-pp1-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-fsdp2-cp2-r-tp2-pp1-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-fsdp2-tp2-r-tp2-pp1-grpo-fp8.toml
configs/qwen2-5/qwen2-5-7b-p-fsdp2-tp2-r-tp2-pp1-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-fsdp2-tp2-r-tp4-pp1-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-pp1-fsdp1-tp2-r-tp1-pp2-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-pp2-fsdp1-tp1-r-tp2-pp2-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-pp2-tp2-r-tp2-pp1-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-pp2-tp2-r-tp2-pp2-grpo.toml
configs/qwen2-5/qwen2-5-7b-p-tp4-r-tp2-pp1-grpo.toml
configs/qwen2-5/qwen2-5-7b-pp2-cp2-sft.toml
configs/qwen2-5/qwen2-5-7b-pp2-tp2-sft.toml
configs/qwen3/qwen3-32b-p-fsdp1-tp8-r-tp4-pp1-grpo.toml
configs/qwen3/qwen3-32b-tp8-sft.toml
configs/qwen3/qwen3-8b-p-fsdp1-tp4-r-tp4-pp1-grpo-fp8.toml
configs/qwen3/qwen3-8b-p-tp4-r-tp2-pp1-grpo.toml
configs/qwen3-moe/qwen3-moe-30b-fsdp2-tp4-sft.toml
configs/qwen3-moe/qwen3-moe-30b-p-tp4-r-tp2-grpo-fp8.toml
configs/qwen3-moe/qwen3-moe-30b-p-tp4-r-tp2-grpo.toml
configs/qwen3-moe/qwen3-moe-30b-tp4-sft.toml
cosmos_rl/__init__.py
cosmos_rl/_version.py
cosmos_rl.egg-info/PKG-INFO
cosmos_rl.egg-info/SOURCES.txt
cosmos_rl.egg-info/dependency_links.txt
cosmos_rl.egg-info/entry_points.txt
cosmos_rl.egg-info/requires.txt
cosmos_rl.egg-info/top_level.txt
cosmos_rl/cli/__init__.py
cosmos_rl/cli/algo.py
cosmos_rl/cli/cli.py
cosmos_rl/cli/custom_group.py
cosmos_rl/cli/nccl.py
cosmos_rl/cli/profiler.py
cosmos_rl/cli/replica.py
cosmos_rl/cli/utils.py
cosmos_rl/comm/__init__.py
cosmos_rl/comm/base.py
cosmos_rl/dispatcher/__init__.py
cosmos_rl/dispatcher/command.py
cosmos_rl/dispatcher/controller.py
cosmos_rl/dispatcher/protocol.py
cosmos_rl/dispatcher/replica.py
cosmos_rl/dispatcher/run_web_panel.py
cosmos_rl/dispatcher/status.py
cosmos_rl/dispatcher/algo/__init__.py
cosmos_rl/dispatcher/algo/base.py
cosmos_rl/dispatcher/algo/grpo.py
cosmos_rl/dispatcher/algo/reward.py
cosmos_rl/dispatcher/config/__init__.py
cosmos_rl/dispatcher/config/frontend/__init__.py
cosmos_rl/dispatcher/config/frontend/configure_template.html
cosmos_rl/dispatcher/config/frontend/dispatcher_status.html
cosmos_rl/dispatcher/data/__init__.py
cosmos_rl/dispatcher/data/packer/__init__.py
cosmos_rl/dispatcher/data/packer/base.py
cosmos_rl/dispatcher/data/packer/decoder_only_llm_data_packer.py
cosmos_rl/dispatcher/data/packer/qwen2_5_vlm_data_packer.py
cosmos_rl/launcher/__init__.py
cosmos_rl/launcher/launch_all.py
cosmos_rl/launcher/launch_controller.sh
cosmos_rl/launcher/launch_replica.sh
cosmos_rl/launcher/worker_entry.py
cosmos_rl/patch/__init__.py
cosmos_rl/policy/__init__.py
cosmos_rl/policy/train.py
cosmos_rl/policy/config/__init__.py
cosmos_rl/policy/kernel/__init__.py
cosmos_rl/policy/kernel/group_gemms.py
cosmos_rl/policy/kernel/megatron_moe/README.md
cosmos_rl/policy/kernel/megatron_moe/__init__.py
cosmos_rl/policy/kernel/megatron_moe/fused_a2a.py
cosmos_rl/policy/kernel/megatron_moe/fused_indices_converter.py
cosmos_rl/policy/kernel/megatron_moe/moe_utils.py
cosmos_rl/policy/kernel/megatron_moe/token_dispatcher.py
cosmos_rl/policy/kernel/moe/__init__.py
cosmos_rl/policy/kernel/moe/grouped_gemm.py
cosmos_rl/policy/kernel/moe/indices.py
cosmos_rl/policy/kernel/symm_mem_recipes/__init__.py
cosmos_rl/policy/kernel/symm_mem_recipes/triton_barrier.py
cosmos_rl/policy/kernel/symm_mem_recipes/triton_on_device_all_to_all_v.py
cosmos_rl/policy/kernel/symm_mem_recipes/triton_utils.py
cosmos_rl/policy/model/__init__.py
cosmos_rl/policy/model/base.py
cosmos_rl/policy/model/gpt/__init__.py
cosmos_rl/policy/model/gpt/parallelize.py
cosmos_rl/policy/model/gpt/weight_converter.py
cosmos_rl/policy/model/gpt/weight_mapper.py
cosmos_rl/policy/model/qwen2_5_vl/__init__.py
cosmos_rl/policy/model/qwen2_5_vl/parallelize.py
cosmos_rl/policy/model/qwen2_5_vl/weight_converter.py
cosmos_rl/policy/model/qwen2_5_vl/weight_mapper.py
cosmos_rl/policy/model/qwen3_moe/__init__.py
cosmos_rl/policy/model/qwen3_moe/parallelize.py
cosmos_rl/policy/model/qwen3_moe/weight_converter.py
cosmos_rl/policy/model/qwen3_moe/weight_mapper.py
cosmos_rl/policy/trainer/__init__.py
cosmos_rl/policy/trainer/grpo_trainer.py
cosmos_rl/policy/trainer/sft_trainer.py
cosmos_rl/policy/trainer/optm/__init__.py
cosmos_rl/rollout/__init__.py
cosmos_rl/rollout/rollout_base.py
cosmos_rl/rollout/rollout_entrance.py
cosmos_rl/rollout/utils.py
cosmos_rl/rollout/vllm_rollout/__init__.py
cosmos_rl/rollout/vllm_rollout/vllm_patch.py
cosmos_rl/rollout/vllm_rollout/vllm_rollout.py
cosmos_rl/rollout/vllm_rollout/vllm_rollout_worker.py
cosmos_rl/tools/__init__.py
cosmos_rl/tools/dataset/__init__.py
cosmos_rl/tools/dataset/cosmos_grpo.py
cosmos_rl/tools/dataset/cosmos_sft.py
cosmos_rl/tools/dataset/gsm8k_grpo.py
cosmos_rl/tools/dataset/math_dapo.py
cosmos_rl/tools/dataset/math_grpo.py
cosmos_rl/tools/model/__init__.py
cosmos_rl/tools/model/moonlight_launcher.py
cosmos_rl/tools/model/deepseek_v3/__init__.py
cosmos_rl/tools/model/deepseek_v3/parallelize.py
cosmos_rl/tools/model/deepseek_v3/weight_converter.py
cosmos_rl/tools/model/deepseek_v3/weight_mapper.py
cosmos_rl/tools/slurm/README.md
cosmos_rl/tools/slurm/__init__.py
cosmos_rl/tools/slurm/cosmos_rl_job_multi_node.sh
cosmos_rl/tools/slurm/cosmos_rl_slurm_launch.py
cosmos_rl/tools/slurm/dispatch_job.py
cosmos_rl/tools/slurm/util.py
cosmos_rl/utils/__init__.py
cosmos_rl/utils/api_suffix.py
cosmos_rl/utils/attn_util.py
cosmos_rl/utils/cache.py
cosmos_rl/utils/checkpoint.py
cosmos_rl/utils/constant.py
cosmos_rl/utils/distributed.py
cosmos_rl/utils/logging.py
cosmos_rl/utils/modelscope.py
cosmos_rl/utils/network_util.py
cosmos_rl/utils/parallelism.py
cosmos_rl/utils/parallelism_map.py
cosmos_rl/utils/parallelism_registry.py
cosmos_rl/utils/profiler.py
cosmos_rl/utils/pynccl.py
cosmos_rl/utils/pynccl_wrapper.py
cosmos_rl/utils/redis_stream.py
cosmos_rl/utils/ulysses.py
cosmos_rl/utils/util.py
cosmos_rl/utils/wandb_logger.py
cosmos_rl/utils/fp8/__init__.py
cosmos_rl/utils/fp8/fp8_util.py
docs/Makefile
docs/README.md
docs/conf.py
docs/index.rst
docs/make.bat
docs/assets/data_flow.png
docs/assets/old_rollout.png
docs/assets/policy.png
docs/assets/rollout.png
docs/assets/weight_p2p.png
docs/async/overview.rst
docs/elastic/overview.rst
docs/multinodes/dgxc_lepton.rst
docs/multinodes/overview.rst
docs/multinodes/slurm.rst
docs/parallelism/overview.rst
docs/quickstart/configuration.rst
docs/quickstart/customization.rst
docs/quickstart/dataflow.rst
docs/quickstart/installation.rst
docs/quickstart/single_node_example.rst
tests/launch_test_worker.py
tests/test_cache.py
tests/test_comm.py
tests/test_context_parallel.py
tests/test_fp8.py
tests/test_grad_allreduce.py
tests/test_high_availability_nccl.py
tests/test_integration.py
tests/test_math_verify.py
tests/test_nccl_collectives.py
tests/test_nccl_timeout.py
tests/test_parallel_map.py
tests/test_policy_to_policy.py
tests/test_policy_to_rollout.py
tests/test_process_flow.py
tests/benchmark/launch_rollout_bench.sh
tests/benchmark/rollout_benchmark.py
tests/configs/grpo_integration_test.toml
tests/configs/sft_integration_test.toml
tests/configs/test_simple_grpo.toml
tests/configs/test_simple_sft.toml
tests/data/test_policy_extract_pp_1_fsdp_1_tp_1.npy
tests/data/test_policy_extract_pp_1_fsdp_2_tp_2.npy
tests/data/test_policy_extract_pp_2_fsdp_2_tp_1.npy
tests/data/test_rollout_extract_pp_1_fsdp_1_tp_1.npy
tests/data/test_rollout_extract_pp_1_fsdp_1_tp_4.npy
tests/test_dataset/dataset_dict.json
tests/test_dataset/train/data-00000-of-00001.arrow
tests/test_dataset/train/dataset_info.json
tests/test_dataset/train/state.json
tests/utils/mock_policy_entrance.py
tests/utils/mock_rollout_entrance.py