README.md
pyproject.toml
setup.py
FAI_RL.egg-info/PKG-INFO
FAI_RL.egg-info/SOURCES.txt
FAI_RL.egg-info/dependency_links.txt
FAI_RL.egg-info/entry_points.txt
FAI_RL.egg-info/requires.txt
FAI_RL.egg-info/top_level.txt
configs/__init__.py
configs/deepspeed/zero3_config_gpu1.json
configs/deepspeed/zero3_config_gpu2.json
configs/deepspeed/zero3_config_gpu4.json
configs/deepspeed/zero3_config_gpu8.json
core/__init__.py
core/config.py
core/model_utils.py
core/trainer_base.py
evaluations/README.md
evaluations/__init__.py
evaluations/eval.py
evaluations/eval_datasets/__init__.py
evaluations/eval_datasets/gsm8k.py
evaluations/eval_datasets/mmlu.py
inference/README.md
inference/__init__.py
inference/inference.py
recipes/__init__.py
recipes/evaluation/gsm8k/llama3_3B_vanilla.yaml
recipes/evaluation/gsm8k/llama3_8B_vanilla.yaml
recipes/evaluation/gsm8k/qwen3_4B.yaml
recipes/evaluation/gsm8k/qwen3_4B_vanilla.yaml
recipes/evaluation/gsm8k/qwen3_8B_vanilla.yaml
recipes/evaluation/mmlu/llama3_3B.yaml
recipes/evaluation/mmlu/llama3_3B_api.yaml
recipes/evaluation/mmlu/llama3_vanilla_3B.yaml
recipes/inference/llama3_3B.yaml
recipes/inference/llama3_3B_api.yaml
recipes/inference/llama3_vanilla_3B.yaml
recipes/training/dpo/llama3_3B_full.yaml
recipes/training/dpo/llama3_3B_lora.yaml
recipes/training/dpo/llama3_3B_qlora.yaml
recipes/training/grpo/llama3_3B_full.yaml
recipes/training/grpo/llama3_3B_lora.yaml
recipes/training/grpo/qwen3_4B_full_subjective_reward.yaml
recipes/training/gspo/llama3_3B_full.yaml
recipes/training/gspo/llama3_3B_lora.yaml
recipes/training/ppo/llama3_3B_full.yaml
recipes/training/ppo/llama3_3B_lora.yaml
recipes/training/ppo/llama3_3B_qlora.yaml
recipes/training/sft/llama3_3B_full.yaml
recipes/training/sft/llama3_3B_lora.yaml
recipes/training/sft/llama3_3B_qlora.yaml
trainers/README.md
trainers/__init__.py
trainers/dpo_trainer.py
trainers/grpo_trainer.py
trainers/gspo_trainer.py
trainers/ppo_trainer.py
trainers/sft_trainer.py
trainers/train.py
trainers/rewards/__init__.py
trainers/rewards/accuracy_rewards.py
trainers/rewards/format_rewards.py
trainers/rewards/subjective_rewards.py
trainers/templates/__init__.py
trainers/templates/gsm8k_template.py
trainers/templates/openmathinstruct_template.py
trainers/templates/subjective_template.py
utils/__init__.py
utils/api_utils.py
utils/config_validation.py
utils/dataset_utils.py
utils/hosted_llm_config.py
utils/logging_utils.py
utils/recipe_overrides.py
venv_deploy/share/jupyter/kernels/python3/kernel.json
venv_deploy/share/jupyter/labextensions/@jupyter-widgets/jupyterlab-manager/install.json
venv_deploy/share/jupyter/labextensions/@jupyter-widgets/jupyterlab-manager/package.json
venv_deploy/share/jupyter/labextensions/@jupyter-widgets/jupyterlab-manager/schemas/@jupyter-widgets/jupyterlab-manager/plugin.json
venv_deploy/share/jupyter/labextensions/@jupyter-widgets/jupyterlab-manager/static/third-party-licenses.json