requests
apipeline>=0.2.4
python-dotenv
pydub
pillow
aiohttp==3.10.11
scipy
pyloudnorm
pydantic
numpy>=1.22.0
nest_asyncio

[accelerate]
accelerate~=1.7.0

[agora]
colorlog
agora-realtime-ai-api-v1~=0.0.3
agora_python_server_sdk_v1~=0.0.3
achatbot[opencv]

[agora_channel_audio_stream]
achatbot[agora]

[agora_transport]
achatbot[agora]

[ai_frameworks_processor]
achatbot[ai_langchain_framework_processor]

[ai_langchain_framework_processor]
langchain~=0.3.9

[asr_processor]
achatbot[deepgram_asr_processor,speech_asr]

[autoawq]
autoawq

[bitsandbytes]
bitsandbytes~=0.44.1

[cartesia_tts_processor]
websockets~=12.0

[codec_bitokenizer]
einops==0.8.1
einx==0.3.0
numpy==2.2.3
omegaconf==2.3.0
packaging==24.2
safetensors==0.5.2
soundfile==0.12.1
soxr==0.5.0.post1
torch==2.5.1
torchaudio==2.5.1
torchvision==0.20.1
transformers==4.46.2

[codec_moshi_mimi]
moshi~=0.1.0

[codec_snac]
snac

[codec_transformers_dac]
transformers[torch]~=4.45.1

[codec_transformers_mimi]
transformers[torch]~=4.45.1

[codec_wavtokenizer]
numpy==1.23.5
encodec
pyyaml
huggingface_hub
achatbot[einops,librosa,matplotlib,soundfile]

[codec_xcodec2]
xcodec2==0.1.3

[conf]
omegaconf~=2.3.0
hydra-core~=1.3.2

[core_llm]
achatbot[llama_cpp,llm_personalai_proxy]

[ctranslate2]
ctranslate2

[daily]
daily-python~=0.11.0

[daily_langchain_rag_bot]
achatbot[daily_transport,langchain_openai_tidb_vector,llm_processor,tts_edge,whisper_groq_asr]

[daily_room_audio_stream]
achatbot[daily]

[daily_rtvi_bot]
achatbot[daily_transport,llm_processor,tts_edge,whisper_groq_asr]

[daily_transport]
achatbot[daily]

[daily_webrtc_terminal_chat_bot]
achatbot[core_llm,daily_room_audio_stream,speech_asr,speech_tts,speech_vad,speech_waker,stream_player]

[daily_webrtc_vad_analyzer]
achatbot[daily]

[deep_translator]
deep_translator~=1.11.4

[deepgram_asr_processor]
deepgram-sdk~=3.7.7

[diffusers]
achatbot[torch_vision_audio]
diffusers[torch]~=0.31.0

[einops]
einops~=0.8.0

[fastapi]
fastapi~=0.112.0

[fastapi_bot_server]
fastapi~=0.112.0
uvicorn~=0.30.6

[flash-attn]
flash-attn==2.7.3

[flashinfer-python]
flashinfer-python==0.2.3

[freeze_omni_voice_processor]
achatbot[llm_transformers_manual_voice_freeze_omni]

[gdown]
gdown==5.1.0

[glm_voice_processor]
achatbot[bitsandbytes,llm_transformers_manual_voice_glm]

[google_ai]
google-generativeai~=0.8.3

[google_llm_processor]
achatbot[google_ai,openai]

[grpc]
grpcio>=1.71.0

[grpc_tools]
grpcio-tools>=1.71.0

[img_processor]
openai~=1.54.1

[lam_audio2expression_avatar]
opencv_python_headless
omegaconf
addict==2.4.0
yapf==0.40.1
librosa
termcolor
numpy==1.24.3
protobuf==5.29.4
transformers==4.36.2

[langchain_openai_tidb_vector]
achatbot[ai_langchain_framework_processor]
langchain-text-splitters~=0.3.2
langchain-openai~=0.2.11
langchain-community~=0.3.9
tidb-vector~=0.0.10
pymysql~=1.1.1

[librosa]
librosa~=0.10.2.post1

[lite_avatar]
funasr
av
h5py
jieba
pypinyin
transformers
typeguard==2.13.3
vector-quantize-pytorch
vocos
onnxruntime
numpy==1.26.4

[lite_avatar_gpu]
achatbot[lite_avatar]
onnxruntime-gpu

[litellm]
litellm~=1.52.0

[litellm_processor]
achatbot[litellm,openai]

[livekit]
livekit~=0.17.5

[livekit-api]
livekit-api~=0.7.1

[livekit_room_audio_stream]
achatbot[livekit,livekit-api]

[livekit_transport]
achatbot[livekit,livekit-api]

[llama_cpp]
llama-cpp-python~=0.2.82

[llm_personalai_proxy]
geocoder~=1.38.1

[llm_processor]
achatbot[google_llm_processor,litellm_processor,openai_llm_processor]

[llm_transformers_manual_speech_higgs]
torch
torchaudio
transformers<4.47.0,>=4.45.1
librosa
omegaconf
dacite
json_repair
pandas
vector_quantize_pytorch
loguru
jieba
accelerate>=0.26.0
soundfile
descript-audio-codec

[llm_transformers_manual_speech_llama]
achatbot[llm_transformers_manual_voice]

[llm_transformers_manual_speech_llasa]
achatbot[llm_transformers_manual_voice]

[llm_transformers_manual_speech_spark]
achatbot[llm_transformers_manual_voice]

[llm_transformers_manual_vision]
transformers
qwen-vl-utils
av
achatbot[torch_vision_audio]

[llm_transformers_manual_vision_deepseekvl2]
achatbot[accelerate,einops]
transformers==4.38.2
xformers>=0.0.21
sentencepiece
attrdict
timm>=0.9.16

[llm_transformers_manual_vision_fastvlm]
achatbot[accelerate,torch_vision_audio]
numpy==1.26.4
transformers==4.48.3
tokenizers==0.21.0
sentencepiece==0.1.99
shortuuid
peft<0.14.0,>=0.10.0
bitsandbytes
markdown2[all]
scikit-learn==1.2.2
einops==0.6.1
einops-exts==0.0.4
timm==1.0.15

[llm_transformers_manual_vision_gemma]
achatbot[accelerate,llm_transformers_manual_vision]

[llm_transformers_manual_vision_glm4v]
achatbot[llm_transformers_manual_vision]

[llm_transformers_manual_vision_img_janus]
achatbot[accelerate,einops,llm_transformers_manual_vision]
sentencepiece
attrdict
timm>=0.9.16

[llm_transformers_manual_vision_keye]
keye-vl-utils[decord]==1.0.0
achatbot[llm_transformers_manual_vision]

[llm_transformers_manual_vision_kimi]
numpy==1.26.2
blobfile
achatbot[accelerate,llm_transformers_manual_vision,tiktoken]

[llm_transformers_manual_vision_llama]
achatbot[llm_transformers_manual_vision]

[llm_transformers_manual_vision_mimo]
achatbot[llm_transformers_manual_vision]

[llm_transformers_manual_vision_molmo]
achatbot[einops,llm_transformers_manual_vision]

[llm_transformers_manual_vision_qwen]
achatbot[llm_transformers_manual_vision]

[llm_transformers_manual_vision_skyworkr1v]
achatbot[llm_transformers_manual_vision]
timm

[llm_transformers_manual_vision_smolvlm]
achatbot[llm_transformers_manual_vision]
num2words

[llm_transformers_manual_vision_speech_gemma]
achatbot[librosa,llm_transformers_manual_vision_gemma]
timm

[llm_transformers_manual_vision_speech_phi]
achatbot[accelerate,soundfile,torch_vision_audio]
transformers==4.48.2
backoff
peft
qwen-omni-utils

[llm_transformers_manual_vision_voice_minicpmo]
achatbot[accelerate,librosa,soundfile]
torch~=2.2.2
torchaudio~=2.2.2
torchvision~=0.17.2
transformers==4.44.2
vector-quantize-pytorch~=1.18.5
vocos~=0.1.0
decord
moviepy

[llm_transformers_manual_vision_voice_qwen]
achatbot[accelerate,librosa,soundfile]
torch~=2.6.0
torchaudio~=2.6.0
torchvision~=0.21.0
numpy==1.26.2
qwen-omni-utils[decord]
torchdiffeq
x_transformers

[llm_transformers_manual_voice]
transformers~=4.45.2
torch~=2.2.2
torchaudio~=2.2.2

[llm_transformers_manual_voice_freeze_omni]
achatbot[librosa,llm_transformers_manual_voice,soundfile,yaml]

[llm_transformers_manual_voice_glm]
achatbot[conf,gdown,llm_transformers_manual_voice,matplotlib,tts_cosy_voice]

[llm_transformers_manual_voice_kimi]
achatbot[accelerate,conf,librosa,soundfile]
torch~=2.6.0
torchaudio~=2.6.0
torchdyn==1.0.6
transformers
pandas
openai-whisper
sox
six==1.16.0
hyperpyyaml
conformer==0.3.2
diffusers
loguru
tqdm
huggingface_hub
blobfile
timm

[llm_transformers_manual_voice_vita]
achatbot[accelerate,conf,librosa,soundfile,torch_vision_audio]
transformers
tiktoken
funasr
rich
hyperpyyaml
conformer
lightning
wget
natsort
safetensors
diffusers
gdown
jiwer
zhon
WeTextProcessing
inflect
openai-whisper
onnxruntime
modelscope
word2number
pyworld
matplotlib

[local_terminal_chat_bot]
tqdm>=4.66.0
achatbot[core_llm,pyaudio_stream,speech_asr,speech_tts,speech_vad,speech_waker,stream_player]

[matplotlib]
matplotlib==3.7.5

[mcp]
mcp[cli]~=1.9.1
fastmcp

[moshi_voice_processor]
moshi~=0.2.1

[musetalk_avatar]
ffmpeg-python>=0.2.0
imageio[ffmpeg]>=2.37.0
tensorflow==2.12.0
accelerate==0.32.0
transformers==4.44.1
av
moviepy
diffusers
achatbot[conf,einops,librosa,opencv,soundfile]

[ngrok_proxy]
pyngrok~=7.2.0
nest-asyncio~=1.6.0

[openai]
openai~=1.54.1

[openai_llm_processor]
achatbot[openai]

[opencv]
opencv-python~=4.10.0.84

[porcupine_wakeword]
pvporcupine~=3.0.2

[pyannote_vad]
pyannote.audio~=3.2.0

[pyaudio_stream]
PyAudio~=0.2.14

[pyee]
pyee~=12.0.0

[pytube]
pytube~=15.0.0

[queue]
achatbot[redis]

[redis]
redis~=5.0.0

[remote_grpc_tts_client]
achatbot[grpc,stream_player]

[remote_grpc_tts_server]
achatbot[grpc,speech_tts]

[remote_queue_chat_bot_be_worker]
achatbot[core_llm,queue,speech_asr,speech_tts,speech_vad,speech_waker]

[remote_queue_chat_bot_fe]
achatbot[queue,speech_audio_stream,stream_player]

[remote_rpc_chat_bot_be_worker]
achatbot[core_llm,rpc,speech_asr,speech_tts,speech_vad,speech_waker]

[remote_rpc_chat_bot_fe]
achatbot[rpc,speech_audio_stream]

[rms_recorder]

[rpc]
grpcio>=1.71.0

[sense_voice_asr]
torch
torchaudio
funasr
onnx
onnxconverter-common

[sglang]
sglang[all]==0.4.4.post1

[silero_vad]
achatbot[torch_vision_audio]

[silero_vad_analyzer]
achatbot[silero_vad]

[smart_turn]
torch
transformers

[soundfile]
soundfile~=0.12.1

[speech_asr]
achatbot[sense_voice_asr,whisper_asr,whisper_faster_asr,whisper_groq_asr,whisper_mlx_asr,whisper_timestamped_asr,whisper_transformers_asr]

[speech_audio_stream]
PyAudio~=0.2.14
daily-python~=0.11.0

[speech_tts]
achatbot[tts_chat,tts_coqui,tts_cosy_voice,tts_edge,tts_f5,tts_g,tts_kokoro,tts_openvoicev2,tts_pyttsx3]

[speech_vad]
achatbot[pyannote_vad,silero_vad,webrtcvad]

[speech_vad_analyzer]
achatbot[daily_webrtc_vad_analyzer,silero_vad_analyzer]

[speech_waker]
achatbot[porcupine_wakeword]

[step_voice_processor]
achatbot[tts_step]

[stream_player]

[tensorrt]
tensorrt~=10.4.0

[test]
sentence_transformers~=3.0.0
pytest~=8.3.2
pytest-mock~=3.14.0

[tiktoken]
tiktoken~=0.7.0

[together_ai]
together~=1.3.3

[torch_vision_audio]
torch~=2.6.0
torchaudio~=2.6.0
torchvision~=0.21.0

[transformers]
transformers[torch]

[trtllm]
tensorrt-llm==0.17.0.post1

[tts_chat]
torch~=2.2.2
vocos~=0.1.0
pybase16384~=0.3.7
vector_quantize_pytorch~=1.16.1
transformers~=4.40.2

[tts_chat:sys_platform == "linux"]
pynini~=2.1.5
WeTextProcessing~=1.0.2
nemo_text_processing~=1.0.2

[tts_coqui]
TTS~=0.22.0

[tts_cosy_voice]
torch~=2.2.2
torchaudio~=2.2.2
transformers~=4.40.2
hyperpyyaml~=1.2.2
onnxruntime~=1.18.1
openai-whisper==20231117
conformer~=0.3.2
diffusers[torch]~=0.30.0
lightning~=2.2.4
wget~=3.2
modelscope~=1.16.0
achatbot[conf]

[tts_cosy_voice2]
achatbot[tts_cosy_voice]

[tts_cosy_voice:sys_platform == "linux"]
WeTextProcessing~=1.0.2

[tts_edge]
edge-tts>=7.2.0

[tts_f5]
wandb
ema_pytorch
datasets
accelerate>=0.33.0
tomli
cached_path
click
torch~=2.2.2
torchaudio~=2.2.2
matplotlib
numpy<=1.26.4
torchdiffeq
jieba
pypinyin
achatbot[librosa,soundfile]
transformers~=4.40.2
x_transformers>=1.31.14
vocos~=0.1.0

[tts_f5:platform_machine != "arm64" and platform_system != "Darwin"]
bitsandbytes>0.37.0

[tts_fishspeech]
torch~=2.3.1
torchaudio~=2.3.1
transformers~=4.40.2
natsort>=8.4.0
loguru>=0.6.0
rich>=13.5.3
vector_quantize_pytorch==1.14.24
loralib>=0.1.2
tiktoken>=0.8.0
pytorch-lightning~=2.4.0
lightning~=2.4.0
pyrootutils
achatbot[conf,einops,librosa]

[tts_g]
gTTS~=2.5.1

[tts_generator_spark]
achatbot[tts_spark]

[tts_higgs]
achatbot[llm_transformers_manual_speech_higgs]

[tts_kokoro]
torch~=2.2.2
transformers~=4.40.2
phonemizer
munch

[tts_llasa]
achatbot[codec_xcodec2]

[tts_mega3]
torch==2.3.1
torchaudio==2.3.1
transformers==4.49.0
WeTextProcessing==1.0.4.1
pyloudnorm==0.1.1
x-transformers==1.44.4
torchdiffeq==0.2.5
openai-whisper==20240930
langdetect
attrdict
setproctitle==1.3.3
achatbot[librosa]

[tts_onnx_kokoro]
kokoro-onnx~=0.2.5

[tts_openvoicev2]
achatbot[librosa,soundfile,whisper_faster_asr,whisper_timestamped_asr]
wavmark==0.0.3
eng_to_ipa~=0.0.2
inflect~=7.0.0
unidecode~=1.3.7
jieba
pypinyin
cn2an
langid

[tts_orpheus]
achatbot[codec_snac]
numpy==1.26.4
torch==2.3.1
torchaudio==2.3.1
transformers==4.48.3

[tts_processor]
achatbot[cartesia_tts_processor,openai,speech_tts]

[tts_pyttsx3]
pyttsx3~=2.90

[tts_spark]
achatbot[codec_bitokenizer]

[tts_step]
torch==2.3.1
torchaudio==2.3.1
torchvision==0.18.1
transformers==4.48.3
accelerate==1.3.0
openai-whisper==20231117
sox==1.5.0
modelscope
six==1.16.0
hyperpyyaml
conformer==0.3.2
diffusers
onnxruntime-gpu==1.20.1
sentencepiece
funasr>=1.1.3
protobuf==5.29.3
achatbot[conf,librosa]

[tts_zonos]
torch>=2.5.1
inflect>=7.5.0
kanjize>=1.5.0
phonemizer>=3.3.0
sudachidict-full>=20241021
sudachipy>=0.6.10
torchaudio>=2.5.1
transformers>=4.48.1
huggingface-hub>=0.28.1
achatbot[soundfile]

[tts_zonos_hybrid]
achatbot[tts_zonos]
flash-attn>=2.7.3
mamba-ssm>=2.2.4
causal-conv1d>=1.5.0.post8

[vad_recorder]
achatbot[speech_vad]

[verovio]
verovio~=4.3.1

[vision_transformers_got_ocr]
achatbot[accelerate,llm_transformers_manual_vision,tiktoken,verovio]

[vision_yolo_detector]
ultralytics~=8.3.12
supervision~=0.24.0

[vllm]
vllm==0.9.2

[webrtc]
aiortc~=1.13.0

[webrtc_silero_vad]
achatbot[silero_vad,webrtcvad]

[webrtcvad]
webrtcvad~=2.0.10

[websocket]
websockets~=12.0

[websocket_server_transport]
achatbot[websocket]

[whisper_asr]
openai-whisper

[whisper_cpp]
pywhispercpp

[whisper_faster_asr]
faster-whisper

[whisper_groq_asr]
groq~=0.9.0

[whisper_mlx_asr]

[whisper_mlx_asr:sys_platform == "darwin" and platform_machine == "arm64"]
mlx_whisper~=0.2.0

[whisper_timestamped_asr]
whisper-timestamped

[whisper_transformers_asr]
transformers[torch]>=4.40.2

[yaml]
PyYAML~=6.0.2
