~~Qwenn 3 TTS~~ Deprecated
Clone the repository Qwenn TTS groxaxo, then inside edit the docker compose file as follows:
version: '3.8'
services:
# GPU-enabled service with official backend (default)
qwen3-tts-gpu:
build:
context: .
dockerfile: Dockerfile
target: production
container_name: qwen3-tts-api
expose:
- "8880"
environment:
- HOST=0.0.0.0
- PORT=8880
- WORKERS=1
- CORS_ORIGINS=*
- TTS_BACKEND=official
- TTS_WARMUP_ON_START=false
#- TTS_MODEL_NAME=Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice
# When using device_ids filter, the GPU appears as device 0 inside container
- CUDA_VISIBLE_DEVICES=0
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
volumes:
# Mount model cache for persistence
- ~/.cache/huggingface:/home/appuser/.cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
restart: always
networks:
- litellm_network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s
# GPU-enabled service with vLLM-Omni backend
qwen3-tts-vllm:
build:
context: .
dockerfile: Dockerfile.vllm
container_name: qwen3-tts-api-vllm
network_mode: host
ports:
- "8880:8880"
environment:
- HOST=0.0.0.0
- PORT=8880
- WORKERS=1
- CORS_ORIGINS=*
- TTS_BACKEND=vllm_omni
- TTS_WARMUP_ON_START=true
- TTS_MODEL_NAME=Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice
- VLLM_WORKER_MULTIPROC_METHOD=spawn
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- HF_HOME=/root/.cache/huggingface
volumes:
# Mount model cache for persistence
- ~/.cache/huggingface:/root/.cache/huggingface
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['2']
capabilities: [gpu]
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
profiles:
- vllm
# CPU-only service
qwen3-tts-cpu:
build:
context: .
dockerfile: Dockerfile
target: cpu-base
container_name: qwen3-tts-api-cpu
network_mode: host
ports:
- "8880:8880"
environment:
- HOST=0.0.0.0
- PORT=8880
- WORKERS=1
- CORS_ORIGINS=*
- TTS_BACKEND=official
- TTS_MODEL_NAME=Qwen/Qwen3-TTS-12Hz-1.7B-Base
volumes:
- ~/.cache/huggingface:/home/appuser/.cache/huggingface
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
profiles:
- cpu
networks:
litellm_network:
external: true
name: litellm-stack_default
# To run GPU version with official backend: docker-compose up qwen3-tts-gpu
# To run GPU version with vLLM backend: docker-compose --profile vllm up qwen3-tts-vllm
# To run CPU version: docker-compose --profile cpu up qwen3-tts-cpu
Add the entry in the liteLLM config file:
# =====================
# Qwen-TTS
# =====================
- model_name: qwen3-tts
litellm_params:
model: openai/qwen3-tts
api_base: http://qwen3-tts-api:8880/v1 # container name as hostname
openai_compatible: true
api_key: none
Now in OWUI > Admin Panel > Settings > Audio, configure OWUI to use the Qween TTS model exposed via LiteLLM for audio generation requests:

- Select
OpenAIfrom the dropdown menu - Insert LiteLLM's API base URL:
https://tracking.mlmp.ti.bfh.ch/v1 - Insert API Key
- Insert model's name (same as in model_name in model_list)
- Save
Finally start Qwenn container and restart OWUI and LiteLLM.