Skip to content

Instantly share code, notes, and snippets.

@grayfallstown
Created January 15, 2026 17:19
Show Gist options
  • Select an option

  • Save grayfallstown/e828dd79cd630e40a07013f259073b43 to your computer and use it in GitHub Desktop.

Select an option

Save grayfallstown/e828dd79cd630e40a07013f259073b43 to your computer and use it in GitHub Desktop.
litellm proxy config with free quota providers and models
# /etc/litellm/config.yaml
model_list:
# ============================================================
# 🌐 OPENROUTER — large / medium / fast / ultrafast
# ============================================================
# =========================
# 🦍 OPENROUTER — LARGE
# =========================
- model_name: large
litellm_params:
model: openrouter/meta-llama/llama-3.3-70b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: llama-3.3-70b-instruct
litellm_params:
model: openrouter/meta-llama/llama-3.3-70b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: large
litellm_params:
model: openrouter/nousresearch/hermes-3-llama-3.1-405b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: hermes-3-llama-3.1-405b
litellm_params:
model: openrouter/nousresearch/hermes-3-llama-3.1-405b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: large
litellm_params:
model: openrouter/tngtech/deepseek-r1t2-chimera:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: deepseek-r1t2-chimera
litellm_params:
model: openrouter/tngtech/deepseek-r1t2-chimera:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: large
litellm_params:
model: openrouter/tngtech/deepseek-r1t-chimera:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: deepseek-r1t-chimera
litellm_params:
model: openrouter/tngtech/deepseek-r1t-chimera:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: large
litellm_params:
model: openrouter/deepseek/deepseek-r1-0528:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: deepseek-r1-0528
litellm_params:
model: openrouter/deepseek/deepseek-r1-0528:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: large
litellm_params:
model: openrouter/tngtech/tng-r1t-chimera:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: tng-r1t-chimera
litellm_params:
model: openrouter/tngtech/tng-r1t-chimera:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: large
litellm_params:
model: openrouter/meta-llama/llama-3.1-405b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: llama-3.1-405b-instruct
litellm_params:
model: openrouter/meta-llama/llama-3.1-405b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: large
litellm_params:
model: openrouter/openai/gpt-oss-120b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: gpt-oss-120b
litellm_params:
model: openrouter/openai/gpt-oss-120b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
# =========================
# 🧱 OPENROUTER — MEDIUM
# =========================
- model_name: medium
litellm_params:
model: openrouter/mistralai/devstral-2512:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: devstral-2512
litellm_params:
model: openrouter/mistralai/devstral-2512:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: medium
litellm_params:
model: openrouter/qwen/qwen3-coder:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: qwen3-coder
litellm_params:
model: openrouter/qwen/qwen3-coder:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
# =========================
# ⚡ OPENROUTER — FAST
# =========================
- model_name: fast
litellm_params:
model: openrouter/openai/gpt-oss-20b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: gpt-oss-20b
litellm_params:
model: openrouter/openai/gpt-oss-20b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: fast
litellm_params:
model: openrouter/google/gemini-2.0-flash-exp:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: gemini-2.0-flash-exp
litellm_params:
model: openrouter/google/gemini-2.0-flash-exp:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: fast
litellm_params:
model: openrouter/qwen/qwen-2.5-vl-7b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: qwen-2.5-vl-7b-instruct
litellm_params:
model: openrouter/qwen/qwen-2.5-vl-7b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: fast
litellm_params:
model: openrouter/allenai/molmo-2-8b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: molmo-2-8b
litellm_params:
model: openrouter/allenai/molmo-2-8b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
# =========================
# 💨 OPENROUTER — ULTRAFAST
# =========================
- model_name: ultrafast
litellm_params:
model: openrouter/meta-llama/llama-3.2-3b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: llama-3.2-3b-instruct
litellm_params:
model: openrouter/meta-llama/llama-3.2-3b-instruct:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: ultrafast
litellm_params:
model: openrouter/qwen/qwen3-4b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: qwen3-4b
litellm_params:
model: openrouter/qwen/qwen3-4b:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: ultrafast
litellm_params:
model: openrouter/google/gemma-3-4b-it:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: gemma-3-4b-it
litellm_params:
model: openrouter/google/gemma-3-4b-it:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: ultrafast
litellm_params:
model: openrouter/google/gemma-3n-e2b-it:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: gemma-3n-e2b-it
litellm_params:
model: openrouter/google/gemma-3n-e2b-it:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: ultrafast
litellm_params:
model: openrouter/google/gemma-3n-e4b-it:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: gemma-3n-e4b-it
litellm_params:
model: openrouter/google/gemma-3n-e4b-it:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: ultrafast
litellm_params:
model: openrouter/arcee-ai/trinity-mini:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: trinity-mini
litellm_params:
model: openrouter/arcee-ai/trinity-mini:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: ultrafast
litellm_params:
model: openrouter/xiaomi/mimo-v2-flash:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: mimo-v2-flash
litellm_params:
model: openrouter/xiaomi/mimo-v2-flash:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: ultrafast
litellm_params:
model: openrouter/nvidia/nemotron-nano-9b-v2:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
- model_name: nemotron-nano-9b-v2
litellm_params:
model: openrouter/nvidia/nemotron-nano-9b-v2:free
api_key: os.environ/OPENROUTER_API_KEY
api_base: https://openrouter.ai/api/v1/
# ============================================================
# 🟦 GOOGLE GEMINI API — (nur die aus deiner Liste) + limits
# ============================================================
# =========================
# 🦍 GEMINI — LARGE
# =========================
- model_name: large
litellm_params:
model: gemini/gemini-robotics-er-1.5-preview
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 10
tpm: 250000
rpd: 250
# (gemma-3-27b ist eher medium nach unserer Heuristik)
- model_name: medium
litellm_params:
model: gemini/gemma-3-27b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
- model_name: gemma-3-27b
litellm_params:
model: gemini/gemma-3-27b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
# =========================
# ⚡ GEMINI — FAST
# =========================
- model_name: fast
litellm_params:
model: gemini/gemini-2.5-flash
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Textausgabemodelle
rpm: 5
tpm: 250000
rpd: 20
- model_name: gemini-2.5-flash
litellm_params:
model: gemini/gemini-2.5-flash
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Textausgabemodelle
rpm: 5
tpm: 250000
rpd: 20
- model_name: fast
litellm_params:
model: gemini/gemma-3-12b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
- model_name: gemma-3-12b
litellm_params:
model: gemini/gemma-3-12b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
# =========================
# 💨 GEMINI — ULTRAFAST
# =========================
- model_name: ultrafast
litellm_params:
model: gemini/gemini-2.5-flash-lite
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Textausgabemodelle
rpm: 10
tpm: 250000
rpd: 20
- model_name: gemini-2.5-flash-lite
litellm_params:
model: gemini/gemini-2.5-flash-lite
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Textausgabemodelle
rpm: 10
tpm: 250000
rpd: 20
- model_name: ultrafast
litellm_params:
model: gemini/gemma-3-4b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
- model_name: gemma-3-4b
litellm_params:
model: gemini/gemma-3-4b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
- model_name: ultrafast
litellm_params:
model: gemini/gemma-3-2b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
- model_name: gemma-3-2b
litellm_params:
model: gemini/gemma-3-2b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
- model_name: ultrafast
litellm_params:
model: gemini/gemma-3-1b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
- model_name: gemma-3-1b
litellm_params:
model: gemini/gemma-3-1b
api_key: os.environ/GEMINI_API_KEY
api_base: https://generativelanguage.googleapis.com/v1beta/openai/
model_info:
category: Andere Modelle
rpm: 30
tpm: 15000
rpd: 14400
# ============================================================
# 🟣 AWAN — (deaktiviert/instabil bei dir, aber wir lassen es drin)
# ============================================================
# =========================
# 🦍 AWAN — LARGE
# =========================
- model_name: large
litellm_params:
model: openai/Meta-Llama-3.1-70B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan
litellm_params:
model: openai/Meta-Llama-3.1-70B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan-llama-3.1-70b
litellm_params:
model: openai/Meta-Llama-3.1-70B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: large
litellm_params:
model: openai/Meta-Llama-3-70B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan
litellm_params:
model: openai/Meta-Llama-3-70B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan-llama-3-70b
litellm_params:
model: openai/Meta-Llama-3-70B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
# =========================
# ⚡ AWAN — FAST
# =========================
- model_name: fast
litellm_params:
model: openai/Meta-Llama-3-8B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan-llama-3-8b
litellm_params:
model: openai/Meta-Llama-3-8B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: fast
litellm_params:
model: openai/Meta-Llama-3.1-8B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan-llama-3.1-8b
litellm_params:
model: openai/Meta-Llama-3.1-8B-Instruct
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
# =========================
# 💨 AWAN — ULTRAFAST
# =========================
- model_name: ultrafast
litellm_params:
model: openai/Awanllm-Llama-3-8B-Cumulus
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan-cumulus-8b
litellm_params:
model: openai/Awanllm-Llama-3-8B-Cumulus
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: ultrafast
litellm_params:
model: openai/Awanllm-Llama-3-8B-Dolfin
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
- model_name: awan-dolfin-8b
litellm_params:
model: openai/Awanllm-Llama-3-8B-Dolfin
api_base: https://api.awanllm.com/v1
api_key: os.environ/AWAN_API_KEY
# ============================================================
# 🟩 GROQ — large / medium / fast / ultrafast + groq alias
# ============================================================
# =========================
# 🦍 GROQ — LARGE
# =========================
- model_name: large
litellm_params:
model: groq/openai/gpt-oss-120b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-gpt-oss-120b
litellm_params:
model: groq/openai/gpt-oss-120b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: large
litellm_params:
model: groq/llama-3.3-70b-versatile
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-llama-3.3-70b-versatile
litellm_params:
model: groq/llama-3.3-70b-versatile
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: large
litellm_params:
model: groq/qwen/qwen3-32b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-qwen3-32b
litellm_params:
model: groq/qwen/qwen3-32b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: large
litellm_params:
model: groq/moonshotai/kimi-k2-instruct
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-kimi-k2-instruct
litellm_params:
model: groq/moonshotai/kimi-k2-instruct
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: large
litellm_params:
model: groq/moonshotai/kimi-k2-instruct-0905
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-kimi-k2-instruct-0905
litellm_params:
model: groq/moonshotai/kimi-k2-instruct-0905
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
# =========================
# 🧱 GROQ — MEDIUM
# =========================
- model_name: medium
litellm_params:
model: groq/openai/gpt-oss-20b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-gpt-oss-20b
litellm_params:
model: groq/openai/gpt-oss-20b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: medium
litellm_params:
model: groq/meta-llama/llama-4-maverick-17b-128e-instruct
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-llama-4-maverick-17b-128e-instruct
litellm_params:
model: groq/meta-llama/llama-4-maverick-17b-128e-instruct
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: medium
litellm_params:
model: groq/meta-llama/llama-4-scout-17b-16e-instruct
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-llama-4-scout-17b-16e-instruct
litellm_params:
model: groq/meta-llama/llama-4-scout-17b-16e-instruct
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
# =========================
# 💨 GROQ — ULTRAFAST
# =========================
- model_name: ultrafast
litellm_params:
model: groq/llama-3.1-8b-instant
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-llama-3.1-8b-instant
litellm_params:
model: groq/llama-3.1-8b-instant
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: ultrafast
litellm_params:
model: groq/allam-2-7b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-allam-2-7b
litellm_params:
model: groq/allam-2-7b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
# =========================
# 🧰 GROQ — SPECIAL (nicht in Pools)
# =========================
- model_name: groq-compound
litellm_params:
model: groq/groq/compound
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-compound-mini
litellm_params:
model: groq/groq/compound-mini
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-gpt-oss-safeguard-20b
litellm_params:
model: groq/openai/gpt-oss-safeguard-20b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-llama-guard-4-12b
litellm_params:
model: groq/meta-llama/llama-guard-4-12b
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-llama-prompt-guard-2-22m
litellm_params:
model: groq/meta-llama/llama-prompt-guard-2-22m
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
- model_name: groq-llama-prompt-guard-2-86m
litellm_params:
model: groq/meta-llama/llama-prompt-guard-2-86m
api_base: https://api.groq.com/openai/v1
api_key: os.environ/GROQ_API_KEY
# ============================================================
# 🐙 GITHUB MODELS — in Pools (large/medium/fast/ultrafast)
# ============================================================
# Provider-Prefix ist Pflicht: github/<modelname>
# Company/Publisher-Prefix NICHT verwenden. :contentReference[oaicite:3]{index=3}
#
# Env:
# export GITHUB_API_KEY="github_pat_...." (oder via systemd env)
#
# Endpoint:
# https://models.github.ai/inference/chat/completions
# -> api_base in LiteLLM: https://models.github.ai/inference
# -------------------------
# 🦍 GitHub — LARGE
# -------------------------
- model_name: large
litellm_params:
model: github/gpt-4.1
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-gpt-4.1
litellm_params:
model: github/gpt-4.1
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/gpt-4o
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-gpt-4o
litellm_params:
model: github/gpt-4o
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/Llama-3.3-70B-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-llama-3.3-70b-instruct
litellm_params:
model: github/Llama-3.3-70B-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/Llama-3.2-90B-Vision-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-llama-3.2-90b-vision
litellm_params:
model: github/Llama-3.2-90B-Vision-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/Meta-Llama-3.1-405B-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-llama-3.1-405b-instruct
litellm_params:
model: github/Meta-Llama-3.1-405B-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/DeepSeek-V3-0324
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-deepseek-v3-0324
litellm_params:
model: github/DeepSeek-V3-0324
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/DeepSeek-R1
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-deepseek-r1
litellm_params:
model: github/DeepSeek-R1
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/AI21-Jamba-1-5-Large
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-ai21-jamba-1.5-large
litellm_params:
model: github/AI21-Jamba-1-5-Large
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: large
litellm_params:
model: github/Cohere-command-r-plus-08-2024
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-cohere-command-r-plus
litellm_params:
model: github/Cohere-command-r-plus-08-2024
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
# -------------------------
# 🧱 GitHub — MEDIUM
# -------------------------
- model_name: medium
litellm_params:
model: github/Llama-3.2-11B-Vision-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-llama-3.2-11b-vision
litellm_params:
model: github/Llama-3.2-11B-Vision-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: medium
litellm_params:
model: github/Llama-4-Maverick-17B-128E-Instruct-FP8
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-llama-4-maverick-17b
litellm_params:
model: github/Llama-4-Maverick-17B-128E-Instruct-FP8
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: medium
litellm_params:
model: github/Llama-4-Scout-17B-16E-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-llama-4-scout-17b
litellm_params:
model: github/Llama-4-Scout-17B-16E-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: medium
litellm_params:
model: github/Codestral-2501
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-codestral-2501
litellm_params:
model: github/Codestral-2501
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: medium
litellm_params:
model: github/Phi-4
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-phi-4
litellm_params:
model: github/Phi-4
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
# -------------------------
# ⚡ GitHub — FAST
# -------------------------
- model_name: fast
litellm_params:
model: github/Ministral-3B
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-ministral-3b
litellm_params:
model: github/Ministral-3B
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: fast
litellm_params:
model: github/Meta-Llama-3.1-8B-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-llama-3.1-8b-instruct
litellm_params:
model: github/Meta-Llama-3.1-8B-Instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
# -------------------------
# 💨 GitHub — ULTRAFAST
# -------------------------
- model_name: ultrafast
litellm_params:
model: github/Phi-4-mini-instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-phi-4-mini-instruct
litellm_params:
model: github/Phi-4-mini-instruct
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: ultrafast
litellm_params:
model: github/Phi-4-mini-reasoning
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
- model_name: github-phi-4-mini-reasoning
litellm_params:
model: github/Phi-4-mini-reasoning
api_base: https://models.github.ai/inference
api_key: os.environ/GITHUB_API_KEY
litellm_settings:
# Muss niedrig, sonst versucht der kein anderes Deployment
num_retries: 5
router_settings:
routing_strategy: simple-shuffle
# Circuit-Breaker: nach X Fehlern wird das Deployment als "bad" markiert
allowed_fails: 2
# Deployment wird für N Sekunden aus dem Pool genommen
cooldown_time: 300
fallbacks:
- large: [medium, fast, ultrafast]
- medium: [fast, ultrafast, large]
- fast: [ultrafast, medium, large]
- ultrafast: [fast, medium, large]
general_settings:
disable_auth: true
request_timeout: 600 # 10 min Gesamt-Timeout
connect_timeout: 30 # Verbindung darf trödeln, aber nicht ewig
read_timeout: 600 # wichtig für lange Streams/Outputs
log_level: INFO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment