Created
January 15, 2026 17:19
-
-
Save grayfallstown/e828dd79cd630e40a07013f259073b43 to your computer and use it in GitHub Desktop.
litellm proxy config with free quota providers and models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /etc/litellm/config.yaml | |
| model_list: | |
| # ============================================================ | |
| # 🌐 OPENROUTER — large / medium / fast / ultrafast | |
| # ============================================================ | |
| # ========================= | |
| # 🦍 OPENROUTER — LARGE | |
| # ========================= | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/meta-llama/llama-3.3-70b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: llama-3.3-70b-instruct | |
| litellm_params: | |
| model: openrouter/meta-llama/llama-3.3-70b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/nousresearch/hermes-3-llama-3.1-405b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: hermes-3-llama-3.1-405b | |
| litellm_params: | |
| model: openrouter/nousresearch/hermes-3-llama-3.1-405b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/tngtech/deepseek-r1t2-chimera:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: deepseek-r1t2-chimera | |
| litellm_params: | |
| model: openrouter/tngtech/deepseek-r1t2-chimera:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/tngtech/deepseek-r1t-chimera:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: deepseek-r1t-chimera | |
| litellm_params: | |
| model: openrouter/tngtech/deepseek-r1t-chimera:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/deepseek/deepseek-r1-0528:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: deepseek-r1-0528 | |
| litellm_params: | |
| model: openrouter/deepseek/deepseek-r1-0528:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/tngtech/tng-r1t-chimera:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: tng-r1t-chimera | |
| litellm_params: | |
| model: openrouter/tngtech/tng-r1t-chimera:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/meta-llama/llama-3.1-405b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: llama-3.1-405b-instruct | |
| litellm_params: | |
| model: openrouter/meta-llama/llama-3.1-405b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: large | |
| litellm_params: | |
| model: openrouter/openai/gpt-oss-120b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: gpt-oss-120b | |
| litellm_params: | |
| model: openrouter/openai/gpt-oss-120b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| # ========================= | |
| # 🧱 OPENROUTER — MEDIUM | |
| # ========================= | |
| - model_name: medium | |
| litellm_params: | |
| model: openrouter/mistralai/devstral-2512:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: devstral-2512 | |
| litellm_params: | |
| model: openrouter/mistralai/devstral-2512:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: medium | |
| litellm_params: | |
| model: openrouter/qwen/qwen3-coder:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: qwen3-coder | |
| litellm_params: | |
| model: openrouter/qwen/qwen3-coder:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| # ========================= | |
| # ⚡ OPENROUTER — FAST | |
| # ========================= | |
| - model_name: fast | |
| litellm_params: | |
| model: openrouter/openai/gpt-oss-20b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: gpt-oss-20b | |
| litellm_params: | |
| model: openrouter/openai/gpt-oss-20b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: fast | |
| litellm_params: | |
| model: openrouter/google/gemini-2.0-flash-exp:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: gemini-2.0-flash-exp | |
| litellm_params: | |
| model: openrouter/google/gemini-2.0-flash-exp:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: fast | |
| litellm_params: | |
| model: openrouter/qwen/qwen-2.5-vl-7b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: qwen-2.5-vl-7b-instruct | |
| litellm_params: | |
| model: openrouter/qwen/qwen-2.5-vl-7b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: fast | |
| litellm_params: | |
| model: openrouter/allenai/molmo-2-8b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: molmo-2-8b | |
| litellm_params: | |
| model: openrouter/allenai/molmo-2-8b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| # ========================= | |
| # 💨 OPENROUTER — ULTRAFAST | |
| # ========================= | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/meta-llama/llama-3.2-3b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: llama-3.2-3b-instruct | |
| litellm_params: | |
| model: openrouter/meta-llama/llama-3.2-3b-instruct:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/qwen/qwen3-4b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: qwen3-4b | |
| litellm_params: | |
| model: openrouter/qwen/qwen3-4b:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/google/gemma-3-4b-it:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: gemma-3-4b-it | |
| litellm_params: | |
| model: openrouter/google/gemma-3-4b-it:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/google/gemma-3n-e2b-it:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: gemma-3n-e2b-it | |
| litellm_params: | |
| model: openrouter/google/gemma-3n-e2b-it:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/google/gemma-3n-e4b-it:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: gemma-3n-e4b-it | |
| litellm_params: | |
| model: openrouter/google/gemma-3n-e4b-it:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/arcee-ai/trinity-mini:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: trinity-mini | |
| litellm_params: | |
| model: openrouter/arcee-ai/trinity-mini:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/xiaomi/mimo-v2-flash:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: mimo-v2-flash | |
| litellm_params: | |
| model: openrouter/xiaomi/mimo-v2-flash:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openrouter/nvidia/nemotron-nano-9b-v2:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| - model_name: nemotron-nano-9b-v2 | |
| litellm_params: | |
| model: openrouter/nvidia/nemotron-nano-9b-v2:free | |
| api_key: os.environ/OPENROUTER_API_KEY | |
| api_base: https://openrouter.ai/api/v1/ | |
| # ============================================================ | |
| # 🟦 GOOGLE GEMINI API — (nur die aus deiner Liste) + limits | |
| # ============================================================ | |
| # ========================= | |
| # 🦍 GEMINI — LARGE | |
| # ========================= | |
| - model_name: large | |
| litellm_params: | |
| model: gemini/gemini-robotics-er-1.5-preview | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 10 | |
| tpm: 250000 | |
| rpd: 250 | |
| # (gemma-3-27b ist eher medium nach unserer Heuristik) | |
| - model_name: medium | |
| litellm_params: | |
| model: gemini/gemma-3-27b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| - model_name: gemma-3-27b | |
| litellm_params: | |
| model: gemini/gemma-3-27b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| # ========================= | |
| # ⚡ GEMINI — FAST | |
| # ========================= | |
| - model_name: fast | |
| litellm_params: | |
| model: gemini/gemini-2.5-flash | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Textausgabemodelle | |
| rpm: 5 | |
| tpm: 250000 | |
| rpd: 20 | |
| - model_name: gemini-2.5-flash | |
| litellm_params: | |
| model: gemini/gemini-2.5-flash | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Textausgabemodelle | |
| rpm: 5 | |
| tpm: 250000 | |
| rpd: 20 | |
| - model_name: fast | |
| litellm_params: | |
| model: gemini/gemma-3-12b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| - model_name: gemma-3-12b | |
| litellm_params: | |
| model: gemini/gemma-3-12b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| # ========================= | |
| # 💨 GEMINI — ULTRAFAST | |
| # ========================= | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: gemini/gemini-2.5-flash-lite | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Textausgabemodelle | |
| rpm: 10 | |
| tpm: 250000 | |
| rpd: 20 | |
| - model_name: gemini-2.5-flash-lite | |
| litellm_params: | |
| model: gemini/gemini-2.5-flash-lite | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Textausgabemodelle | |
| rpm: 10 | |
| tpm: 250000 | |
| rpd: 20 | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: gemini/gemma-3-4b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| - model_name: gemma-3-4b | |
| litellm_params: | |
| model: gemini/gemma-3-4b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: gemini/gemma-3-2b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| - model_name: gemma-3-2b | |
| litellm_params: | |
| model: gemini/gemma-3-2b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: gemini/gemma-3-1b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| - model_name: gemma-3-1b | |
| litellm_params: | |
| model: gemini/gemma-3-1b | |
| api_key: os.environ/GEMINI_API_KEY | |
| api_base: https://generativelanguage.googleapis.com/v1beta/openai/ | |
| model_info: | |
| category: Andere Modelle | |
| rpm: 30 | |
| tpm: 15000 | |
| rpd: 14400 | |
| # ============================================================ | |
| # 🟣 AWAN — (deaktiviert/instabil bei dir, aber wir lassen es drin) | |
| # ============================================================ | |
| # ========================= | |
| # 🦍 AWAN — LARGE | |
| # ========================= | |
| - model_name: large | |
| litellm_params: | |
| model: openai/Meta-Llama-3.1-70B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan | |
| litellm_params: | |
| model: openai/Meta-Llama-3.1-70B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan-llama-3.1-70b | |
| litellm_params: | |
| model: openai/Meta-Llama-3.1-70B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: openai/Meta-Llama-3-70B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan | |
| litellm_params: | |
| model: openai/Meta-Llama-3-70B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan-llama-3-70b | |
| litellm_params: | |
| model: openai/Meta-Llama-3-70B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| # ========================= | |
| # ⚡ AWAN — FAST | |
| # ========================= | |
| - model_name: fast | |
| litellm_params: | |
| model: openai/Meta-Llama-3-8B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan-llama-3-8b | |
| litellm_params: | |
| model: openai/Meta-Llama-3-8B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: fast | |
| litellm_params: | |
| model: openai/Meta-Llama-3.1-8B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan-llama-3.1-8b | |
| litellm_params: | |
| model: openai/Meta-Llama-3.1-8B-Instruct | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| # ========================= | |
| # 💨 AWAN — ULTRAFAST | |
| # ========================= | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openai/Awanllm-Llama-3-8B-Cumulus | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan-cumulus-8b | |
| litellm_params: | |
| model: openai/Awanllm-Llama-3-8B-Cumulus | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: openai/Awanllm-Llama-3-8B-Dolfin | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| - model_name: awan-dolfin-8b | |
| litellm_params: | |
| model: openai/Awanllm-Llama-3-8B-Dolfin | |
| api_base: https://api.awanllm.com/v1 | |
| api_key: os.environ/AWAN_API_KEY | |
| # ============================================================ | |
| # 🟩 GROQ — large / medium / fast / ultrafast + groq alias | |
| # ============================================================ | |
| # ========================= | |
| # 🦍 GROQ — LARGE | |
| # ========================= | |
| - model_name: large | |
| litellm_params: | |
| model: groq/openai/gpt-oss-120b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-gpt-oss-120b | |
| litellm_params: | |
| model: groq/openai/gpt-oss-120b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: groq/llama-3.3-70b-versatile | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-llama-3.3-70b-versatile | |
| litellm_params: | |
| model: groq/llama-3.3-70b-versatile | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: groq/qwen/qwen3-32b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-qwen3-32b | |
| litellm_params: | |
| model: groq/qwen/qwen3-32b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: groq/moonshotai/kimi-k2-instruct | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-kimi-k2-instruct | |
| litellm_params: | |
| model: groq/moonshotai/kimi-k2-instruct | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: groq/moonshotai/kimi-k2-instruct-0905 | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-kimi-k2-instruct-0905 | |
| litellm_params: | |
| model: groq/moonshotai/kimi-k2-instruct-0905 | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| # ========================= | |
| # 🧱 GROQ — MEDIUM | |
| # ========================= | |
| - model_name: medium | |
| litellm_params: | |
| model: groq/openai/gpt-oss-20b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-gpt-oss-20b | |
| litellm_params: | |
| model: groq/openai/gpt-oss-20b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: medium | |
| litellm_params: | |
| model: groq/meta-llama/llama-4-maverick-17b-128e-instruct | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-llama-4-maverick-17b-128e-instruct | |
| litellm_params: | |
| model: groq/meta-llama/llama-4-maverick-17b-128e-instruct | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: medium | |
| litellm_params: | |
| model: groq/meta-llama/llama-4-scout-17b-16e-instruct | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-llama-4-scout-17b-16e-instruct | |
| litellm_params: | |
| model: groq/meta-llama/llama-4-scout-17b-16e-instruct | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| # ========================= | |
| # 💨 GROQ — ULTRAFAST | |
| # ========================= | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: groq/llama-3.1-8b-instant | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-llama-3.1-8b-instant | |
| litellm_params: | |
| model: groq/llama-3.1-8b-instant | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: groq/allam-2-7b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-allam-2-7b | |
| litellm_params: | |
| model: groq/allam-2-7b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| # ========================= | |
| # 🧰 GROQ — SPECIAL (nicht in Pools) | |
| # ========================= | |
| - model_name: groq-compound | |
| litellm_params: | |
| model: groq/groq/compound | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-compound-mini | |
| litellm_params: | |
| model: groq/groq/compound-mini | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-gpt-oss-safeguard-20b | |
| litellm_params: | |
| model: groq/openai/gpt-oss-safeguard-20b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-llama-guard-4-12b | |
| litellm_params: | |
| model: groq/meta-llama/llama-guard-4-12b | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-llama-prompt-guard-2-22m | |
| litellm_params: | |
| model: groq/meta-llama/llama-prompt-guard-2-22m | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| - model_name: groq-llama-prompt-guard-2-86m | |
| litellm_params: | |
| model: groq/meta-llama/llama-prompt-guard-2-86m | |
| api_base: https://api.groq.com/openai/v1 | |
| api_key: os.environ/GROQ_API_KEY | |
| # ============================================================ | |
| # 🐙 GITHUB MODELS — in Pools (large/medium/fast/ultrafast) | |
| # ============================================================ | |
| # Provider-Prefix ist Pflicht: github/<modelname> | |
| # Company/Publisher-Prefix NICHT verwenden. :contentReference[oaicite:3]{index=3} | |
| # | |
| # Env: | |
| # export GITHUB_API_KEY="github_pat_...." (oder via systemd env) | |
| # | |
| # Endpoint: | |
| # https://models.github.ai/inference/chat/completions | |
| # -> api_base in LiteLLM: https://models.github.ai/inference | |
| # ------------------------- | |
| # 🦍 GitHub — LARGE | |
| # ------------------------- | |
| - model_name: large | |
| litellm_params: | |
| model: github/gpt-4.1 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-gpt-4.1 | |
| litellm_params: | |
| model: github/gpt-4.1 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/gpt-4o | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-gpt-4o | |
| litellm_params: | |
| model: github/gpt-4o | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/Llama-3.3-70B-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-llama-3.3-70b-instruct | |
| litellm_params: | |
| model: github/Llama-3.3-70B-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/Llama-3.2-90B-Vision-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-llama-3.2-90b-vision | |
| litellm_params: | |
| model: github/Llama-3.2-90B-Vision-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/Meta-Llama-3.1-405B-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-llama-3.1-405b-instruct | |
| litellm_params: | |
| model: github/Meta-Llama-3.1-405B-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/DeepSeek-V3-0324 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-deepseek-v3-0324 | |
| litellm_params: | |
| model: github/DeepSeek-V3-0324 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/DeepSeek-R1 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-deepseek-r1 | |
| litellm_params: | |
| model: github/DeepSeek-R1 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/AI21-Jamba-1-5-Large | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-ai21-jamba-1.5-large | |
| litellm_params: | |
| model: github/AI21-Jamba-1-5-Large | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: large | |
| litellm_params: | |
| model: github/Cohere-command-r-plus-08-2024 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-cohere-command-r-plus | |
| litellm_params: | |
| model: github/Cohere-command-r-plus-08-2024 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| # ------------------------- | |
| # 🧱 GitHub — MEDIUM | |
| # ------------------------- | |
| - model_name: medium | |
| litellm_params: | |
| model: github/Llama-3.2-11B-Vision-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-llama-3.2-11b-vision | |
| litellm_params: | |
| model: github/Llama-3.2-11B-Vision-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: medium | |
| litellm_params: | |
| model: github/Llama-4-Maverick-17B-128E-Instruct-FP8 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-llama-4-maverick-17b | |
| litellm_params: | |
| model: github/Llama-4-Maverick-17B-128E-Instruct-FP8 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: medium | |
| litellm_params: | |
| model: github/Llama-4-Scout-17B-16E-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-llama-4-scout-17b | |
| litellm_params: | |
| model: github/Llama-4-Scout-17B-16E-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: medium | |
| litellm_params: | |
| model: github/Codestral-2501 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-codestral-2501 | |
| litellm_params: | |
| model: github/Codestral-2501 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: medium | |
| litellm_params: | |
| model: github/Phi-4 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-phi-4 | |
| litellm_params: | |
| model: github/Phi-4 | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| # ------------------------- | |
| # ⚡ GitHub — FAST | |
| # ------------------------- | |
| - model_name: fast | |
| litellm_params: | |
| model: github/Ministral-3B | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-ministral-3b | |
| litellm_params: | |
| model: github/Ministral-3B | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: fast | |
| litellm_params: | |
| model: github/Meta-Llama-3.1-8B-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-llama-3.1-8b-instruct | |
| litellm_params: | |
| model: github/Meta-Llama-3.1-8B-Instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| # ------------------------- | |
| # 💨 GitHub — ULTRAFAST | |
| # ------------------------- | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: github/Phi-4-mini-instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-phi-4-mini-instruct | |
| litellm_params: | |
| model: github/Phi-4-mini-instruct | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: ultrafast | |
| litellm_params: | |
| model: github/Phi-4-mini-reasoning | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| - model_name: github-phi-4-mini-reasoning | |
| litellm_params: | |
| model: github/Phi-4-mini-reasoning | |
| api_base: https://models.github.ai/inference | |
| api_key: os.environ/GITHUB_API_KEY | |
| litellm_settings: | |
| # Muss niedrig, sonst versucht der kein anderes Deployment | |
| num_retries: 5 | |
| router_settings: | |
| routing_strategy: simple-shuffle | |
| # Circuit-Breaker: nach X Fehlern wird das Deployment als "bad" markiert | |
| allowed_fails: 2 | |
| # Deployment wird für N Sekunden aus dem Pool genommen | |
| cooldown_time: 300 | |
| fallbacks: | |
| - large: [medium, fast, ultrafast] | |
| - medium: [fast, ultrafast, large] | |
| - fast: [ultrafast, medium, large] | |
| - ultrafast: [fast, medium, large] | |
| general_settings: | |
| disable_auth: true | |
| request_timeout: 600 # 10 min Gesamt-Timeout | |
| connect_timeout: 30 # Verbindung darf trödeln, aber nicht ewig | |
| read_timeout: 600 # wichtig für lange Streams/Outputs | |
| log_level: INFO |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment