|
version = 1 |
|
|
|
# WIP config where I tweak my model settings when I use router mode |
|
|
|
# Global |
|
[*] |
|
# Offload all layers to the GPU VRAM |
|
n-gpu-layers = 999 |
|
# Use all the threads for HTTP requests |
|
threads = -1 |
|
# Not sure if this should be on or off for my setup (default on) |
|
fit = on |
|
# Disable memory-map model as loading everything into VRAM |
|
no-mmap = 1 |
|
# Use direct-io |
|
# direct-io = 1 |
|
|
|
# unsloth suggest --ctx-size 16384 (16k) which seems low |
|
# Sugested to use --ctx-size 32768 (32k) for fast coder |
|
# Suggested to use --ctx-size 65536 (64k) for Multi-file work or big refactor |
|
# Suggested for crazy --ctx-size 131072 (128k) for One-shot analysis (Project dump) |
|
# Some models support --ctx-size 204800 (200k) which is HUGE |
|
ctx-size = 131072 |
|
|
|
# GLM-4.7-Flash |
|
[glm-4.7-flash] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
seed = 3407 |
|
temp = 1.0 |
|
top-p = 0.95 |
|
min-p = 0.01 |
|
repeat-penalty = 1.0 |
|
ctx-size = 131072 |
|
|
|
# [glm-4.7-flash-16k] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# repeat-penalty = 1.0 |
|
# ctx-size = 16348 |
|
|
|
# [glm-4.7-flash-32k] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# repeat-penalty = 1.0 |
|
# ctx-size = 32768 |
|
|
|
# [glm-4.7-flash-64k] |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_0 |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/GLM-4.7-Flash-GGUF:Q8_K_XL |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# repeat-penalty = 1.0 |
|
# ctx-size = 65536 |
|
|
|
# gpt-oss-120b-GGUF |
|
[gpt-oss-120b] |
|
# hf-repo = ggml-org/gpt-oss-120b-GGUF |
|
# hf-repo = unsloth/gpt-oss-120b-GGUF:Q8_0 |
|
hf-repo = unsloth/gpt-oss-120b-GGUF:Q8_K_XL |
|
temp = 1.0 |
|
min-p = 0.0 |
|
top-p = 1.0 |
|
top-k = 0.0 |
|
ctx-size = 131072 |
|
|
|
# gpt-oss-20b-GGUF |
|
[gpt-oss-20b] |
|
# hf-repo = ggml-org/gpt-oss-20b-GGUF |
|
# hf-repo = unsloth/gpt-oss-20b-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/gpt-oss-20b-GGUF:Q8_0 |
|
hf-repo = unsloth/gpt-oss-20b-GGUF:Q8_K_XL |
|
temp = 1.0 |
|
min-p = 0.0 |
|
top-p = 1.0 |
|
top-k = 0.0 |
|
|
|
# Qwen 3.5-122b |
|
[qwen3.5-122b-coder] |
|
hf-repo = unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL |
|
# ctx-size = 16384 |
|
temp = 0.6 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
repeat-penalty = 1.0 |
|
presence-penalty = 0.00 |
|
|
|
[qwen3.5-122b-tasks] |
|
hf-repo = unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL |
|
# ctx-size = 16384 |
|
temp = 1.0 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
|
|
# Qwen3.5-35B-A3B |
|
[qwen3.5-35b-a3b-coder] |
|
hf-repo = unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q8_K_XL |
|
temp = 0.6 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
repeat-penalty = 1.0 |
|
presence-penalty = 0.00 |
|
|
|
[qwen3.5-35b-a3b-tasks] |
|
hf-repo = unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q8_K_XL |
|
temp = 1.0 |
|
top-p = 0,95 |
|
top-k = 20 |
|
min-p = 0.00 |
|
|
|
# Qwen3-Coder-30B-A3B-Instruct # |
|
[qwen3-coder-30b] |
|
# hf-repo = unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q6_K_XL |
|
# hf-repo = unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q8_0 |
|
hf-repo = unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q8_K_XL |
|
# hf-repo = ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
repeat-penalty = 1.05 |
|
ctx-size = 131072 |
|
|
|
# Qwen3-Coder-Next |
|
[qwen3-coder-next] |
|
hf-repo = unsloth/Qwen3-Coder-Next-GGUF:Q8_K_XL |
|
seed = 3407 |
|
temp = 1.0 |
|
top-p = 0.95 |
|
min-p = 0.01 |
|
top-k = 40 |
|
|
|
[qwen3-coder-next-q6-k] |
|
hf-repo = unsloth/Qwen3-Coder-Next-GGUF:Q6_K |
|
seed = 3407 |
|
temp = 1.0 |
|
top-p = 0.95 |
|
min-p = 0.01 |
|
top-k = 40 |
|
|
|
# [qwen3-coder-next-q8-0] |
|
# hf-repo = unsloth/Qwen3-Coder-Next-GGUF:Q8_0 |
|
# seed = 3407 |
|
# temp = 1.0 |
|
# top-p = 0.95 |
|
# min-p = 0.01 |
|
# top-k = 40 |
|
|
|
# Qwen3-Next-80B-A3B-Instruct |
|
[qwen3-next-instruct] |
|
hf-repo = unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q8_K_XL |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
presence-penalty = 1.0 |
|
|
|
[qwen3-next-instruct-q6-k] |
|
hf-repo = unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q6_K |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
presence-penalty = 1.0 |
|
|
|
[qwen3-next-instruct-q8-0] |
|
hf-repo = unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q8_0 |
|
ngl = 99 |
|
temp = 0.7 |
|
min-p = 0.0 |
|
top-p = 0.80 |
|
top-k = 20 |
|
presence-penalty = 1.0 |
|
|
|
# Gemma 3 |
|
[gemma-3] |
|
# hf-repo = unsloth/gemma-3-27b-it-GGUF:Q6_K_XL |
|
hf-repo = unsloth/gemma-3-27b-it-GGUF:Q8_K_XL |
|
seed = 3407 |
|
prio = 2 |
|
temp = 1.0 |
|
repeat-penalty = 1.0 |
|
min-p = 0.01 |
|
top-k = 64 |
|
top-p = 0.95 |
|
|
|
[gemma-3-12b] |
|
# hf-repo = unsloth/gemma-3-27b-it-GGUF:Q6_K_XL |
|
hf-repo = unsloth/gemma-3-12b-it-GGUF:UD-Q8_K_XL |
|
seed = 3407 |
|
prio = 2 |
|
temp = 1.0 |
|
repeat-penalty = 1.0 |
|
min-p = 0.01 |
|
top-k = 64 |
|
top-p = 0.95 |
|
|
|
# [gemma-3-q6] |
|
# hf-repo = unsloth/gemma-3-27b-it-GGUF:Q6_K_XL |
|
# seed = 3407 |
|
# prio = 2 |
|
# temp = 1.0 |
|
# repeat-penalty = 1.0 |
|
# min-p = 0.01 |
|
# top-k = 64 |
|
# top-p = 0.95 |
|
|
|
# |
|
[devstral-small-2-24b] |
|
# hf-repo = ggml-org/Devstral-Small-2-24B-Instruct-2512-GGUF |
|
hf-repo = unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Q8_K_XL |
|
seed = 3407 |
|
prio = 3 |
|
temp = 0.15 |
|
min-p = 0.01 |
|
|
|
# |
|
[devstral-2-123b] |
|
hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q4_K_XL |
|
# hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q5_K_XL |
|
# hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q6_K |
|
# hf-repo = unsloth/Devstral-2-123B-Instruct-2512-GGUF:Q6_K_XL |
|
seed = 3407 |
|
prio = 3 |
|
temp = 0.15 |
|
min-p = 0.01 |
|
|