pak::pak(c("ollamar", "ellmer", "tidyverse"))
library(ellmer)
library(tidyverse)
all_models <- c(
"qwen2.5:0.5b",
"qwen2.5-coder:3b",
"deepseek-coder:1.3b",
"internlm2:1m",
"qwen:0.5b",
"qwen2:0.5b",
"qwen3:0.6b",
"gemma3:1b",
"llama-guard3:1b",
"tinydolphin:1.1b",
"falcon3:1b",
"sailor2:1b",
"granite3.1-moe:1b",
"starcoder:1b",
"llama3.2:1b",
"tinyllama:1.1b",
"smollm2:1.7b",
"qwen3:1.7b",
"qwen:1.8b",
"deepseek-r1:1.5b",
"qwen2:1.5b",
"qwen2.5:1.5b",
"codegemma:2b",
"gemma2:2b",
"orca-mini:3b",
"qwen2.5:3b",
"starcoder2:3b",
"falcon3:3b",
"llama3.2:3b",
"qwen3:4b",
"gemma3:4b",
"qwen:4b"
)
all_models |>
walk(\(x) {
cat(paste0(x, "...\n"))
ollamar::pull(x)
})
#> qwen2.5:0.5b...
#> qwen2.5-coder:3b...
#> deepseek-coder:1.3b...
#> internlm2:1m...
#> qwen:0.5b...
#> qwen2:0.5b...
#> qwen3:0.6b...
#> gemma3:1b...
#> llama-guard3:1b...
#> tinydolphin:1.1b...
#> falcon3:1b...
#> sailor2:1b...
#> granite3.1-moe:1b...
#> starcoder:1b...
#> llama3.2:1b...
#> tinyllama:1.1b...
#> smollm2:1.7b...
#> qwen3:1.7b...
#> qwen:1.8b...
#> deepseek-r1:1.5b...
#> qwen2:1.5b...
#> qwen2.5:1.5b...
#> codegemma:2b...
#> gemma2:2b...
#> orca-mini:3b...
#> qwen2.5:3b...
#> starcoder2:3b...
#> falcon3:3b...
#> llama3.2:3b...
#> qwen3:4b...
#> gemma3:4b...
#> qwen:4b...
benchmark_expr <- all_models |>
lapply(\(m) {
expr(
chat_ollama(model = !!m)$chat("1 + 1 =", echo = FALSE)
)
}) |>
setNames(all_models)
tictoc::tic("benchmark")
benchmark_results <- do.call(
bench::mark,
c(benchmark_expr, list(iterations = 10, check = FALSE))
)
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
tictoc::toc()
#> benchmark: 749.885 sec elapsed
benchmark_plot_data <- benchmark_results |>
mutate(faster = as.double(max(median) / median)) |>
arrange(median) |>
transmute(model = names(expression), median, total_time, faster) |>
mutate(model = fct_reorder(model, median) |> fct_rev()) |>
print(n = Inf)
#> # A tibble: 32 × 4
#> model median total_time faster
#> <fct> <bch:tm> <bch:tm> <dbl>
#> 1 qwen:0.5b 205.2ms 4.12s 30.7
#> 2 qwen2:0.5b 205.22ms 3.37s 30.7
#> 3 falcon3:3b 215.07ms 5.52s 29.3
#> 4 llama-guard3:1b 216.41ms 9.55s 29.1
#> 5 qwen2:1.5b 220.36ms 3.92s 28.6
#> 6 qwen:1.8b 228.47ms 4.65s 27.6
#> 7 qwen:4b 235.74ms 9.83s 26.7
#> 8 qwen2.5:1.5b 255.54ms 10.09s 24.7
#> 9 smollm2:1.7b 257.11ms 10.26s 24.5
#> 10 gemma3:1b 271.32ms 4.49s 23.2
#> 11 llama3.2:1b 278.55ms 10.63s 22.6
#> 12 codegemma:2b 279.98ms 31.48s 22.5
#> 13 orca-mini:3b 285.33ms 6.41s 22.1
#> 14 llama3.2:3b 302.23ms 5.97s 20.9
#> 15 qwen2.5-coder:3b 312.13ms 6.06s 20.2
#> 16 qwen2.5:3b 333.48ms 6.32s 18.9
#> 17 gemma2:2b 345.44ms 9.6s 18.2
#> 18 falcon3:1b 378.89ms 11.28s 16.6
#> 19 qwen2.5:0.5b 435.28ms 7.13s 14.5
#> 20 gemma3:4b 445.26ms 7.47s 14.2
#> 21 sailor2:1b 545.63ms 8.03s 11.6
#> 22 tinydolphin:1.1b 572.58ms 12.98s 11.0
#> 23 granite3.1-moe:1b 576.25ms 7.59s 10.9
#> 24 tinyllama:1.1b 636.34ms 7.76s 9.90
#> 25 deepseek-r1:1.5b 803.83ms 10.49s 7.84
#> 26 deepseek-coder:1.3b 854.37ms 12.56s 7.38
#> 27 qwen3:0.6b 2.01s 18.5s 3.14
#> 28 starcoder2:3b 2.07s 31.91s 3.04
#> 29 internlm2:1m 3.11s 41.9s 2.02
#> 30 qwen3:1.7b 4.21s 45.74s 1.50
#> 31 starcoder:1b 5.7s 1.5m 1.11
#> 32 qwen3:4b 6.3s 1.16m 1
benchmark_plot_data |>
ggplot(aes(model, faster)) +
geom_col() +
coord_flip()Created on 2025-09-03 with reprex v2.1.1.9000
