Skip to content

Instantly share code, notes, and snippets.

@jrosell
Last active September 3, 2025 15:27
Show Gist options
  • Select an option

  • Save jrosell/ce5b34accae0a9ac04a9f44fd0785cd2 to your computer and use it in GitHub Desktop.

Select an option

Save jrosell/ce5b34accae0a9ac04a9f44fd0785cd2 to your computer and use it in GitHub Desktop.
What are the fastest small ollama models for quick LLM tasks?
pak::pak(c("ollamar", "ellmer", "tidyverse"))
library(ellmer)
library(tidyverse)

all_models <- c(
  "qwen2.5:0.5b",
  "qwen2.5-coder:3b",
  "deepseek-coder:1.3b",
  "internlm2:1m",
  "qwen:0.5b",
  "qwen2:0.5b",
  "qwen3:0.6b",
  "gemma3:1b",
  "llama-guard3:1b",
  "tinydolphin:1.1b",
  "falcon3:1b",
  "sailor2:1b",
  "granite3.1-moe:1b",
  "starcoder:1b",
  "llama3.2:1b",
  "tinyllama:1.1b",
  "smollm2:1.7b",
  "qwen3:1.7b",
  "qwen:1.8b",
  "deepseek-r1:1.5b",
  "qwen2:1.5b",
  "qwen2.5:1.5b",
  "codegemma:2b",
  "gemma2:2b",
  "orca-mini:3b",
  "qwen2.5:3b",
  "starcoder2:3b",
  "falcon3:3b",
  "llama3.2:3b",
  "qwen3:4b",
  "gemma3:4b",
  "qwen:4b"
)

all_models |>
  walk(\(x) {
    cat(paste0(x, "...\n"))
    ollamar::pull(x)
  })
#> qwen2.5:0.5b...
#> qwen2.5-coder:3b...
#> deepseek-coder:1.3b...
#> internlm2:1m...
#> qwen:0.5b...
#> qwen2:0.5b...
#> qwen3:0.6b...
#> gemma3:1b...
#> llama-guard3:1b...
#> tinydolphin:1.1b...
#> falcon3:1b...
#> sailor2:1b...
#> granite3.1-moe:1b...
#> starcoder:1b...
#> llama3.2:1b...
#> tinyllama:1.1b...
#> smollm2:1.7b...
#> qwen3:1.7b...
#> qwen:1.8b...
#> deepseek-r1:1.5b...
#> qwen2:1.5b...
#> qwen2.5:1.5b...
#> codegemma:2b...
#> gemma2:2b...
#> orca-mini:3b...
#> qwen2.5:3b...
#> starcoder2:3b...
#> falcon3:3b...
#> llama3.2:3b...
#> qwen3:4b...
#> gemma3:4b...
#> qwen:4b...

benchmark_expr <- all_models |>
  lapply(\(m) {
    expr(
      chat_ollama(model = !!m)$chat("1 + 1 =", echo = FALSE)
    )
  }) |>
  setNames(all_models)

tictoc::tic("benchmark")
benchmark_results <- do.call(
  bench::mark,
  c(benchmark_expr, list(iterations = 10, check = FALSE))
)
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
tictoc::toc()
#> benchmark: 749.885 sec elapsed

benchmark_plot_data <- benchmark_results |>
  mutate(faster = as.double(max(median) / median)) |>
  arrange(median) |>
  transmute(model = names(expression), median, total_time, faster) |>
  mutate(model = fct_reorder(model, median) |> fct_rev()) |>
  print(n = Inf)
#> # A tibble: 32 × 4
#>    model                 median total_time faster
#>    <fct>               <bch:tm>   <bch:tm>  <dbl>
#>  1 qwen:0.5b            205.2ms      4.12s  30.7 
#>  2 qwen2:0.5b          205.22ms      3.37s  30.7 
#>  3 falcon3:3b          215.07ms      5.52s  29.3 
#>  4 llama-guard3:1b     216.41ms      9.55s  29.1 
#>  5 qwen2:1.5b          220.36ms      3.92s  28.6 
#>  6 qwen:1.8b           228.47ms      4.65s  27.6 
#>  7 qwen:4b             235.74ms      9.83s  26.7 
#>  8 qwen2.5:1.5b        255.54ms     10.09s  24.7 
#>  9 smollm2:1.7b        257.11ms     10.26s  24.5 
#> 10 gemma3:1b           271.32ms      4.49s  23.2 
#> 11 llama3.2:1b         278.55ms     10.63s  22.6 
#> 12 codegemma:2b        279.98ms     31.48s  22.5 
#> 13 orca-mini:3b        285.33ms      6.41s  22.1 
#> 14 llama3.2:3b         302.23ms      5.97s  20.9 
#> 15 qwen2.5-coder:3b    312.13ms      6.06s  20.2 
#> 16 qwen2.5:3b          333.48ms      6.32s  18.9 
#> 17 gemma2:2b           345.44ms       9.6s  18.2 
#> 18 falcon3:1b          378.89ms     11.28s  16.6 
#> 19 qwen2.5:0.5b        435.28ms      7.13s  14.5 
#> 20 gemma3:4b           445.26ms      7.47s  14.2 
#> 21 sailor2:1b          545.63ms      8.03s  11.6 
#> 22 tinydolphin:1.1b    572.58ms     12.98s  11.0 
#> 23 granite3.1-moe:1b   576.25ms      7.59s  10.9 
#> 24 tinyllama:1.1b      636.34ms      7.76s   9.90
#> 25 deepseek-r1:1.5b    803.83ms     10.49s   7.84
#> 26 deepseek-coder:1.3b 854.37ms     12.56s   7.38
#> 27 qwen3:0.6b             2.01s      18.5s   3.14
#> 28 starcoder2:3b          2.07s     31.91s   3.04
#> 29 internlm2:1m           3.11s      41.9s   2.02
#> 30 qwen3:1.7b             4.21s     45.74s   1.50
#> 31 starcoder:1b            5.7s       1.5m   1.11
#> 32 qwen3:4b                6.3s      1.16m   1

benchmark_plot_data |>
  ggplot(aes(model, faster)) +
  geom_col() +
  coord_flip()

Created on 2025-09-03 with reprex v2.1.1.9000

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment