Skip to content

Instantly share code, notes, and snippets.

@ebibibi
Last active September 30, 2025 04:43
Show Gist options
  • Select an option

  • Save ebibibi/6385c61aaea66870dfa2d9a78fe82a3a to your computer and use it in GitHub Desktop.

Select an option

Save ebibibi/6385c61aaea66870dfa2d9a78fe82a3a to your computer and use it in GitHub Desktop.
<#
目的:
- 各モデルを 2段階(meta→その出力で再実行)で計測し、結果を CSV に追記
- MaxTokens(=num_predict) は 128000 に設定
前提:
- Ollama が http://localhost:11434 で稼働
- 未取得モデルは "ollama コマンド" で取得(失敗時は警告のみ)
事前のモデルダウンロードコマンド:
ollama pull gpt-oss:20b
ollama pull gpt-oss:120b
ollama pull gemma3:27b
ollama pull gemma3:12b
ollama pull gemma3:4b
ollama pull gemma3:1b
ollama pull deepseek-r1:8b
ollama pull qwen3:30b
ollama pull qwen3:8b
ollama pull qwen3:4b
#>
param(
[string[]]$Models = @(
'gpt-oss:20b',
'gpt-oss:120b',
'gemma3:27b',
'gemma3:12b',
'gemma3:4b',
'gemma3:1b',
'deepseek-r1:8b',
'qwen3:30b',
'qwen3:8b',
'qwen3:4b'
),
[string]$OutCsv = ".\ollama_bench_runs.csv",
[string]$OutEnv = $null, # 環境スナップショット(JSON)。未指定なら時刻で自動命名
[string]$Endpoint = "http://localhost:11434",
[string]$BasePrompt = "あなたはAIモデルの性能評価専門家です。言語モデルの推論能力、創造性、論理性、文章構成力、指示遵守能力を総合的に測定するための理想的なプロンプトを1つ作成してください。以下の要件を満たすプロンプトを出力してください:
1. 複数の思考プロセスを要求する(分析→推論→提案→評価)
2. 具体的で測定可能な出力を求める
3. 創造性と論理性の両方を必要とする
4. 文章の構造化能力を測定できる
5. 適度な複雑さで差異が現れやすい
重要な指示:プロンプト本文のみを出力してください。説明や前置き、後書きは一切不要です。作成したプロンプトの文章だけを回答として出力してください。",
[double]$Temperature = 0.7,
[double]$TopP = 0.9,
[int]$Seed = 42
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
# ---- (opt) Get Ollama version for records (HTTP) ----
$OllamaVersion = ""
try {
$v = Invoke-RestMethod -Method Get -Uri "$Endpoint/api/version"
if ($v.version) { $OllamaVersion = [string]$v.version }
} catch { $OllamaVersion = "" }
# ===================== Ollama CLI 検出(Windows or WSL or None) =====================
$script:OllamaCliMode = 'None' # 'Windows' | 'WSL' | 'None'
$script:OllamaCliVersion = $null
function Detect-OllamaCli {
try {
& ollama --version *> $null
if ($LASTEXITCODE -eq 0) {
$script:OllamaCliMode = 'Windows'
$script:OllamaCliVersion = ( & ollama --version 2>$null | Out-String ).Trim()
return
}
} catch { }
try {
& wsl -e sh -lc "ollama --version" *> $null
if ($LASTEXITCODE -eq 0) {
$script:OllamaCliMode = 'WSL'
$script:OllamaCliVersion = ( & wsl -e sh -lc "ollama --version" 2>$null | Out-String ).Trim()
return
}
} catch { }
$script:OllamaCliMode = 'None'
$script:OllamaCliVersion = $null
}
Detect-OllamaCli
# =========================================================
# モデル確保:HTTPの /api/pull は使わず、Ollama CLI(Windows or WSL)を使用
# =========================================================
function Ensure-OllamaModel {
param([string]$Model)
if ($script:OllamaCliMode -eq 'None') {
Write-Output "WARN: 'ollama' CLI が (Windows/WSL) どちらにも見つかりません。Ensure はスキップします: $Model"
return
}
$exists = $false
try {
if ($script:OllamaCliMode -eq 'Windows') {
& ollama show $Model *> $null
} else {
& wsl -e sh -lc "ollama show $Model" *> $null
}
if ($LASTEXITCODE -eq 0) { $exists = $true }
} catch { }
if (-not $exists) {
try {
if ($script:OllamaCliMode -eq 'Windows') {
$list = & ollama list 2>$null
} else {
$list = & wsl -e sh -lc "ollama list" 2>$null
}
if ($list) {
$pattern = '^\s*' + [regex]::Escape($Model) + '\s'
if ($list -split "`r?`n" | Where-Object { $_ -match $pattern }) {
$exists = $true
}
}
} catch { }
}
if (-not $exists) {
Write-Output "Model '$Model' not found. Pulling with 'ollama pull' via $script:OllamaCliMode..."
try {
if ($script:OllamaCliMode -eq 'Windows') {
& ollama pull $Model
} else {
& wsl -e sh -lc "ollama pull $Model"
}
if ($LASTEXITCODE -ne 0) {
Write-Output "WARN: 'ollama pull $Model' failed (exit=$LASTEXITCODE)"
} else {
Write-Output "Pulled: $Model"
}
} catch {
Write-Output "WARN: Pull failed for $Model ($($_.Exception.Message))"
}
}
}
# ---- Helper: <think> タグのみ除去(中身は保持) ----
function Remove-ThinkTags {
param([string]$Text)
if ([string]::IsNullOrWhiteSpace($Text)) { return $Text }
$t = [regex]::Replace($Text, '</\s*think\s*>', '', 'IgnoreCase')
$t = [regex]::Replace($t, '<\s*think\b[^>]*>', '', 'IgnoreCase')
return $t
}
# ---- Helper: One-shot generate (HTTP/REST, stream=false で統計取得) ----
function Invoke-OllamaGenerate {
param(
[string]$Model,
[string]$Prompt
)
$body = @{
model = $Model
prompt = $Prompt
stream = $false
options = @{
temperature = $Temperature
top_p = $TopP
seed = $Seed
num_predict = 128000
}
} | ConvertTo-Json -Depth 6
$resp = Invoke-RestMethod -Method Post -Uri "$Endpoint/api/generate" -Body $body -ContentType "application/json"
return $resp
}
# ---- Helper: Convert API response -> record object ----
function New-RunRecord {
param(
[string]$RunID,
[string]$Model,
[string]$Phase, # 'meta' or 'generated'
[string]$UsedPrompt,
[object]$Resp # Ollama /api/generate response
)
# ns -> s
$totalS = if ($Resp.total_duration) { [math]::Round($Resp.total_duration / 1e9, 3) } else { $null }
$promptS = if ($Resp.prompt_eval_duration) { [math]::Round($Resp.prompt_eval_duration / 1e9, 6) } else { $null }
$decodeS = if ($Resp.eval_duration) { [math]::Round($Resp.eval_duration / 1e9, 6) } else { $null }
$promptTps = if ($Resp.prompt_eval_count -and $promptS -and $promptS -ne 0) {
[math]::Round($Resp.prompt_eval_count / $promptS, 2)
} else { $null }
$decodeTps = if ($Resp.eval_count -and $decodeS -and $decodeS -ne 0) {
[math]::Round($Resp.eval_count / $decodeS, 2)
} else { $null }
[pscustomobject]@{
DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
RunID = $RunID
Ollama_Version = $OllamaVersion
Model = $Model
Phase = $Phase
Temperature = $Temperature
TopP = $TopP
Seed = $Seed
MaxTokens = 128000
Input_Tokens = $Resp.prompt_eval_count
Output_Tokens = $Resp.eval_count
Prompt_Tokens_per_s = $promptTps
Decode_Tokens_per_s = $decodeTps
Total_Time_s = $totalS
Done_Reason = $Resp.done_reason
Used_Prompt = ($UsedPrompt -replace "`r?`n"," ").Trim()
Response_Sample = ($Resp.response -replace "`r?`n"," ").Trim()
Notes = ""
}
}
# ---- Helper: Write single record to CSV ----
function Write-RecordToCsv {
param([object]$Record)
if (Test-Path $OutCsv) {
$Record | Export-Csv -Path $OutCsv -Append -NoTypeInformation -Encoding UTF8
} else {
$Record | Export-Csv -Path $OutCsv -NoTypeInformation -Encoding UTF8
}
}
# ===================== 環境スナップショット収集 =====================
function Get-NvidiaSmi-Windows {
$result = @()
try {
$lines = & nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free,memory.used,pcie.link.gen.current,pcie.link.gen.max --format=csv,noheader,nounits 2>$null
if ($lines) {
$result = $lines -split "`r?`n" | Where-Object { $_ -match '\S' } | ForEach-Object {
$p = $_.Split(',')
[pscustomobject]@{
Name = $p[0].Trim()
Driver_Version = $p[1].Trim()
Mem_Total_MB = [int]$p[2].Trim()
Mem_Free_MB = [int]$p[3].Trim()
Mem_Used_MB = [int]$p[4].Trim()
PCIE_Gen_Current = $p[5].Trim()
PCIE_Gen_Max = $p[6].Trim()
}
}
}
} catch { }
return ,$result
}
function Get-NvidiaCudaVersion {
try {
$head = & nvidia-smi 2>$null
if ($head) {
$joined = ($head -join ' ')
$m = [regex]::Match($joined, 'CUDA Version:\s*([0-9]+\.[0-9]+)')
if ($m.Success) { return $m.Groups[1].Value }
}
} catch { }
return $null
}
function Get-NvidiaSmi-WSL {
try {
$lines = & wsl -e sh -lc "nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free,memory.used --format=csv,noheader,nounits" 2>$null
return ($lines -join "`n").Trim()
} catch { return $null }
}
function Get-WSL-Info {
$obj = [ordered]@{}
try { $obj.VersionText = ( & wsl --version 2>$null | Out-String ).Trim() } catch { $obj.VersionText = $null }
try { $obj.Distros = ( & wsl -l -v 2>$null | Out-String ).Trim() } catch { $obj.Distros = $null }
try { $obj.Kernel = ( & wsl -e sh -lc "uname -r" 2>$null | Out-String ).Trim() } catch { $obj.Kernel = $null }
try { $obj.OSRelease = ( & wsl -e sh -lc 'grep "^NAME=\|^VERSION_ID=" /etc/os-release' 2>$null | Out-String ).Trim() } catch { $obj.OSRelease = $null }
$obj.NvidiaSMI = Get-NvidiaSmi-WSL
return [pscustomobject]$obj
}
function Get-EnvReport {
param([string]$Endpoint, [string]$OllamaVersion)
$os = $null; $cs = $null; $cpu = $null; $mems = @(); $gpus = @()
try { $os = Get-CimInstance Win32_OperatingSystem } catch {}
try { $cs = Get-CimInstance Win32_ComputerSystem } catch {}
try { $cpu = Get-CimInstance Win32_Processor | Select-Object -First 1 } catch {}
try { $mems = Get-CimInstance Win32_PhysicalMemory } catch {}
try { $gpus = Get-CimInstance Win32_VideoController } catch {}
# Disk (物理) と C: の実態
$physDisks = @()
try { $physDisks = Get-PhysicalDisk | Select-Object FriendlyName,SerialNumber,BusType,MediaType,HealthStatus,OperationalStatus,@{n='SizeGB';e={[math]::Round($_.Size/1GB,2)}} } catch {}
$cVolume = $null
try {
$cVolume = [pscustomobject]@{
DriveLetter = 'C'
FileSystem = (Get-Volume -DriveLetter C -ErrorAction Stop).FileSystem
SizeGB = [math]::Round((Get-Volume -DriveLetter C -ErrorAction Stop).Size/1GB,2)
FreeGB = [math]::Round((Get-Volume -DriveLetter C -ErrorAction Stop).SizeRemaining/1GB,2)
Physical = $null
IsSystem = $true
}
try {
$cPhys = (Get-Partition -DriveLetter C -ErrorAction Stop | Get-Disk | Get-PhysicalDisk)
if ($cPhys) {
$cVolume.Physical = [pscustomobject]@{
FriendlyName = $cPhys.FriendlyName
BusType = $cPhys.BusType
MediaType = $cPhys.MediaType
SizeGB = [math]::Round($cPhys.Size/1GB,2)
SerialNumber = $cPhys.SerialNumber
}
}
} catch { }
} catch { }
# Power plan & uptime
$powerPlan = $null
try { $powerPlan = (Get-CimInstance -Namespace root\cimv2\power -ClassName Win32_PowerPlan -Filter "IsActive=TRUE").ElementName } catch { }
$uptimeDays = $null
try {
if ($os) {
$uptimeDays = [math]::Round(((Get-Date) - [Management.ManagementDateTimeConverter]::ToDateTime($os.LastBootUpTime)).TotalDays, 2)
}
} catch { }
# GPU 詳細(Windows 側 nvidia-smi)
$nvsmi = Get-NvidiaSmi-Windows
$cudaVer = Get-NvidiaCudaVersion
# ネットワーク(参考)
$ipv4 = @()
try {
$ipv4 = Get-NetIPAddress -AddressFamily IPv4 -ErrorAction SilentlyContinue |
Where-Object { $_.IPAddress -and ($_.IPAddress -notmatch '^169\.254\.') } |
Select-Object InterfaceAlias, IPAddress, PrefixLength
} catch { }
# WSL 側情報(無ければ null のまま)
$wsl = Get-WSL-Info
# メモリ合計
$memTotalGB = $null
try { $memTotalGB = [math]::Round(($mems | Measure-Object -Property Capacity -Sum).Sum/1GB,2) } catch { }
return [pscustomobject]@{
DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
Host = @{
ComputerName = $env:COMPUTERNAME
UserName = $env:USERNAME
Domain = $env:USERDOMAIN
TimeZone = (Get-TimeZone).Id
Uptime_Days = $uptimeDays
PowerPlan = $powerPlan
PowerShell = $PSVersionTable.PSVersion.ToString()
}
ComputerSystem = @{
Manufacturer = $cs.Manufacturer
Model = $cs.Model
TotalPhysicalMemory_GB = if ($cs.TotalPhysicalMemory) { [math]::Round($cs.TotalPhysicalMemory/1GB,2) } else { $memTotalGB }
}
OS = @{
Caption = $os.Caption
Version = $os.Version
BuildNumber = $os.BuildNumber
OSArchitecture= $os.OSArchitecture
InstallDate = $os.InstallDate
}
CPU = @{
Name = $cpu.Name
Cores = $cpu.NumberOfCores
LogicalProcessors = $cpu.NumberOfLogicalProcessors
MaxClockMHz = $cpu.MaxClockSpeed
L3CacheKB = $cpu.L3CacheSize
}
Memory = @{
TotalGB = $memTotalGB
Modules = @($mems | Select-Object @{n='CapacityGB';e={[math]::Round($_.Capacity/1GB,2)}}, Speed, Manufacturer, PartNumber, DeviceLocator)
}
Disk = @{
PhysicalDisks = @($physDisks)
SystemDrive = $cVolume
}
GPU = @{
Win32_VideoController = @($gpus | Select-Object Name, AdapterCompatibility, DriverVersion, DriverDate, @{n='AdapterRAM_Bytes';e={$_.AdapterRAM}}, PNPDeviceID)
NvidiaSMI_Windows = @($nvsmi)
Nvidia_CUDA_Version = $cudaVer
}
Network = @{
IPv4 = @($ipv4)
}
Ollama = @{
Endpoint = $Endpoint
Version = $OllamaVersion
CliMode = $script:OllamaCliMode
CliVersion = $script:OllamaCliVersion
}
WSL = $wsl
}
}
# ===================== 環境スナップショット収集 ここまで =====================
# ---- Main ----
$stamp = Get-Date -Format "yyyyMMdd-HHmmss"
if (-not $OutEnv -or [string]::IsNullOrWhiteSpace($OutEnv)) {
$OutEnv = ".\ollama_bench_env_$stamp.json"
}
# ★ 実行開始時点で環境スナップショットを保存(常に2ファイル出力)
try {
$envReport = Get-EnvReport -Endpoint $Endpoint -OllamaVersion $OllamaVersion
$envReport | ConvertTo-Json -Depth 8 | Out-File -FilePath $OutEnv -Encoding UTF8
Write-Output "Environment snapshot saved to: $OutEnv"
} catch {
Write-Output "WARN: Failed to write environment snapshot: $($_.Exception.Message)"
}
foreach ($m in $Models) {
Write-Output "=== Model: $m ==="
Ensure-OllamaModel -Model $m
try {
# Phase 1: ベースプロンプトで「良いベンチ用プロンプト」を生成
$r1 = Invoke-OllamaGenerate -Model $m -Prompt $BasePrompt
$rec1 = New-RunRecord -RunID "$stamp-$($m.Replace(':','_'))-1" -Model $m -Phase "meta" -UsedPrompt $BasePrompt -Resp $r1
Write-RecordToCsv -Record $rec1
Write-Output "Phase 1 completed for $m"
# Phase 2: その出力テキストを同じモデルへ再入力
$generatedPrompt = [string]$r1.response
$generatedPromptClean = Remove-ThinkTags -Text $generatedPrompt
if ([string]::IsNullOrWhiteSpace($generatedPromptClean)) {
Write-Output "WARN: Empty response from phase1 for $m (phase2 skipped)"
$rec2 = [pscustomobject]@{
DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
RunID = "$stamp-$($m.Replace(':','_'))-2"
Ollama_Version = $OllamaVersion
Model = $m
Phase = "generated"
Temperature = $Temperature
TopP = $TopP
Seed = $Seed
MaxTokens = 128000
Input_Tokens = $null
Output_Tokens = $null
Prompt_Tokens_per_s = $null
Decode_Tokens_per_s = $null
Total_Time_s = $null
Done_Reason = "skipped-empty-phase1"
Used_Prompt = "(phase1 empty)"
Response_Sample = ""
Notes = "phase2 skipped due to empty phase1 response"
}
Write-RecordToCsv -Record $rec2
Write-Output "Phase 2 skipped for $m (empty phase1)"
} else {
$r2 = Invoke-OllamaGenerate -Model $m -Prompt $generatedPromptClean
$rec2 = New-RunRecord -RunID "$stamp-$($m.Replace(':','_'))-2" -Model $m -Phase "generated" -UsedPrompt $generatedPromptClean -Resp $r2
Write-RecordToCsv -Record $rec2
Write-Output "Phase 2 completed for $m"
}
} catch {
$err = $_.Exception.Message
Write-Output "ERROR: $m failed - $err"
$recErr = [pscustomobject]@{
DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
RunID = "$stamp-$($m.Replace(':','_'))-ERR"
Ollama_Version = $OllamaVersion
Model = $m
Phase = "error"
Temperature = $Temperature
TopP = $TopP
Seed = $Seed
MaxTokens = 128000
Input_Tokens = $null
Output_Tokens = $null
Prompt_Tokens_per_s = $null
Decode_Tokens_per_s = $null
Total_Time_s = $null
Done_Reason = "invoke-failed"
Used_Prompt = $BasePrompt
Response_Sample = ""
Notes = "Invoke failed: $err"
}
Write-RecordToCsv -Record $recErr
}
}
Write-Output "All results saved to: $OutCsv"
Write-Output "Environment snapshot (JSON): $OutEnv"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment