Last active
September 30, 2025 04:43
-
-
Save ebibibi/6385c61aaea66870dfa2d9a78fe82a3a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <# | |
| 目的: | |
| - 各モデルを 2段階(meta→その出力で再実行)で計測し、結果を CSV に追記 | |
| - MaxTokens(=num_predict) は 128000 に設定 | |
| 前提: | |
| - Ollama が http://localhost:11434 で稼働 | |
| - 未取得モデルは "ollama コマンド" で取得(失敗時は警告のみ) | |
| 事前のモデルダウンロードコマンド: | |
| ollama pull gpt-oss:20b | |
| ollama pull gpt-oss:120b | |
| ollama pull gemma3:27b | |
| ollama pull gemma3:12b | |
| ollama pull gemma3:4b | |
| ollama pull gemma3:1b | |
| ollama pull deepseek-r1:8b | |
| ollama pull qwen3:30b | |
| ollama pull qwen3:8b | |
| ollama pull qwen3:4b | |
| #> | |
| param( | |
| [string[]]$Models = @( | |
| 'gpt-oss:20b', | |
| 'gpt-oss:120b', | |
| 'gemma3:27b', | |
| 'gemma3:12b', | |
| 'gemma3:4b', | |
| 'gemma3:1b', | |
| 'deepseek-r1:8b', | |
| 'qwen3:30b', | |
| 'qwen3:8b', | |
| 'qwen3:4b' | |
| ), | |
| [string]$OutCsv = ".\ollama_bench_runs.csv", | |
| [string]$OutEnv = $null, # 環境スナップショット(JSON)。未指定なら時刻で自動命名 | |
| [string]$Endpoint = "http://localhost:11434", | |
| [string]$BasePrompt = "あなたはAIモデルの性能評価専門家です。言語モデルの推論能力、創造性、論理性、文章構成力、指示遵守能力を総合的に測定するための理想的なプロンプトを1つ作成してください。以下の要件を満たすプロンプトを出力してください: | |
| 1. 複数の思考プロセスを要求する(分析→推論→提案→評価) | |
| 2. 具体的で測定可能な出力を求める | |
| 3. 創造性と論理性の両方を必要とする | |
| 4. 文章の構造化能力を測定できる | |
| 5. 適度な複雑さで差異が現れやすい | |
| 重要な指示:プロンプト本文のみを出力してください。説明や前置き、後書きは一切不要です。作成したプロンプトの文章だけを回答として出力してください。", | |
| [double]$Temperature = 0.7, | |
| [double]$TopP = 0.9, | |
| [int]$Seed = 42 | |
| ) | |
| $ErrorActionPreference = 'Stop' | |
| $ProgressPreference = 'SilentlyContinue' | |
| # ---- (opt) Get Ollama version for records (HTTP) ---- | |
| $OllamaVersion = "" | |
| try { | |
| $v = Invoke-RestMethod -Method Get -Uri "$Endpoint/api/version" | |
| if ($v.version) { $OllamaVersion = [string]$v.version } | |
| } catch { $OllamaVersion = "" } | |
| # ===================== Ollama CLI 検出(Windows or WSL or None) ===================== | |
| $script:OllamaCliMode = 'None' # 'Windows' | 'WSL' | 'None' | |
| $script:OllamaCliVersion = $null | |
| function Detect-OllamaCli { | |
| try { | |
| & ollama --version *> $null | |
| if ($LASTEXITCODE -eq 0) { | |
| $script:OllamaCliMode = 'Windows' | |
| $script:OllamaCliVersion = ( & ollama --version 2>$null | Out-String ).Trim() | |
| return | |
| } | |
| } catch { } | |
| try { | |
| & wsl -e sh -lc "ollama --version" *> $null | |
| if ($LASTEXITCODE -eq 0) { | |
| $script:OllamaCliMode = 'WSL' | |
| $script:OllamaCliVersion = ( & wsl -e sh -lc "ollama --version" 2>$null | Out-String ).Trim() | |
| return | |
| } | |
| } catch { } | |
| $script:OllamaCliMode = 'None' | |
| $script:OllamaCliVersion = $null | |
| } | |
| Detect-OllamaCli | |
| # ========================================================= | |
| # モデル確保:HTTPの /api/pull は使わず、Ollama CLI(Windows or WSL)を使用 | |
| # ========================================================= | |
| function Ensure-OllamaModel { | |
| param([string]$Model) | |
| if ($script:OllamaCliMode -eq 'None') { | |
| Write-Output "WARN: 'ollama' CLI が (Windows/WSL) どちらにも見つかりません。Ensure はスキップします: $Model" | |
| return | |
| } | |
| $exists = $false | |
| try { | |
| if ($script:OllamaCliMode -eq 'Windows') { | |
| & ollama show $Model *> $null | |
| } else { | |
| & wsl -e sh -lc "ollama show $Model" *> $null | |
| } | |
| if ($LASTEXITCODE -eq 0) { $exists = $true } | |
| } catch { } | |
| if (-not $exists) { | |
| try { | |
| if ($script:OllamaCliMode -eq 'Windows') { | |
| $list = & ollama list 2>$null | |
| } else { | |
| $list = & wsl -e sh -lc "ollama list" 2>$null | |
| } | |
| if ($list) { | |
| $pattern = '^\s*' + [regex]::Escape($Model) + '\s' | |
| if ($list -split "`r?`n" | Where-Object { $_ -match $pattern }) { | |
| $exists = $true | |
| } | |
| } | |
| } catch { } | |
| } | |
| if (-not $exists) { | |
| Write-Output "Model '$Model' not found. Pulling with 'ollama pull' via $script:OllamaCliMode..." | |
| try { | |
| if ($script:OllamaCliMode -eq 'Windows') { | |
| & ollama pull $Model | |
| } else { | |
| & wsl -e sh -lc "ollama pull $Model" | |
| } | |
| if ($LASTEXITCODE -ne 0) { | |
| Write-Output "WARN: 'ollama pull $Model' failed (exit=$LASTEXITCODE)" | |
| } else { | |
| Write-Output "Pulled: $Model" | |
| } | |
| } catch { | |
| Write-Output "WARN: Pull failed for $Model ($($_.Exception.Message))" | |
| } | |
| } | |
| } | |
| # ---- Helper: <think> タグのみ除去(中身は保持) ---- | |
| function Remove-ThinkTags { | |
| param([string]$Text) | |
| if ([string]::IsNullOrWhiteSpace($Text)) { return $Text } | |
| $t = [regex]::Replace($Text, '</\s*think\s*>', '', 'IgnoreCase') | |
| $t = [regex]::Replace($t, '<\s*think\b[^>]*>', '', 'IgnoreCase') | |
| return $t | |
| } | |
| # ---- Helper: One-shot generate (HTTP/REST, stream=false で統計取得) ---- | |
| function Invoke-OllamaGenerate { | |
| param( | |
| [string]$Model, | |
| [string]$Prompt | |
| ) | |
| $body = @{ | |
| model = $Model | |
| prompt = $Prompt | |
| stream = $false | |
| options = @{ | |
| temperature = $Temperature | |
| top_p = $TopP | |
| seed = $Seed | |
| num_predict = 128000 | |
| } | |
| } | ConvertTo-Json -Depth 6 | |
| $resp = Invoke-RestMethod -Method Post -Uri "$Endpoint/api/generate" -Body $body -ContentType "application/json" | |
| return $resp | |
| } | |
| # ---- Helper: Convert API response -> record object ---- | |
| function New-RunRecord { | |
| param( | |
| [string]$RunID, | |
| [string]$Model, | |
| [string]$Phase, # 'meta' or 'generated' | |
| [string]$UsedPrompt, | |
| [object]$Resp # Ollama /api/generate response | |
| ) | |
| # ns -> s | |
| $totalS = if ($Resp.total_duration) { [math]::Round($Resp.total_duration / 1e9, 3) } else { $null } | |
| $promptS = if ($Resp.prompt_eval_duration) { [math]::Round($Resp.prompt_eval_duration / 1e9, 6) } else { $null } | |
| $decodeS = if ($Resp.eval_duration) { [math]::Round($Resp.eval_duration / 1e9, 6) } else { $null } | |
| $promptTps = if ($Resp.prompt_eval_count -and $promptS -and $promptS -ne 0) { | |
| [math]::Round($Resp.prompt_eval_count / $promptS, 2) | |
| } else { $null } | |
| $decodeTps = if ($Resp.eval_count -and $decodeS -and $decodeS -ne 0) { | |
| [math]::Round($Resp.eval_count / $decodeS, 2) | |
| } else { $null } | |
| [pscustomobject]@{ | |
| DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss") | |
| RunID = $RunID | |
| Ollama_Version = $OllamaVersion | |
| Model = $Model | |
| Phase = $Phase | |
| Temperature = $Temperature | |
| TopP = $TopP | |
| Seed = $Seed | |
| MaxTokens = 128000 | |
| Input_Tokens = $Resp.prompt_eval_count | |
| Output_Tokens = $Resp.eval_count | |
| Prompt_Tokens_per_s = $promptTps | |
| Decode_Tokens_per_s = $decodeTps | |
| Total_Time_s = $totalS | |
| Done_Reason = $Resp.done_reason | |
| Used_Prompt = ($UsedPrompt -replace "`r?`n"," ").Trim() | |
| Response_Sample = ($Resp.response -replace "`r?`n"," ").Trim() | |
| Notes = "" | |
| } | |
| } | |
| # ---- Helper: Write single record to CSV ---- | |
| function Write-RecordToCsv { | |
| param([object]$Record) | |
| if (Test-Path $OutCsv) { | |
| $Record | Export-Csv -Path $OutCsv -Append -NoTypeInformation -Encoding UTF8 | |
| } else { | |
| $Record | Export-Csv -Path $OutCsv -NoTypeInformation -Encoding UTF8 | |
| } | |
| } | |
| # ===================== 環境スナップショット収集 ===================== | |
| function Get-NvidiaSmi-Windows { | |
| $result = @() | |
| try { | |
| $lines = & nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free,memory.used,pcie.link.gen.current,pcie.link.gen.max --format=csv,noheader,nounits 2>$null | |
| if ($lines) { | |
| $result = $lines -split "`r?`n" | Where-Object { $_ -match '\S' } | ForEach-Object { | |
| $p = $_.Split(',') | |
| [pscustomobject]@{ | |
| Name = $p[0].Trim() | |
| Driver_Version = $p[1].Trim() | |
| Mem_Total_MB = [int]$p[2].Trim() | |
| Mem_Free_MB = [int]$p[3].Trim() | |
| Mem_Used_MB = [int]$p[4].Trim() | |
| PCIE_Gen_Current = $p[5].Trim() | |
| PCIE_Gen_Max = $p[6].Trim() | |
| } | |
| } | |
| } | |
| } catch { } | |
| return ,$result | |
| } | |
| function Get-NvidiaCudaVersion { | |
| try { | |
| $head = & nvidia-smi 2>$null | |
| if ($head) { | |
| $joined = ($head -join ' ') | |
| $m = [regex]::Match($joined, 'CUDA Version:\s*([0-9]+\.[0-9]+)') | |
| if ($m.Success) { return $m.Groups[1].Value } | |
| } | |
| } catch { } | |
| return $null | |
| } | |
| function Get-NvidiaSmi-WSL { | |
| try { | |
| $lines = & wsl -e sh -lc "nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free,memory.used --format=csv,noheader,nounits" 2>$null | |
| return ($lines -join "`n").Trim() | |
| } catch { return $null } | |
| } | |
| function Get-WSL-Info { | |
| $obj = [ordered]@{} | |
| try { $obj.VersionText = ( & wsl --version 2>$null | Out-String ).Trim() } catch { $obj.VersionText = $null } | |
| try { $obj.Distros = ( & wsl -l -v 2>$null | Out-String ).Trim() } catch { $obj.Distros = $null } | |
| try { $obj.Kernel = ( & wsl -e sh -lc "uname -r" 2>$null | Out-String ).Trim() } catch { $obj.Kernel = $null } | |
| try { $obj.OSRelease = ( & wsl -e sh -lc 'grep "^NAME=\|^VERSION_ID=" /etc/os-release' 2>$null | Out-String ).Trim() } catch { $obj.OSRelease = $null } | |
| $obj.NvidiaSMI = Get-NvidiaSmi-WSL | |
| return [pscustomobject]$obj | |
| } | |
| function Get-EnvReport { | |
| param([string]$Endpoint, [string]$OllamaVersion) | |
| $os = $null; $cs = $null; $cpu = $null; $mems = @(); $gpus = @() | |
| try { $os = Get-CimInstance Win32_OperatingSystem } catch {} | |
| try { $cs = Get-CimInstance Win32_ComputerSystem } catch {} | |
| try { $cpu = Get-CimInstance Win32_Processor | Select-Object -First 1 } catch {} | |
| try { $mems = Get-CimInstance Win32_PhysicalMemory } catch {} | |
| try { $gpus = Get-CimInstance Win32_VideoController } catch {} | |
| # Disk (物理) と C: の実態 | |
| $physDisks = @() | |
| try { $physDisks = Get-PhysicalDisk | Select-Object FriendlyName,SerialNumber,BusType,MediaType,HealthStatus,OperationalStatus,@{n='SizeGB';e={[math]::Round($_.Size/1GB,2)}} } catch {} | |
| $cVolume = $null | |
| try { | |
| $cVolume = [pscustomobject]@{ | |
| DriveLetter = 'C' | |
| FileSystem = (Get-Volume -DriveLetter C -ErrorAction Stop).FileSystem | |
| SizeGB = [math]::Round((Get-Volume -DriveLetter C -ErrorAction Stop).Size/1GB,2) | |
| FreeGB = [math]::Round((Get-Volume -DriveLetter C -ErrorAction Stop).SizeRemaining/1GB,2) | |
| Physical = $null | |
| IsSystem = $true | |
| } | |
| try { | |
| $cPhys = (Get-Partition -DriveLetter C -ErrorAction Stop | Get-Disk | Get-PhysicalDisk) | |
| if ($cPhys) { | |
| $cVolume.Physical = [pscustomobject]@{ | |
| FriendlyName = $cPhys.FriendlyName | |
| BusType = $cPhys.BusType | |
| MediaType = $cPhys.MediaType | |
| SizeGB = [math]::Round($cPhys.Size/1GB,2) | |
| SerialNumber = $cPhys.SerialNumber | |
| } | |
| } | |
| } catch { } | |
| } catch { } | |
| # Power plan & uptime | |
| $powerPlan = $null | |
| try { $powerPlan = (Get-CimInstance -Namespace root\cimv2\power -ClassName Win32_PowerPlan -Filter "IsActive=TRUE").ElementName } catch { } | |
| $uptimeDays = $null | |
| try { | |
| if ($os) { | |
| $uptimeDays = [math]::Round(((Get-Date) - [Management.ManagementDateTimeConverter]::ToDateTime($os.LastBootUpTime)).TotalDays, 2) | |
| } | |
| } catch { } | |
| # GPU 詳細(Windows 側 nvidia-smi) | |
| $nvsmi = Get-NvidiaSmi-Windows | |
| $cudaVer = Get-NvidiaCudaVersion | |
| # ネットワーク(参考) | |
| $ipv4 = @() | |
| try { | |
| $ipv4 = Get-NetIPAddress -AddressFamily IPv4 -ErrorAction SilentlyContinue | | |
| Where-Object { $_.IPAddress -and ($_.IPAddress -notmatch '^169\.254\.') } | | |
| Select-Object InterfaceAlias, IPAddress, PrefixLength | |
| } catch { } | |
| # WSL 側情報(無ければ null のまま) | |
| $wsl = Get-WSL-Info | |
| # メモリ合計 | |
| $memTotalGB = $null | |
| try { $memTotalGB = [math]::Round(($mems | Measure-Object -Property Capacity -Sum).Sum/1GB,2) } catch { } | |
| return [pscustomobject]@{ | |
| DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss") | |
| Host = @{ | |
| ComputerName = $env:COMPUTERNAME | |
| UserName = $env:USERNAME | |
| Domain = $env:USERDOMAIN | |
| TimeZone = (Get-TimeZone).Id | |
| Uptime_Days = $uptimeDays | |
| PowerPlan = $powerPlan | |
| PowerShell = $PSVersionTable.PSVersion.ToString() | |
| } | |
| ComputerSystem = @{ | |
| Manufacturer = $cs.Manufacturer | |
| Model = $cs.Model | |
| TotalPhysicalMemory_GB = if ($cs.TotalPhysicalMemory) { [math]::Round($cs.TotalPhysicalMemory/1GB,2) } else { $memTotalGB } | |
| } | |
| OS = @{ | |
| Caption = $os.Caption | |
| Version = $os.Version | |
| BuildNumber = $os.BuildNumber | |
| OSArchitecture= $os.OSArchitecture | |
| InstallDate = $os.InstallDate | |
| } | |
| CPU = @{ | |
| Name = $cpu.Name | |
| Cores = $cpu.NumberOfCores | |
| LogicalProcessors = $cpu.NumberOfLogicalProcessors | |
| MaxClockMHz = $cpu.MaxClockSpeed | |
| L3CacheKB = $cpu.L3CacheSize | |
| } | |
| Memory = @{ | |
| TotalGB = $memTotalGB | |
| Modules = @($mems | Select-Object @{n='CapacityGB';e={[math]::Round($_.Capacity/1GB,2)}}, Speed, Manufacturer, PartNumber, DeviceLocator) | |
| } | |
| Disk = @{ | |
| PhysicalDisks = @($physDisks) | |
| SystemDrive = $cVolume | |
| } | |
| GPU = @{ | |
| Win32_VideoController = @($gpus | Select-Object Name, AdapterCompatibility, DriverVersion, DriverDate, @{n='AdapterRAM_Bytes';e={$_.AdapterRAM}}, PNPDeviceID) | |
| NvidiaSMI_Windows = @($nvsmi) | |
| Nvidia_CUDA_Version = $cudaVer | |
| } | |
| Network = @{ | |
| IPv4 = @($ipv4) | |
| } | |
| Ollama = @{ | |
| Endpoint = $Endpoint | |
| Version = $OllamaVersion | |
| CliMode = $script:OllamaCliMode | |
| CliVersion = $script:OllamaCliVersion | |
| } | |
| WSL = $wsl | |
| } | |
| } | |
| # ===================== 環境スナップショット収集 ここまで ===================== | |
| # ---- Main ---- | |
| $stamp = Get-Date -Format "yyyyMMdd-HHmmss" | |
| if (-not $OutEnv -or [string]::IsNullOrWhiteSpace($OutEnv)) { | |
| $OutEnv = ".\ollama_bench_env_$stamp.json" | |
| } | |
| # ★ 実行開始時点で環境スナップショットを保存(常に2ファイル出力) | |
| try { | |
| $envReport = Get-EnvReport -Endpoint $Endpoint -OllamaVersion $OllamaVersion | |
| $envReport | ConvertTo-Json -Depth 8 | Out-File -FilePath $OutEnv -Encoding UTF8 | |
| Write-Output "Environment snapshot saved to: $OutEnv" | |
| } catch { | |
| Write-Output "WARN: Failed to write environment snapshot: $($_.Exception.Message)" | |
| } | |
| foreach ($m in $Models) { | |
| Write-Output "=== Model: $m ===" | |
| Ensure-OllamaModel -Model $m | |
| try { | |
| # Phase 1: ベースプロンプトで「良いベンチ用プロンプト」を生成 | |
| $r1 = Invoke-OllamaGenerate -Model $m -Prompt $BasePrompt | |
| $rec1 = New-RunRecord -RunID "$stamp-$($m.Replace(':','_'))-1" -Model $m -Phase "meta" -UsedPrompt $BasePrompt -Resp $r1 | |
| Write-RecordToCsv -Record $rec1 | |
| Write-Output "Phase 1 completed for $m" | |
| # Phase 2: その出力テキストを同じモデルへ再入力 | |
| $generatedPrompt = [string]$r1.response | |
| $generatedPromptClean = Remove-ThinkTags -Text $generatedPrompt | |
| if ([string]::IsNullOrWhiteSpace($generatedPromptClean)) { | |
| Write-Output "WARN: Empty response from phase1 for $m (phase2 skipped)" | |
| $rec2 = [pscustomobject]@{ | |
| DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss") | |
| RunID = "$stamp-$($m.Replace(':','_'))-2" | |
| Ollama_Version = $OllamaVersion | |
| Model = $m | |
| Phase = "generated" | |
| Temperature = $Temperature | |
| TopP = $TopP | |
| Seed = $Seed | |
| MaxTokens = 128000 | |
| Input_Tokens = $null | |
| Output_Tokens = $null | |
| Prompt_Tokens_per_s = $null | |
| Decode_Tokens_per_s = $null | |
| Total_Time_s = $null | |
| Done_Reason = "skipped-empty-phase1" | |
| Used_Prompt = "(phase1 empty)" | |
| Response_Sample = "" | |
| Notes = "phase2 skipped due to empty phase1 response" | |
| } | |
| Write-RecordToCsv -Record $rec2 | |
| Write-Output "Phase 2 skipped for $m (empty phase1)" | |
| } else { | |
| $r2 = Invoke-OllamaGenerate -Model $m -Prompt $generatedPromptClean | |
| $rec2 = New-RunRecord -RunID "$stamp-$($m.Replace(':','_'))-2" -Model $m -Phase "generated" -UsedPrompt $generatedPromptClean -Resp $r2 | |
| Write-RecordToCsv -Record $rec2 | |
| Write-Output "Phase 2 completed for $m" | |
| } | |
| } catch { | |
| $err = $_.Exception.Message | |
| Write-Output "ERROR: $m failed - $err" | |
| $recErr = [pscustomobject]@{ | |
| DateTimeJST = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss") | |
| RunID = "$stamp-$($m.Replace(':','_'))-ERR" | |
| Ollama_Version = $OllamaVersion | |
| Model = $m | |
| Phase = "error" | |
| Temperature = $Temperature | |
| TopP = $TopP | |
| Seed = $Seed | |
| MaxTokens = 128000 | |
| Input_Tokens = $null | |
| Output_Tokens = $null | |
| Prompt_Tokens_per_s = $null | |
| Decode_Tokens_per_s = $null | |
| Total_Time_s = $null | |
| Done_Reason = "invoke-failed" | |
| Used_Prompt = $BasePrompt | |
| Response_Sample = "" | |
| Notes = "Invoke failed: $err" | |
| } | |
| Write-RecordToCsv -Record $recErr | |
| } | |
| } | |
| Write-Output "All results saved to: $OutCsv" | |
| Write-Output "Environment snapshot (JSON): $OutEnv" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment