Skip to content

Instantly share code, notes, and snippets.

@Mazyod
Created November 10, 2025 05:09
Show Gist options
  • Select an option

  • Save Mazyod/77f625469da2585fa3a1c166e547ff9a to your computer and use it in GitHub Desktop.

Select an option

Save Mazyod/77f625469da2585fa3a1c166e547ff9a to your computer and use it in GitHub Desktop.
VLLM GPU Utilization Summary from docker-compose
#!/usr/bin/env -S uv run
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "pyyaml>=6.0",
# "rich>=13.0.0",
# ]
# ///
"""
Docker Compose GPU Utilization Analyzer
Parses a docker-compose.yaml file and shows GPU allocation summary
with utilization percentages from --gpu-memory-utilization flags.
"""
import argparse
import re
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Set
import yaml
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich.progress import Progress, BarColumn, TextColumn
from rich.text import Text
from rich.layout import Layout
from rich import box
def parse_gpu_memory_utilization(command) -> float:
"""
Extract GPU memory utilization percentage from command.
Args:
command: Can be a string or list of command arguments
Returns:
float: Utilization percentage (0.0 to 1.0), or 0.0 if not found
"""
# Convert command to string if it's a list
if isinstance(command, list):
command_str = " ".join(str(arg) for arg in command)
else:
command_str = str(command)
# Look for --gpu-memory-utilization flag followed by a number
pattern = r'--gpu-memory-utilization[=\s]+([0-9.]+)'
match = re.search(pattern, command_str)
if match:
value = float(match.group(1))
# If value is > 1, assume it's a percentage (e.g., 90 means 90%)
if value > 1:
return value / 100.0
return value
return 0.0
def parse_gpu_devices(service_config: dict) -> List[str]:
"""
Extract GPU device IDs from service configuration.
Args:
service_config: Service configuration dictionary
Returns:
List of GPU device IDs
"""
gpu_ids = []
try:
deploy = service_config.get('deploy', {})
resources = deploy.get('resources', {})
reservations = resources.get('reservations', {})
devices = reservations.get('devices', [])
for device in devices:
if device.get('driver') == 'nvidia':
device_ids = device.get('device_ids', [])
# device_ids can be a list or a single string
if isinstance(device_ids, list):
gpu_ids.extend(str(did) for did in device_ids)
else:
gpu_ids.append(str(device_ids))
except (KeyError, AttributeError):
pass
return gpu_ids
def analyze_docker_compose(file_path: Path) -> Dict:
"""
Analyze docker-compose file for GPU utilization.
Args:
file_path: Path to docker-compose.yaml file
Returns:
Dictionary with analysis results
"""
with open(file_path, 'r') as f:
compose_data = yaml.safe_load(f)
services = compose_data.get('services', {})
# Track GPU allocations: {gpu_id: [(service_name, utilization)]}
gpu_allocations = defaultdict(list)
all_gpu_ids: Set[str] = set()
for service_name, service_config in services.items():
# Get GPU device IDs
gpu_ids = parse_gpu_devices(service_config)
# Get GPU memory utilization from command
command = service_config.get('command')
if command:
utilization = parse_gpu_memory_utilization(command)
if gpu_ids and utilization > 0:
for gpu_id in gpu_ids:
gpu_allocations[gpu_id].append({
'service': service_name,
'utilization': utilization
})
all_gpu_ids.add(gpu_id)
# Calculate totals per GPU
gpu_summary = {}
for gpu_id in sorted(all_gpu_ids):
allocations = gpu_allocations[gpu_id]
total_utilization = sum(alloc['utilization'] for alloc in allocations)
gpu_summary[gpu_id] = {
'allocations': allocations,
'total_utilization': total_utilization,
'remaining': max(0, 1.0 - total_utilization)
}
return gpu_summary
def create_gpu_table(gpu_summary: Dict, console: Console) -> Table:
"""Create a rich table showing GPU allocation summary."""
table = Table(
title="🎮 GPU Allocation Summary",
box=box.ROUNDED,
title_style="bold magenta",
show_header=True,
header_style="bold cyan"
)
table.add_column("GPU ID", style="cyan", justify="center", width=10)
table.add_column("Service", style="yellow")
table.add_column("Utilization", justify="right", style="green")
table.add_column("Total Used", justify="right", style="bold blue")
table.add_column("Remaining", justify="right", style="magenta")
table.add_column("Status", justify="center")
if not gpu_summary:
table.add_row("—", "No GPU allocations found", "—", "—", "—", "❌")
return table
for gpu_id, data in sorted(gpu_summary.items()):
allocations = data['allocations']
total_util = data['total_utilization']
remaining = data['remaining']
# Determine status
if total_util > 1.0:
status = "⚠️ OVER"
status_style = "bold red"
elif total_util > 0.9:
status = "⚡ HIGH"
status_style = "bold yellow"
elif total_util > 0.5:
status = "✓ OK"
status_style = "bold green"
else:
status = "✓ LOW"
status_style = "bold green"
# Add rows for each allocation
for idx, alloc in enumerate(allocations):
if idx == 0:
# First row includes GPU ID and totals
table.add_row(
f"[bold]{gpu_id}[/bold]",
alloc['service'],
f"{alloc['utilization']*100:.1f}%",
f"[bold]{total_util*100:.1f}%[/bold]",
f"{remaining*100:.1f}%",
f"[{status_style}]{status}[/{status_style}]"
)
else:
# Subsequent rows for same GPU
table.add_row(
"",
alloc['service'],
f"{alloc['utilization']*100:.1f}%",
"",
"",
""
)
# Add separator between GPUs if not last
if gpu_id != list(gpu_summary.keys())[-1]:
table.add_section()
return table
def create_utilization_bars(gpu_summary: Dict, console: Console):
"""Create visual progress bars for GPU utilization."""
if not gpu_summary:
return
console.print("\n[bold cyan]📊 GPU Memory Utilization Bars[/bold cyan]\n")
for gpu_id, data in sorted(gpu_summary.items()):
total_util = data['total_utilization']
# Determine color based on utilization
if total_util > 1.0:
color = "red"
bar_total = 100
elif total_util > 0.9:
color = "yellow"
bar_total = int(total_util * 100)
elif total_util > 0.5:
color = "green"
bar_total = int(total_util * 100)
else:
color = "blue"
bar_total = int(total_util * 100)
# Create progress bar
text = Text(f"GPU {gpu_id}: ", style="bold cyan")
# Build the bar
bar_width = 40
filled = int((min(total_util, 1.0) * bar_width))
bar = "█" * filled + "░" * (bar_width - filled)
text.append(f"[{color}]{bar}[/{color}] ")
text.append(f"{total_util*100:.1f}%", style=f"bold {color}")
if total_util > 1.0:
text.append(" ⚠️ OVERALLOCATED!", style="bold red")
console.print(text)
def create_statistics_panel(gpu_summary: Dict) -> Panel:
"""Create a panel with overall statistics."""
if not gpu_summary:
content = "[yellow]No GPU allocations found in docker-compose.yaml[/yellow]"
return Panel(content, title="📈 Statistics", border_style="yellow")
total_gpus = len(gpu_summary)
overallocated = sum(1 for data in gpu_summary.values() if data['total_utilization'] > 1.0)
highly_utilized = sum(1 for data in gpu_summary.values() if 0.9 < data['total_utilization'] <= 1.0)
avg_utilization = sum(data['total_utilization'] for data in gpu_summary.values()) / total_gpus
total_services = sum(len(data['allocations']) for data in gpu_summary.values())
content = f"""[bold cyan]Total GPUs:[/bold cyan] {total_gpus}
[bold cyan]Total Services:[/bold cyan] {total_services}
[bold cyan]Average Utilization:[/bold cyan] {avg_utilization*100:.1f}%
[bold green]Well Utilized:[/bold green] {total_gpus - overallocated - highly_utilized}
[bold yellow]Highly Utilized (>90%):[/bold yellow] {highly_utilized}
[bold red]Overallocated (>100%):[/bold red] {overallocated}
"""
return Panel(content, title="📈 Statistics", border_style="cyan", box=box.ROUNDED)
def main():
parser = argparse.ArgumentParser(
description="Analyze GPU utilization from docker-compose.yaml",
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
'file',
type=Path,
nargs='?',
default=Path('docker-compose.yaml'),
help='Path to docker-compose.yaml file (default: docker-compose.yaml)'
)
args = parser.parse_args()
console = Console()
# Check if file exists
if not args.file.exists():
console.print(f"[bold red]Error:[/bold red] File '{args.file}' not found!")
sys.exit(1)
# Show loading message
with console.status("[bold green]Analyzing docker-compose.yaml...", spinner="dots"):
gpu_summary = analyze_docker_compose(args.file)
# Display results
console.print()
console.print(Panel.fit(
f"[bold cyan]Docker Compose GPU Analyzer[/bold cyan]\n"
f"File: [yellow]{args.file}[/yellow]",
border_style="blue"
))
console.print()
# Statistics panel
console.print(create_statistics_panel(gpu_summary))
console.print()
# Main table
table = create_gpu_table(gpu_summary, console)
console.print(table)
# Utilization bars
if gpu_summary:
create_utilization_bars(gpu_summary, console)
console.print()
# Warnings for overallocation
overallocated = {gpu_id: data for gpu_id, data in gpu_summary.items()
if data['total_utilization'] > 1.0}
if overallocated:
console.print(Panel(
"[bold red]⚠️ Warning: Some GPUs are overallocated![/bold red]\n\n"
+ "\n".join(
f"GPU {gpu_id}: {data['total_utilization']*100:.1f}% allocated"
for gpu_id, data in overallocated.items()
),
title="⚠️ Overallocation Alert",
border_style="red",
box=box.DOUBLE
))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment