Created
November 10, 2025 05:09
-
-
Save Mazyod/77f625469da2585fa3a1c166e547ff9a to your computer and use it in GitHub Desktop.
VLLM GPU Utilization Summary from docker-compose
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "pyyaml>=6.0", | |
| # "rich>=13.0.0", | |
| # ] | |
| # /// | |
| """ | |
| Docker Compose GPU Utilization Analyzer | |
| Parses a docker-compose.yaml file and shows GPU allocation summary | |
| with utilization percentages from --gpu-memory-utilization flags. | |
| """ | |
| import argparse | |
| import re | |
| import sys | |
| from collections import defaultdict | |
| from pathlib import Path | |
| from typing import Dict, List, Set | |
| import yaml | |
| from rich.console import Console | |
| from rich.panel import Panel | |
| from rich.table import Table | |
| from rich.progress import Progress, BarColumn, TextColumn | |
| from rich.text import Text | |
| from rich.layout import Layout | |
| from rich import box | |
| def parse_gpu_memory_utilization(command) -> float: | |
| """ | |
| Extract GPU memory utilization percentage from command. | |
| Args: | |
| command: Can be a string or list of command arguments | |
| Returns: | |
| float: Utilization percentage (0.0 to 1.0), or 0.0 if not found | |
| """ | |
| # Convert command to string if it's a list | |
| if isinstance(command, list): | |
| command_str = " ".join(str(arg) for arg in command) | |
| else: | |
| command_str = str(command) | |
| # Look for --gpu-memory-utilization flag followed by a number | |
| pattern = r'--gpu-memory-utilization[=\s]+([0-9.]+)' | |
| match = re.search(pattern, command_str) | |
| if match: | |
| value = float(match.group(1)) | |
| # If value is > 1, assume it's a percentage (e.g., 90 means 90%) | |
| if value > 1: | |
| return value / 100.0 | |
| return value | |
| return 0.0 | |
| def parse_gpu_devices(service_config: dict) -> List[str]: | |
| """ | |
| Extract GPU device IDs from service configuration. | |
| Args: | |
| service_config: Service configuration dictionary | |
| Returns: | |
| List of GPU device IDs | |
| """ | |
| gpu_ids = [] | |
| try: | |
| deploy = service_config.get('deploy', {}) | |
| resources = deploy.get('resources', {}) | |
| reservations = resources.get('reservations', {}) | |
| devices = reservations.get('devices', []) | |
| for device in devices: | |
| if device.get('driver') == 'nvidia': | |
| device_ids = device.get('device_ids', []) | |
| # device_ids can be a list or a single string | |
| if isinstance(device_ids, list): | |
| gpu_ids.extend(str(did) for did in device_ids) | |
| else: | |
| gpu_ids.append(str(device_ids)) | |
| except (KeyError, AttributeError): | |
| pass | |
| return gpu_ids | |
| def analyze_docker_compose(file_path: Path) -> Dict: | |
| """ | |
| Analyze docker-compose file for GPU utilization. | |
| Args: | |
| file_path: Path to docker-compose.yaml file | |
| Returns: | |
| Dictionary with analysis results | |
| """ | |
| with open(file_path, 'r') as f: | |
| compose_data = yaml.safe_load(f) | |
| services = compose_data.get('services', {}) | |
| # Track GPU allocations: {gpu_id: [(service_name, utilization)]} | |
| gpu_allocations = defaultdict(list) | |
| all_gpu_ids: Set[str] = set() | |
| for service_name, service_config in services.items(): | |
| # Get GPU device IDs | |
| gpu_ids = parse_gpu_devices(service_config) | |
| # Get GPU memory utilization from command | |
| command = service_config.get('command') | |
| if command: | |
| utilization = parse_gpu_memory_utilization(command) | |
| if gpu_ids and utilization > 0: | |
| for gpu_id in gpu_ids: | |
| gpu_allocations[gpu_id].append({ | |
| 'service': service_name, | |
| 'utilization': utilization | |
| }) | |
| all_gpu_ids.add(gpu_id) | |
| # Calculate totals per GPU | |
| gpu_summary = {} | |
| for gpu_id in sorted(all_gpu_ids): | |
| allocations = gpu_allocations[gpu_id] | |
| total_utilization = sum(alloc['utilization'] for alloc in allocations) | |
| gpu_summary[gpu_id] = { | |
| 'allocations': allocations, | |
| 'total_utilization': total_utilization, | |
| 'remaining': max(0, 1.0 - total_utilization) | |
| } | |
| return gpu_summary | |
| def create_gpu_table(gpu_summary: Dict, console: Console) -> Table: | |
| """Create a rich table showing GPU allocation summary.""" | |
| table = Table( | |
| title="🎮 GPU Allocation Summary", | |
| box=box.ROUNDED, | |
| title_style="bold magenta", | |
| show_header=True, | |
| header_style="bold cyan" | |
| ) | |
| table.add_column("GPU ID", style="cyan", justify="center", width=10) | |
| table.add_column("Service", style="yellow") | |
| table.add_column("Utilization", justify="right", style="green") | |
| table.add_column("Total Used", justify="right", style="bold blue") | |
| table.add_column("Remaining", justify="right", style="magenta") | |
| table.add_column("Status", justify="center") | |
| if not gpu_summary: | |
| table.add_row("—", "No GPU allocations found", "—", "—", "—", "❌") | |
| return table | |
| for gpu_id, data in sorted(gpu_summary.items()): | |
| allocations = data['allocations'] | |
| total_util = data['total_utilization'] | |
| remaining = data['remaining'] | |
| # Determine status | |
| if total_util > 1.0: | |
| status = "⚠️ OVER" | |
| status_style = "bold red" | |
| elif total_util > 0.9: | |
| status = "⚡ HIGH" | |
| status_style = "bold yellow" | |
| elif total_util > 0.5: | |
| status = "✓ OK" | |
| status_style = "bold green" | |
| else: | |
| status = "✓ LOW" | |
| status_style = "bold green" | |
| # Add rows for each allocation | |
| for idx, alloc in enumerate(allocations): | |
| if idx == 0: | |
| # First row includes GPU ID and totals | |
| table.add_row( | |
| f"[bold]{gpu_id}[/bold]", | |
| alloc['service'], | |
| f"{alloc['utilization']*100:.1f}%", | |
| f"[bold]{total_util*100:.1f}%[/bold]", | |
| f"{remaining*100:.1f}%", | |
| f"[{status_style}]{status}[/{status_style}]" | |
| ) | |
| else: | |
| # Subsequent rows for same GPU | |
| table.add_row( | |
| "", | |
| alloc['service'], | |
| f"{alloc['utilization']*100:.1f}%", | |
| "", | |
| "", | |
| "" | |
| ) | |
| # Add separator between GPUs if not last | |
| if gpu_id != list(gpu_summary.keys())[-1]: | |
| table.add_section() | |
| return table | |
| def create_utilization_bars(gpu_summary: Dict, console: Console): | |
| """Create visual progress bars for GPU utilization.""" | |
| if not gpu_summary: | |
| return | |
| console.print("\n[bold cyan]📊 GPU Memory Utilization Bars[/bold cyan]\n") | |
| for gpu_id, data in sorted(gpu_summary.items()): | |
| total_util = data['total_utilization'] | |
| # Determine color based on utilization | |
| if total_util > 1.0: | |
| color = "red" | |
| bar_total = 100 | |
| elif total_util > 0.9: | |
| color = "yellow" | |
| bar_total = int(total_util * 100) | |
| elif total_util > 0.5: | |
| color = "green" | |
| bar_total = int(total_util * 100) | |
| else: | |
| color = "blue" | |
| bar_total = int(total_util * 100) | |
| # Create progress bar | |
| text = Text(f"GPU {gpu_id}: ", style="bold cyan") | |
| # Build the bar | |
| bar_width = 40 | |
| filled = int((min(total_util, 1.0) * bar_width)) | |
| bar = "█" * filled + "░" * (bar_width - filled) | |
| text.append(f"[{color}]{bar}[/{color}] ") | |
| text.append(f"{total_util*100:.1f}%", style=f"bold {color}") | |
| if total_util > 1.0: | |
| text.append(" ⚠️ OVERALLOCATED!", style="bold red") | |
| console.print(text) | |
| def create_statistics_panel(gpu_summary: Dict) -> Panel: | |
| """Create a panel with overall statistics.""" | |
| if not gpu_summary: | |
| content = "[yellow]No GPU allocations found in docker-compose.yaml[/yellow]" | |
| return Panel(content, title="📈 Statistics", border_style="yellow") | |
| total_gpus = len(gpu_summary) | |
| overallocated = sum(1 for data in gpu_summary.values() if data['total_utilization'] > 1.0) | |
| highly_utilized = sum(1 for data in gpu_summary.values() if 0.9 < data['total_utilization'] <= 1.0) | |
| avg_utilization = sum(data['total_utilization'] for data in gpu_summary.values()) / total_gpus | |
| total_services = sum(len(data['allocations']) for data in gpu_summary.values()) | |
| content = f"""[bold cyan]Total GPUs:[/bold cyan] {total_gpus} | |
| [bold cyan]Total Services:[/bold cyan] {total_services} | |
| [bold cyan]Average Utilization:[/bold cyan] {avg_utilization*100:.1f}% | |
| [bold green]Well Utilized:[/bold green] {total_gpus - overallocated - highly_utilized} | |
| [bold yellow]Highly Utilized (>90%):[/bold yellow] {highly_utilized} | |
| [bold red]Overallocated (>100%):[/bold red] {overallocated} | |
| """ | |
| return Panel(content, title="📈 Statistics", border_style="cyan", box=box.ROUNDED) | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Analyze GPU utilization from docker-compose.yaml", | |
| formatter_class=argparse.RawDescriptionHelpFormatter | |
| ) | |
| parser.add_argument( | |
| 'file', | |
| type=Path, | |
| nargs='?', | |
| default=Path('docker-compose.yaml'), | |
| help='Path to docker-compose.yaml file (default: docker-compose.yaml)' | |
| ) | |
| args = parser.parse_args() | |
| console = Console() | |
| # Check if file exists | |
| if not args.file.exists(): | |
| console.print(f"[bold red]Error:[/bold red] File '{args.file}' not found!") | |
| sys.exit(1) | |
| # Show loading message | |
| with console.status("[bold green]Analyzing docker-compose.yaml...", spinner="dots"): | |
| gpu_summary = analyze_docker_compose(args.file) | |
| # Display results | |
| console.print() | |
| console.print(Panel.fit( | |
| f"[bold cyan]Docker Compose GPU Analyzer[/bold cyan]\n" | |
| f"File: [yellow]{args.file}[/yellow]", | |
| border_style="blue" | |
| )) | |
| console.print() | |
| # Statistics panel | |
| console.print(create_statistics_panel(gpu_summary)) | |
| console.print() | |
| # Main table | |
| table = create_gpu_table(gpu_summary, console) | |
| console.print(table) | |
| # Utilization bars | |
| if gpu_summary: | |
| create_utilization_bars(gpu_summary, console) | |
| console.print() | |
| # Warnings for overallocation | |
| overallocated = {gpu_id: data for gpu_id, data in gpu_summary.items() | |
| if data['total_utilization'] > 1.0} | |
| if overallocated: | |
| console.print(Panel( | |
| "[bold red]⚠️ Warning: Some GPUs are overallocated![/bold red]\n\n" | |
| + "\n".join( | |
| f"GPU {gpu_id}: {data['total_utilization']*100:.1f}% allocated" | |
| for gpu_id, data in overallocated.items() | |
| ), | |
| title="⚠️ Overallocation Alert", | |
| border_style="red", | |
| box=box.DOUBLE | |
| )) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment