Skip to content

Instantly share code, notes, and snippets.

@seabass011
Last active February 1, 2026 22:54
Show Gist options
  • Select an option

  • Save seabass011/d2b00c9f46b71ab656eec5faaad32d1c to your computer and use it in GitHub Desktop.

Select an option

Save seabass011/d2b00c9f46b71ab656eec5faaad32d1c to your computer and use it in GitHub Desktop.
iOS Simulator Bridge for AI Agents - gives agents eyes into mobile screens

Simctl Bridge

REST API that wraps xcrun simctl so agents can see iOS Simulator screens and catch visual bugs the DOM tree hides.

Why

Playwright gives agents browser eyes. This gives them mobile eyes.

The agent captures raw framebuffer screenshots from the iOS Simulator, bypassing the DOM entirely. CSS breaks that are invisible in Chrome DevTools become visible.

Prerequisites

  • macOS with Xcode installed
  • iOS Simulator runtime (xcodebuild -downloadPlatform iOS)
  • Python 3.10+

Setup

cd simctl-server
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt

Run

uvicorn app:app --host 127.0.0.1 --port 8787

Endpoints

Method Path Description
GET /healthz Health check
GET /devices List all simulators
POST /boot/{udid} Boot a simulator
POST /shutdown/{udid} Shutdown a simulator
POST /open/{udid}?url=... Open URL in simulator browser
GET /screenshot/{udid} Capture PNG screenshot
GET /screenshot/{udid}/base64 Capture as base64 JSON
WS /stream/{udid} Stream screenshots at 1fps
POST /pasteboard/{udid} Copy text to clipboard (JSON body: {"text": "..."})

Use booted as the UDID to target whichever simulator is currently running.

Example

# List available simulators
curl http://localhost:8787/devices

# Capture screenshot from currently booted simulator
curl http://localhost:8787/screenshot/booted -o screen.png
open screen.png

# Or boot a specific device by UDID first
curl -X POST http://localhost:8787/boot/YOUR-DEVICE-UDID-HERE

Agent Integration

For LLM agents, use the base64 endpoint:

import requests

resp = requests.get("http://localhost:8787/screenshot/booted/base64")
data = resp.json()
# data["data"] is base64 PNG, ready for vision models

Limitations

simctl only supports screenshots and basic control. For tap/swipe/keypress, you need:

  • XCUITest framework
  • Appium
  • AppleScript targeting Simulator.app

This server focuses on giving agents eyes, not hands.

License

MIT

"""
Simctl Bridge - REST API for iOS Simulator control.
Gives agents "eyes" (screenshots) and basic control over iOS simulators
by wrapping xcrun simctl commands.
Usage:
uvicorn app:app --host 127.0.0.1 --port 8787
"""
import asyncio
import base64
import json
import re
from typing import Any, Dict, List, Optional
from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import Response, JSONResponse
from pydantic import BaseModel
from starlette.middleware.base import BaseHTTPMiddleware
class PasteRequest(BaseModel):
"""Request body for pasteboard endpoint."""
text: str
class PathTraversalMiddleware(BaseHTTPMiddleware):
"""Middleware to detect and reject path traversal attempts."""
async def dispatch(self, request: Request, call_next):
# Check for path traversal patterns in the URL path
# Detect .. sequences which indicate path traversal attempts
if ".." in request.url.path:
return JSONResponse(
status_code=400,
content={"detail": "Invalid request: path traversal detected"}
)
return await call_next(request)
app = FastAPI(
title="Simctl Bridge",
version="0.2.0",
description="REST API bridge for xcrun simctl - gives agents eyes into iOS Simulator",
)
# Add path traversal protection middleware BEFORE CORS
app.add_middleware(PathTraversalMiddleware)
# Allow browser clients
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# UDID validation - prevents command injection
# Accepts: UUID format or "booted" keyword
UDID_PATTERN = re.compile(r"^[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}$|^booted$")
def validate_udid(udid: str) -> str:
"""Validate UDID format to prevent command injection."""
if not UDID_PATTERN.match(udid):
raise HTTPException(status_code=400, detail=f"Invalid UDID format: {udid}")
return udid
async def run_simctl(args: List[str], input_text: Optional[str] = None, timeout: int = 30) -> bytes:
"""
Run xcrun simctl command asynchronously.
Returns stdout as bytes. Raises HTTPException on failure.
"""
cmd = ["xcrun", "simctl", *args]
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdin=asyncio.subprocess.PIPE if input_text else None,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(
proc.communicate(input=input_text.encode() if input_text else None),
timeout=timeout,
)
if proc.returncode != 0:
error_msg = stderr.decode().strip() or "simctl command failed"
# Return 404 for device not found, 500 for other errors
if "Invalid device" in error_msg or "not found" in error_msg.lower():
raise HTTPException(status_code=404, detail=error_msg)
raise HTTPException(status_code=500, detail=error_msg)
return stdout
except asyncio.TimeoutError:
raise HTTPException(status_code=504, detail=f"Command timed out after {timeout}s")
except FileNotFoundError:
raise HTTPException(status_code=500, detail="xcrun/simctl not found - is Xcode installed?")
@app.get("/healthz")
async def healthz() -> Dict[str, str]:
"""Health check endpoint."""
return {"status": "ok"}
@app.get("/devices")
async def list_devices() -> Dict[str, Any]:
"""List all available iOS simulators."""
raw = await run_simctl(["list", "devices", "-j"])
try:
return json.loads(raw.decode())
except json.JSONDecodeError as exc:
raise HTTPException(status_code=500, detail=f"Failed to parse simctl output: {exc}")
@app.post("/boot/{udid}")
async def boot_device(udid: str) -> Dict[str, str]:
"""Boot a simulator by UDID."""
validate_udid(udid)
await run_simctl(["boot", udid])
return {"status": "booted", "udid": udid}
@app.post("/shutdown/{udid}")
async def shutdown_device(udid: str) -> Dict[str, str]:
"""Shutdown a simulator by UDID."""
validate_udid(udid)
await run_simctl(["shutdown", udid])
return {"status": "shutdown", "udid": udid}
@app.post("/open/{udid}")
async def open_url(udid: str, url: str = "http://localhost:3000") -> Dict[str, str]:
"""Open a URL in the simulator's browser."""
validate_udid(udid)
await run_simctl(["openurl", udid, url])
return {"status": "opened", "udid": udid, "url": url}
@app.get("/screenshot/{udid}")
async def screenshot(udid: str) -> Response:
"""
Capture simulator screenshot.
Use 'booted' as udid to target the currently booted simulator.
Returns raw PNG image data.
"""
validate_udid(udid)
img_data = await run_simctl(["io", udid, "screenshot", "--type=png", "-"], timeout=10)
return Response(content=img_data, media_type="image/png")
@app.get("/screenshot/{udid}/base64")
async def screenshot_base64(udid: str) -> Dict[str, str]:
"""
Capture simulator screenshot as base64.
Useful for agents that need the image data in JSON format.
"""
validate_udid(udid)
img_data = await run_simctl(["io", udid, "screenshot", "--type=png", "-"], timeout=10)
return {
"type": "image/png",
"data": base64.b64encode(img_data).decode("utf-8"),
}
@app.websocket("/stream/{udid}")
async def stream_screenshots(websocket: WebSocket, udid: str) -> None:
"""
Stream screenshots as base64 JSON frames.
Sends {"type": "screenshot", "data": "<base64>"} every second.
Connect with: ws://localhost:8787/stream/booted
"""
validate_udid(udid)
await websocket.accept()
try:
while True:
try:
img_data = await run_simctl(["io", udid, "screenshot", "--type=png", "-"], timeout=10)
payload = base64.b64encode(img_data).decode("utf-8")
await websocket.send_json({"type": "screenshot", "data": payload})
except HTTPException as exc:
await websocket.send_json({"type": "error", "message": exc.detail})
break
await asyncio.sleep(1)
except WebSocketDisconnect:
pass # Client disconnected normally
except Exception:
pass # Connection lost
finally:
try:
await websocket.close()
except Exception:
pass
@app.post("/pasteboard/{udid}")
async def pasteboard(udid: str, request: PasteRequest) -> JSONResponse:
"""Copy text to simulator clipboard. Accepts JSON body: {"text": "..."}"""
validate_udid(udid)
await run_simctl(["pbcopy", udid], input_text=request.text)
return JSONResponse({"status": "copied", "udid": udid, "length": len(request.text)})
# NOTE: simctl does NOT support tap/keypress/swipe directly.
# For input simulation, you need:
# - XCUITest framework
# - Appium
# - AppleScript (osascript) targeting Simulator.app
#
# This server focuses on what simctl actually supports:
# screenshots, boot/shutdown, clipboard, and URL opening.
fastapi==0.115.0
uvicorn[standard]==0.30.6
requests>=2.31.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment