Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save aont/740198ec4992e3e2ae46068cd68ac864 to your computer and use it in GitHub Desktop.

Select an option

Save aont/740198ec4992e3e2ae46068cd68ac864 to your computer and use it in GitHub Desktop.

Capturing Network Traffic with Playwright and Chrome

This sample code shows how to use Playwright and Chrome DevTools Protocol (CDP) in Python to capture network requests and responses from a real Chrome browser and save them as JSON.

The main flow is:

  1. Find a free TCP port The script uses socket to get an available local port for Chrome’s --remote-debugging-port.

  2. Launch Chrome with CDP enabled launch_chrome_with_cdp() starts Chrome with:

    • --remote-debugging-port=<port>
    • --user-data-dir=<directory> so that Playwright can connect to this running Chrome instance. You need to adjust chrome_path for your environment.
  3. Wait for DevTools to be ready wait_for_cdp_ready() repeatedly tries to open a TCP connection to the CDP port until Chrome is ready.

  4. Connect via Playwright and open a page Using async_playwright() and connect_over_cdp(), the script attaches to Chrome, gets a browser context, and opens a new page. It then navigates to https://www.google.com and waits for the network to go idle.

  5. Listen to network events Event handlers for "request" and "response" on the page collect:

    • URL, method, headers, status, etc.
    • A simple timestamp All events are appended to a network_events list.
  6. Save results to JSON and clean up After navigation and a short additional wait, the script:

    • Writes network_events to output.json
    • Closes the Playwright browser/context
    • Gracefully terminates the Chrome process

This script is a compact example of how to combine async Python, Playwright, and Chrome DevTools to log web traffic for debugging, analysis, or automated monitoring.

import asyncio
import json
import socket
from pathlib import Path
from playwright.async_api import async_playwright
URL = "https://www.google.com"
OUTPUT_JSON = "output.json"
USER_DATA_DIR = Path("./chrome-user-data-async").resolve() # You can change this path if needed
def find_free_port() -> int:
"""Get an available TCP port (sync is fine here)."""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("127.0.0.1", 0))
return s.getsockname()[1]
async def wait_for_cdp_ready(port: int, timeout: float = 10.0) -> None:
"""
Wait until the DevTools Protocol TCP port is listening (async).
"""
start = asyncio.get_event_loop().time()
while asyncio.get_event_loop().time() - start < timeout:
try:
reader, writer = await asyncio.wait_for(
asyncio.open_connection("127.0.0.1", port),
timeout=0.5,
)
writer.close()
await writer.wait_closed()
return
except Exception:
await asyncio.sleep(0.2)
raise TimeoutError("Chrome DevTools endpoint did not become ready in time.")
async def launch_chrome_with_cdp(user_data_dir: Path, port: int):
"""
Launch Chrome with --user-data-dir and --remote-debugging-port (async).
"""
user_data_dir.mkdir(parents=True, exist_ok=True)
# Change this Chrome executable path according to your environment.
# Examples:
# Windows: r"C:\Program Files\Google\Chrome\Application\chrome.exe"
# macOS: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
# Linux: "google-chrome" or "google-chrome-stable"
chrome_path = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # Modify if necessary
args = [
chrome_path,
f"--remote-debugging-port={port}",
f"--user-data-dir={str(user_data_dir)}",
"--no-first-run",
"--no-default-browser-check",
"--disable-dev-shm-usage",
"--disable-gpu",
"about:blank",
]
proc = await asyncio.create_subprocess_exec(
*args,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
)
return proc
async def main():
port = find_free_port()
print(f"Using DevTools port: {port}")
chrome_proc = await launch_chrome_with_cdp(USER_DATA_DIR, port)
try:
await wait_for_cdp_ready(port)
print("Chrome DevTools endpoint is ready.")
network_events = []
async with async_playwright() as p:
# Connect to the existing Chrome instance via DevTools Protocol
browser = await p.chromium.connect_over_cdp(f"http://127.0.0.1:{port}")
# For a persistent context (with --user-data-dir), contexts[0] should be the existing context
if browser.contexts:
context = browser.contexts[0]
else:
# Fallback, just in case
context = await browser.new_context()
page = await context.new_page()
# Event handlers (do not need to be async; Playwright will call them appropriately)
def on_request(request):
try:
network_events.append(
{
"type": "request",
"url": request.url,
"method": request.method,
"headers": dict(request.headers),
"post_data": request.post_data,
"timestamp": asyncio.get_event_loop().time(),
}
)
except Exception as e:
network_events.append(
{
"type": "request_error",
"error": str(e),
"url": getattr(request, "url", None),
"timestamp": asyncio.get_event_loop().time(),
}
)
def on_response(response):
try:
network_events.append(
{
"type": "response",
"url": response.url,
"status": response.status,
"status_text": response.status_text,
"headers": dict(response.headers),
"timestamp": asyncio.get_event_loop().time(),
}
)
except Exception as e:
network_events.append(
{
"type": "response_error",
"error": str(e),
"url": getattr(response, "url", None),
"timestamp": asyncio.get_event_loop().time(),
}
)
page.on("request", on_request)
page.on("response", on_response)
# Navigate to target URL
print(f"Navigating to {URL}")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# Wait a bit more to capture additional requests
await asyncio.sleep(5)
# Cleanup on Playwright side
await context.close()
await browser.close()
# Save captured network events to JSON
with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
json.dump(network_events, f, ensure_ascii=False, indent=2)
print(f"Network log saved to {OUTPUT_JSON}")
finally:
# Terminate Chrome process
if chrome_proc and chrome_proc.returncode is None:
chrome_proc.terminate()
try:
await asyncio.wait_for(chrome_proc.wait(), timeout=5)
except asyncio.TimeoutError:
chrome_proc.kill()
try:
await chrome_proc.wait()
except Exception:
pass
print("Chrome has been closed.")
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment