|
import asyncio |
|
import json |
|
import socket |
|
from pathlib import Path |
|
|
|
from playwright.async_api import async_playwright |
|
|
|
|
|
URL = "https://www.google.com" |
|
OUTPUT_JSON = "output.json" |
|
USER_DATA_DIR = Path("./chrome-user-data-async").resolve() # You can change this path if needed |
|
|
|
|
|
def find_free_port() -> int: |
|
"""Get an available TCP port (sync is fine here).""" |
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: |
|
s.bind(("127.0.0.1", 0)) |
|
return s.getsockname()[1] |
|
|
|
|
|
async def wait_for_cdp_ready(port: int, timeout: float = 10.0) -> None: |
|
""" |
|
Wait until the DevTools Protocol TCP port is listening (async). |
|
""" |
|
start = asyncio.get_event_loop().time() |
|
while asyncio.get_event_loop().time() - start < timeout: |
|
try: |
|
reader, writer = await asyncio.wait_for( |
|
asyncio.open_connection("127.0.0.1", port), |
|
timeout=0.5, |
|
) |
|
writer.close() |
|
await writer.wait_closed() |
|
return |
|
except Exception: |
|
await asyncio.sleep(0.2) |
|
raise TimeoutError("Chrome DevTools endpoint did not become ready in time.") |
|
|
|
|
|
async def launch_chrome_with_cdp(user_data_dir: Path, port: int): |
|
""" |
|
Launch Chrome with --user-data-dir and --remote-debugging-port (async). |
|
""" |
|
user_data_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
# Change this Chrome executable path according to your environment. |
|
# Examples: |
|
# Windows: r"C:\Program Files\Google\Chrome\Application\chrome.exe" |
|
# macOS: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" |
|
# Linux: "google-chrome" or "google-chrome-stable" |
|
chrome_path = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # Modify if necessary |
|
|
|
args = [ |
|
chrome_path, |
|
f"--remote-debugging-port={port}", |
|
f"--user-data-dir={str(user_data_dir)}", |
|
"--no-first-run", |
|
"--no-default-browser-check", |
|
"--disable-dev-shm-usage", |
|
"--disable-gpu", |
|
"about:blank", |
|
] |
|
|
|
proc = await asyncio.create_subprocess_exec( |
|
*args, |
|
stdout=asyncio.subprocess.DEVNULL, |
|
stderr=asyncio.subprocess.DEVNULL, |
|
) |
|
return proc |
|
|
|
|
|
async def main(): |
|
port = find_free_port() |
|
print(f"Using DevTools port: {port}") |
|
|
|
chrome_proc = await launch_chrome_with_cdp(USER_DATA_DIR, port) |
|
|
|
try: |
|
await wait_for_cdp_ready(port) |
|
print("Chrome DevTools endpoint is ready.") |
|
|
|
network_events = [] |
|
|
|
async with async_playwright() as p: |
|
# Connect to the existing Chrome instance via DevTools Protocol |
|
browser = await p.chromium.connect_over_cdp(f"http://127.0.0.1:{port}") |
|
|
|
# For a persistent context (with --user-data-dir), contexts[0] should be the existing context |
|
if browser.contexts: |
|
context = browser.contexts[0] |
|
else: |
|
# Fallback, just in case |
|
context = await browser.new_context() |
|
|
|
page = await context.new_page() |
|
|
|
# Event handlers (do not need to be async; Playwright will call them appropriately) |
|
def on_request(request): |
|
try: |
|
network_events.append( |
|
{ |
|
"type": "request", |
|
"url": request.url, |
|
"method": request.method, |
|
"headers": dict(request.headers), |
|
"post_data": request.post_data, |
|
"timestamp": asyncio.get_event_loop().time(), |
|
} |
|
) |
|
except Exception as e: |
|
network_events.append( |
|
{ |
|
"type": "request_error", |
|
"error": str(e), |
|
"url": getattr(request, "url", None), |
|
"timestamp": asyncio.get_event_loop().time(), |
|
} |
|
) |
|
|
|
def on_response(response): |
|
try: |
|
network_events.append( |
|
{ |
|
"type": "response", |
|
"url": response.url, |
|
"status": response.status, |
|
"status_text": response.status_text, |
|
"headers": dict(response.headers), |
|
"timestamp": asyncio.get_event_loop().time(), |
|
} |
|
) |
|
except Exception as e: |
|
network_events.append( |
|
{ |
|
"type": "response_error", |
|
"error": str(e), |
|
"url": getattr(response, "url", None), |
|
"timestamp": asyncio.get_event_loop().time(), |
|
} |
|
) |
|
|
|
page.on("request", on_request) |
|
page.on("response", on_response) |
|
|
|
# Navigate to target URL |
|
print(f"Navigating to {URL}") |
|
await page.goto(URL, wait_until="networkidle", timeout=60_000) |
|
|
|
# Wait a bit more to capture additional requests |
|
await asyncio.sleep(5) |
|
|
|
# Cleanup on Playwright side |
|
await context.close() |
|
await browser.close() |
|
|
|
# Save captured network events to JSON |
|
with open(OUTPUT_JSON, "w", encoding="utf-8") as f: |
|
json.dump(network_events, f, ensure_ascii=False, indent=2) |
|
|
|
print(f"Network log saved to {OUTPUT_JSON}") |
|
|
|
finally: |
|
# Terminate Chrome process |
|
if chrome_proc and chrome_proc.returncode is None: |
|
chrome_proc.terminate() |
|
try: |
|
await asyncio.wait_for(chrome_proc.wait(), timeout=5) |
|
except asyncio.TimeoutError: |
|
chrome_proc.kill() |
|
try: |
|
await chrome_proc.wait() |
|
except Exception: |
|
pass |
|
print("Chrome has been closed.") |
|
|
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |