Andrew-Chen-Wang/README.md

## README.md

      
    Raw
  

              README.md
            
          
    On very large pages, the snapshot can fill the entire context #1329
microsoft/playwright-mcp#1329
believe a good source of inspiration is Claude Code/Codex. There are two things that I'm thinking of which is:
Grab the HTML instead. Unfortunately, usually when the browser_snapshot is so huge, the html is also pretty big (in my case 12m HTML and above sometimes)
Take a browser_screenshot and grab the HTML and browser_snapshot and use a "Grep" tool or an HTML parser (or however your agent is running) to whatever you need so that your MCP can do a browser action.
Images should be much smaller. Here's some slop Claude code that worked for me very well. I'm using openai/agents, but this can be used for any custom or alternative agent package.

  
## run.ts
  const {
    filter: redirectLargeSnapshots,
    searchTool: snapshotFileTool,
    getCreatedFiles,
  } = createLargeSnapshotHandler()
  const agent = new Agent({
      name: "Agent",
      model: "gpt-5-nano",
      instructions: AGENT_PROMPT,
      mcpServers: [playwrightMCPServer],
      tools: [snapshotFileTool],
      outputType: schema,
    })
  try {
    const result = await withAgentSpan(
      async () => {
        return await run(agent, [{ role: "user", content: userPrompt }], {
          maxTurns: 25,
          session,
          callModelInputFilter: (args) =>
            prepareModelInputForAgent({ ...args, modelData: redirectLargeSnapshots(args) }),
        })
      },
    )
  } finally {
    for (const f of getCreatedFiles()) {
      try {
        fs.unlinkSync(f)
      } catch {}
    }
  }

## utils.ts
import type { AgentInputItem } from "@openai/agents-core/types"
import type {
  CallModelInputFilter,
  CallModelInputFilterArgs,
  FunctionCallResultItem,
  ModelInputData
} from "@openai/agents"
import { spawn } from "node:child_process"
import * as crypto from "node:crypto"
import * as fs from "node:fs"
import * as os from "node:os"
import * as path from "node:path"
import {
  tool,
} from "@openai/agents"
import { z } from "zod"

// ---------------------------------------------------------------------------
// Large-snapshot handling
// ---------------------------------------------------------------------------

/** Snapshots larger than this are redirected to temp files instead of inline. */
const LARGE_SNAPSHOT_THRESHOLD = 50_000

/** Temp directory for saved snapshots. Created once at module load. */
const SNAPSHOT_DIR = path.join(os.tmpdir(), "playwright-snapshots")
fs.mkdirSync(SNAPSHOT_DIR, { recursive: true })

function getSnapshotText(output: FunctionCallResultItem["output"]): string | null {
  if (typeof output === "string") return output
  if (Array.isArray(output)) return output.map((o) => (o as { text?: string }).text ?? "").join("")
  if (output && typeof output === "object" && "text" in output)
    return (output as { text: string }).text
  return null
}

function isLargeSnapshot(item: AgentInputItem): boolean {
  if (item.type !== "function_call_result") return false
  const r = item as FunctionCallResultItem
  if (!r.name.startsWith("browser_")) return false
  const text = getSnapshotText(r.output)
  if (!text) return false
  return text.includes("### Snapshot") && text.length > LARGE_SNAPSHOT_THRESHOLD
}

/** Runs a shell command and returns its stdout as a string. */
function runShellCommand(cmd: string): Promise<string> {
  return new Promise((resolve, reject) => {
    const proc = spawn("sh", ["-c", cmd])
    let stdout = ""
    let stderr = ""
    proc.stdout.on("data", (chunk: Buffer) => {
      stdout += chunk
    })
    proc.stderr.on("data", (chunk: Buffer) => {
      stderr += chunk
    })
    proc.on("error", reject)
    proc.on("close", (code) => {
      // grep exits 1 when no matches — not an error
      if (code === 0 || code === 1) resolve(stdout || "No matches found.")
      else reject(new Error(stderr || `Command exited with code ${code}`))
    })
  })
}

/**
 * Creates the large-snapshot callModelInputFilter and a companion search tool.
 * Both share a closure over the list of saved snapshot file paths, so the tool
 * always searches the most recently saved file without the agent knowing the path.
 */
export function createLargeSnapshotHandler(): {
  filter: (args: CallModelInputFilterArgs) => ModelInputData
  searchTool: ReturnType<typeof tool>
  getCreatedFiles: () => string[]
} {
  const createdFiles: string[] = []

  const filter = ({ modelData }: CallModelInputFilterArgs): ModelInputData => {
    const newInput = modelData.input.map((item: AgentInputItem): AgentInputItem => {
      if (!isLargeSnapshot(item)) return item

      const r = item as FunctionCallResultItem
      const text = getSnapshotText(r.output)
      if (text === null) return item
      const filePath = path.join(SNAPSHOT_DIR, `snapshot-${crypto.randomUUID()}.yaml`)
      fs.writeFileSync(filePath, text, "utf-8")
      createdFiles.push(filePath)

      const msg = `[Snapshot was ${text.length.toLocaleString()} chars — too large to include inline.]\n\nUse browser_take_screenshot to see the page visually.\nUse the searchSnapshotFile tool to search the saved accessibility tree — write a grep command with {filename} as the file placeholder.\nUse browser_evaluate directly to query the DOM for specific elements.`

      if (typeof r.output === "string")
        return { ...r, output: msg } satisfies FunctionCallResultItem
      if (Array.isArray(r.output))
        return {
          ...r,
          output: [{ type: "input_text", text: msg }],
        } satisfies FunctionCallResultItem
      return { ...r, output: { type: "text", text: msg } } satisfies FunctionCallResultItem
    })

    return { ...modelData, input: newInput }
  }

  const searchTool = tool({
    name: "searchSnapshotFile",
    description:
      "Run a grep command against the most recently saved accessibility tree snapshot. " +
      "Write the full grep command using {filename} as the placeholder for the snapshot file path. " +
      "Example: grep -n -i -C3 'search button' {filename}",
    parameters: z.object({
      command: z.string().describe("grep command with {filename} as the snapshot file placeholder"),
    }),
    async execute({ command }) {
      const filePath = createdFiles.at(-1)
      if (!filePath) return "No snapshot file has been saved yet."
      const resolved = command.replace("{filename}", filePath)
      return runShellCommand(resolved)
    },
  })

  return { filter, searchTool, getCreatedFiles: () => createdFiles }
}
	const {
	filter: redirectLargeSnapshots,
	searchTool: snapshotFileTool,
	getCreatedFiles,
	} = createLargeSnapshotHandler()
	const agent = new Agent({
	name: "Agent",
	model: "gpt-5-nano",
	instructions: AGENT_PROMPT,
	mcpServers: [playwrightMCPServer],
	tools: [snapshotFileTool],
	outputType: schema,
	})
	try {
	const result = await withAgentSpan(
	async () => {
	return await run(agent, [{ role: "user", content: userPrompt }], {
	maxTurns: 25,
	session,
	callModelInputFilter: (args) =>
	prepareModelInputForAgent({ ...args, modelData: redirectLargeSnapshots(args) }),
	})
	},
	)
	} finally {
	for (const f of getCreatedFiles()) {
	try {
	fs.unlinkSync(f)
	} catch {}
	}
	}
	import type { AgentInputItem } from "@openai/agents-core/types"
	import type {
	CallModelInputFilter,
	CallModelInputFilterArgs,
	FunctionCallResultItem,
	ModelInputData
	} from "@openai/agents"
	import { spawn } from "node:child_process"
	import * as crypto from "node:crypto"
	import * as fs from "node:fs"
	import * as os from "node:os"
	import * as path from "node:path"
	import {
	tool,
	} from "@openai/agents"
	import { z } from "zod"

	// ---------------------------------------------------------------------------
	// Large-snapshot handling
	// ---------------------------------------------------------------------------

	/** Snapshots larger than this are redirected to temp files instead of inline. */
	const LARGE_SNAPSHOT_THRESHOLD = 50_000

	/** Temp directory for saved snapshots. Created once at module load. */
	const SNAPSHOT_DIR = path.join(os.tmpdir(), "playwright-snapshots")
	fs.mkdirSync(SNAPSHOT_DIR, { recursive: true })

	function getSnapshotText(output: FunctionCallResultItem["output"]): string \| null {
	if (typeof output === "string") return output
	if (Array.isArray(output)) return output.map((o) => (o as { text?: string }).text ?? "").join("")
	if (output && typeof output === "object" && "text" in output)
	return (output as { text: string }).text
	return null
	}

	function isLargeSnapshot(item: AgentInputItem): boolean {
	if (item.type !== "function_call_result") return false
	const r = item as FunctionCallResultItem
	if (!r.name.startsWith("browser_")) return false
	const text = getSnapshotText(r.output)
	if (!text) return false
	return text.includes("### Snapshot") && text.length > LARGE_SNAPSHOT_THRESHOLD
	}

	/** Runs a shell command and returns its stdout as a string. */
	function runShellCommand(cmd: string): Promise<string> {
	return new Promise((resolve, reject) => {
	const proc = spawn("sh", ["-c", cmd])
	let stdout = ""
	let stderr = ""
	proc.stdout.on("data", (chunk: Buffer) => {
	stdout += chunk
	})
	proc.stderr.on("data", (chunk: Buffer) => {
	stderr += chunk
	})
	proc.on("error", reject)
	proc.on("close", (code) => {
	// grep exits 1 when no matches — not an error
	if (code === 0 \|\| code === 1) resolve(stdout \|\| "No matches found.")
	else reject(new Error(stderr \|\| `Command exited with code ${code}`))
	})
	})
	}

	/**
	* Creates the large-snapshot callModelInputFilter and a companion search tool.
	* Both share a closure over the list of saved snapshot file paths, so the tool
	* always searches the most recently saved file without the agent knowing the path.
	*/
	export function createLargeSnapshotHandler(): {
	filter: (args: CallModelInputFilterArgs) => ModelInputData
	searchTool: ReturnType<typeof tool>
	getCreatedFiles: () => string[]
	} {
	const createdFiles: string[] = []

	const filter = ({ modelData }: CallModelInputFilterArgs): ModelInputData => {
	const newInput = modelData.input.map((item: AgentInputItem): AgentInputItem => {
	if (!isLargeSnapshot(item)) return item

	const r = item as FunctionCallResultItem
	const text = getSnapshotText(r.output)
	if (text === null) return item
	const filePath = path.join(SNAPSHOT_DIR, `snapshot-${crypto.randomUUID()}.yaml`)
	fs.writeFileSync(filePath, text, "utf-8")
	createdFiles.push(filePath)

	const msg = `[Snapshot was ${text.length.toLocaleString()} chars — too large to include inline.]\n\nUse browser_take_screenshot to see the page visually.\nUse the searchSnapshotFile tool to search the saved accessibility tree — write a grep command with {filename} as the file placeholder.\nUse browser_evaluate directly to query the DOM for specific elements.`

	if (typeof r.output === "string")
	return { ...r, output: msg } satisfies FunctionCallResultItem
	if (Array.isArray(r.output))
	return {
	...r,
	output: [{ type: "input_text", text: msg }],
	} satisfies FunctionCallResultItem
	return { ...r, output: { type: "text", text: msg } } satisfies FunctionCallResultItem
	})

	return { ...modelData, input: newInput }
	}

	const searchTool = tool({
	name: "searchSnapshotFile",
	description:
	"Run a grep command against the most recently saved accessibility tree snapshot. " +
	"Write the full grep command using {filename} as the placeholder for the snapshot file path. " +
	"Example: grep -n -i -C3 'search button' {filename}",
	parameters: z.object({
	command: z.string().describe("grep command with {filename} as the snapshot file placeholder"),
	}),
	async execute({ command }) {
	const filePath = createdFiles.at(-1)
	if (!filePath) return "No snapshot file has been saved yet."
	const resolved = command.replace("{filename}", filePath)
	return runShellCommand(resolved)
	},
	})

	return { filter, searchTool, getCreatedFiles: () => createdFiles }
	}