BLamy/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Below is a mental model I’ve found useful when turning FFmpeg (or yt‑dlp + FFmpeg) into a tiny “MCP server” that feels as friendly as, say, an image‑resize API:

1 | Think in “jobs”, not raw CLI flags


Job = ( source → transform → destination )

source – URL, upload, or pipe
transform – one of a handful of named presets (audio‑only/mp3, “YouTube → HLS 360p”, sprite‑sheet, etc.)
destination – file download, cloud bucket, or a streamable response


The user POSTs a tiny JSON blob; the server turns it into the full FFmpeg command, runs it, and returns a job_id.

POST /jobs
{
  "source": "https://youtu.be/abc123",
  "preset": "mp3",
  "callback": "https://example.com/webhook"
}

2 | Hide the gnarly stuff behind presets

Presets are just JSON files checked into the repo:
{
  "id": "mp3",
  "description": "Extract best audio → MP3 192 kbps",
  "pipeline": [
    "yt-dlp -f bestaudio -o - ${SOURCE}",
    "ffmpeg -i pipe:0 -vn -codec:a libmp3lame -b:a 192k ${DEST}"
  ],
  "priority": 5,
  "timeout": 600000
}
Why it helps:

Users never see CLI flags.
You can tune presets without breaking API clients.
Power users can still POST "custom_args": ["-vf", "scale=1280:-1"] if you expose an escape‑hatch field.


3 | Minimal REST surface


verb
path
purpose


GET
/presets
list & describe presets


POST
/jobs
create a job (returns job_id)


GET
/jobs/{id}
live status JSON (queue, running, …)


GET
/jobs/{id}/log
stream stderr/stdout (tail -f)


GET
/jobs/{id}/out
download final artifact


DELETE
/jobs/{id}
cancel


Everything else (auth, rate‑limit, etc.) sits in a gateway.

4 | Inside the server

┌──────────┐   enqueue   ┌───────────────┐   spawn   ┌──────────┐
│  API      ├────────────▶ Job Queue      ├──────────▶ Worker    │
└──────────┘             └───────────────┘          │ ffmpeg   │
                                                     └──────────┘


Queue = Redis, Rabbit, or just BullMQ.
Worker pulls the preset, does string interpolation (${SOURCE}, ${DEST}), then execa() the full ffmpeg/yt‑dlp chain.
Progress: parse FFmpeg’s stderr lines (frame=…) → push to Redis pub/sub → surface via /jobs/{id}.


5 | DX niceties


Feature
Implementation sketch


Self‑documenting
/presets plus OpenAPI (Swagger) generated from same JSON


Dry‑run
dry_run=true → return the full command string


Webhook / SSE
POST callback or Server‑Sent Events for real‑time UI


Version pinning
Accept-Version: 2025‑05‑02 header, or /v1/ path


Batch jobs
Accept an array in POST; respond with an array of IDs


Health & metrics
/healthz, /metrics (Prometheus)


6 | Mapping the scary help text to something human


CLI concept (yt‑dlp/ffmpeg)
API surface


-S sort descriptors
part of a “quality” preset param ("quality": "720p").


--compat-options
hidden behind "legacy": true toggle


--ignore-errors
job‑level flag "best_effort": true


Long -vf filterchains
separate pipeline step in preset


Updates / channels
container image tag; handled by dev‑ops, not the API


7 | Example: one‑liner usage

curl -s -X POST https://ff.mcp/api/jobs \
  -H "Authorization: Bearer $TOKEN" \
  -d '{"source":"https://youtu.be/dQw4w9WgXcQ","preset":"gif:5s"}' \
| jq -r '.job_id'
Five seconds later:
curl -O https://ff.mcp/api/jobs/$ID/out   # rickroll.gif

Key intuition

Treat FFmpeg like a render farm with opinionated presets, not a Swiss‑army CLI.

People shouldn’t need to know about -b:v vs. -maxrate; they just pick “1080p‑hq” or “mp3‑128k” and your server owns the rest.
Hope this gives you a clear mental framework. Once you’re comfortable, you can expose a second “expert” endpoint that takes raw CLI strings—just keep it separate so your simple path stays simple.

  
## ffmpeg-mcp.ts
// ffmpeg‑mcp-server.ts
// A minimal FFmpeg/yt‑dlp “render‑farm” wrapped in MCP.
//
// ▸ npm i @modelcontextprotocol/sdk execa yt-dlp zod
// ▸ npx ts-node ffmpeg‑mcp-server.ts    (or build & run with node)

import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import { execa } from "execa";
import { randomUUID } from "node:crypto";
import { tmpdir } from "node:os";
import { join } from "node:path";
import fs from "node:fs/promises";

// ────────────────────────────────────────────────────────────────────────────
// 1. Presets – add / tweak as you like
// ────────────────────────────────────────────────────────────────────────────
type Pipeline = (job: Job) => Promise<void>;

const PRESETS: Record<string, { desc: string; pipeline: Pipeline }> = {
  mp3: {
    desc: "Extract best audio → MP3 192 kbps",
    pipeline: async (job) => {
      const { source, outfile } = job;
      const dl = execa("yt-dlp", ["-f", "bestaudio", "-o", "-", source], {
        stdout: "pipe",
        stderr: "pipe",
      });
      const ff = execa(
        "ffmpeg",
        ["-i", "pipe:0", "-vn", "-codec:a", "libmp3lame", "-b:a", "192k", outfile],
        { stdin: dl.stdout!, stderr: "pipe" }
      );
      job.processes.push(dl, ff);
      await Promise.all([dl, ff]);
    },
  },

  "gif:5s": {
    desc: "First 5 seconds → looping GIF",
    pipeline: async (job) => {
      const { source, outfile } = job;
      const tmp = join(tmpdir(), `${job.id}.mp4`);
      // 1. Get the first 5 s
      const dl = execa("yt-dlp", ["-f", "mp4", "-o", tmp, "--download-sections", "*0-5", source]);
      job.processes.push(dl);
      await dl;
      // 2. Convert to GIF
      const ff = execa("ffmpeg", ["-i", tmp, "-vf", "fps=12,scale=480:-1:flags=lanczos", outfile]);
      job.processes.push(ff);
      await ff;
    },
  },
};

// ────────────────────────────────────────────────────────────────────────────
// 2. Simple in‑memory job store
// ────────────────────────────────────────────────────────────────────────────
type JobStatus = "queued" | "running" | "done" | "error" | "cancelled";
interface Job {
  id: string;
  source: string;
  preset: string;
  outfile: string;
  status: JobStatus;
  error?: string;
  processes: ReturnType<typeof execa>[];
}
const jobs = new Map<string, Job>();

// ────────────────────────────────────────────────────────────────────────────
// 3. MCP server
// ────────────────────────────────────────────────────────────────────────────
const server = new McpServer({
  name: "ffmpeg-mcp",
  version: "0.1.0",
});

// 3.1  Resource: list presets
server.resource(
  "presets",
  "presets://all",
  async (uri) => ({
    contents: [
      {
        uri: uri.href,
        text: JSON.stringify(
          Object.entries(PRESETS).map(([k, v]) => ({ id: k, desc: v.desc })),
          null,
          2
        ),
      },
    ],
  })
);

// 3.2  Resource: job status   e.g.  job://<id>/status
server.resource(
  "job-status",
  new ResourceTemplate("job://{id}/status", { list: undefined }),
  async (uri, { id }) => {
    const job = jobs.get(id);
    if (!job)
      throw new Error(`Job ${id} not found`);
    return { contents: [{ uri: uri.href, text: JSON.stringify(job, null, 2) }] };
  }
);

// 3.3  Resource: job output (only when done)
server.resource(
  "job-output",
  new ResourceTemplate("job://{id}/output", { list: undefined }),
  async (uri, { id }) => {
    const job = jobs.get(id);
    if (!job) throw new Error(`Job ${id} not found`);
    if (job.status !== "done")
      throw new Error(`Job ${id} not finished (status=${job.status})`);
    const data = await fs.readFile(job.outfile);
    return {
      contents: [
        {
          uri: uri.href,
          mediaType: "application/octet-stream",
          binary: data,
        },
      ],
    };
  }
);

// 3.4  Tool: create job
server.tool(
  "createJob",
  {
    source: z.string().url(),
    preset: z.enum(Object.keys(PRESETS) as [keyof typeof PRESETS]),
  },
  async ({ source, preset }) => {
    const id = randomUUID();
    const outfile = join(tmpdir(), `${id}.${preset.replace(/[:/]/g, "_")}`);
    const job: Job = { id, source, preset, outfile, status: "queued", processes: [] };
    jobs.set(id, job);
    runJob(job); // fire‑and‑forget
    return {
      content: [{ type: "text", text: id }],
    };
  }
);

// 3.5  Tool: cancel job
server.tool(
  "cancelJob",
  { id: z.string() },
  async ({ id }) => {
    const job = jobs.get(id);
    if (!job) return { content: [{ type: "text", text: `Job ${id} not found` }] };
    if (job.status === "running") {
      for (const p of job.processes) p.kill();
      job.status = "cancelled";
    }
    return { content: [{ type: "text", text: `Job ${id} → ${job.status}` }] };
  }
);

// ────────────────────────────────────────────────────────────────────────────
// 4. Job runner
// ────────────────────────────────────────────────────────────────────────────
async function runJob(job: Job) {
  const { pipeline } = PRESETS[job.preset];
  job.status = "running";
  try {
    await pipeline(job);
    job.status = "done";
  } catch (err) {
    job.status = "error";
    job.error = (err as Error).message;
  }
}

// ────────────────────────────────────────────────────────────────────────────
// 5. Start MCP over stdio (works great inside Cursor / ChatGPT “Run” envs)
// ────────────────────────────────────────────────────────────────────────────
const transport = new StdioServerTransport();
await server.connect(transport);

// Optional: clean tmp files on exit
process.on("exit", () => {
  for (const job of jobs.values()) {
    if (job.status === "done") void fs.unlink(job.outfile).catch(() => {});
  }
});
verb	path	purpose
GET	`/presets`	list & describe presets
POST	`/jobs`	create a job (returns `job_id`)
GET	`/jobs/{id}`	live status JSON (queue, running, …)
GET	`/jobs/{id}/log`	stream stderr/stdout (tail -f)
GET	`/jobs/{id}/out`	download final artifact
DELETE	`/jobs/{id}`	cancel
Feature	Implementation sketch
Self‑documenting	`/presets` plus OpenAPI (Swagger) generated from same JSON
Dry‑run	`dry_run=true` → return the full command string
Webhook / SSE	POST callback or Server‑Sent Events for real‑time UI
Version pinning	`Accept-Version: 2025‑05‑02` header, or `/v1/` path
Batch jobs	Accept an array in POST; respond with an array of IDs
Health & metrics	`/healthz`, `/metrics` (Prometheus)
CLI concept (yt‑dlp/ffmpeg)	API surface
`-S` sort descriptors	part of a “quality” preset param (`"quality": "720p"`).
`--compat-options`	hidden behind `"legacy": true` toggle
`--ignore-errors`	job‑level flag `"best_effort": true`
Long `-vf` filterchains	separate pipeline step in preset
Updates / channels	container image tag; handled by dev‑ops, not the API
	// ffmpeg‑mcp-server.ts
	// A minimal FFmpeg/yt‑dlp “render‑farm” wrapped in MCP.
	//
	// ▸ npm i @modelcontextprotocol/sdk execa yt-dlp zod
	// ▸ npx ts-node ffmpeg‑mcp-server.ts (or build & run with node)

	import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
	import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
	import { z } from "zod";
	import { execa } from "execa";
	import { randomUUID } from "node:crypto";
	import { tmpdir } from "node:os";
	import { join } from "node:path";
	import fs from "node:fs/promises";

	// ────────────────────────────────────────────────────────────────────────────
	// 1. Presets – add / tweak as you like
	// ────────────────────────────────────────────────────────────────────────────
	type Pipeline = (job: Job) => Promise<void>;

	const PRESETS: Record<string, { desc: string; pipeline: Pipeline }> = {
	mp3: {
	desc: "Extract best audio → MP3 192 kbps",
	pipeline: async (job) => {
	const { source, outfile } = job;
	const dl = execa("yt-dlp", ["-f", "bestaudio", "-o", "-", source], {
	stdout: "pipe",
	stderr: "pipe",
	});
	const ff = execa(
	"ffmpeg",
	["-i", "pipe:0", "-vn", "-codec:a", "libmp3lame", "-b:a", "192k", outfile],
	{ stdin: dl.stdout!, stderr: "pipe" }
	);
	job.processes.push(dl, ff);
	await Promise.all([dl, ff]);
	},
	},

	"gif:5s": {
	desc: "First 5 seconds → looping GIF",
	pipeline: async (job) => {
	const { source, outfile } = job;
	const tmp = join(tmpdir(), `${job.id}.mp4`);
	// 1. Get the first 5 s
	const dl = execa("yt-dlp", ["-f", "mp4", "-o", tmp, "--download-sections", "*0-5", source]);
	job.processes.push(dl);
	await dl;
	// 2. Convert to GIF
	const ff = execa("ffmpeg", ["-i", tmp, "-vf", "fps=12,scale=480:-1:flags=lanczos", outfile]);
	job.processes.push(ff);
	await ff;
	},
	},
	};

	// ────────────────────────────────────────────────────────────────────────────
	// 2. Simple in‑memory job store
	// ────────────────────────────────────────────────────────────────────────────
	type JobStatus = "queued" \| "running" \| "done" \| "error" \| "cancelled";
	interface Job {
	id: string;
	source: string;
	preset: string;
	outfile: string;
	status: JobStatus;
	error?: string;
	processes: ReturnType<typeof execa>[];
	}
	const jobs = new Map<string, Job>();

	// ────────────────────────────────────────────────────────────────────────────
	// 3. MCP server
	// ────────────────────────────────────────────────────────────────────────────
	const server = new McpServer({
	name: "ffmpeg-mcp",
	version: "0.1.0",
	});

	// 3.1 Resource: list presets
	server.resource(
	"presets",
	"presets://all",
	async (uri) => ({
	contents: [
	{
	uri: uri.href,
	text: JSON.stringify(
	Object.entries(PRESETS).map(([k, v]) => ({ id: k, desc: v.desc })),
	null,
	2
	),
	},
	],
	})
	);

	// 3.2 Resource: job status e.g. job://<id>/status
	server.resource(
	"job-status",
	new ResourceTemplate("job://{id}/status", { list: undefined }),
	async (uri, { id }) => {
	const job = jobs.get(id);
	if (!job)
	throw new Error(`Job ${id} not found`);
	return { contents: [{ uri: uri.href, text: JSON.stringify(job, null, 2) }] };
	}
	);

	// 3.3 Resource: job output (only when done)
	server.resource(
	"job-output",
	new ResourceTemplate("job://{id}/output", { list: undefined }),
	async (uri, { id }) => {
	const job = jobs.get(id);
	if (!job) throw new Error(`Job ${id} not found`);
	if (job.status !== "done")
	throw new Error(`Job ${id} not finished (status=${job.status})`);
	const data = await fs.readFile(job.outfile);
	return {
	contents: [
	{
	uri: uri.href,
	mediaType: "application/octet-stream",
	binary: data,
	},
	],
	};
	}
	);

	// 3.4 Tool: create job
	server.tool(
	"createJob",
	{
	source: z.string().url(),
	preset: z.enum(Object.keys(PRESETS) as [keyof typeof PRESETS]),
	},
	async ({ source, preset }) => {
	const id = randomUUID();
	const outfile = join(tmpdir(), `${id}.${preset.replace(/[:/]/g, "_")}`);
	const job: Job = { id, source, preset, outfile, status: "queued", processes: [] };
	jobs.set(id, job);
	runJob(job); // fire‑and‑forget
	return {
	content: [{ type: "text", text: id }],
	};
	}
	);

	// 3.5 Tool: cancel job
	server.tool(
	"cancelJob",
	{ id: z.string() },
	async ({ id }) => {
	const job = jobs.get(id);
	if (!job) return { content: [{ type: "text", text: `Job ${id} not found` }] };
	if (job.status === "running") {
	for (const p of job.processes) p.kill();
	job.status = "cancelled";
	}
	return { content: [{ type: "text", text: `Job ${id} → ${job.status}` }] };
	}
	);

	// ────────────────────────────────────────────────────────────────────────────
	// 4. Job runner
	// ────────────────────────────────────────────────────────────────────────────
	async function runJob(job: Job) {
	const { pipeline } = PRESETS[job.preset];
	job.status = "running";
	try {
	await pipeline(job);
	job.status = "done";
	} catch (err) {
	job.status = "error";
	job.error = (err as Error).message;
	}
	}

	// ────────────────────────────────────────────────────────────────────────────
	// 5. Start MCP over stdio (works great inside Cursor / ChatGPT “Run” envs)
	// ────────────────────────────────────────────────────────────────────────────
	const transport = new StdioServerTransport();
	await server.connect(transport);

	// Optional: clean tmp files on exit
	process.on("exit", () => {
	for (const job of jobs.values()) {
	if (job.status === "done") void fs.unlink(job.outfile).catch(() => {});
	}
	});