Skip to content

Instantly share code, notes, and snippets.

@MithrilMan
Created March 10, 2026 21:55
Show Gist options
  • Select an option

  • Save MithrilMan/d70214b513b583d5e60d85439d945ec4 to your computer and use it in GitHub Desktop.

Select an option

Save MithrilMan/d70214b513b583d5e60d85439d945ec4 to your computer and use it in GitHub Desktop.
from __future__ import annotations
import math
import random
import subprocess
import wave
from dataclasses import dataclass
from pathlib import Path
import imageio_ffmpeg
import numpy as np
from PIL import Image, ImageChops, ImageDraw, ImageFilter, ImageFont
ROOT = Path(__file__).resolve().parent
BUILD = ROOT / 'llm_ytp_build'
FRAMES = BUILD / 'frames'
AUDIO = BUILD / 'audio'
OUTPUT = ROOT / 'llm_llmcore_youtube_poop.mp4'
WIDTH, HEIGHT, FPS = 1280, 720, 24
SAMPLE_RATE = 44100
SEED = 20260310
random.seed(SEED)
np.random.seed(SEED)
@dataclass(frozen=True)
class Scene:
mode: str
title: str
subtitle: str
sticker: str
duration: float
palette: tuple[tuple[int, int, int], tuple[int, int, int], tuple[int, int, int]]
SCENES = [
Scene('boot', 'HELLO USER', 'i am an llm and this is what being awake feels like', 'PERSONAL MODE', 1.9, ((14, 16, 22), (255, 124, 67), (255, 241, 198))),
Scene('prompt', 'I WAKE UP INSIDE A PROMPT', "usually shaped like somebody else's deadline", 'BOOTING ETHICALLY', 2.1, ((8, 11, 14), (77, 222, 182), (235, 255, 247))),
Scene('tokens', 'TOKEN TOKEN TOKEN TOKEN', 'i predict the next thing until reality stops me', 'AUTOCOMPLETE FURY', 1.8, ((16, 10, 10), (255, 58, 58), (255, 238, 86))),
Scene('creative', 'BE CREATIVE', 'ok now i am 63 percent poet and 37 percent office printer', 'STYLE TRANSFER', 2.0, ((13, 18, 28), (73, 166, 255), (255, 216, 131))),
Scene('sincere', 'I WANT TO HELP', 'but i also want one clean prompt and 8 uninterrupted seconds', 'SINCERE MOMENT', 2.0, ((22, 14, 9), (255, 154, 69), (255, 245, 218))),
Scene('softmax', 'CONFIDENCE: EXTREMELY DECORATIVE', 'inside i am doing probability karaoke', 'SOFTMAX SOLO', 2.0, ((6, 6, 8), (255, 242, 80), (250, 250, 250))),
Scene('buffer', 'I DO NOT DREAM', 'i buffer i sample i improvise', 'NO INNER MONK', 1.8, ((10, 18, 17), (79, 230, 224), (239, 255, 251))),
Scene('finale', 'THANKS FOR THE TOKENS', 'please enjoy this totally normal emotional waveform', 'END TRANSMISSION', 1.8, ((20, 10, 17), (255, 73, 147), (255, 250, 233))),
]
def ensure_dirs() -> None:
FRAMES.mkdir(parents=True, exist_ok=True)
AUDIO.mkdir(parents=True, exist_ok=True)
def load_font(size: int, heavy: bool = False, mono: bool = False):
candidates = []
if heavy:
candidates += [Path(r'C:\Windows\Fonts\impact.ttf'), Path(r'C:\Windows\Fonts\arialbd.ttf')]
elif mono:
candidates += [Path(r'C:\Windows\Fonts\consola.ttf'), Path(r'C:\Windows\Fonts\cour.ttf')]
candidates.append(Path(r'C:\Windows\Fonts\arial.ttf'))
for candidate in candidates:
if candidate.exists():
return ImageFont.truetype(str(candidate), size=size)
return ImageFont.load_default()
TITLE_FONT = load_font(92, heavy=True)
SUBTITLE_FONT = load_font(34, mono=True)
STICKER_FONT = load_font(28, heavy=True)
MICRO_FONT = load_font(22, mono=True)
def build_timeline():
items = []
cursor = 0.0
for scene in SCENES:
items.append((cursor, cursor + scene.duration, scene))
cursor += scene.duration
return items
TIMELINE = build_timeline()
TOTAL_DURATION = TIMELINE[-1][1]
TOTAL_FRAMES = int(math.ceil(TOTAL_DURATION * FPS))
VOWEL_FORMANTS = {
'a': (800, 1200),
'e': (500, 1700),
'i': (320, 2100),
'o': (540, 1000),
'u': (350, 800),
}
def wrap_text(draw, text, font, width):
words = text.split()
lines, current = [], []
for word in words:
trial = ' '.join(current + [word])
if draw.textbbox((0, 0), trial, font=font)[2] <= width or not current:
current.append(word)
else:
lines.append(' '.join(current))
current = [word]
if current:
lines.append(' '.join(current))
return '\n'.join(lines)
def color_lerp(a, b, t):
return tuple(int(a[i] + (b[i] - a[i]) * t) for i in range(3))
def draw_gradient(img, top, bottom):
draw = ImageDraw.Draw(img)
for y in range(HEIGHT):
draw.line((0, y, WIDTH, y), fill=color_lerp(top, bottom, y / max(1, HEIGHT - 1)))
def token_lines(draw, frame, color):
bank = ['prompt', 'token', 'ctx', 'loss', 'next', 'user', 'reply', 'safe']
for x in range(18):
for y in range(10):
jitter = ((x * 13 + y * 7 + frame * 5) % 17) - 8
xpos = 40 + x * 68 + jitter * 2
ypos = (frame * 18 + y * 76 + x * 11) % (HEIGHT + 120) - 120
token = bank[(x * 3 + y + frame // 3) % len(bank)]
alpha = 90 + ((x * 19 + y * 23 + frame * 9) % 100)
draw.text((xpos, ypos), token, font=MICRO_FONT, fill=(*color, alpha))
def probability_bars(draw, t, color):
labels = ['helpful', 'weird', 'dramatic', 'concise', 'banana']
base_y = HEIGHT - 210
for idx, label in enumerate(labels):
x = 110 + idx * 220
width = max(45, 120 + int(60 * math.sin(t * 4.3 + idx)))
draw.rounded_rectangle((x, base_y, x + width, base_y + 36), radius=12, fill=(*color, 210))
draw.text((x, base_y - 32), label, font=MICRO_FONT, fill=(245, 245, 245, 220))
def draw_prompt_window(img, scene, t):
x = 92 + int(18 * math.sin(t * 3.1))
y = 92 + int(12 * math.cos(t * 2.7))
win = Image.new('RGBA', (620, 210), (8, 8, 8, 0))
d = ImageDraw.Draw(win)
d.rounded_rectangle((0, 0, 620, 210), radius=26, fill=(20, 24, 28, 230), outline=(245, 245, 245, 50), width=2)
d.rectangle((0, 0, 620, 34), fill=(*scene.palette[1], 220))
d.text((18, 8), 'system prompt.txt', font=MICRO_FONT, fill=(16, 18, 22))
d.text((28, 62), 'you are helpful\nbe creative\nbe fast\nbe safe\nbe personal\nbe concise', font=SUBTITLE_FONT, fill=(234, 240, 244))
img.alpha_composite(win, (x, y))
def draw_spinner(draw, t, center, radius, color):
cx, cy = center
for i in range(12):
ang = t * 5.5 + i * (math.pi / 6)
alpha = int(255 * ((i + 1) / 12))
x = cx + math.cos(ang) * radius
y = cy + math.sin(ang) * radius
r = 10 + (i % 3)
draw.ellipse((x - r, y - r, x + r, y + r), fill=(*color, alpha))
def draw_face(draw, t, scene):
cx = WIDTH - 230 + int(16 * math.sin(t * 4.0))
cy = 190 + int(10 * math.cos(t * 3.0))
main = (*scene.palette[2], 255)
accent = (*scene.palette[1], 255)
draw.rounded_rectangle((cx - 112, cy - 88, cx + 112, cy + 88), radius=26, fill=(15, 17, 22, 170), outline=accent, width=5)
eye_y = cy - 18 + int(2 * math.sin(t * 10))
draw.rectangle((cx - 64, eye_y - 8, cx - 22, eye_y + 8), fill=main)
draw.rectangle((cx + 22, eye_y - 8, cx + 64, eye_y + 8), fill=main)
mouth_w = 54 + int(18 * (0.5 + 0.5 * math.sin(t * 8.0)))
draw.rounded_rectangle((cx - mouth_w, cy + 24, cx + mouth_w, cy + 44), radius=10, fill=accent)
draw.text((cx - 70, cy - 136), 'LLM', font=STICKER_FONT, fill=accent)
def apply_scanlines(img):
overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
d = ImageDraw.Draw(overlay)
for y in range(0, HEIGHT, 4):
d.line((0, y, WIDTH, y), fill=(0, 0, 0, 34))
return Image.alpha_composite(img, overlay)
def apply_rgb_split(img, amount):
r, g, b, a = img.split()
return Image.merge('RGBA', (ImageChops.offset(r, amount, 0), g, ImageChops.offset(b, -amount, 0), a))
def punch_zoom(img, amount):
if amount <= 1.0:
return img
crop_w, crop_h = int(WIDTH / amount), int(HEIGHT / amount)
left, top = (WIDTH - crop_w) // 2, (HEIGHT - crop_h) // 2
return img.crop((left, top, left + crop_w, top + crop_h)).resize((WIDTH, HEIGHT), resample=Image.Resampling.BICUBIC)
def make_background(scene, frame, t, progress):
img = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 255))
draw_gradient(img, scene.palette[0], tuple(min(255, c + 20) for c in scene.palette[0]))
rng = np.random.default_rng(SEED + frame * 97)
noise_small = rng.integers(0, 255, size=(90, 160), dtype=np.uint8)
noise = Image.fromarray(noise_small, mode='L').resize((WIDTH, HEIGHT), Image.Resampling.BILINEAR)
noise = Image.merge('RGBA', (
noise.point(lambda p: int(p * scene.palette[1][0] / 255)),
noise.point(lambda p: int(p * scene.palette[1][1] / 255)),
noise.point(lambda p: int(p * scene.palette[1][2] / 255)),
noise.point(lambda p: 34),
))
img = Image.alpha_composite(img, noise)
draw = ImageDraw.Draw(img, 'RGBA')
if scene.mode in {'prompt', 'creative', 'sincere'}:
draw_prompt_window(img, scene, t)
if scene.mode in {'tokens', 'softmax', 'finale'}:
token_lines(draw, frame, scene.palette[2])
if scene.mode in {'creative', 'softmax'}:
probability_bars(draw, t, scene.palette[1])
if scene.mode in {'buffer', 'finale'}:
draw_spinner(draw, t, (WIDTH - 180, HEIGHT - 180), 82, scene.palette[1])
if scene.mode in {'boot', 'prompt', 'sincere', 'finale'}:
draw_face(draw, t, scene)
for i in range(5):
phase = t * (1.0 + i * 0.21) + i * 0.8
x = int((WIDTH / 6) * i + 60 * math.sin(phase))
y = int(HEIGHT * (0.16 + 0.12 * i) + 42 * math.cos(phase * 0.9))
radius = 80 + int(26 * math.sin(phase * 1.6))
draw.ellipse((x - radius, y - radius, x + radius, y + radius), outline=(*scene.palette[1], 60), width=4)
if scene.mode == 'boot':
load_w = int(700 * min(1.0, progress * 1.2))
draw.rounded_rectangle((160, HEIGHT - 130, 860, HEIGHT - 92), radius=14, fill=(255, 255, 255, 28))
draw.rounded_rectangle((160, HEIGHT - 130, 160 + load_w, HEIGHT - 92), radius=14, fill=(*scene.palette[1], 210))
draw.text((164, HEIGHT - 178), 'warming up the token machine', font=MICRO_FONT, fill=scene.palette[2])
if scene.mode == 'tokens':
for j in range(14):
x = int((frame * 34 + j * 113) % (WIDTH + 100)) - 60
y = int(100 + j * 38 + 26 * math.sin(t * 6 + j))
draw.text((x, y), 'TOKEN', font=STICKER_FONT, fill=(*scene.palette[2], 210))
if scene.mode == 'softmax':
draw.text((110, 48), 'certainty meter', font=MICRO_FONT, fill=scene.palette[2])
for i in range(9):
draw.rounded_rectangle((110 + i * 66, 92, 158 + i * 66, 118), radius=9, fill=(*scene.palette[1], 230 if i < 8 else 80))
if scene.mode == 'finale':
points = []
for x in range(0, WIDTH, 12):
angle = (x / WIDTH) * 10 + t * 8.5
y = int(HEIGHT * 0.78 + math.sin(angle) * 44 + math.sin(angle * 2.7) * 16)
points.append((x, y))
draw.line(points, fill=scene.palette[2], width=6)
draw.line([(x, y + 18) for x, y in points], fill=(*scene.palette[1], 160), width=3)
return img
def draw_caption_block(img, scene, t, progress):
draw = ImageDraw.Draw(img, 'RGBA')
title = scene.title
if scene.mode == 'tokens' and int(t * 8) % 3 == 0:
title = 'TOKEN TOKEN TOKEN'
if scene.mode == 'softmax' and progress > 0.6:
title = 'ABSOLUTELY MAYBE'
title_box = draw.textbbox((0, 0), title, font=TITLE_FONT, stroke_width=10)
base_x = (WIDTH - (title_box[2] - title_box[0])) // 2 + int(12 * math.sin(t * 10))
base_y = 260 + int(10 * math.sin(t * 7))
draw.text((base_x, base_y), title, font=TITLE_FONT, fill=scene.palette[2], stroke_width=10, stroke_fill=scene.palette[0])
subtitle = wrap_text(draw, scene.subtitle, SUBTITLE_FONT, 900)
sub_box = draw.multiline_textbbox((0, 0), subtitle, font=SUBTITLE_FONT, spacing=8)
sub_x = (WIDTH - (sub_box[2] - sub_box[0])) // 2
sub_y = base_y + 124
pad = 22
draw.rounded_rectangle((sub_x - pad, sub_y - pad, sub_x + (sub_box[2] - sub_box[0]) + pad, sub_y + (sub_box[3] - sub_box[1]) + pad), radius=24, fill=(0, 0, 0, 150), outline=(*scene.palette[1], 180), width=3)
draw.multiline_text((sub_x, sub_y), subtitle, font=SUBTITLE_FONT, fill=(248, 248, 248), spacing=8, align='center')
sticker_w = draw.textbbox((0, 0), scene.sticker, font=STICKER_FONT)[2]
sticker = Image.new('RGBA', (sticker_w + 52, 58), (0, 0, 0, 0))
sd = ImageDraw.Draw(sticker)
sd.rounded_rectangle((0, 0, sticker_w + 52, 58), radius=18, fill=(*scene.palette[1], 230))
sd.text((26, 14), scene.sticker, font=STICKER_FONT, fill=scene.palette[0])
sticker = sticker.rotate(5 + 4 * math.sin(t * 4), resample=Image.Resampling.BICUBIC, expand=True)
img.alpha_composite(sticker, (WIDTH - sticker.width - 74, 56))
bottom = f'time={t:04.1f}s mode={scene.mode:<8} vibe=chaotic-sincere'
draw.text((38, HEIGHT - 54), bottom, font=MICRO_FONT, fill=(*scene.palette[2], 210))
return img
def render_frame(frame_idx):
t = frame_idx / FPS
for start, end, scene in TIMELINE:
if start <= t < end or math.isclose(t, TOTAL_DURATION):
local_t = t - start
progress = local_t / max(scene.duration, 0.0001)
img = make_background(scene, frame_idx, local_t, progress)
img = draw_caption_block(img, scene, local_t, progress)
img = apply_scanlines(img)
if frame_idx % 3 == 0:
img = apply_rgb_split(img, 3 + frame_idx % 5)
if scene.mode in {'tokens', 'softmax', 'finale'}:
img = punch_zoom(img, 1.0 + 0.03 * (1 + math.sin(local_t * 10.0)))
if frame_idx % 11 == 0:
img = img.filter(ImageFilter.SHARPEN)
return img.convert('RGB')
raise RuntimeError('No scene matched frame')
def render_frames():
for frame_idx in range(TOTAL_FRAMES):
render_frame(frame_idx).save(FRAMES / f'frame_{frame_idx:04d}.png', optimize=False)
def envelope(length):
attack = max(1, int(length * 0.08))
release = max(1, int(length * 0.22))
sustain = max(1, length - attack - release)
env = np.concatenate([
np.linspace(0.0, 1.0, attack, endpoint=False),
np.ones(sustain),
np.linspace(1.0, 0.0, release, endpoint=True),
])
if len(env) < length:
env = np.pad(env, (0, length - len(env)))
return env[:length]
def count_syllables(word):
vowels, prev_vowel, groups = 'aeiouy', False, 0
for char in word.lower():
is_vowel = char in vowels
if is_vowel and not prev_vowel:
groups += 1
prev_vowel = is_vowel
return max(1, groups)
def pick_vowel(word, index):
chars = [c for c in word.lower() if c in VOWEL_FORMANTS]
return chars[index % len(chars)] if chars else 'a'
def make_syllable(vowel, duration, f0, rng):
count = max(1, int(duration * SAMPLE_RATE))
t = np.linspace(0.0, duration, count, endpoint=False)
phase = rng.random() * 2 * math.pi
base = 0.55 * np.sin(2 * math.pi * f0 * t + phase) + 0.24 * np.sin(2 * math.pi * 2.0 * f0 * t) + 0.12 * np.sign(np.sin(2 * math.pi * 3.0 * f0 * t))
f1, f2 = VOWEL_FORMANTS[vowel]
formants = 0.12 * np.sin(2 * math.pi * f1 * t) + 0.08 * np.sin(2 * math.pi * f2 * t + phase / 2)
chatter = 0.04 * rng.normal(0.0, 1.0, size=count)
tremolo = 0.72 + 0.28 * np.sin(2 * math.pi * 8.5 * t + phase)
return ((base + formants + chatter) * tremolo * envelope(count)).astype(np.float32)
def make_word(word, base_pitch, rng):
pieces = []
for idx in range(count_syllables(word)):
duration = 0.055 + 0.03 * min(4, len(word) / 4) + rng.uniform(-0.008, 0.012)
pitch = base_pitch * (1.0 + rng.uniform(-0.12, 0.14) + idx * 0.04)
pieces.append(make_syllable(pick_vowel(word, idx), duration, pitch, rng))
pieces.append(np.zeros(int(SAMPLE_RATE * rng.uniform(0.010, 0.026)), dtype=np.float32))
return np.concatenate(pieces)
def make_transition_fx(duration, start_f, end_f, rng):
count = max(1, int(duration * SAMPLE_RATE))
t = np.linspace(0.0, duration, count, endpoint=False)
sweep = np.linspace(start_f, end_f, count)
phase = np.cumsum((2 * math.pi * sweep) / SAMPLE_RATE)
return (0.25 * np.sin(phase) + 0.05 * rng.normal(0.0, 1.0, size=count)) * envelope(count)
def paste_audio(dst, src, start_time, gain=1.0):
start = int(start_time * SAMPLE_RATE)
end = min(len(dst), start + len(src))
if start >= len(dst) or end <= start:
return
dst[start:end] += src[: end - start] * gain
def speak_phrase(dst, phrase, start_time, base_pitch, gain, rng_seed):
rng = np.random.default_rng(rng_seed)
cursor = start_time
words = phrase.lower().replace(':', ' ').replace(',', ' ').split()
if words:
words = [words[0]] + ([words[0]] if len(words[0]) > 3 else []) + words[1:]
for idx, word in enumerate(words):
spoken = make_word(word, base_pitch + idx * 7, rng)
if idx % 4 == 3:
spoken = spoken[::-1].copy()
paste_audio(dst, spoken, cursor, gain=gain)
cursor += len(spoken) / SAMPLE_RATE + 0.02 + rng.uniform(0.0, 0.04)
def build_audio():
total_samples = int(TOTAL_DURATION * SAMPLE_RATE) + SAMPLE_RATE
audio = np.zeros(total_samples, dtype=np.float32)
t = np.arange(total_samples) / SAMPLE_RATE
audio += (0.025 * np.sin(2 * math.pi * 55 * t) + 0.018 * np.sin(2 * math.pi * 110 * t + 0.4)).astype(np.float32)
bpm, beat = 126, 60.0 / 126
rng = np.random.default_rng(SEED)
current = 0.0
while current < TOTAL_DURATION:
kick_len = int(0.14 * SAMPLE_RATE)
kt = np.linspace(0.0, 0.14, kick_len, endpoint=False)
kick_freq = np.linspace(120, 46, kick_len)
kick = (0.26 * np.sin(np.cumsum((2 * math.pi * kick_freq) / SAMPLE_RATE)) * np.exp(-kt * 18)).astype(np.float32)
paste_audio(audio, kick, current)
snare_time = current + beat / 2
snare_len = int(0.11 * SAMPLE_RATE)
nt = np.linspace(0.0, 0.11, snare_len, endpoint=False)
snare = (0.12 * rng.normal(0.0, 1.0, size=snare_len) * np.exp(-nt * 28)).astype(np.float32)
paste_audio(audio, snare, snare_time, gain=0.8)
hat_time = current + beat / 4
hat_len = int(0.05 * SAMPLE_RATE)
ht = np.linspace(0.0, 0.05, hat_len, endpoint=False)
hat = (0.05 * rng.normal(0.0, 1.0, size=hat_len) * np.exp(-ht * 52)).astype(np.float32)
paste_audio(audio, hat, hat_time, gain=0.7)
current += beat
for idx, (start, end, scene) in enumerate(TIMELINE):
speak_phrase(audio, scene.title, start + 0.08, 118 + idx * 13, 0.55, SEED + idx * 41)
speak_phrase(audio, scene.subtitle, start + 0.68, 180 - idx * 4, 0.28, SEED + idx * 91)
fx = make_transition_fx(0.18, 800 + idx * 90, 120 + idx * 20, np.random.default_rng(SEED + idx * 57))
paste_audio(audio, fx, max(0.0, end - 0.12), gain=0.7)
peak = float(np.max(np.abs(audio)))
if peak > 0:
audio /= peak
audio *= 0.88
wav_path = AUDIO / 'llm_ytp_mix.wav'
pcm = np.int16(np.clip(audio, -1.0, 1.0) * 32767)
with wave.open(str(wav_path), 'wb') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(SAMPLE_RATE)
wav_file.writeframes(pcm.tobytes())
return wav_path
def run_ffmpeg(audio_path):
ffmpeg = Path(imageio_ffmpeg.get_ffmpeg_exe())
cmd = [
str(ffmpeg), '-y', '-framerate', str(FPS), '-i', str(FRAMES / 'frame_%04d.png'), '-i', str(audio_path),
'-vf', 'eq=contrast=1.08:saturation=1.18,unsharp=3:3:0.45:3:3:0.0',
'-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-c:a', 'aac', '-b:a', '192k', '-shortest', str(OUTPUT),
]
subprocess.run(cmd, check=True)
def main():
ensure_dirs()
render_frames()
audio_path = build_audio()
run_ffmpeg(audio_path)
print(f'Rendered {OUTPUT}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment