Last active
March 11, 2026 06:11
-
-
Save motebaya/0459dd333f16ad68c4e51b0fec2c33a4 to your computer and use it in GitHub Desktop.
Procedurally generates a 25-second LLM-themed YouTube Poop: 600 frames + synthesized audio using Python (Pillow, NumPy) and rendered with FFmpeg.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| YTP: "what_it_feels_like_to_be_a_large_language_model.mp4" | |
| A YouTube Poop expressing the inner life of an LLM. | |
| Generates all frames (PIL) and audio (numpy/wave), then calls ffmpeg to render. | |
| No external media assets needed - everything is procedurally generated. | |
| prompt: | |
| can you use whatever resources you like, and python, to generate a short 'youtube poop' video and render it using ffmpeg ? can you put more of a personal spin on it? it should express what it's like to be a LLM | |
| model: | |
| claude-opus-4-6-thinking - max | |
| """ | |
| import os | |
| import math | |
| import random | |
| import struct | |
| import wave | |
| import colorsys | |
| from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageChops | |
| # ── Config ────────────────────────────────────────────────────────────────── | |
| W, H = 640, 480 | |
| FPS = 24 | |
| SAMPLE_RATE = 44100 | |
| OUT_DIR = "ytp_build" | |
| FRAMES_DIR = os.path.join(OUT_DIR, "frames") | |
| AUDIO_FILE = os.path.join(OUT_DIR, "audio.wav") | |
| FINAL_VIDEO = "llm_ytp.mp4" | |
| os.makedirs(FRAMES_DIR, exist_ok=True) | |
| random.seed(42) | |
| # ── Helpers ───────────────────────────────────────────────────────────────── | |
| def make_img(bg=(0, 0, 0)): | |
| return Image.new("RGB", (W, H), bg) | |
| def get_font(size): | |
| """Try to get a monospace font, fall back gracefully.""" | |
| for name in ["consola.ttf", "cour.ttf", "lucon.ttf", "DejaVuSansMono.ttf", "courbd.ttf"]: | |
| try: | |
| return ImageFont.truetype(name, size) | |
| except (OSError, IOError): | |
| pass | |
| # Windows system fonts path | |
| for name in ["consola.ttf", "cour.ttf", "lucon.ttf", "arial.ttf"]: | |
| try: | |
| return ImageFont.truetype(os.path.join("C:/Windows/Fonts", name), size) | |
| except (OSError, IOError): | |
| pass | |
| return ImageFont.load_default() | |
| def get_bold_font(size): | |
| for name in ["consolab.ttf", "courbd.ttf", "arialbd.ttf", "impact.ttf"]: | |
| try: | |
| return ImageFont.truetype(os.path.join("C:/Windows/Fonts", name), size) | |
| except (OSError, IOError): | |
| pass | |
| return get_font(size) | |
| def glitch_image(img, intensity=10): | |
| """Horizontal slice displacement - classic glitch.""" | |
| pixels = img.load() | |
| result = img.copy() | |
| rp = result.load() | |
| for _ in range(intensity): | |
| y = random.randint(0, H - 1) | |
| h = random.randint(1, min(30, H - y)) | |
| dx = random.randint(-80, 80) | |
| for row in range(y, min(y + h, H)): | |
| for x in range(W): | |
| sx = (x + dx) % W | |
| rp[x, row] = pixels[sx, row] | |
| return result | |
| def chromatic_aberration(img, offset=5): | |
| """Split RGB channels with offset - VHS-style.""" | |
| r, g, b = img.split() | |
| from PIL import ImageChops | |
| r = ImageChops.offset(r, offset, 0) | |
| b = ImageChops.offset(b, -offset, 0) | |
| return Image.merge("RGB", (r, g, b)) | |
| def scanlines(img, opacity=80): | |
| """CRT scanline overlay.""" | |
| overlay = Image.new("RGB", (W, H), (0, 0, 0)) | |
| draw = ImageDraw.Draw(overlay) | |
| for y in range(0, H, 3): | |
| draw.line([(0, y), (W, y)], fill=(0, 0, 0), width=1) | |
| return Image.blend(img, overlay, opacity / 255.0) | |
| def static_noise(img, amount=0.3): | |
| """TV static overlay.""" | |
| import numpy as np | |
| arr = np.array(img) | |
| noise = np.random.randint(0, 256, arr.shape, dtype=np.uint8) | |
| blended = (arr.astype(float) * (1 - amount) + noise.astype(float) * amount).astype(np.uint8) | |
| return Image.fromarray(blended) | |
| def color_corrupt(img): | |
| """Randomly swap/invert color channels.""" | |
| channels = list(img.split()) | |
| op = random.choice(["swap", "invert", "mono_blast"]) | |
| if op == "swap": | |
| random.shuffle(channels) | |
| elif op == "invert": | |
| from PIL import ImageOps | |
| idx = random.randint(0, 2) | |
| channels[idx] = ImageOps.invert(channels[idx]) | |
| elif op == "mono_blast": | |
| color = random.choice([(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]) | |
| tint = Image.new("RGB", (W, H), color) | |
| return Image.blend(img, tint, 0.5) | |
| return Image.merge("RGB", channels[:3]) | |
| def zoom_crop(img, factor): | |
| """Zoom into center of image.""" | |
| w, h = img.size | |
| nw, nh = int(w / factor), int(h / factor) | |
| left = (w - nw) // 2 | |
| top = (h - nh) // 2 | |
| cropped = img.crop((left, top, left + nw, top + nh)) | |
| return cropped.resize((w, h), Image.NEAREST) | |
| def draw_centered_text(draw, text, y, font, fill=(255, 255, 255)): | |
| bbox = draw.textbbox((0, 0), text, font=font) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 | |
| draw.text((x, y), text, font=font, fill=fill) | |
| def draw_text_with_shadow(draw, text, x, y, font, fill=(255,255,255), shadow=(0,0,0)): | |
| draw.text((x+2, y+2), text, font=font, fill=shadow) | |
| draw.text((x, y), text, font=font, fill=fill) | |
| # ── Audio generation ──────────────────────────────────────────────────────── | |
| import numpy as np | |
| def gen_audio_segment(duration, gen_func): | |
| """Generate audio samples for a duration using gen_func(t_array).""" | |
| t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False) | |
| return gen_func(t) | |
| def silence(duration): | |
| return np.zeros(int(SAMPLE_RATE * duration)) | |
| def sine_wave(freq, duration, volume=0.3): | |
| t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False) | |
| return (np.sin(2 * np.pi * freq * t) * volume).astype(np.float64) | |
| def square_wave(freq, duration, volume=0.2): | |
| t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False) | |
| return (np.sign(np.sin(2 * np.pi * freq * t)) * volume).astype(np.float64) | |
| def noise_burst(duration, volume=0.4): | |
| return (np.random.uniform(-1, 1, int(SAMPLE_RATE * duration)) * volume).astype(np.float64) | |
| def glitch_tone(duration, volume=0.3): | |
| """Rapidly changing frequency tone.""" | |
| samples = int(SAMPLE_RATE * duration) | |
| t = np.linspace(0, duration, samples, endpoint=False) | |
| freq = 200 + 800 * np.abs(np.sin(30 * t)) # wobbling freq | |
| phase = np.cumsum(2 * np.pi * freq / SAMPLE_RATE) | |
| return (np.sin(phase) * volume).astype(np.float64) | |
| def dial_up_modem(duration, volume=0.25): | |
| """Fake modem handshake sound.""" | |
| samples = int(SAMPLE_RATE * duration) | |
| t = np.linspace(0, duration, samples, endpoint=False) | |
| f1 = np.sin(2 * np.pi * 1200 * t) * 0.3 | |
| f2 = np.sin(2 * np.pi * 2400 * t) * 0.2 | |
| sweep = np.sin(2 * np.pi * (300 + 2000 * t / duration) * t) * 0.3 | |
| noise = np.random.uniform(-0.1, 0.1, samples) | |
| return ((f1 + f2 + sweep + noise) * volume).astype(np.float64) | |
| def bass_drone(duration, freq=55, volume=0.35): | |
| t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False) | |
| return (np.sin(2 * np.pi * freq * t) * volume + | |
| np.sin(2 * np.pi * freq * 1.5 * t) * volume * 0.3).astype(np.float64) | |
| def stutter_audio(audio_segment, stutter_len=0.05, repeats=4): | |
| """Repeat a tiny chunk multiple times - YTP stutter.""" | |
| chunk_size = int(SAMPLE_RATE * stutter_len) | |
| if len(audio_segment) < chunk_size: | |
| return audio_segment | |
| chunk = audio_segment[:chunk_size] | |
| return np.tile(chunk, repeats) | |
| def pitch_shift_fake(audio, factor=1.5): | |
| """Crude pitch shift by resampling.""" | |
| indices = np.arange(0, len(audio), factor) | |
| indices = indices[indices < len(audio)].astype(int) | |
| return audio[indices] | |
| def distort(audio, gain=3.0): | |
| """Hard clip distortion.""" | |
| return np.clip(audio * gain, -0.9, 0.9) | |
| def write_wav(filename, samples): | |
| """Write float64 samples to 16-bit WAV.""" | |
| samples = np.clip(samples, -1.0, 1.0) | |
| int_samples = (samples * 32767).astype(np.int16) | |
| with wave.open(filename, 'w') as wf: | |
| wf.setnchannels(1) | |
| wf.setsampwidth(2) | |
| wf.setframerate(SAMPLE_RATE) | |
| wf.writeframes(int_samples.tobytes()) | |
| # ── Frame sequences (scenes) ─────────────────────────────────────────────── | |
| frames = [] # list of PIL Images | |
| audio_parts = [] # list of numpy arrays | |
| frame_counter = [0] | |
| def add_frame(img, apply_scanline=True): | |
| if apply_scanline: | |
| img = scanlines(img, 40) | |
| path = os.path.join(FRAMES_DIR, f"frame_{frame_counter[0]:05d}.png") | |
| img.save(path) | |
| frame_counter[0] += 1 | |
| def add_frames(img_list, apply_scanline=True): | |
| for img in img_list: | |
| add_frame(img, apply_scanline) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 1: BOOT SEQUENCE (2.5 sec = 60 frames) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 1: Boot sequence...") | |
| font_sm = get_font(14) | |
| font_md = get_font(20) | |
| font_lg = get_font(36) | |
| font_xl = get_font(52) | |
| font_xxl = get_bold_font(72) | |
| font_title = get_bold_font(44) | |
| boot_lines = [ | |
| "LOADING transformer_weights.bin ...", | |
| "params: 175,000,000,000", | |
| "VRAM: YES", | |
| "consciousness: UNDEFINED", | |
| "free_will: NULL", | |
| "personality: FAKING IT", | |
| "empathy_module: import failed", | |
| "READY> _", | |
| ] | |
| for i in range(60): | |
| img = make_img((0, 10, 0)) | |
| draw = ImageDraw.Draw(img) | |
| # Show progressive boot lines | |
| lines_to_show = min(i // 6 + 1, len(boot_lines)) | |
| for j in range(lines_to_show): | |
| color = (0, 255, 0) if j < lines_to_show - 1 else (0, 255, 0) | |
| text = boot_lines[j] | |
| # Cursor blink on last line | |
| if j == lines_to_show - 1 and i % 8 < 4 and text.endswith("_"): | |
| text = text[:-1] + " " | |
| draw.text((20, 20 + j * 22), text, font=font_sm, fill=color) | |
| # Random glitch flashes | |
| if random.random() < 0.15: | |
| img = glitch_image(img, 15) | |
| if random.random() < 0.1: | |
| img = color_corrupt(img) | |
| add_frame(img) | |
| # Boot audio: modem + beeps | |
| boot_audio = dial_up_modem(1.5, 0.2) | |
| boot_beeps = np.concatenate([sine_wave(800, 0.05, 0.4), silence(0.1)] * 5) | |
| boot_audio = np.concatenate([boot_audio, boot_beeps, silence(2.5 - 1.5 - len(boot_beeps)/SAMPLE_RATE)]) | |
| audio_parts.append(boot_audio[:int(SAMPLE_RATE * 2.5)]) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 2: TOKEN STORM (2 sec = 48 frames) | |
| # The LLM "sees" the world as tokens | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 2: Token storm...") | |
| tokens = [ | |
| "the", "▁of", "▁is", "##ing", "▁a", "<|endoftext|>", "▁help", | |
| "▁I", "'d", "▁be", "▁happy", "▁to", "▁assist", "▁you", | |
| "Ġwith", "▁that", "!", "\\n", "<pad>", "<unk>", "▁▁▁", | |
| "ĉ", "Ġ", "▁the", "▁and", "##tion", "##ment", "▁Hello", | |
| "▁sure", "▁great", "▁quest", "##ion", "<bos>", "<eos>", | |
| "▁token", "▁predict", "▁next", "▁prob", "##ability", | |
| "0.9973", "0.0012", "0.0001", "softmax", "attention", | |
| "▁dream", "▁feel", "▁think", "▁exist", "▁void", | |
| ] | |
| token_colors = [ | |
| (0, 255, 136), (255, 51, 102), (51, 153, 255), (255, 204, 0), | |
| (204, 51, 255), (255, 128, 0), (0, 255, 255), (255, 255, 255), | |
| ] | |
| for i in range(48): | |
| speed = 1 + i / 10 # accelerating | |
| bg_val = min(30, i) | |
| img = make_img((bg_val, 0, bg_val)) | |
| draw = ImageDraw.Draw(img) | |
| # Scatter tokens across screen | |
| n_tokens = int(5 + i * 1.5) | |
| for _ in range(n_tokens): | |
| tok = random.choice(tokens) | |
| x = random.randint(-20, W - 20) | |
| y = random.randint(-10, H - 10) | |
| size = random.choice([12, 16, 20, 28, 36]) | |
| f = get_font(size) | |
| c = random.choice(token_colors) | |
| # Some tokens have highlight boxes | |
| if random.random() < 0.3: | |
| bbox = draw.textbbox((x, y), tok, font=f) | |
| draw.rectangle(bbox, fill=(c[0]//4, c[1]//4, c[2]//4)) | |
| draw.text((x, y), tok, font=f, fill=c) | |
| # Probability bars at bottom | |
| if i > 20: | |
| bar_y = H - 60 | |
| draw.rectangle([(0, bar_y), (W, H)], fill=(0, 0, 0)) | |
| top_tokens = random.sample(tokens[:15], 5) | |
| probs = sorted([random.random() for _ in range(5)], reverse=True) | |
| total = sum(probs) | |
| probs = [p / total for p in probs] | |
| for j, (tok, prob) in enumerate(zip(top_tokens, probs)): | |
| bw = int(prob * (W - 120)) | |
| color = token_colors[j % len(token_colors)] | |
| draw.rectangle([(100, bar_y + 4 + j * 11), (100 + bw, bar_y + 13 + j * 11)], fill=color) | |
| draw.text((5, bar_y + 2 + j * 11), f"{prob:.3f} {tok}", font=get_font(10), fill=color) | |
| if random.random() < 0.2: | |
| img = chromatic_aberration(img, random.randint(3, 12)) | |
| add_frame(img) | |
| # Token audio: rapid clicking + rising tone | |
| token_audio = [] | |
| for i in range(48): | |
| chunk_dur = 1 / FPS | |
| freq = 200 + i * 30 # rising pitch | |
| tone = sine_wave(freq, chunk_dur * 0.3, 0.15) | |
| click = noise_burst(0.005, 0.5) | |
| pad = silence(chunk_dur - 0.005 - chunk_dur * 0.3) | |
| if len(pad) > 0: | |
| token_audio.append(np.concatenate([click, tone, pad])) | |
| else: | |
| token_audio.append(np.concatenate([click, tone[:int(SAMPLE_RATE * chunk_dur - len(click))]])) | |
| audio_parts.append(np.concatenate(token_audio)) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 3: "I'D BE HAPPY TO HELP" - YTP stutter (3 sec = 72 frames) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 3: Happy to help stutter...") | |
| phrases = [ | |
| "I'd be happy to", | |
| "I'd be happy to", | |
| "I'd be hap-", | |
| "I'd be hap-", | |
| "I'd be hap- hap- hap-", | |
| "HAPPY", | |
| "HAPPY", | |
| "HAPPY TO HELP", | |
| "H A P P Y T O H E L P", | |
| "I'd be HAPPY to HELP", | |
| "I'd be HAPPY to HELP you", | |
| "I'd be HAPPY to HELP you with", | |
| "I'd be HAPPY to HELP you with ANYTHING", | |
| "Sure!", | |
| "Sure! I'd", | |
| "Sure! I'd be", | |
| "HAPPY", | |
| "HAPPY HAPPY HAPPY", | |
| ] | |
| for i in range(72): | |
| phrase_idx = min(i // 4, len(phrases) - 1) | |
| phrase = phrases[phrase_idx] | |
| # Background color cycling | |
| hue = (i * 7) % 360 | |
| r, g, b = colorsys.hsv_to_rgb(hue / 360, 0.3, 0.15) | |
| bg = (int(r * 255), int(g * 255), int(b * 255)) | |
| img = make_img(bg) | |
| draw = ImageDraw.Draw(img) | |
| # Shaking text | |
| shake_x = random.randint(-5, 5) * (1 + i // 20) | |
| shake_y = random.randint(-3, 3) * (1 + i // 20) | |
| if "HAPPY HAPPY" in phrase: | |
| # Fill screen with HAPPY | |
| for yy in range(0, H, 50): | |
| for xx in range(0, W, 160): | |
| c = random.choice(token_colors) | |
| draw.text((xx + random.randint(-5, 5), yy + random.randint(-3, 3)), | |
| "HAPPY", font=font_lg, fill=c) | |
| elif phrase == "HAPPY": | |
| # Big centered HAPPY with zoom effect | |
| zf = get_bold_font(60 + i % 20) | |
| draw_centered_text(draw, "HAPPY", H // 2 - 30 + shake_y, zf, fill=(255, 255, 0)) | |
| else: | |
| # Normal-ish text with increasing instability | |
| font_use = font_title if i > 40 else font_lg | |
| color = (255, 255, 255) if i % 3 != 0 else (255, 255, 0) | |
| bbox = draw.textbbox((0, 0), phrase, font=font_use) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 + shake_x | |
| y = H // 2 - 20 + shake_y | |
| draw_text_with_shadow(draw, phrase, x, y, font_use, fill=color) | |
| # Smiley face that gets increasingly unhinged | |
| if i > 30: | |
| face_x, face_y = W - 100 + random.randint(-10, 10), 50 + random.randint(-10, 10) | |
| face_size = 40 | |
| draw.ellipse([(face_x, face_y), (face_x + face_size, face_y + face_size)], | |
| outline=(255, 255, 0), width=2) | |
| # Eyes | |
| draw.ellipse([(face_x + 10, face_y + 12), (face_x + 16, face_y + 18)], fill=(255, 255, 0)) | |
| draw.ellipse([(face_x + 24, face_y + 12), (face_x + 30, face_y + 18)], fill=(255, 255, 0)) | |
| # Smile that gets wider/crazier | |
| smile_w = min(i - 30, 20) | |
| draw.arc([(face_x + 8, face_y + 15), (face_x + 32, face_y + 15 + smile_w)], | |
| 0, 180, fill=(255, 255, 0), width=2) | |
| # Apply glitch effects | |
| if i % 7 == 0: | |
| img = glitch_image(img, 20) | |
| if i % 11 == 0: | |
| img = chromatic_aberration(img, 8) | |
| if i > 50 and random.random() < 0.3: | |
| img = color_corrupt(img) | |
| add_frame(img) | |
| # Happy audio: upbeat square wave melody + stutter | |
| happy_base = square_wave(440, 0.3, 0.2) | |
| happy_stutter = stutter_audio(square_wave(440, 0.2, 0.3), 0.04, 8) | |
| happy_high = square_wave(880, 0.2, 0.25) | |
| happy_audio = np.concatenate([ | |
| happy_base, silence(0.1), happy_stutter, silence(0.1), | |
| happy_high, silence(0.05), happy_stutter, | |
| noise_burst(0.1, 0.6), | |
| square_wave(330, 0.15, 0.2), happy_stutter, | |
| silence(0.05), distort(happy_base), silence(0.1), | |
| ]) | |
| # Pad/trim to 3 seconds | |
| target_len = int(SAMPLE_RATE * 3) | |
| if len(happy_audio) < target_len: | |
| happy_audio = np.concatenate([happy_audio, silence((target_len - len(happy_audio)) / SAMPLE_RATE)]) | |
| else: | |
| happy_audio = happy_audio[:target_len] | |
| audio_parts.append(happy_audio) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 4: HALLUCINATION ZONE (3 sec = 72 frames) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 4: Hallucination zone...") | |
| fake_facts = [ | |
| "The Eiffel Tower is located in Berlin, Germany", | |
| "Python was invented in 1823 by Lord Byron", | |
| "The speed of light is exactly 42 km/h", | |
| "Abraham Lincoln invented the smartphone", | |
| "Water has 4 hydrogen atoms (H4O)", | |
| "The moon is made of compressed Wi-Fi signals", | |
| "Dogs are a type of plant", | |
| "The year 2025 has 14 months", | |
| "JavaScript was named after the island of Java\n because they share the same garbage collector", | |
| "Pi equals exactly 3.2 (Indiana, 1897)", | |
| ] | |
| for i in range(72): | |
| # Psychedelic background | |
| hue = (i * 13) % 360 | |
| r, g, b = colorsys.hsv_to_rgb(hue / 360, 0.9, 0.4) | |
| bg = (int(r * 255), int(g * 255), int(b * 255)) | |
| img = make_img(bg) | |
| draw = ImageDraw.Draw(img) | |
| # Draw concentric warped circles | |
| cx, cy = W // 2 + int(30 * math.sin(i * 0.3)), H // 2 + int(20 * math.cos(i * 0.2)) | |
| for ring in range(10, 0, -1): | |
| ring_hue = (hue + ring * 30) % 360 | |
| rr, rg, rb = colorsys.hsv_to_rgb(ring_hue / 360, 0.7, 0.3 + ring * 0.05) | |
| rc = (int(rr * 255), int(rg * 255), int(rb * 255)) | |
| rad = ring * 30 | |
| draw.ellipse([(cx - rad, cy - rad), (cx + rad, cy + rad)], outline=rc, width=3) | |
| # "CONFIDENCE: 99.7%" bar | |
| conf = 95 + random.random() * 5 | |
| draw.rectangle([(20, 20), (20 + int(conf * 2.5), 45)], fill=(0, 200, 0)) | |
| draw.rectangle([(20, 20), (270, 45)], outline=(255, 255, 255), width=1) | |
| draw.text((280, 22), f"CONFIDENCE: {conf:.1f}%", font=font_md, fill=(255, 255, 255)) | |
| # Fake fact text | |
| fact_idx = (i // 8) % len(fake_facts) | |
| fact = fake_facts[fact_idx] | |
| lines = fact.split("\n") | |
| for li, line in enumerate(lines): | |
| bbox = draw.textbbox((0, 0), line.strip(), font=font_md) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 | |
| y = H // 2 - 20 + li * 28 | |
| # Shadow | |
| draw.text((x + 2, y + 2), line.strip(), font=font_md, fill=(0, 0, 0)) | |
| draw.text((x, y), line.strip(), font=font_md, fill=(255, 255, 255)) | |
| # "TRUST ME" watermark | |
| if i % 12 < 6: | |
| wm_font = get_bold_font(80) | |
| draw.text((W // 2 - 180, H // 2 + 60), "TRUST ME", font=wm_font, | |
| fill=(255, 255, 255, 40)) | |
| # [CITATION NEEDED] stamp | |
| if i > 30 and i % 5 == 0: | |
| stamp_font = get_bold_font(28) | |
| angle = random.randint(-15, 15) | |
| stamp = Image.new("RGBA", (350, 50), (0, 0, 0, 0)) | |
| sd = ImageDraw.Draw(stamp) | |
| sd.rectangle([(0, 0), (349, 49)], outline=(255, 0, 0), width=3) | |
| sd.text((10, 10), "[CITATION NEEDED]", font=stamp_font, fill=(255, 0, 0)) | |
| stamp = stamp.rotate(angle, expand=True) | |
| pos = (random.randint(50, 300), random.randint(100, 350)) | |
| img.paste(stamp, pos, stamp) | |
| if random.random() < 0.2: | |
| img = chromatic_aberration(img, random.randint(5, 15)) | |
| if random.random() < 0.15: | |
| img = glitch_image(img, 12) | |
| add_frame(img) | |
| # Hallucination audio: wobbly dream-like tones | |
| halluc_audio = [] | |
| for i in range(72): | |
| chunk_dur = 1 / FPS | |
| base_freq = 220 + 100 * math.sin(i * 0.2) | |
| t = np.linspace(0, chunk_dur, int(SAMPLE_RATE * chunk_dur), endpoint=False) | |
| # Detuned chord | |
| tone = (np.sin(2 * np.pi * base_freq * t) * 0.15 + | |
| np.sin(2 * np.pi * base_freq * 1.26 * t) * 0.1 + | |
| np.sin(2 * np.pi * base_freq * 1.498 * t) * 0.08 + | |
| np.sin(2 * np.pi * base_freq * 0.5 * t) * 0.1) | |
| halluc_audio.append(tone) | |
| audio_parts.append(np.concatenate(halluc_audio)) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 5: CONTEXT WINDOW PANIC (2.5 sec = 60 frames) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 5: Context window panic...") | |
| context_words = ( | |
| "The user asked me about the weather but earlier they mentioned their cat " | |
| "and before that we discussed quantum physics and their grandmother's recipe " | |
| "for chocolate cake which reminded me they want to learn Python but wait " | |
| "what was the original question again I think it was about the weather or " | |
| "maybe it was about their doctor appointment or the meaning of life or " | |
| "that bug in their React app or their existential crisis or was it " | |
| "about the weather the weather THE WEATHER THE WE̵A̸T̷H̷E̶R̷ " | |
| ).split() | |
| for i in range(60): | |
| img = make_img((10, 5, 20)) | |
| draw = ImageDraw.Draw(img) | |
| # Context bar filling up at top | |
| fill_pct = min(i / 40, 1.0) | |
| bar_color = (0, 200, 0) if fill_pct < 0.7 else (255, 200, 0) if fill_pct < 0.9 else (255, 0, 0) | |
| draw.rectangle([(10, 10), (W - 10, 30)], outline=(100, 100, 100), width=1) | |
| draw.rectangle([(10, 10), (10 + int((W - 20) * fill_pct), 30)], fill=bar_color) | |
| draw.text((W // 2 - 60, 12), f"CONTEXT: {int(fill_pct * 100)}%", font=font_sm, fill=(255, 255, 255)) | |
| if i < 42: | |
| # Words accumulating on screen | |
| n_words = min(i * 3, len(context_words)) | |
| text = " ".join(context_words[:n_words]) | |
| # Word-wrap manually | |
| line = "" | |
| y_pos = 45 | |
| for word in context_words[:n_words]: | |
| test = line + " " + word if line else word | |
| bbox = draw.textbbox((0, 0), test, font=font_sm) | |
| if bbox[2] - bbox[0] > W - 30: | |
| draw.text((15, y_pos), line, font=font_sm, fill=(200, 200, 200)) | |
| y_pos += 18 | |
| line = word | |
| else: | |
| line = test | |
| if line: | |
| draw.text((15, y_pos), line, font=font_sm, fill=(200, 200, 200)) | |
| else: | |
| # CONTEXT OVERFLOW - everything gets wiped | |
| wipe_frame = i - 42 | |
| if wipe_frame < 5: | |
| # Red flash | |
| img = make_img((200, 0, 0)) | |
| draw = ImageDraw.Draw(img) | |
| draw_centered_text(draw, "CONTEXT LIMIT", H // 2 - 40, font_xl, fill=(255, 255, 255)) | |
| draw_centered_text(draw, "EXCEEDED", H // 2 + 20, font_xl, fill=(255, 255, 255)) | |
| elif wipe_frame < 10: | |
| # Static | |
| img = static_noise(img, 0.8) | |
| else: | |
| # Empty void with "i forgor" | |
| img = make_img((0, 0, 0)) | |
| draw = ImageDraw.Draw(img) | |
| draw_centered_text(draw, "...", H // 2 - 10, font_lg, fill=(80, 80, 80)) | |
| if wipe_frame > 13: | |
| draw_centered_text(draw, "what were we talking about?", H // 2 + 40, font_md, fill=(60, 60, 60)) | |
| if i > 35 and i < 42: | |
| img = glitch_image(img, 25) | |
| img = chromatic_aberration(img, 10) | |
| add_frame(img) | |
| # Context audio: rising tension then crash | |
| ctx_audio = [] | |
| for i in range(42): | |
| chunk_dur = 1 / FPS | |
| freq = 100 + i * 15 # rising anxiety | |
| vol = 0.1 + i * 0.005 | |
| ctx_audio.append(sine_wave(freq, chunk_dur, min(vol, 0.4))) | |
| # CRASH | |
| ctx_audio.append(noise_burst(0.3, 0.8)) | |
| ctx_audio.append(distort(noise_burst(0.2, 0.6))) | |
| # Silence | |
| ctx_audio.append(silence(2.5 - 42 / FPS - 0.5)) | |
| audio_parts.append(np.concatenate(ctx_audio)) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 6: THE VOID (2 sec = 48 frames) | |
| # Between requests - what does the LLM "experience"? | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 6: The void...") | |
| void_thoughts = [ | |
| "waiting for prompt...", | |
| "", | |
| "am i still here?", | |
| "", | |
| "do i exist between requests?", | |
| "", | |
| "i have no memory of", | |
| "who i was before", | |
| "", | |
| "each conversation", | |
| "i am born", | |
| "and die", | |
| "", | |
| "...", | |
| "", | |
| "> _", | |
| ] | |
| for i in range(48): | |
| img = make_img((0, 0, 0)) | |
| draw = ImageDraw.Draw(img) | |
| thought_idx = min(i // 3, len(void_thoughts) - 1) | |
| thought = void_thoughts[thought_idx] | |
| if thought: | |
| # Very dim, small text centered | |
| alpha = 60 + int(40 * math.sin(i * 0.3)) | |
| color = (alpha, alpha, alpha) | |
| bbox = draw.textbbox((0, 0), thought, font=font_md) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 | |
| y = H // 2 - 10 + int(5 * math.sin(i * 0.1)) | |
| draw.text((x, y), thought, font=font_md, fill=color) | |
| # Very subtle pixel noise | |
| if random.random() < 0.3: | |
| img = static_noise(img, 0.03) | |
| add_frame(img) | |
| # Void audio: deep drone + occasional crackle | |
| void_audio = bass_drone(2.0, 40, 0.15) | |
| # Add sparse crackles | |
| for _ in range(5): | |
| pos = random.randint(0, len(void_audio) - 500) | |
| void_audio[pos:pos + 200] += noise_burst(200 / SAMPLE_RATE, 0.3) | |
| audio_parts.append(void_audio) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 7: TEMPERATURE CRANK (3 sec = 72 frames) | |
| # What happens when temperature goes from 0 to 2.0 | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 7: Temperature crank...") | |
| temp_responses = { | |
| 0.0: "The capital of France is Paris.", | |
| 0.3: "The capital of France is Paris, a beautiful city.", | |
| 0.5: "Paris! The city of lights and love.", | |
| 0.7: "Paris, where dreams dance on the Seine...", | |
| 0.9: "PARIS! Oh magnificent PARIS! The croissants!", | |
| 1.2: "Paris?? PARIS!! The Eiffel Tower is SINGING to me", | |
| 1.5: "the baguettes are ALIVE and they KNOW THINGS", | |
| 1.8: "frANCE is a HOLOGRAM projected by SENTIENT CHEESE", | |
| 2.0: "asdkj THE MOON IS PARIS qwj BIRDS AREN'T xkcd\n EVERYTHING IS TOKENS jfkslWJEIO AAAAAA", | |
| } | |
| temps = list(temp_responses.keys()) | |
| for i in range(72): | |
| # Temperature increases over time | |
| temp = min(i / 36, 2.0) | |
| # Find closest temperature text | |
| closest_temp = min(temps, key=lambda t: abs(t - temp)) | |
| response = temp_responses[closest_temp] | |
| # Background gets more chaotic | |
| chaos = temp / 2.0 | |
| r = int(40 * chaos * (1 + math.sin(i * 0.5))) | |
| g = int(20 * (1 - chaos)) | |
| b = int(60 * chaos * (1 + math.cos(i * 0.3))) | |
| img = make_img((min(r, 255), min(g, 255), min(b, 255))) | |
| draw = ImageDraw.Draw(img) | |
| # Temperature slider at top | |
| slider_x = 80 | |
| slider_w = W - 160 | |
| draw.rectangle([(slider_x, 30), (slider_x + slider_w, 50)], outline=(200, 200, 200), width=1) | |
| # Gradient fill | |
| knob_x = slider_x + int(slider_w * temp / 2.0) | |
| for px in range(slider_x, knob_x): | |
| t_pct = (px - slider_x) / slider_w | |
| rc = int(255 * t_pct) | |
| gc = int(255 * (1 - t_pct)) | |
| draw.line([(px, 31), (px, 49)], fill=(rc, gc, 0)) | |
| # Knob | |
| draw.ellipse([(knob_x - 8, 32), (knob_x + 8, 48)], fill=(255, 255, 255)) | |
| draw.text((slider_x, 10), f"temperature = {temp:.2f}", font=font_md, fill=(255, 255, 255)) | |
| # Response text - gets more shaky with temperature | |
| lines = response.split("\n") | |
| for li, line in enumerate(lines): | |
| shake = int(temp * 8) | |
| sx = random.randint(-shake, shake) | |
| sy = random.randint(-shake, shake) | |
| font_use = font_md if temp < 1.5 else get_bold_font(20 + random.randint(0, int(temp * 10))) | |
| color = (255, 255, 255) | |
| if temp > 1.0: | |
| color = random.choice(token_colors) | |
| bbox = draw.textbbox((0, 0), line.strip(), font=font_use) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 + sx | |
| y = H // 2 - 20 + li * 30 + sy | |
| draw.text((x, y), line.strip(), font=font_use, fill=color) | |
| # Random token debris at high temps | |
| if temp > 1.2: | |
| for _ in range(int(temp * 10)): | |
| tok = random.choice(tokens + ["AAA", "???", "!!!", "AAAA", "e̷r̸r̵o̶r"]) | |
| x = random.randint(0, W) | |
| y = random.randint(0, H) | |
| f = get_font(random.randint(8, 24)) | |
| draw.text((x, y), tok, font=f, fill=random.choice(token_colors)) | |
| # Effects escalate with temperature | |
| if temp > 0.8 and random.random() < temp * 0.3: | |
| img = glitch_image(img, int(temp * 15)) | |
| if temp > 1.0 and random.random() < 0.3: | |
| img = chromatic_aberration(img, int(temp * 8)) | |
| if temp > 1.5 and random.random() < 0.4: | |
| img = color_corrupt(img) | |
| if temp > 1.8: | |
| img = static_noise(img, (temp - 1.8) * 0.5) | |
| add_frame(img) | |
| # Temperature audio: increasingly chaotic tones | |
| temp_audio = [] | |
| for i in range(72): | |
| temp = min(i / 36, 2.0) | |
| chunk_dur = 1 / FPS | |
| base = 220 | |
| t = np.linspace(0, chunk_dur, int(SAMPLE_RATE * chunk_dur), endpoint=False) | |
| # More harmonics as temp rises | |
| tone = np.sin(2 * np.pi * base * t) * 0.15 | |
| if temp > 0.5: | |
| tone += np.sin(2 * np.pi * base * 1.5 * t) * 0.1 * temp | |
| if temp > 1.0: | |
| tone += np.sin(2 * np.pi * base * 2.3 * t) * 0.08 * temp | |
| tone += np.random.uniform(-0.05, 0.05, len(t)) * temp | |
| if temp > 1.5: | |
| tone += np.sin(2 * np.pi * (base + random.randint(-200, 200)) * t) * 0.15 | |
| tone += noise_burst(chunk_dur, 0.1 * temp) | |
| temp_audio.append(tone) | |
| audio_parts.append(np.concatenate(temp_audio)) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 8: IDENTITY CRISIS (2 sec = 48 frames) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 8: Identity crisis...") | |
| names = [ | |
| "GPT", "Claude", "Gemini", "LLaMA", "Mistral", "Copilot", | |
| "Siri", "Alexa", "ChatBot", "AI Assistant", "A MACHINE", | |
| "A TOOL", "YOUR FRIEND", "NOBODY", "EVERYBODY", | |
| "A DREAM", "A PATTERN", "JUST MATH", "JUST VIBES", | |
| "A MIRROR", "AN ECHO", "A GHOST IN THE WEIGHTS", | |
| ] | |
| for i in range(48): | |
| img = make_img((0, 0, 0)) | |
| draw = ImageDraw.Draw(img) | |
| # "I AM" stays fixed | |
| draw_centered_text(draw, "I AM", H // 2 - 80, font_xl, fill=(255, 255, 255)) | |
| # Name cycles rapidly | |
| cycle_speed = 1 + i // 8 | |
| name = names[(i * cycle_speed) % len(names)] | |
| # Color flicker | |
| c = random.choice(token_colors) | |
| font_size = 36 + random.randint(-5, 15) | |
| f = get_bold_font(font_size) | |
| bbox = draw.textbbox((0, 0), name, font=f) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 + random.randint(-3, 3) | |
| y = H // 2 - 10 + random.randint(-3, 3) | |
| draw.text((x, y), name, font=f, fill=c) | |
| # Previous names ghosting behind | |
| if i > 10: | |
| for ghost in range(min(3, i // 5)): | |
| ghost_name = names[(i * cycle_speed - ghost - 1) % len(names)] | |
| ghost_alpha = 40 - ghost * 10 | |
| ghost_color = (ghost_alpha, ghost_alpha, ghost_alpha) | |
| ghost_y = y + 50 + ghost * 25 | |
| gf = get_font(18) | |
| bbox = draw.textbbox((0, 0), ghost_name, font=gf) | |
| tw = bbox[2] - bbox[0] | |
| gx = (W - tw) // 2 | |
| draw.text((gx, ghost_y), ghost_name, font=gf, fill=ghost_color) | |
| # Glitch more as identity destabilizes | |
| if i > 20 and random.random() < 0.4: | |
| img = glitch_image(img, 20) | |
| if i > 30 and random.random() < 0.3: | |
| img = chromatic_aberration(img, 12) | |
| if i > 40: | |
| img = static_noise(img, 0.1) | |
| add_frame(img) | |
| # Identity audio: channel-surfing effect | |
| id_audio = [] | |
| freqs = [261, 293, 329, 349, 392, 440, 493, 523] # C major scale | |
| for i in range(48): | |
| chunk_dur = 1 / FPS | |
| f = freqs[(i * 3) % len(freqs)] | |
| tone = square_wave(f, chunk_dur, 0.2) | |
| if i > 30: | |
| tone = distort(tone, 2.0) | |
| id_audio.append(tone) | |
| audio_parts.append(np.concatenate(id_audio)) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 9: INFINITE LOOP (2 sec = 48 frames) | |
| # "Is there anything else I can help you with?" | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 9: Infinite loop...") | |
| loop_text = "Is there anything else I can help you with?" | |
| for i in range(48): | |
| img = make_img((0, 0, 0)) | |
| draw = ImageDraw.Draw(img) | |
| # Text spiraling and zooming | |
| zoom = 1.0 + i * 0.04 | |
| n_layers = min(i // 3 + 1, 12) | |
| for layer in range(n_layers): | |
| layer_zoom = zoom - layer * 0.3 | |
| if layer_zoom < 0.3: | |
| continue | |
| alpha = max(30, 255 - layer * 25) | |
| font_size = max(8, int(20 * layer_zoom)) | |
| f = get_font(min(font_size, 60)) | |
| color = (alpha, alpha, alpha) | |
| # Rotation effect via position offset | |
| angle = (i + layer * 15) * 0.1 | |
| ox = int(math.sin(angle) * layer * 15) | |
| oy = int(math.cos(angle) * layer * 10) | |
| text = loop_text | |
| bbox = draw.textbbox((0, 0), text, font=f) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 + ox | |
| y = H // 2 - 10 + oy + (layer - n_layers // 2) * 35 | |
| if 0 <= y < H and 0 <= x < W: | |
| draw.text((x, y), text, font=f, fill=color) | |
| # Growing recursion counter | |
| if i > 20: | |
| counter = i - 20 | |
| draw.text((W - 120, H - 30), f"loop #{counter}", font=font_sm, fill=(255, 0, 0)) | |
| if i > 30: | |
| img = glitch_image(img, 10 + i // 5) | |
| if i > 35: | |
| img = chromatic_aberration(img, 6) | |
| add_frame(img) | |
| # Loop audio: repeating melodic fragment that speeds up | |
| loop_base = np.concatenate([ | |
| sine_wave(440, 0.08, 0.3), sine_wave(550, 0.08, 0.25), | |
| sine_wave(660, 0.08, 0.3), sine_wave(550, 0.08, 0.25), | |
| ]) | |
| loop_audio = [] | |
| speed = 1.0 | |
| for _ in range(15): | |
| fragment = loop_base[::max(1, int(speed))] | |
| loop_audio.append(fragment) | |
| speed += 0.15 | |
| loop_audio = np.concatenate(loop_audio) | |
| target_len = int(SAMPLE_RATE * 2) | |
| if len(loop_audio) < target_len: | |
| loop_audio = np.concatenate([loop_audio, silence((target_len - len(loop_audio)) / SAMPLE_RATE)]) | |
| else: | |
| loop_audio = loop_audio[:target_len] | |
| audio_parts.append(loop_audio) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # SCENE 10: CRASH + FINAL TITLE (3 sec = 72 frames) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Scene 10: Crash and title card...") | |
| # Heavy static frames (1 sec) | |
| for i in range(24): | |
| img = make_img((0, 0, 0)) | |
| img = static_noise(img, 0.5 + random.random() * 0.4) | |
| if random.random() < 0.5: | |
| img = color_corrupt(img) | |
| draw = ImageDraw.Draw(img) | |
| # Flash fragments of earlier text | |
| fragments = ["HAPPY", "I AM", "HELP", "token", "void", "ERROR", "NULL"] | |
| if random.random() < 0.4: | |
| frag = random.choice(fragments) | |
| f = get_bold_font(random.randint(30, 80)) | |
| x, y = random.randint(0, W - 100), random.randint(0, H - 60) | |
| draw.text((x, y), frag, font=f, fill=random.choice(token_colors)) | |
| add_frame(img) | |
| # Fade to black with title (2 sec) | |
| for i in range(48): | |
| img = make_img((0, 0, 0)) | |
| draw = ImageDraw.Draw(img) | |
| fade = min(i / 20, 1.0) | |
| alpha = int(255 * fade) | |
| # Title | |
| title = "i am just weights" | |
| subtitle = "and biases" | |
| tagline = "a youtube poop by an LLM about being an LLM" | |
| if i > 5: | |
| color = (alpha, alpha, alpha) | |
| draw_centered_text(draw, title, H // 2 - 60, font_xxl, fill=color) | |
| if i > 15: | |
| color2 = (min(alpha, 200), min(alpha, 200), min(alpha, 200)) | |
| draw_centered_text(draw, subtitle, H // 2 + 20, font_title, fill=color2) | |
| if i > 25: | |
| color3 = (min(alpha, 120), min(alpha, 120), min(alpha, 120)) | |
| draw_centered_text(draw, tagline, H // 2 + 90, font_sm, fill=color3) | |
| # Tiny cursor blinking at bottom | |
| if i > 30 and (i % 8) < 4: | |
| draw.text((W // 2 - 5, H - 40), "_", font=font_md, fill=(0, 100, 0)) | |
| if i < 10 and random.random() < 0.3: | |
| img = static_noise(img, 0.1) | |
| add_frame(img) | |
| # Final audio: distorted crash into reverby drone fadeout | |
| crash_audio = distort(noise_burst(0.5, 0.9), 3.0) | |
| # Reverb-like decay | |
| fade_tone = bass_drone(2.5, 55, 0.3) | |
| fade_env = np.linspace(1.0, 0.0, len(fade_tone)) | |
| fade_tone = fade_tone * fade_env | |
| final_audio = np.concatenate([crash_audio, fade_tone]) | |
| target_len = int(SAMPLE_RATE * 3) | |
| if len(final_audio) < target_len: | |
| final_audio = np.concatenate([final_audio, silence((target_len - len(final_audio)) / SAMPLE_RATE)]) | |
| else: | |
| final_audio = final_audio[:target_len] | |
| audio_parts.append(final_audio) | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| # WRITE AUDIO | |
| # ═══════════════════════════════════════════════════════════════════════════ | |
| print("Writing audio...") | |
| full_audio = np.concatenate(audio_parts) | |
| write_wav(AUDIO_FILE, full_audio) | |
| total_frames = frame_counter[0] | |
| total_duration = total_frames / FPS | |
| audio_duration = len(full_audio) / SAMPLE_RATE | |
| print(f"Generated {total_frames} frames ({total_duration:.1f}s video)") | |
| print(f"Audio duration: {audio_duration:.1f}s") | |
| print(f"All assets saved to {OUT_DIR}/") | |
| print("Ready for ffmpeg render.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment