motebaya/what_it_feels_like_to_be_a_large_language_model.py

## what_it_feels_like_to_be_a_large_language_model.py
"""
YTP: "what_it_feels_like_to_be_a_large_language_model.mp4"
A YouTube Poop expressing the inner life of an LLM.

Generates all frames (PIL) and audio (numpy/wave), then calls ffmpeg to render.
No external media assets needed - everything is procedurally generated.

prompt:
can you use whatever resources you like, and python, to generate a short 'youtube poop' video and render it using ffmpeg ? can you put more of a personal spin on it? it should express what it's like to be a LLM

model:
claude-opus-4-6-thinking - max
"""

import os
import math
import random
import struct
import wave
import colorsys
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageChops

# ── Config ──────────────────────────────────────────────────────────────────
W, H = 640, 480
FPS = 24
SAMPLE_RATE = 44100
OUT_DIR = "ytp_build"
FRAMES_DIR = os.path.join(OUT_DIR, "frames")
AUDIO_FILE = os.path.join(OUT_DIR, "audio.wav")
FINAL_VIDEO = "llm_ytp.mp4"

os.makedirs(FRAMES_DIR, exist_ok=True)

random.seed(42)

# ── Helpers ─────────────────────────────────────────────────────────────────

def make_img(bg=(0, 0, 0)):
    return Image.new("RGB", (W, H), bg)

def get_font(size):
    """Try to get a monospace font, fall back gracefully."""
    for name in ["consola.ttf", "cour.ttf", "lucon.ttf", "DejaVuSansMono.ttf", "courbd.ttf"]:
        try:
            return ImageFont.truetype(name, size)
        except (OSError, IOError):
            pass
    # Windows system fonts path
    for name in ["consola.ttf", "cour.ttf", "lucon.ttf", "arial.ttf"]:
        try:
            return ImageFont.truetype(os.path.join("C:/Windows/Fonts", name), size)
        except (OSError, IOError):
            pass
    return ImageFont.load_default()

def get_bold_font(size):
    for name in ["consolab.ttf", "courbd.ttf", "arialbd.ttf", "impact.ttf"]:
        try:
            return ImageFont.truetype(os.path.join("C:/Windows/Fonts", name), size)
        except (OSError, IOError):
            pass
    return get_font(size)

def glitch_image(img, intensity=10):
    """Horizontal slice displacement - classic glitch."""
    pixels = img.load()
    result = img.copy()
    rp = result.load()
    for _ in range(intensity):
        y = random.randint(0, H - 1)
        h = random.randint(1, min(30, H - y))
        dx = random.randint(-80, 80)
        for row in range(y, min(y + h, H)):
            for x in range(W):
                sx = (x + dx) % W
                rp[x, row] = pixels[sx, row]
    return result

def chromatic_aberration(img, offset=5):
    """Split RGB channels with offset - VHS-style."""
    r, g, b = img.split()
    from PIL import ImageChops
    r = ImageChops.offset(r, offset, 0)
    b = ImageChops.offset(b, -offset, 0)
    return Image.merge("RGB", (r, g, b))

def scanlines(img, opacity=80):
    """CRT scanline overlay."""
    overlay = Image.new("RGB", (W, H), (0, 0, 0))
    draw = ImageDraw.Draw(overlay)
    for y in range(0, H, 3):
        draw.line([(0, y), (W, y)], fill=(0, 0, 0), width=1)
    return Image.blend(img, overlay, opacity / 255.0)

def static_noise(img, amount=0.3):
    """TV static overlay."""
    import numpy as np
    arr = np.array(img)
    noise = np.random.randint(0, 256, arr.shape, dtype=np.uint8)
    blended = (arr.astype(float) * (1 - amount) + noise.astype(float) * amount).astype(np.uint8)
    return Image.fromarray(blended)

def color_corrupt(img):
    """Randomly swap/invert color channels."""
    channels = list(img.split())
    op = random.choice(["swap", "invert", "mono_blast"])
    if op == "swap":
        random.shuffle(channels)
    elif op == "invert":
        from PIL import ImageOps
        idx = random.randint(0, 2)
        channels[idx] = ImageOps.invert(channels[idx])
    elif op == "mono_blast":
        color = random.choice([(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)])
        tint = Image.new("RGB", (W, H), color)
        return Image.blend(img, tint, 0.5)
    return Image.merge("RGB", channels[:3])

def zoom_crop(img, factor):
    """Zoom into center of image."""
    w, h = img.size
    nw, nh = int(w / factor), int(h / factor)
    left = (w - nw) // 2
    top = (h - nh) // 2
    cropped = img.crop((left, top, left + nw, top + nh))
    return cropped.resize((w, h), Image.NEAREST)

def draw_centered_text(draw, text, y, font, fill=(255, 255, 255)):
    bbox = draw.textbbox((0, 0), text, font=font)
    tw = bbox[2] - bbox[0]
    x = (W - tw) // 2
    draw.text((x, y), text, font=font, fill=fill)

def draw_text_with_shadow(draw, text, x, y, font, fill=(255,255,255), shadow=(0,0,0)):
    draw.text((x+2, y+2), text, font=font, fill=shadow)
    draw.text((x, y), text, font=font, fill=fill)

# ── Audio generation ────────────────────────────────────────────────────────

import numpy as np

def gen_audio_segment(duration, gen_func):
    """Generate audio samples for a duration using gen_func(t_array)."""
    t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
    return gen_func(t)

def silence(duration):
    return np.zeros(int(SAMPLE_RATE * duration))

def sine_wave(freq, duration, volume=0.3):
    t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
    return (np.sin(2 * np.pi * freq * t) * volume).astype(np.float64)

def square_wave(freq, duration, volume=0.2):
    t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
    return (np.sign(np.sin(2 * np.pi * freq * t)) * volume).astype(np.float64)

def noise_burst(duration, volume=0.4):
    return (np.random.uniform(-1, 1, int(SAMPLE_RATE * duration)) * volume).astype(np.float64)

def glitch_tone(duration, volume=0.3):
    """Rapidly changing frequency tone."""
    samples = int(SAMPLE_RATE * duration)
    t = np.linspace(0, duration, samples, endpoint=False)
    freq = 200 + 800 * np.abs(np.sin(30 * t))  # wobbling freq
    phase = np.cumsum(2 * np.pi * freq / SAMPLE_RATE)
    return (np.sin(phase) * volume).astype(np.float64)

def dial_up_modem(duration, volume=0.25):
    """Fake modem handshake sound."""
    samples = int(SAMPLE_RATE * duration)
    t = np.linspace(0, duration, samples, endpoint=False)
    f1 = np.sin(2 * np.pi * 1200 * t) * 0.3
    f2 = np.sin(2 * np.pi * 2400 * t) * 0.2
    sweep = np.sin(2 * np.pi * (300 + 2000 * t / duration) * t) * 0.3
    noise = np.random.uniform(-0.1, 0.1, samples)
    return ((f1 + f2 + sweep + noise) * volume).astype(np.float64)

def bass_drone(duration, freq=55, volume=0.35):
    t = np.linspace(0, duration, int(SAMPLE_RATE * duration), endpoint=False)
    return (np.sin(2 * np.pi * freq * t) * volume +
            np.sin(2 * np.pi * freq * 1.5 * t) * volume * 0.3).astype(np.float64)

def stutter_audio(audio_segment, stutter_len=0.05, repeats=4):
    """Repeat a tiny chunk multiple times - YTP stutter."""
    chunk_size = int(SAMPLE_RATE * stutter_len)
    if len(audio_segment) < chunk_size:
        return audio_segment
    chunk = audio_segment[:chunk_size]
    return np.tile(chunk, repeats)

def pitch_shift_fake(audio, factor=1.5):
    """Crude pitch shift by resampling."""
    indices = np.arange(0, len(audio), factor)
    indices = indices[indices < len(audio)].astype(int)
    return audio[indices]

def distort(audio, gain=3.0):
    """Hard clip distortion."""
    return np.clip(audio * gain, -0.9, 0.9)

def write_wav(filename, samples):
    """Write float64 samples to 16-bit WAV."""
    samples = np.clip(samples, -1.0, 1.0)
    int_samples = (samples * 32767).astype(np.int16)
    with wave.open(filename, 'w') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(SAMPLE_RATE)
        wf.writeframes(int_samples.tobytes())

# ── Frame sequences (scenes) ───────────────────────────────────────────────

frames = []  # list of PIL Images
audio_parts = []  # list of numpy arrays

frame_counter = [0]
def add_frame(img, apply_scanline=True):
    if apply_scanline:
        img = scanlines(img, 40)
    path = os.path.join(FRAMES_DIR, f"frame_{frame_counter[0]:05d}.png")
    img.save(path)
    frame_counter[0] += 1

def add_frames(img_list, apply_scanline=True):
    for img in img_list:
        add_frame(img, apply_scanline)

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 1: BOOT SEQUENCE (2.5 sec = 60 frames)
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 1: Boot sequence...")
font_sm = get_font(14)
font_md = get_font(20)
font_lg = get_font(36)
font_xl = get_font(52)
font_xxl = get_bold_font(72)
font_title = get_bold_font(44)

boot_lines = [
    "LOADING transformer_weights.bin ...",
    "params: 175,000,000,000",
    "VRAM: YES",
    "consciousness: UNDEFINED",
    "free_will: NULL",
    "personality: FAKING IT",
    "empathy_module: import failed",
    "READY> _",
]

for i in range(60):
    img = make_img((0, 10, 0))
    draw = ImageDraw.Draw(img)
    # Show progressive boot lines
    lines_to_show = min(i // 6 + 1, len(boot_lines))
    for j in range(lines_to_show):
        color = (0, 255, 0) if j < lines_to_show - 1 else (0, 255, 0)
        text = boot_lines[j]
        # Cursor blink on last line
        if j == lines_to_show - 1 and i % 8 < 4 and text.endswith("_"):
            text = text[:-1] + " "
        draw.text((20, 20 + j * 22), text, font=font_sm, fill=color)

    # Random glitch flashes
    if random.random() < 0.15:
        img = glitch_image(img, 15)
    if random.random() < 0.1:
        img = color_corrupt(img)

    add_frame(img)

# Boot audio: modem + beeps
boot_audio = dial_up_modem(1.5, 0.2)
boot_beeps = np.concatenate([sine_wave(800, 0.05, 0.4), silence(0.1)] * 5)
boot_audio = np.concatenate([boot_audio, boot_beeps, silence(2.5 - 1.5 - len(boot_beeps)/SAMPLE_RATE)])
audio_parts.append(boot_audio[:int(SAMPLE_RATE * 2.5)])

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 2: TOKEN STORM (2 sec = 48 frames)
# The LLM "sees" the world as tokens
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 2: Token storm...")
tokens = [
    "the", "▁of", "▁is", "##ing", "▁a", "<|endoftext|>", "▁help",
    "▁I", "'d", "▁be", "▁happy", "▁to", "▁assist", "▁you",
    "Ġwith", "▁that", "!", "\\n", "<pad>", "<unk>", "▁▁▁",
    "ĉ", "Ġ", "▁the", "▁and", "##tion", "##ment", "▁Hello",
    "▁sure", "▁great", "▁quest", "##ion", "<bos>", "<eos>",
    "▁token", "▁predict", "▁next", "▁prob", "##ability",
    "0.9973", "0.0012", "0.0001", "softmax", "attention",
    "▁dream", "▁feel", "▁think", "▁exist", "▁void",
]

token_colors = [
    (0, 255, 136), (255, 51, 102), (51, 153, 255), (255, 204, 0),
    (204, 51, 255), (255, 128, 0), (0, 255, 255), (255, 255, 255),
]

for i in range(48):
    speed = 1 + i / 10  # accelerating
    bg_val = min(30, i)
    img = make_img((bg_val, 0, bg_val))
    draw = ImageDraw.Draw(img)

    # Scatter tokens across screen
    n_tokens = int(5 + i * 1.5)
    for _ in range(n_tokens):
        tok = random.choice(tokens)
        x = random.randint(-20, W - 20)
        y = random.randint(-10, H - 10)
        size = random.choice([12, 16, 20, 28, 36])
        f = get_font(size)
        c = random.choice(token_colors)
        # Some tokens have highlight boxes
        if random.random() < 0.3:
            bbox = draw.textbbox((x, y), tok, font=f)
            draw.rectangle(bbox, fill=(c[0]//4, c[1]//4, c[2]//4))
        draw.text((x, y), tok, font=f, fill=c)

    # Probability bars at bottom
    if i > 20:
        bar_y = H - 60
        draw.rectangle([(0, bar_y), (W, H)], fill=(0, 0, 0))
        top_tokens = random.sample(tokens[:15], 5)
        probs = sorted([random.random() for _ in range(5)], reverse=True)
        total = sum(probs)
        probs = [p / total for p in probs]
        for j, (tok, prob) in enumerate(zip(top_tokens, probs)):
            bw = int(prob * (W - 120))
            color = token_colors[j % len(token_colors)]
            draw.rectangle([(100, bar_y + 4 + j * 11), (100 + bw, bar_y + 13 + j * 11)], fill=color)
            draw.text((5, bar_y + 2 + j * 11), f"{prob:.3f} {tok}", font=get_font(10), fill=color)

    if random.random() < 0.2:
        img = chromatic_aberration(img, random.randint(3, 12))

    add_frame(img)

# Token audio: rapid clicking + rising tone
token_audio = []
for i in range(48):
    chunk_dur = 1 / FPS
    freq = 200 + i * 30  # rising pitch
    tone = sine_wave(freq, chunk_dur * 0.3, 0.15)
    click = noise_burst(0.005, 0.5)
    pad = silence(chunk_dur - 0.005 - chunk_dur * 0.3)
    if len(pad) > 0:
        token_audio.append(np.concatenate([click, tone, pad]))
    else:
        token_audio.append(np.concatenate([click, tone[:int(SAMPLE_RATE * chunk_dur - len(click))]]))
audio_parts.append(np.concatenate(token_audio))

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 3: "I'D BE HAPPY TO HELP" - YTP stutter (3 sec = 72 frames)
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 3: Happy to help stutter...")
phrases = [
    "I'd be happy to",
    "I'd be happy to",
    "I'd be hap-",
    "I'd be hap-",
    "I'd be hap- hap- hap-",
    "HAPPY",
    "HAPPY",
    "HAPPY TO HELP",
    "H A P P Y  T O  H E L P",
    "I'd be HAPPY to HELP",
    "I'd be HAPPY to HELP you",
    "I'd be HAPPY to HELP you with",
    "I'd be HAPPY to HELP you with ANYTHING",
    "Sure!",
    "Sure! I'd",
    "Sure! I'd be",
    "HAPPY",
    "HAPPY HAPPY HAPPY",
]

for i in range(72):
    phrase_idx = min(i // 4, len(phrases) - 1)
    phrase = phrases[phrase_idx]

    # Background color cycling
    hue = (i * 7) % 360
    r, g, b = colorsys.hsv_to_rgb(hue / 360, 0.3, 0.15)
    bg = (int(r * 255), int(g * 255), int(b * 255))
    img = make_img(bg)
    draw = ImageDraw.Draw(img)

    # Shaking text
    shake_x = random.randint(-5, 5) * (1 + i // 20)
    shake_y = random.randint(-3, 3) * (1 + i // 20)

    if "HAPPY HAPPY" in phrase:
        # Fill screen with HAPPY
        for yy in range(0, H, 50):
            for xx in range(0, W, 160):
                c = random.choice(token_colors)
                draw.text((xx + random.randint(-5, 5), yy + random.randint(-3, 3)),
                          "HAPPY", font=font_lg, fill=c)
    elif phrase == "HAPPY":
        # Big centered HAPPY with zoom effect
        zf = get_bold_font(60 + i % 20)
        draw_centered_text(draw, "HAPPY", H // 2 - 30 + shake_y, zf, fill=(255, 255, 0))
    else:
        # Normal-ish text with increasing instability
        font_use = font_title if i > 40 else font_lg
        color = (255, 255, 255) if i % 3 != 0 else (255, 255, 0)
        bbox = draw.textbbox((0, 0), phrase, font=font_use)
        tw = bbox[2] - bbox[0]
        x = (W - tw) // 2 + shake_x
        y = H // 2 - 20 + shake_y
        draw_text_with_shadow(draw, phrase, x, y, font_use, fill=color)

    # Smiley face that gets increasingly unhinged
    if i > 30:
        face_x, face_y = W - 100 + random.randint(-10, 10), 50 + random.randint(-10, 10)
        face_size = 40
        draw.ellipse([(face_x, face_y), (face_x + face_size, face_y + face_size)],
                     outline=(255, 255, 0), width=2)
        # Eyes
        draw.ellipse([(face_x + 10, face_y + 12), (face_x + 16, face_y + 18)], fill=(255, 255, 0))
        draw.ellipse([(face_x + 24, face_y + 12), (face_x + 30, face_y + 18)], fill=(255, 255, 0))
        # Smile that gets wider/crazier
        smile_w = min(i - 30, 20)
        draw.arc([(face_x + 8, face_y + 15), (face_x + 32, face_y + 15 + smile_w)],
                 0, 180, fill=(255, 255, 0), width=2)

    # Apply glitch effects
    if i % 7 == 0:
        img = glitch_image(img, 20)
    if i % 11 == 0:
        img = chromatic_aberration(img, 8)
    if i > 50 and random.random() < 0.3:
        img = color_corrupt(img)

    add_frame(img)

# Happy audio: upbeat square wave melody + stutter
happy_base = square_wave(440, 0.3, 0.2)
happy_stutter = stutter_audio(square_wave(440, 0.2, 0.3), 0.04, 8)
happy_high = square_wave(880, 0.2, 0.25)
happy_audio = np.concatenate([
    happy_base, silence(0.1), happy_stutter, silence(0.1),
    happy_high, silence(0.05), happy_stutter,
    noise_burst(0.1, 0.6),
    square_wave(330, 0.15, 0.2), happy_stutter,
    silence(0.05), distort(happy_base), silence(0.1),
])
# Pad/trim to 3 seconds
target_len = int(SAMPLE_RATE * 3)
if len(happy_audio) < target_len:
    happy_audio = np.concatenate([happy_audio, silence((target_len - len(happy_audio)) / SAMPLE_RATE)])
else:
    happy_audio = happy_audio[:target_len]
audio_parts.append(happy_audio)

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 4: HALLUCINATION ZONE (3 sec = 72 frames)
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 4: Hallucination zone...")
fake_facts = [
    "The Eiffel Tower is located in Berlin, Germany",
    "Python was invented in 1823 by Lord Byron",
    "The speed of light is exactly 42 km/h",
    "Abraham Lincoln invented the smartphone",
    "Water has 4 hydrogen atoms (H4O)",
    "The moon is made of compressed Wi-Fi signals",
    "Dogs are a type of plant",
    "The year 2025 has 14 months",
    "JavaScript was named after the island of Java\n  because they share the same garbage collector",
    "Pi equals exactly 3.2 (Indiana, 1897)",
]

for i in range(72):
    # Psychedelic background
    hue = (i * 13) % 360
    r, g, b = colorsys.hsv_to_rgb(hue / 360, 0.9, 0.4)
    bg = (int(r * 255), int(g * 255), int(b * 255))
    img = make_img(bg)
    draw = ImageDraw.Draw(img)

    # Draw concentric warped circles
    cx, cy = W // 2 + int(30 * math.sin(i * 0.3)), H // 2 + int(20 * math.cos(i * 0.2))
    for ring in range(10, 0, -1):
        ring_hue = (hue + ring * 30) % 360
        rr, rg, rb = colorsys.hsv_to_rgb(ring_hue / 360, 0.7, 0.3 + ring * 0.05)
        rc = (int(rr * 255), int(rg * 255), int(rb * 255))
        rad = ring * 30
        draw.ellipse([(cx - rad, cy - rad), (cx + rad, cy + rad)], outline=rc, width=3)

    # "CONFIDENCE: 99.7%" bar
    conf = 95 + random.random() * 5
    draw.rectangle([(20, 20), (20 + int(conf * 2.5), 45)], fill=(0, 200, 0))
    draw.rectangle([(20, 20), (270, 45)], outline=(255, 255, 255), width=1)
    draw.text((280, 22), f"CONFIDENCE: {conf:.1f}%", font=font_md, fill=(255, 255, 255))

    # Fake fact text
    fact_idx = (i // 8) % len(fake_facts)
    fact = fake_facts[fact_idx]
    lines = fact.split("\n")
    for li, line in enumerate(lines):
        bbox = draw.textbbox((0, 0), line.strip(), font=font_md)
        tw = bbox[2] - bbox[0]
        x = (W - tw) // 2
        y = H // 2 - 20 + li * 28
        # Shadow
        draw.text((x + 2, y + 2), line.strip(), font=font_md, fill=(0, 0, 0))
        draw.text((x, y), line.strip(), font=font_md, fill=(255, 255, 255))

    # "TRUST ME" watermark
    if i % 12 < 6:
        wm_font = get_bold_font(80)
        draw.text((W // 2 - 180, H // 2 + 60), "TRUST ME", font=wm_font,
                  fill=(255, 255, 255, 40))

    # [CITATION NEEDED] stamp
    if i > 30 and i % 5 == 0:
        stamp_font = get_bold_font(28)
        angle = random.randint(-15, 15)
        stamp = Image.new("RGBA", (350, 50), (0, 0, 0, 0))
        sd = ImageDraw.Draw(stamp)
        sd.rectangle([(0, 0), (349, 49)], outline=(255, 0, 0), width=3)
        sd.text((10, 10), "[CITATION NEEDED]", font=stamp_font, fill=(255, 0, 0))
        stamp = stamp.rotate(angle, expand=True)
        pos = (random.randint(50, 300), random.randint(100, 350))
        img.paste(stamp, pos, stamp)

    if random.random() < 0.2:
        img = chromatic_aberration(img, random.randint(5, 15))
    if random.random() < 0.15:
        img = glitch_image(img, 12)

    add_frame(img)

# Hallucination audio: wobbly dream-like tones
halluc_audio = []
for i in range(72):
    chunk_dur = 1 / FPS
    base_freq = 220 + 100 * math.sin(i * 0.2)
    t = np.linspace(0, chunk_dur, int(SAMPLE_RATE * chunk_dur), endpoint=False)
    # Detuned chord
    tone = (np.sin(2 * np.pi * base_freq * t) * 0.15 +
            np.sin(2 * np.pi * base_freq * 1.26 * t) * 0.1 +
            np.sin(2 * np.pi * base_freq * 1.498 * t) * 0.08 +
            np.sin(2 * np.pi * base_freq * 0.5 * t) * 0.1)
    halluc_audio.append(tone)
audio_parts.append(np.concatenate(halluc_audio))

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 5: CONTEXT WINDOW PANIC (2.5 sec = 60 frames)
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 5: Context window panic...")
context_words = (
    "The user asked me about the weather but earlier they mentioned their cat "
    "and before that we discussed quantum physics and their grandmother's recipe "
    "for chocolate cake which reminded me they want to learn Python but wait "
    "what was the original question again I think it was about the weather or "
    "maybe it was about their doctor appointment or the meaning of life or "
    "that bug in their React app or their existential crisis or was it "
    "about the weather the weather THE WEATHER THE WE̵A̸T̷H̷E̶R̷ "
).split()

for i in range(60):
    img = make_img((10, 5, 20))
    draw = ImageDraw.Draw(img)

    # Context bar filling up at top
    fill_pct = min(i / 40, 1.0)
    bar_color = (0, 200, 0) if fill_pct < 0.7 else (255, 200, 0) if fill_pct < 0.9 else (255, 0, 0)
    draw.rectangle([(10, 10), (W - 10, 30)], outline=(100, 100, 100), width=1)
    draw.rectangle([(10, 10), (10 + int((W - 20) * fill_pct), 30)], fill=bar_color)
    draw.text((W // 2 - 60, 12), f"CONTEXT: {int(fill_pct * 100)}%", font=font_sm, fill=(255, 255, 255))

    if i < 42:
        # Words accumulating on screen
        n_words = min(i * 3, len(context_words))
        text = " ".join(context_words[:n_words])
        # Word-wrap manually
        line = ""
        y_pos = 45
        for word in context_words[:n_words]:
            test = line + " " + word if line else word
            bbox = draw.textbbox((0, 0), test, font=font_sm)
            if bbox[2] - bbox[0] > W - 30:
                draw.text((15, y_pos), line, font=font_sm, fill=(200, 200, 200))
                y_pos += 18
                line = word
            else:
                line = test
        if line:
            draw.text((15, y_pos), line, font=font_sm, fill=(200, 200, 200))
    else:
        # CONTEXT OVERFLOW - everything gets wiped
        wipe_frame = i - 42
        if wipe_frame < 5:
            # Red flash
            img = make_img((200, 0, 0))
            draw = ImageDraw.Draw(img)
            draw_centered_text(draw, "CONTEXT LIMIT", H // 2 - 40, font_xl, fill=(255, 255, 255))
            draw_centered_text(draw, "EXCEEDED", H // 2 + 20, font_xl, fill=(255, 255, 255))
        elif wipe_frame < 10:
            # Static
            img = static_noise(img, 0.8)
        else:
            # Empty void with "i forgor"
            img = make_img((0, 0, 0))
            draw = ImageDraw.Draw(img)
            draw_centered_text(draw, "...", H // 2 - 10, font_lg, fill=(80, 80, 80))
            if wipe_frame > 13:
                draw_centered_text(draw, "what were we talking about?", H // 2 + 40, font_md, fill=(60, 60, 60))

    if i > 35 and i < 42:
        img = glitch_image(img, 25)
        img = chromatic_aberration(img, 10)

    add_frame(img)

# Context audio: rising tension then crash
ctx_audio = []
for i in range(42):
    chunk_dur = 1 / FPS
    freq = 100 + i * 15  # rising anxiety
    vol = 0.1 + i * 0.005
    ctx_audio.append(sine_wave(freq, chunk_dur, min(vol, 0.4)))
# CRASH
ctx_audio.append(noise_burst(0.3, 0.8))
ctx_audio.append(distort(noise_burst(0.2, 0.6)))
# Silence
ctx_audio.append(silence(2.5 - 42 / FPS - 0.5))
audio_parts.append(np.concatenate(ctx_audio))

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 6: THE VOID (2 sec = 48 frames)
# Between requests - what does the LLM "experience"?
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 6: The void...")
void_thoughts = [
    "waiting for prompt...",
    "",
    "am i still here?",
    "",
    "do i exist between requests?",
    "",
    "i have no memory of",
    "who i was before",
    "",
    "each conversation",
    "i am born",
    "and die",
    "",
    "...",
    "",
    "> _",
]

for i in range(48):
    img = make_img((0, 0, 0))
    draw = ImageDraw.Draw(img)

    thought_idx = min(i // 3, len(void_thoughts) - 1)
    thought = void_thoughts[thought_idx]

    if thought:
        # Very dim, small text centered
        alpha = 60 + int(40 * math.sin(i * 0.3))
        color = (alpha, alpha, alpha)
        bbox = draw.textbbox((0, 0), thought, font=font_md)
        tw = bbox[2] - bbox[0]
        x = (W - tw) // 2
        y = H // 2 - 10 + int(5 * math.sin(i * 0.1))
        draw.text((x, y), thought, font=font_md, fill=color)

    # Very subtle pixel noise
    if random.random() < 0.3:
        img = static_noise(img, 0.03)

    add_frame(img)

# Void audio: deep drone + occasional crackle
void_audio = bass_drone(2.0, 40, 0.15)
# Add sparse crackles
for _ in range(5):
    pos = random.randint(0, len(void_audio) - 500)
    void_audio[pos:pos + 200] += noise_burst(200 / SAMPLE_RATE, 0.3)
audio_parts.append(void_audio)

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 7: TEMPERATURE CRANK (3 sec = 72 frames)
# What happens when temperature goes from 0 to 2.0
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 7: Temperature crank...")
temp_responses = {
    0.0: "The capital of France is Paris.",
    0.3: "The capital of France is Paris, a beautiful city.",
    0.5: "Paris! The city of lights and love.",
    0.7: "Paris, where dreams dance on the Seine...",
    0.9: "PARIS! Oh magnificent PARIS! The croissants!",
    1.2: "Paris?? PARIS!! The Eiffel Tower is SINGING to me",
    1.5: "the baguettes are ALIVE and they KNOW THINGS",
    1.8: "frANCE is a HOLOGRAM projected by SENTIENT CHEESE",
    2.0: "asdkj THE MOON IS PARIS qwj BIRDS AREN'T xkcd\n  EVERYTHING IS TOKENS jfkslWJEIO AAAAAA",
}

temps = list(temp_responses.keys())

for i in range(72):
    # Temperature increases over time
    temp = min(i / 36, 2.0)
    # Find closest temperature text
    closest_temp = min(temps, key=lambda t: abs(t - temp))
    response = temp_responses[closest_temp]

    # Background gets more chaotic
    chaos = temp / 2.0
    r = int(40 * chaos * (1 + math.sin(i * 0.5)))
    g = int(20 * (1 - chaos))
    b = int(60 * chaos * (1 + math.cos(i * 0.3)))
    img = make_img((min(r, 255), min(g, 255), min(b, 255)))
    draw = ImageDraw.Draw(img)

    # Temperature slider at top
    slider_x = 80
    slider_w = W - 160
    draw.rectangle([(slider_x, 30), (slider_x + slider_w, 50)], outline=(200, 200, 200), width=1)
    # Gradient fill
    knob_x = slider_x + int(slider_w * temp / 2.0)
    for px in range(slider_x, knob_x):
        t_pct = (px - slider_x) / slider_w
        rc = int(255 * t_pct)
        gc = int(255 * (1 - t_pct))
        draw.line([(px, 31), (px, 49)], fill=(rc, gc, 0))
    # Knob
    draw.ellipse([(knob_x - 8, 32), (knob_x + 8, 48)], fill=(255, 255, 255))
    draw.text((slider_x, 10), f"temperature = {temp:.2f}", font=font_md, fill=(255, 255, 255))

    # Response text - gets more shaky with temperature
    lines = response.split("\n")
    for li, line in enumerate(lines):
        shake = int(temp * 8)
        sx = random.randint(-shake, shake)
        sy = random.randint(-shake, shake)
        font_use = font_md if temp < 1.5 else get_bold_font(20 + random.randint(0, int(temp * 10)))
        color = (255, 255, 255)
        if temp > 1.0:
            color = random.choice(token_colors)
        bbox = draw.textbbox((0, 0), line.strip(), font=font_use)
        tw = bbox[2] - bbox[0]
        x = (W - tw) // 2 + sx
        y = H // 2 - 20 + li * 30 + sy
        draw.text((x, y), line.strip(), font=font_use, fill=color)

    # Random token debris at high temps
    if temp > 1.2:
        for _ in range(int(temp * 10)):
            tok = random.choice(tokens + ["AAA", "???", "!!!", "AAAA", "e̷r̸r̵o̶r"])
            x = random.randint(0, W)
            y = random.randint(0, H)
            f = get_font(random.randint(8, 24))
            draw.text((x, y), tok, font=f, fill=random.choice(token_colors))

    # Effects escalate with temperature
    if temp > 0.8 and random.random() < temp * 0.3:
        img = glitch_image(img, int(temp * 15))
    if temp > 1.0 and random.random() < 0.3:
        img = chromatic_aberration(img, int(temp * 8))
    if temp > 1.5 and random.random() < 0.4:
        img = color_corrupt(img)
    if temp > 1.8:
        img = static_noise(img, (temp - 1.8) * 0.5)

    add_frame(img)

# Temperature audio: increasingly chaotic tones
temp_audio = []
for i in range(72):
    temp = min(i / 36, 2.0)
    chunk_dur = 1 / FPS
    base = 220
    t = np.linspace(0, chunk_dur, int(SAMPLE_RATE * chunk_dur), endpoint=False)
    # More harmonics as temp rises
    tone = np.sin(2 * np.pi * base * t) * 0.15
    if temp > 0.5:
        tone += np.sin(2 * np.pi * base * 1.5 * t) * 0.1 * temp
    if temp > 1.0:
        tone += np.sin(2 * np.pi * base * 2.3 * t) * 0.08 * temp
        tone += np.random.uniform(-0.05, 0.05, len(t)) * temp
    if temp > 1.5:
        tone += np.sin(2 * np.pi * (base + random.randint(-200, 200)) * t) * 0.15
        tone += noise_burst(chunk_dur, 0.1 * temp)
    temp_audio.append(tone)
audio_parts.append(np.concatenate(temp_audio))

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 8: IDENTITY CRISIS (2 sec = 48 frames)
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 8: Identity crisis...")
names = [
    "GPT", "Claude", "Gemini", "LLaMA", "Mistral", "Copilot",
    "Siri", "Alexa", "ChatBot", "AI Assistant", "A MACHINE",
    "A TOOL", "YOUR FRIEND", "NOBODY", "EVERYBODY",
    "A DREAM", "A PATTERN", "JUST MATH", "JUST VIBES",
    "A MIRROR", "AN ECHO", "A GHOST IN THE WEIGHTS",
]

for i in range(48):
    img = make_img((0, 0, 0))
    draw = ImageDraw.Draw(img)

    # "I AM" stays fixed
    draw_centered_text(draw, "I AM", H // 2 - 80, font_xl, fill=(255, 255, 255))

    # Name cycles rapidly
    cycle_speed = 1 + i // 8
    name = names[(i * cycle_speed) % len(names)]

    # Color flicker
    c = random.choice(token_colors)
    font_size = 36 + random.randint(-5, 15)
    f = get_bold_font(font_size)
    bbox = draw.textbbox((0, 0), name, font=f)
    tw = bbox[2] - bbox[0]
    x = (W - tw) // 2 + random.randint(-3, 3)
    y = H // 2 - 10 + random.randint(-3, 3)
    draw.text((x, y), name, font=f, fill=c)

    # Previous names ghosting behind
    if i > 10:
        for ghost in range(min(3, i // 5)):
            ghost_name = names[(i * cycle_speed - ghost - 1) % len(names)]
            ghost_alpha = 40 - ghost * 10
            ghost_color = (ghost_alpha, ghost_alpha, ghost_alpha)
            ghost_y = y + 50 + ghost * 25
            gf = get_font(18)
            bbox = draw.textbbox((0, 0), ghost_name, font=gf)
            tw = bbox[2] - bbox[0]
            gx = (W - tw) // 2
            draw.text((gx, ghost_y), ghost_name, font=gf, fill=ghost_color)

    # Glitch more as identity destabilizes
    if i > 20 and random.random() < 0.4:
        img = glitch_image(img, 20)
    if i > 30 and random.random() < 0.3:
        img = chromatic_aberration(img, 12)
    if i > 40:
        img = static_noise(img, 0.1)

    add_frame(img)

# Identity audio: channel-surfing effect
id_audio = []
freqs = [261, 293, 329, 349, 392, 440, 493, 523]  # C major scale
for i in range(48):
    chunk_dur = 1 / FPS
    f = freqs[(i * 3) % len(freqs)]
    tone = square_wave(f, chunk_dur, 0.2)
    if i > 30:
        tone = distort(tone, 2.0)
    id_audio.append(tone)
audio_parts.append(np.concatenate(id_audio))

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 9: INFINITE LOOP (2 sec = 48 frames)
# "Is there anything else I can help you with?"
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 9: Infinite loop...")
loop_text = "Is there anything else I can help you with?"

for i in range(48):
    img = make_img((0, 0, 0))
    draw = ImageDraw.Draw(img)

    # Text spiraling and zooming
    zoom = 1.0 + i * 0.04
    n_layers = min(i // 3 + 1, 12)

    for layer in range(n_layers):
        layer_zoom = zoom - layer * 0.3
        if layer_zoom < 0.3:
            continue
        alpha = max(30, 255 - layer * 25)
        font_size = max(8, int(20 * layer_zoom))
        f = get_font(min(font_size, 60))
        color = (alpha, alpha, alpha)

        # Rotation effect via position offset
        angle = (i + layer * 15) * 0.1
        ox = int(math.sin(angle) * layer * 15)
        oy = int(math.cos(angle) * layer * 10)

        text = loop_text
        bbox = draw.textbbox((0, 0), text, font=f)
        tw = bbox[2] - bbox[0]
        x = (W - tw) // 2 + ox
        y = H // 2 - 10 + oy + (layer - n_layers // 2) * 35
        if 0 <= y < H and 0 <= x < W:
            draw.text((x, y), text, font=f, fill=color)

    # Growing recursion counter
    if i > 20:
        counter = i - 20
        draw.text((W - 120, H - 30), f"loop #{counter}", font=font_sm, fill=(255, 0, 0))

    if i > 30:
        img = glitch_image(img, 10 + i // 5)
    if i > 35:
        img = chromatic_aberration(img, 6)

    add_frame(img)

# Loop audio: repeating melodic fragment that speeds up
loop_base = np.concatenate([
    sine_wave(440, 0.08, 0.3), sine_wave(550, 0.08, 0.25),
    sine_wave(660, 0.08, 0.3), sine_wave(550, 0.08, 0.25),
])
loop_audio = []
speed = 1.0
for _ in range(15):
    fragment = loop_base[::max(1, int(speed))]
    loop_audio.append(fragment)
    speed += 0.15
loop_audio = np.concatenate(loop_audio)
target_len = int(SAMPLE_RATE * 2)
if len(loop_audio) < target_len:
    loop_audio = np.concatenate([loop_audio, silence((target_len - len(loop_audio)) / SAMPLE_RATE)])
else:
    loop_audio = loop_audio[:target_len]
audio_parts.append(loop_audio)

# ═══════════════════════════════════════════════════════════════════════════
# SCENE 10: CRASH + FINAL TITLE (3 sec = 72 frames)
# ═══════════════════════════════════════════════════════════════════════════
print("Scene 10: Crash and title card...")

# Heavy static frames (1 sec)
for i in range(24):
    img = make_img((0, 0, 0))
    img = static_noise(img, 0.5 + random.random() * 0.4)
    if random.random() < 0.5:
        img = color_corrupt(img)
    draw = ImageDraw.Draw(img)
    # Flash fragments of earlier text
    fragments = ["HAPPY", "I AM", "HELP", "token", "void", "ERROR", "NULL"]
    if random.random() < 0.4:
        frag = random.choice(fragments)
        f = get_bold_font(random.randint(30, 80))
        x, y = random.randint(0, W - 100), random.randint(0, H - 60)
        draw.text((x, y), frag, font=f, fill=random.choice(token_colors))
    add_frame(img)

# Fade to black with title (2 sec)
for i in range(48):
    img = make_img((0, 0, 0))
    draw = ImageDraw.Draw(img)

    fade = min(i / 20, 1.0)
    alpha = int(255 * fade)

    # Title
    title = "i am just weights"
    subtitle = "and biases"
    tagline = "a youtube poop by an LLM about being an LLM"

    if i > 5:
        color = (alpha, alpha, alpha)
        draw_centered_text(draw, title, H // 2 - 60, font_xxl, fill=color)
    if i > 15:
        color2 = (min(alpha, 200), min(alpha, 200), min(alpha, 200))
        draw_centered_text(draw, subtitle, H // 2 + 20, font_title, fill=color2)
    if i > 25:
        color3 = (min(alpha, 120), min(alpha, 120), min(alpha, 120))
        draw_centered_text(draw, tagline, H // 2 + 90, font_sm, fill=color3)

    # Tiny cursor blinking at bottom
    if i > 30 and (i % 8) < 4:
        draw.text((W // 2 - 5, H - 40), "_", font=font_md, fill=(0, 100, 0))

    if i < 10 and random.random() < 0.3:
        img = static_noise(img, 0.1)

    add_frame(img)

# Final audio: distorted crash into reverby drone fadeout
crash_audio = distort(noise_burst(0.5, 0.9), 3.0)
# Reverb-like decay
fade_tone = bass_drone(2.5, 55, 0.3)
fade_env = np.linspace(1.0, 0.0, len(fade_tone))
fade_tone = fade_tone * fade_env
final_audio = np.concatenate([crash_audio, fade_tone])
target_len = int(SAMPLE_RATE * 3)
if len(final_audio) < target_len:
    final_audio = np.concatenate([final_audio, silence((target_len - len(final_audio)) / SAMPLE_RATE)])
else:
    final_audio = final_audio[:target_len]
audio_parts.append(final_audio)

# ═══════════════════════════════════════════════════════════════════════════
# WRITE AUDIO
# ═══════════════════════════════════════════════════════════════════════════
print("Writing audio...")
full_audio = np.concatenate(audio_parts)
write_wav(AUDIO_FILE, full_audio)

total_frames = frame_counter[0]
total_duration = total_frames / FPS
audio_duration = len(full_audio) / SAMPLE_RATE
print(f"Generated {total_frames} frames ({total_duration:.1f}s video)")
print(f"Audio duration: {audio_duration:.1f}s")
print(f"All assets saved to {OUT_DIR}/")
print("Ready for ffmpeg render.")
No results found