Don't use original DOOM code directly with SGL because:
- SGL is dated, quirky, and poorly documented in some areas
- Original DOOM renderer needs heavy refactoring anyway for Saturn
- You'll fight two battles simultaneously
Instead, use:
| Component | Choice | Why |
|---|---|---|
| Code base | doomgeneric | Minimal porting interface - just implement framebuffer, input, timing |
| SDK | Yaul (libyaul) | Modern, maintained, better dual-SH2 support, active community |
| Initial renderer | Software to VDP2 bitmap | Get it working first, optimize later |
doomgeneric requires you to implement only:
- DG_Init()
- DG_DrawFrame(buffer)
- DG_GetKey()
- DG_GetTicksMs()
- DG_SleepMs()
Render to a VDP2 bitmap layer (256 or 320 width). This will be slow but playable quickly.
Master SH2: Game logic + rendering
Slave SH2: BSP traversal, or visplane rendering, or AI
68000: Sound via SCSP (offload completely)
You have two realistic paths:
Path A - Optimized Software Renderer:
- Keep column-based rendering
- Add SH2 assembly for inner loops
- Use both CPUs for parallel column rendering
- This is honestly probably the best quality option
Path B - VDP1 Hybrid:
- Walls as subdivided quads (reduces affine warping)
- Sprites via VDP1 sprites
- Floors/ceilings still software rendered
- Complex but potentially faster
The original bad port tried this. Problems:
VDP1 Affine Texturing:
What you want: What VDP1 gives:
┌────────┐ ┌────────┐
│████████│ │██▓▓░░ │
│████████│ vs │███▓▓░░ │ (texture swimming)
│████████│ │████▓▓░ │
└────────┘ └────────┘
You'd need heavy quad subdivision to compensate, eating fillrate.
# Get doomgeneric
git clone https://github.com/ozkl/doomgeneric
# Get Yaul SDK (follow their setup)
# https://yaul.org/
# Your initial work:
# 1. Create doomgeneric_saturn.c
# 2. Implement the 5 required functions
# 3. Framebuffer → VDP2 NBG0 bitmap layer
# 4. Input → Saturn digital pad
# 5. Audio → SCSP via 68000 (can defer this)2MB Main RAM:
- DOOM heap: ~1.2MB
- Level data: streaming from CD
- Code: ~400KB
VDP1 VRAM (512KB):
- Sprites/textures if using VDP1
VDP2 VRAM (512KB):
- Framebuffer: 320×200×8bpp = 64KB
- Palette/tiles
| Approach | Time to Playable | Final Quality Potential |
|---|---|---|
| doomgeneric + Yaul + software | 2-3 weeks | High (with optimization) |
| Original DOOM + SGL | 2-3 months | Medium-High |
| Full VDP1 rewrite | 6+ months | Medium (warping issues) |
Want me to detail the Yaul setup or the specific rendering optimizations for SH2?
# Ubuntu/Debian packages
sudo apt install build-essential git texinfo libgmp-dev \
libmpfr-dev libmpc-dev flex bison python3 cmake \
libpng-dev libtool automake
# Create workspace
mkdir -p ~/saturn/toolchain
mkdir -p ~/saturn/projects
cd ~/saturn/toolchain# Clone tool-chains repository
git clone https://github.com/yaul-org/libyaul-installer
cd libyaul-installer
# Build (this takes 30-60 minutes)
./install --prefix=/opt/yaul-toolchain --sh2
# Add to your shell profile (.bashrc/.zshrc)
export YAUL_INSTALL_ROOT=/opt/yaul-toolchain
export PATH="${YAUL_INSTALL_ROOT}/sh2eb-elf/bin:${PATH}"cd ~/saturn/toolchain
git clone https://github.com/yaul-org/libyaul.git
cd libyaul
# Copy and edit configuration
cp yaul.env.in yaul.envEdit yaul.env:
# yaul.env - key settings
YAUL_INSTALL_ROOT=/opt/yaul-toolchain
YAUL_ARCH_SH_PREFIX=sh2eb-elf
YAUL_BUILD_ROOT=${HOME}/saturn/toolchain/libyaul/build
# Build type: release recommended for DOOM
YAUL_BUILD=release
# Enable second CPU support - CRITICAL for DOOM
YAUL_OPTION_DEV_CARTRIDGE=0
SILENT=0# Source environment and build
source yaul.env
make clean
make
make installdoom-saturn/
├── yaul.env # Copy from libyaul
├── Makefile
├── ip.sx # Boot header
├── src/
│ ├── main.c
│ ├── doomgeneric_saturn.c
│ ├── video.c
│ ├── input.c
│ ├── sound_68k.c
│ └── sh2_renderer.S # Assembly optimizations
├── doomgeneric/ # Submodule or copy
│ ├── doomgeneric.c
│ ├── d_main.c
│ └── ... (doom source)
├── assets/
│ └── doom1.wad
└── cd/
└── (built ISO contents)
YAUL_ROOT := $(YAUL_INSTALL_ROOT)
include $(YAUL_ROOT)/share/yaul.env
TARGET := doom
BUILD_DIR := build
SRCS := src/main.c \
src/doomgeneric_saturn.c \
src/video.c \
doomgeneric/doomgeneric.c \
doomgeneric/d_main.c \
# ... add all doom sources
ASMS := src/sh2_renderer.S
# DOOM needs all the memory it can get
LDFLAGS += -Wl,--defsym,__stack_size=0x4000
LDFLAGS += -Wl,--defsym,__heap_size=0x100000
include $(YAUL_ROOT)/share/yaul-post.mk// src/main.c
#include <yaul.h>
#include "doomgeneric/doomgeneric.h"
// VDP2 bitmap for framebuffer
#define FB_WIDTH 320
#define FB_HEIGHT 200
static uint8_t *framebuffer;
void main(void)
{
// Initialize Yaul
yaul_init();
// Setup VDP2 for bitmap mode
vdp2_tvmd_display_res_set(VDP2_TVMD_INTERLACE_NONE,
VDP2_TVMD_HORZ_NORMAL_A,
VDP2_TVMD_VERT_224);
// NBG0 as 8bpp bitmap (paletted - good for DOOM)
struct vdp2_bitmap_format nbg0_format = {
.scroll_screen = VDP2_NBG0,
.cc_count = VDP2_SCRN_CCC_PALETTE_256,
.bitmap_size = VDP2_BITMAP_SIZE_512X256,
.palette_base = VDP2_CRAM_ADDR(0x0000),
.bitmap_base = VDP2_VRAM_ADDR(0, 0x00000)
};
vdp2_bitmap_format_set(&nbg0_format);
vdp2_scrn_priority_set(VDP2_NBG0, 7);
vdp2_scrn_display_set(VDP2_NBG0, true);
// Get pointer to VRAM for direct framebuffer writes
framebuffer = (uint8_t *)VDP2_VRAM_ADDR(0, 0x00000);
// Start slave CPU for parallel work
cpu_dual_master_init();
vdp2_tvmd_display_set();
// Initialize and run DOOM
doomgeneric_Create(0, NULL);
while (1) {
doomgeneric_Tick();
}
}// src/doomgeneric_saturn.c
#include <yaul.h>
#include "doomgeneric/doomgeneric.h"
extern uint8_t *framebuffer;
static volatile uint32_t tick_counter = 0;
// VBlank handler for timing
static void vblank_in_handler(void *work __unused)
{
tick_counter++; // ~60Hz NTSC, ~50Hz PAL
}
void DG_Init(void)
{
// Register vblank handler for timing
vdp2_sync_vblank_in_set(vblank_in_handler, NULL);
// Setup DOOM palette → VDP2 CRAM
// (Done when D_DoomMain loads palette)
}
void DG_DrawFrame(void)
{
// DG_ScreenBuffer is DOOM's RGBA buffer
// Convert to 8bpp paletted and copy to VDP2 VRAM
uint32_t *src = (uint32_t *)DG_ScreenBuffer;
uint8_t *dst = framebuffer;
// Simple copy - DOOM already renders paletted internally
// We intercept earlier in I_FinishUpdate for real impl
for (int y = 0; y < DOOMGENERIC_RESY; y++) {
for (int x = 0; x < DOOMGENERIC_RESX; x++) {
// Convert RGBA to palette index
// In optimized version, skip this - render directly paletted
*dst++ = find_palette_index(src[y * DOOMGENERIC_RESX + x]);
}
dst += (512 - DOOMGENERIC_RESX); // Stride padding
}
vdp2_sync();
vdp2_sync_wait();
}
int DG_GetKey(int *pressed, unsigned char *doomKey)
{
static uint16_t prev_digital = 0;
smpc_peripheral_digital_t digital;
smpc_peripheral_digital_read(&digital, 0);
uint16_t curr = digital.pressed.raw;
uint16_t changed = curr ^ prev_digital;
if (changed == 0) return 0;
// Map Saturn → DOOM keys
static const struct {
uint16_t saturn;
unsigned char doom;
} keymap[] = {
{ PERIPHERAL_DIGITAL_UP, KEY_UPARROW },
{ PERIPHERAL_DIGITAL_DOWN, KEY_DOWNARROW },
{ PERIPHERAL_DIGITAL_LEFT, KEY_LEFTARROW },
{ PERIPHERAL_DIGITAL_RIGHT, KEY_RIGHTARROW },
{ PERIPHERAL_DIGITAL_A, KEY_FIRE },
{ PERIPHERAL_DIGITAL_B, KEY_USE },
{ PERIPHERAL_DIGITAL_C, KEY_STRAFE_R },
{ PERIPHERAL_DIGITAL_X, KEY_STRAFE_L },
{ PERIPHERAL_DIGITAL_Y, ' ' }, // Open/use
{ PERIPHERAL_DIGITAL_Z, KEY_TAB }, // Map
{ PERIPHERAL_DIGITAL_L, KEY_RSHIFT }, // Run
{ PERIPHERAL_DIGITAL_R, KEY_FIRE },
{ PERIPHERAL_DIGITAL_START, KEY_ESCAPE },
};
for (int i = 0; i < sizeof(keymap)/sizeof(keymap[0]); i++) {
if (changed & keymap[i].saturn) {
*doomKey = keymap[i].doom;
*pressed = (curr & keymap[i].saturn) ? 1 : 0;
prev_digital = curr;
return 1;
}
}
prev_digital = curr;
return 0;
}
uint32_t DG_GetTicksMs(void)
{
// Convert vblanks to milliseconds
// NTSC: 1 vblank ≈ 16.67ms
return (tick_counter * 1000) / 60;
}
void DG_SleepMs(uint32_t ms)
{
uint32_t target = tick_counter + ((ms * 60) / 1000);
while (tick_counter < target) {
cpu_sync_wait();
}
}
void DG_SetWindowTitle(const char *title)
{
(void)title; // No window on Saturn
}Registers:
R0-R15 General purpose (R15 = SP)
MACH/MACL MAC registers for multiply-accumulate
PR Procedure return
PC Program counter
SR Status register
Key Performance Facts:
- 16-bit instruction encoding (compact)
- Most instructions: 1 cycle
- Multiply: 1-4 cycles (MAC: 2 cycles)
- Division: 39 cycles (!!!) - AVOID
- Load/Store with cache miss: 7+ cycles
- Cache: 4KB instruction, 1KB data (SH7604)
- Pipeline: 5 stage, stalls on data hazards
DOOM uses division heavily in rendering. Replace with reciprocal multiply:
// SLOW - original DOOM style
int scale = (projection_distance << FRACBITS) / distance;
// FAST - reciprocal lookup table
// Precompute: recip_table[i] = (1 << 24) / i for i = 1..2048
int scale = (projection_distance * recip_table[distance >> 4]) >> (24 - FRACBITS);// Build reciprocal table at startup
#define RECIP_BITS 20
static uint32_t recip_table[2048];
void init_recip_table(void)
{
recip_table[0] = 0xFFFFFFFF; // Avoid /0
for (int i = 1; i < 2048; i++) {
recip_table[i] = (1 << RECIP_BITS) / i;
}
}The innermost column drawing loop runs millions of times per frame:
! src/sh2_renderer.S
! Optimized textured column drawer
.global _R_DrawColumn_asm
.align 2
! void R_DrawColumn_asm(
! uint8_t *dest, R4
! uint8_t *source, R5 (texture)
! int32_t frac, R6 (texture V start, 16.16 fixed)
! int32_t fracstep, R7 (texture V step per pixel)
! int32_t count, @(SP) -> R0
! int32_t destStep @(4,SP) -> R1 (usually 320 for screen width)
! )
_R_DrawColumn_asm:
mov.l @(4,r15), r1 ! destStep (screen width)
mov.l @r15, r0 ! count
tst r0, r0
bt/s .Lexit ! count <= 0, exit
nop
! R4 = dest
! R5 = texture
! R6 = frac
! R7 = fracstep
! R0 = count
! R1 = destStep
mov #0, r2 ! r2 will hold texture offset
.Lloop:
! Extract integer part of frac (bits 16-22, masked to 127 for 128-tall texture)
mov r6, r2
shlr16 r2 ! r2 = frac >> 16
and #127, r2 ! r2 = (frac >> 16) & 127
! Fetch texel
mov.b @(r0, r5), r3 ! Indexed load - r3 = source[r2]
! Wait - this syntax is wrong for SH2
! Need to use r2 as offset properly
! Correct approach:
add r5, r2 ! r2 = source + offset
mov.b @r2, r3 ! r3 = texel
extu.b r3, r3 ! Zero extend byte
! Write pixel
mov.b r3, @r4 ! *dest = texel
! Advance
add r7, r6 ! frac += fracstep
add r1, r4 ! dest += destStep
dt r0 ! count-- and test
bf .Lloop ! if count != 0, continue
.Lexit:
rts
nopUnrolled Version (2x faster for tall columns):
! 8x unrolled column drawer
! Processes 8 pixels per iteration
.global _R_DrawColumn8_asm
.align 4
_R_DrawColumn8_asm:
sts.l pr, @-r15
mov.l r8, @-r15
mov.l r9, @-r15
mov.l r10, @-r15
mov.l r11, @-r15
mov.l @(20,r15), r0 ! count
mov.l @(24,r15), r1 ! destStep
mov r0, r8
shlr2 r8
shlr r8 ! r8 = count / 8 (full iterations)
and #7, r0
mov r0, r9 ! r9 = count % 8 (remainder)
tst r8, r8
bt .Lremainder
! Preload fracstep * 2, * 4 for reduced additions
mov r7, r10
shll r10 ! r10 = fracstep * 2
mov r10, r11
shll r11 ! r11 = fracstep * 4
.Lloop8:
! Pixel 0
mov r6, r2
shlr16 r2
and #127, r2
add r5, r2
mov.b @r2, r3
extu.b r3, r3
mov.b r3, @r4
add r7, r6
add r1, r4
! Pixel 1
mov r6, r2
shlr16 r2
and #127, r2
add r5, r2
mov.b @r2, r3
extu.b r3, r3
mov.b r3, @r4
add r7, r6
add r1, r4
! Pixels 2-7 ... (same pattern)
! [Abbreviated - repeat 6 more times]
dt r8
bf .Lloop8
.Lremainder:
! Handle remaining 0-7 pixels with regular loop
tst r9, r9
bt .Ldone
.Lremloop:
mov r6, r2
shlr16 r2
and #127, r2
add r5, r2
mov.b @r2, r3
extu.b r3, r3
mov.b r3, @r4
add r7, r6
add r1, r4
dt r9
bf .Lremloop
.Ldone:
mov.l @r15+, r11
mov.l @r15+, r10
mov.l @r15+, r9
mov.l @r15+, r8
lds.l @r15+, pr
rts
nop// src/dual_cpu.c
#include <yaul.h>
// Shared work buffer in uncached RAM
struct render_work {
volatile int ready;
volatile int done;
// Column rendering work
int start_col;
int end_col;
// Shared data pointers
uint8_t *framebuffer;
// ... other rendering state
} __attribute__((aligned(32)));
static struct render_work *slave_work =
(struct render_work *)0x20100000; // High workram
// Slave CPU entry point
static void slave_entry(void)
{
while (1) {
// Wait for work
while (!slave_work->ready) {
cpu_sync_wait();
}
// Render assigned columns
for (int x = slave_work->start_col; x < slave_work->end_col; x++) {
R_DrawColumn_slave(x);
}
slave_work->done = 1;
slave_work->ready = 0;
}
}
// Master CPU - start frame
void R_RenderView_dual(void)
{
// Give right half of screen to slave
slave_work->start_col = SCREENWIDTH / 2;
slave_work->end_col = SCREENWIDTH;
slave_work->framebuffer = framebuffer;
slave_work->done = 0;
slave_work->ready = 1; // Signal slave
// Master renders left half
for (int x = 0; x < SCREENWIDTH / 2; x++) {
R_RenderColumn(x);
}
// Wait for slave to complete
while (!slave_work->done) {
cpu_sync_wait();
}
}
// Initialize dual CPU at startup
void init_dual_cpu(void)
{
slave_work->ready = 0;
slave_work->done = 0;
cpu_dual_slave_set(slave_entry);
}// BAD - Cache-thrashing column-major access
for (int x = 0; x < 320; x++) {
for (int y = 0; y < 200; y++) {
framebuffer[y * 320 + x] = pixel; // Jumps 320 bytes each iteration
}
}
// BETTER - But DOOM needs column-major for walls
// Solution: Use write-through or write to HWRAM when possible
// Use on-chip RAM (4KB) for hot data
// Address: 0xFFFFF000 - 0xFFFFFFFF on SH7604
#define ONCHIP_RAM ((uint8_t *)0xFFFFF000)
// Store texture column cache in on-chip RAM
static uint8_t column_cache[128] __attribute__((section(".onchip")));// DOOM uses 16.16 fixed point
// SH2 optimized operations:
// Multiply two 16.16 fixed, result 16.16
static inline int32_t fixed_mul(int32_t a, int32_t b)
{
// Use MAC instructions for speed
int32_t result;
__asm__ volatile (
"dmuls.l %1, %2\n\t" // 32x32->64 signed multiply
"sts mach, %0\n\t" // Get high 32 bits
: "=r" (result)
: "r" (a >> 8), "r" (b >> 8)
: "mach", "macl"
);
return result;
}
// Fast integer multiply with overflow to MACL
static inline int32_t mul32(int32_t a, int32_t b)
{
int32_t result;
__asm__ volatile (
"mul.l %1, %2\n\t"
"sts macl, %0\n\t"
: "=r" (result)
: "r" (a), "r" (b)
: "macl"
);
return result;
}| Resolution | Optimization Level | Expected FPS |
|---|---|---|
| 320×200 | Basic port | 5-8 fps |
| 320×200 | ASM inner loops | 12-15 fps |
| 320×200 | Dual CPU | 18-22 fps |
| 256×160 | Full optimization | 25-30 fps |
| 256×160 | + reduced draw distance | 30+ fps |
// Render at 256x160, scale up to 320x200 via VDP2
void setup_scaled_mode(void)
{
// Render to 256x160 bitmap
// Use VDP2 screen scaling to display at 320x200
vdp2_scrn_reduction_x_set(VDP2_NBG0, 0.8 * 65536); // 256/320
vdp2_scrn_reduction_y_set(VDP2_NBG0, 0.8 * 65536); // 160/200
}# Mednafen (most accurate)
mednafen doom-saturn.cue
# Yabause/Kronos (debugging features)
kronos -i doom-saturn.cue# Flash to Action Replay / Pseudo Saturn
# Or burn to CD-R (not reliable on all Saturns)
# USB dev cart recommended for iteration speedWant me to detail the 68000 sound driver setup or the CD streaming for WAD data?
┌─────────────────────────────────────────────────────────────┐
│ Saturn Audio System │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ SH2 Main │────▶│ Sound RAM │◀───▶│ MC68EC000 │ │
│ │ CPU │ │ 512 KB │ │ 11.3 MHz │ │
│ └──────────┘ └──────┬───────┘ └──────┬───────┘ │
│ │ │ │
│ ▼ ▼ │
│ ┌──────────────────────────────┐ │
│ │ SCSP │ │
│ │ Yamaha YMF292-F │ │
│ │ - 32 PCM channels │ │
│ │ - 16-bit, up to 44.1kHz │ │
│ │ - FM synthesis │ │
│ │ - DSP effects │ │
│ └──────────────┬───────────────┘ │
│ │ │
│ ▼ │
│ ┌──────────┐ │
│ │ Stereo │ │
│ │ Output │ │
│ └──────────┘ │
└─────────────────────────────────────────────────────────────┘
68000 Address Space:
0x000000 - 0x07FFFF : Sound RAM (512KB)
0x100000 - 0x100BFF : SCSP registers (slot 0-31)
0x100C00 - 0x100EFF : SCSP common registers
SH2 Access to Sound RAM:
0x25A00000 - 0x25A7FFFF : Sound RAM (through SCSP)
0x25B00000 - 0x25B00EFF : SCSP registers
Sound RAM Layout (512KB):
┌─────────────────────────────────────┐ 0x000000
│ 68000 Driver Code (8KB) │
├─────────────────────────────────────┤ 0x002000
│ Command Buffer (256 bytes) │
├─────────────────────────────────────┤ 0x002100
│ PCM Sample Bank (384KB) │
│ - Sound effects │
│ - Loaded from WAD │
├─────────────────────────────────────┤ 0x062100
│ Music Sample Bank (96KB) │
│ - OPL instruments or PCM patches │
├─────────────────────────────────────┤ 0x07A100
│ Work RAM / Buffers (24KB) │
└─────────────────────────────────────┘ 0x080000
// Shared between SH2 and 68000
// Located at Sound RAM 0x002000
struct sound_command {
uint8_t cmd; // Command type
uint8_t channel; // Sound channel (0-31)
uint8_t volume_left; // 0-127
uint8_t volume_right; // 0-127
uint16_t pitch; // Pitch (OCT/FNS format)
uint16_t sample_id; // Sample index
uint32_t sample_addr; // Address in sound RAM
uint32_t sample_len; // Length in bytes
uint8_t loop; // Loop flag
uint8_t priority; // For channel stealing
uint16_t reserved;
};
#define CMD_NONE 0x00
#define CMD_PLAY_SFX 0x01
#define CMD_STOP_SFX 0x02
#define CMD_PLAY_MUSIC 0x03
#define CMD_STOP_MUSIC 0x04
#define CMD_SET_VOLUME 0x05
#define CMD_STOP_ALL 0x06
#define CMD_UPLOAD_DONE 0x07
// Command ring buffer
#define MAX_COMMANDS 16
struct command_buffer {
volatile uint8_t write_pos; // SH2 writes here
volatile uint8_t read_pos; // 68000 reads here
volatile uint8_t driver_ready; // 68000 sets when initialized
volatile uint8_t padding;
struct sound_command commands[MAX_COMMANDS];
};; sound_driver.s
; Saturn 68000 Sound Driver for DOOM
; Assemble with: m68k-elf-as -m68000
.text
.org 0x0000
; ============================================
; Vector Table
; ============================================
vectors:
.long 0x00080000 ; Initial SP (top of sound RAM)
.long _start ; Initial PC
.long _exception ; Bus error
.long _exception ; Address error
.long _exception ; Illegal instruction
.long _exception ; Zero divide
.long _exception ; CHK
.long _exception ; TRAPV
.long _exception ; Privilege violation
.long _exception ; Trace
.long _exception ; Line A
.long _exception ; Line F
.space 48*4 ; Reserved
.long _timer_irq ; SCSP timer interrupt (vector 64)
.space (256-65)*4 ; Rest of vectors
; ============================================
; Constants
; ============================================
SCSP_BASE = 0x100000
SCSP_SLOT_BASE = 0x100000
SCSP_COMMON = 0x100C00
; SCSP slot registers (per channel, 0x20 bytes each)
SLOT_SAR = 0x00 ; Sample address (18-bit, in 68K space)
SLOT_LSA = 0x04 ; Loop start address
SLOT_LEA = 0x08 ; Loop end address
SLOT_D2R_D1R = 0x0A ; Decay rates
SLOT_RR_DL = 0x0C ; Release rate / decay level
SLOT_AR_KARS = 0x0E ; Attack rate / key scale
SLOT_LPCTL_8B = 0x00 ; Loop control (bits in SAR high word)
SLOT_PCM8B = 0x00 ; 8-bit PCM flag
SLOT_OCT_FNS = 0x10 ; Octave / frequency
SLOT_DISDL_DIPAN= 0x14 ; Direct send level / pan
SLOT_EFSDL_EFPAN= 0x16 ; Effect send level / pan
SLOT_KYONB = 0x1E ; Key on/off (bit 4)
; Common registers
SCSP_MVOL = 0x100C04 ; Master volume
SCSP_TIMERA = 0x100C18 ; Timer A
SCSP_TIMERB = 0x100C1A ; Timer B
SCSP_SCIEB = 0x100C1E ; Interrupt enable
SCSP_SCIPD = 0x100C20 ; Interrupt pending
; Command buffer address
CMD_BUFFER = 0x002000
; Sample bank starts here
SAMPLE_BANK = 0x002100
; ============================================
; Driver Entry Point
; ============================================
_start:
; Disable interrupts during init
move.w #0x2700, sr
; Initialize SCSP
bsr init_scsp
; Clear all channels
bsr stop_all_channels
; Set master volume (0 = max)
move.w #0x000F, SCSP_MVOL
; Signal ready to SH2
move.b #1, CMD_BUFFER+2 ; driver_ready = 1
; Enable interrupts
move.w #0x2000, sr
; ============================================
; Main Loop
; ============================================
main_loop:
; Check for new commands
move.l #CMD_BUFFER, a0
move.b (a0), d0 ; write_pos
move.b 1(a0), d1 ; read_pos
cmp.b d0, d1
beq main_loop ; No new commands
; Calculate command address
; cmd_addr = CMD_BUFFER + 4 + (read_pos * sizeof(command))
moveq #0, d2
move.b d1, d2
lsl.w #5, d2 ; * 32 (sizeof command)
add.l #CMD_BUFFER+4, d2
move.l d2, a1 ; a1 = command pointer
; Get command type
move.b (a1), d3 ; cmd type
; Dispatch command
cmp.b #1, d3
beq cmd_play_sfx
cmp.b #2, d3
beq cmd_stop_sfx
cmp.b #3, d3
beq cmd_play_music
cmp.b #4, d3
beq cmd_stop_music
cmp.b #5, d3
beq cmd_set_volume
cmp.b #6, d3
beq cmd_stop_all
cmd_done:
; Advance read pointer
addq.b #1, d1
and.b #0x0F, d1 ; Wrap at 16
move.b d1, 1(a0) ; Update read_pos
bra main_loop
; ============================================
; Command Handlers
; ============================================
cmd_play_sfx:
; a1 = command structure
; Get channel
move.b 1(a1), d0 ; channel
ext.w d0
; Calculate slot register base
; slot_addr = SCSP_SLOT_BASE + (channel * 0x20)
move.w d0, d1
lsl.w #5, d1
add.l #SCSP_SLOT_BASE, d1
move.l d1, a2 ; a2 = slot registers
; Key off first (stop any playing sound)
bclr #4, 0x1F(a2)
; Set sample address (18-bit, words)
move.l 8(a1), d1 ; sample_addr
lsr.l #1, d1 ; Convert to words
move.w d1, 2(a2) ; Low 16 bits
swap d1
and.w #0x000F, d1 ; High 4 bits
or.w #0x1000, d1 ; PCM8B=0, LPCTL=01 (normal)
move.w d1, (a2) ; SAR high + flags
; Set loop points (full sample, no loop)
move.w #0x0000, 4(a2) ; LSA = 0
move.l 12(a1), d1 ; sample_len
lsr.l #1, d1 ; Convert to words
move.w d1, 6(a2) ; LEA = length
; Set envelope (instant attack, no decay, instant release)
move.w #0x001F, 0x0A(a2) ; D2R=0, D1R=31
move.w #0x001F, 0x0C(a2) ; RR=31, DL=0
move.w #0x001F, 0x0E(a2) ; AR=31, KRS=0
; Set pitch (default = 0x400 = original rate for 22kHz at 44.1kHz out)
move.w 4(a1), d1 ; pitch from command
tst.w d1
bne .has_pitch
move.w #0x0400, d1 ; Default pitch
.has_pitch:
move.w d1, 0x10(a2) ; OCT/FNS
; Set volume and pan
move.b 2(a1), d1 ; volume_left
move.b 3(a1), d2 ; volume_right
lsl.w #8, d1
or.b d2, d1
move.w d1, 0x14(a2) ; DISDL/DIPAN
move.w #0x0000, 0x16(a2) ; No effect send
; Key on
bset #4, 0x1F(a2)
bra cmd_done
cmd_stop_sfx:
move.b 1(a1), d0 ; channel
ext.w d0
move.w d0, d1
lsl.w #5, d1
add.l #SCSP_SLOT_BASE, d1
move.l d1, a2
bclr #4, 0x1F(a2) ; Key off
bra cmd_done
cmd_play_music:
; TODO: Implement MUS/MIDI playback
bra cmd_done
cmd_stop_music:
; Stop channels 24-31 (reserved for music)
move.w #24, d0
.stop_music_loop:
move.w d0, d1
lsl.w #5, d1
add.l #SCSP_SLOT_BASE, d1
move.l d1, a2
bclr #4, 0x1F(a2)
addq.w #1, d0
cmp.w #32, d0
blt .stop_music_loop
bra cmd_done
cmd_set_volume:
move.b 2(a1), d0 ; New master volume
ext.w d0
move.w d0, SCSP_MVOL
bra cmd_done
cmd_stop_all:
bsr stop_all_channels
bra cmd_done
; ============================================
; Utility Functions
; ============================================
init_scsp:
; Initialize SCSP common registers
move.w #0x0000, 0x100C00 ; MEM4MB=0, DAC18B=0
move.w #0x000F, 0x100C04 ; Master volume max
rts
stop_all_channels:
move.l #SCSP_SLOT_BASE, a0
moveq #31, d0
.stop_loop:
bclr #4, 0x1F(a0) ; Key off
add.l #0x20, a0 ; Next slot
dbf d0, .stop_loop
rts
; ============================================
; Interrupt Handlers
; ============================================
_timer_irq:
; Used for music timing if needed
rte
_exception:
; Halt on exception
stop #0x2700
bra _exception
.end# 68000 toolchain (separate from SH2)
M68K_PREFIX = m68k-elf-
M68K_AS = $(M68K_PREFIX)as
M68K_LD = $(M68K_PREFIX)ld
M68K_OBJCOPY = $(M68K_PREFIX)objcopy
sound_driver.bin: sound_driver.s
$(M68K_AS) -m68000 -o sound_driver.o sound_driver.s
$(M68K_LD) -T sound_driver.ld -o sound_driver.elf sound_driver.o
$(M68K_OBJCOPY) -O binary sound_driver.elf sound_driver.bin/* sound_driver.ld */
OUTPUT_FORMAT("elf32-m68k")
OUTPUT_ARCH(m68k)
MEMORY
{
SOUND_RAM (rwx) : ORIGIN = 0x000000, LENGTH = 512K
}
SECTIONS
{
.text 0x000000 : {
*(.text)
*(.rodata)
} > SOUND_RAM
.data : {
*(.data)
} > SOUND_RAM
.bss : {
*(.bss)
} > SOUND_RAM
}// src/sound_saturn.c
#include <yaul.h>
#include <string.h>
// Sound RAM access from SH2
#define SOUND_RAM ((volatile uint8_t *)0x25A00000)
#define SCSP_REG ((volatile uint16_t *)0x25B00000)
// Driver location
#define DRIVER_ADDR 0x000000
#define CMD_BUFFER_ADDR 0x002000
#define SAMPLE_BANK_ADDR 0x002100
// Command buffer in sound RAM
static volatile struct command_buffer *cmd_buf =
(volatile struct command_buffer *)(0x25A00000 + CMD_BUFFER_ADDR);
// Sample directory
struct sample_info {
uint32_t sound_ram_addr; // Address in sound RAM
uint32_t length; // Length in bytes
uint16_t rate; // Sample rate
uint8_t bits; // 8 or 16
uint8_t channels; // 1 or 2
};
static struct sample_info sample_directory[128];
static uint32_t next_sample_addr = SAMPLE_BANK_ADDR;
// Load and start 68000 driver
void sound_init(void)
{
extern uint8_t sound_driver_bin[];
extern uint32_t sound_driver_bin_size;
// Stop 68000
smpc_smc_sndoff_call();
// Wait for 68000 to halt
for (volatile int i = 0; i < 10000; i++);
// Upload driver to sound RAM
uint8_t *src = sound_driver_bin;
volatile uint8_t *dst = SOUND_RAM + DRIVER_ADDR;
for (uint32_t i = 0; i < sound_driver_bin_size; i++) {
dst[i] = src[i];
}
// Clear command buffer
cmd_buf->write_pos = 0;
cmd_buf->read_pos = 0;
cmd_buf->driver_ready = 0;
// Start 68000
smpc_smc_sndon_call();
// Wait for driver to signal ready
while (cmd_buf->driver_ready == 0) {
for (volatile int i = 0; i < 1000; i++);
}
}
// Upload sample to sound RAM
int sound_upload_sample(int id, const uint8_t *data, uint32_t length,
uint16_t rate, uint8_t bits)
{
if (id >= 128) return -1;
if (next_sample_addr + length > 0x062100) return -1; // Out of space
// Copy to sound RAM
volatile uint8_t *dst = SOUND_RAM + next_sample_addr;
for (uint32_t i = 0; i < length; i++) {
dst[i] = data[i];
}
// Record in directory
sample_directory[id].sound_ram_addr = next_sample_addr;
sample_directory[id].length = length;
sample_directory[id].rate = rate;
sample_directory[id].bits = bits;
sample_directory[id].channels = 1;
next_sample_addr += length;
// Align to 4 bytes
next_sample_addr = (next_sample_addr + 3) & ~3;
return 0;
}
// Send command to 68000
static void send_command(struct sound_command *cmd)
{
uint8_t write_pos = cmd_buf->write_pos;
uint8_t next_pos = (write_pos + 1) & 0x0F;
// Wait if buffer full
while (next_pos == cmd_buf->read_pos) {
// Buffer full, wait
for (volatile int i = 0; i < 100; i++);
}
// Copy command
volatile struct sound_command *dst = &cmd_buf->commands[write_pos];
dst->cmd = cmd->cmd;
dst->channel = cmd->channel;
dst->volume_left = cmd->volume_left;
dst->volume_right = cmd->volume_right;
dst->pitch = cmd->pitch;
dst->sample_id = cmd->sample_id;
dst->sample_addr = cmd->sample_addr;
dst->sample_len = cmd->sample_len;
dst->loop = cmd->loop;
dst->priority = cmd->priority;
// Advance write position (signals 68000)
cmd_buf->write_pos = next_pos;
}
// Play a sound effect
static int next_sfx_channel = 0;
#define MAX_SFX_CHANNELS 24 // Channels 0-23 for SFX, 24-31 for music
void sound_play_sfx(int sample_id, int volume, int sep)
{
if (sample_id >= 128) return;
struct sample_info *info = &sample_directory[sample_id];
if (info->length == 0) return; // Sample not loaded
struct sound_command cmd;
cmd.cmd = CMD_PLAY_SFX;
cmd.channel = next_sfx_channel;
cmd.sample_addr = info->sound_ram_addr;
cmd.sample_len = info->length;
cmd.loop = 0;
cmd.priority = 0;
// sep: 0 = left, 128 = center, 255 = right
// Convert to left/right volume
int vol_left = (volume * (255 - sep)) / 255;
int vol_right = (volume * sep) / 255;
cmd.volume_left = vol_left > 127 ? 127 : vol_left;
cmd.volume_right = vol_right > 127 ? 127 : vol_right;
// Calculate pitch based on sample rate
// SCSP base rate is 44100Hz
// OCT/FNS: OCT = octave (-8 to +7), FNS = 10-bit fraction
// For 11025Hz sample at 44100Hz output: pitch = 0x0400 (1/4 rate)
// For 22050Hz: pitch = 0x0800
// For 44100Hz: pitch = 0x1000
int base_pitch = (info->rate * 0x1000) / 44100;
cmd.pitch = base_pitch > 0xFFFF ? 0xFFFF : base_pitch;
send_command(&cmd);
// Round-robin channel allocation
next_sfx_channel = (next_sfx_channel + 1) % MAX_SFX_CHANNELS;
}
void sound_stop_sfx(int channel)
{
struct sound_command cmd;
cmd.cmd = CMD_STOP_SFX;
cmd.channel = channel;
send_command(&cmd);
}
void sound_stop_all(void)
{
struct sound_command cmd;
cmd.cmd = CMD_STOP_ALL;
send_command(&cmd);
}// src/i_sound_saturn.c
// Replaces DOOM's i_sound.c
#include "doomdef.h"
#include "doomstat.h"
#include "i_sound.h"
#include "w_wad.h"
extern void sound_init(void);
extern int sound_upload_sample(int id, const uint8_t *data, uint32_t length,
uint16_t rate, uint8_t bits);
extern void sound_play_sfx(int sample_id, int volume, int sep);
// Map DOOM sfx to our sample IDs
static int sfx_loaded[NUMSFX];
void I_InitSound(void)
{
sound_init();
// Load sound effects from WAD
for (int i = 1; i < NUMSFX; i++) {
if (S_sfx[i].name[0] == 0) continue;
char lumpname[9];
sprintf(lumpname, "DS%s", S_sfx[i].name);
int lump = W_CheckNumForName(lumpname);
if (lump < 0) {
sfx_loaded[i] = -1;
continue;
}
// DOOM sound format:
// 0x00-0x01: format (3)
// 0x02-0x03: sample rate
// 0x04-0x07: sample count
// 0x08+: unsigned 8-bit PCM
uint8_t *data = W_CacheLumpNum(lump, PU_STATIC);
int size = W_LumpLength(lump);
uint16_t rate = data[2] | (data[3] << 8);
uint32_t samples = data[4] | (data[5] << 8) |
(data[6] << 16) | (data[7] << 24);
// Convert unsigned to signed for SCSP
uint8_t *pcm = data + 8;
for (uint32_t j = 0; j < samples; j++) {
pcm[j] = pcm[j] ^ 0x80; // Convert to signed
}
if (sound_upload_sample(i, pcm, samples, rate, 8) == 0) {
sfx_loaded[i] = i;
} else {
sfx_loaded[i] = -1; // Failed to upload
}
W_ReleaseLumpNum(lump);
}
}
int I_StartSound(sfxinfo_t *sfx, int channel, int vol, int sep, int pitch)
{
(void)pitch; // TODO: pitch shifting
int id = sfx - S_sfx;
if (sfx_loaded[id] < 0) return -1;
sound_play_sfx(sfx_loaded[id], vol, sep);
return channel;
}
void I_StopSound(int handle)
{
// Could implement per-channel stopping
(void)handle;
}
int I_SoundIsPlaying(int handle)
{
// Could query 68000 for channel status
(void)handle;
return 0;
}
void I_UpdateSound(void)
{
// Nothing needed - 68000 handles mixing
}
void I_SubmitSound(void)
{
// Nothing needed
}
void I_ShutdownSound(void)
{
sound_stop_all();
}
void I_SetChannels(void)
{
// Nothing needed
}WAD File Format:
┌────────────────────────────────────────┐
│ Header (12 bytes) │
│ - "IWAD" or "PWAD" (4 bytes) │
│ - Lump count (4 bytes) │
│ - Directory offset (4 bytes) │
├────────────────────────────────────────┤
│ Lump Data │
│ - Textures, sprites, sounds... │
│ - Maps, etc. │
├────────────────────────────────────────┤
│ Directory (16 bytes per entry) │
│ - File offset (4 bytes) │
│ - Size (4 bytes) │
│ - Name (8 bytes, null-padded) │
└────────────────────────────────────────┘
// src/cd_wad.c
#include <yaul.h>
#include <cd-block.h>
// WAD directory entry
struct wad_dirent {
uint32_t offset; // Offset in WAD file
uint32_t size; // Size in bytes
char name[8]; // Lump name
};
// WAD state
struct wad_state {
uint32_t cd_start_sector; // Starting sector of WAD on CD
uint32_t numlumps; // Number of lumps
struct wad_dirent *directory; // In RAM
// Cache
uint8_t *cache; // Lump cache buffer
uint32_t cache_size; // Size of cache
int cached_lump; // Currently cached lump (-1 = none)
};
static struct wad_state wad;
// CD sector size
#define SECTOR_SIZE 2048
// Read sectors from CD
static void cd_read_sectors(uint32_t sector, uint32_t count, void *buffer)
{
cd_block_sector_read(sector, count, buffer);
// Wait for read to complete
while (cd_block_busy()) {
vdp2_sync_wait();
}
}
// Initialize WAD from CD
int W_InitWAD(const char *filename)
{
// Find file on CD (ISO9660)
cd_block_file_t file;
if (cd_block_file_open(&file, filename) != 0) {
return -1; // File not found
}
wad.cd_start_sector = file.lba;
// Read header
uint8_t header[12];
cd_read_sectors(wad.cd_start_sector, 1, header);
// Verify WAD signature
if (header[0] != 'I' && header[0] != 'P') return -1;
if (header[1] != 'W') return -1;
if (header[2] != 'A') return -1;
if (header[3] != 'D') return -1;
wad.numlumps = header[4] | (header[5] << 8) |
(header[6] << 16) | (header[7] << 24);
uint32_t dir_offset = header[8] | (header[9] << 8) |
(header[10] << 16) | (header[11] << 24);
// Allocate directory in RAM
// ~2800 lumps * 16 bytes ≈ 45KB for DOOM1.WAD
wad.directory = malloc(wad.numlumps * sizeof(struct wad_dirent));
if (!wad.directory) return -1;
// Read directory from CD
uint32_t dir_sector = wad.cd_start_sector + (dir_offset / SECTOR_SIZE);
uint32_t dir_sectors = ((wad.numlumps * 16) + SECTOR_SIZE - 1) / SECTOR_SIZE;
// Temporary buffer for reading
uint8_t *temp = malloc(dir_sectors * SECTOR_SIZE);
if (!temp) return -1;
cd_read_sectors(dir_sector, dir_sectors, temp);
// Parse directory
uint8_t *ptr = temp + (dir_offset % SECTOR_SIZE);
for (uint32_t i = 0; i < wad.numlumps; i++) {
wad.directory[i].offset = ptr[0] | (ptr[1] << 8) |
(ptr[2] << 16) | (ptr[3] << 24);
wad.directory[i].size = ptr[4] | (ptr[5] << 8) |
(ptr[6] << 16) | (ptr[7] << 24);
memcpy(wad.directory[i].name, ptr + 8, 8);
ptr += 16;
}
free(temp);
// Allocate lump cache (128KB)
wad.cache_size = 128 * 1024;
wad.cache = malloc(wad.cache_size);
wad.cached_lump = -1;
return 0;
}
// Find lump by name
int W_CheckNumForName(const char *name)
{
char name8[8];
memset(name8, 0, 8);
for (int i = 0; i < 8 && name[i]; i++) {
name8[i] = name[i];
if (name8[i] >= 'a' && name8[i] <= 'z')
name8[i] -= 32; // Uppercase
}
// Search backwards (later lumps override earlier)
for (int i = wad.numlumps - 1; i >= 0; i--) {
if (memcmp(wad.directory[i].name, name8, 8) == 0) {
return i;
}
}
return -1;
}
int W_GetNumForName(const char *name)
{
int i = W_CheckNumForName(name);
if (i < 0) {
I_Error("W_GetNumForName: %s not found!", name);
}
return i;
}
// Get lump length
int W_LumpLength(int lump)
{
if (lump < 0 || lump >= (int)wad.numlumps) return 0;
return wad.directory[lump].size;
}
// Read lump from CD
void *W_CacheLumpNum(int lump, int tag)
{
if (lump < 0 || lump >= (int)wad.numlumps) return NULL;
struct wad_dirent *entry = &wad.directory[lump];
// Check cache
if (wad.cached_lump == lump) {
return wad.cache;
}
// Allocate from zone if larger than cache
uint8_t *buffer;
if (entry->size > wad.cache_size) {
buffer = Z_Malloc(entry->size, tag, NULL);
} else {
buffer = wad.cache;
wad.cached_lump = lump;
}
// Calculate CD sectors
uint32_t file_offset = entry->offset;
uint32_t sector = wad.cd_start_sector + (file_offset / SECTOR_SIZE);
uint32_t sector_offset = file_offset % SECTOR_SIZE;
// Read sectors
uint32_t bytes_remaining = entry->size;
uint8_t *dest = buffer;
uint8_t sector_buf[SECTOR_SIZE];
// Handle first partial sector
if (sector_offset != 0) {
cd_read_sectors(sector, 1, sector_buf);
uint32_t copy = SECTOR_SIZE - sector_offset;
if (copy > bytes_remaining) copy = bytes_remaining;
memcpy(dest, sector_buf + sector_offset, copy);
dest += copy;
bytes_remaining -= copy;
sector++;
}
// Read full sectors directly
if (bytes_remaining >= SECTOR_SIZE) {
uint32_t full_sectors = bytes_remaining / SECTOR_SIZE;
cd_read_sectors(sector, full_sectors, dest);
dest += full_sectors * SECTOR_SIZE;
bytes_remaining -= full_sectors * SECTOR_SIZE;
sector += full_sectors;
}
// Handle last partial sector
if (bytes_remaining > 0) {
cd_read_sectors(sector, 1, sector_buf);
memcpy(dest, sector_buf, bytes_remaining);
}
return buffer;
}
void *W_CacheLumpName(const char *name, int tag)
{
return W_CacheLumpNum(W_GetNumForName(name), tag);
}// src/texture_cache.c
// Streaming texture cache for limited RAM
#define TEXTURE_CACHE_SIZE (256 * 1024) // 256KB for textures
#define MAX_CACHED_TEXTURES 64
struct cached_texture {
int lump; // WAD lump number
uint8_t *data; // Pointer in cache
uint32_t size; // Size in bytes
uint32_t last_used; // Frame number last accessed
uint8_t locked; // Don't evict
};
struct texture_cache {
uint8_t *buffer; // Cache memory
uint32_t used; // Bytes used
struct cached_texture entries[MAX_CACHED_TEXTURES];
int num_entries;
uint32_t frame_count;
};
static struct texture_cache tex_cache;
void texture_cache_init(void)
{
tex_cache.buffer = malloc(TEXTURE_CACHE_SIZE);
tex_cache.used = 0;
tex_cache.num_entries = 0;
tex_cache.frame_count = 0;
}
// Find oldest unlocked texture to evict
static int find_eviction_candidate(uint32_t needed_size)
{
int oldest_idx = -1;
uint32_t oldest_frame = 0xFFFFFFFF;
for (int i = 0; i < tex_cache.num_entries; i++) {
if (tex_cache.entries[i].locked) continue;
if (tex_cache.entries[i].last_used < oldest_frame) {
oldest_frame = tex_cache.entries[i].last_used;
oldest_idx = i;
}
}
return oldest_idx;
}
// Compact cache (defragment)
static void compact_cache(void)
{
uint8_t *write_ptr = tex_cache.buffer;
for (int i = 0; i < tex_cache.num_entries; i++) {
if (tex_cache.entries[i].data != write_ptr) {
memmove(write_ptr, tex_cache.entries[i].data,
tex_cache.entries[i].size);
tex_cache.entries[i].data = write_ptr;
}
write_ptr += tex_cache.entries[i].size;
}
tex_cache.used = write_ptr - tex_cache.buffer;
}
// Get texture, loading from CD if needed
uint8_t *texture_cache_get(int lump, int *out_size)
{
// Check if already cached
for (int i = 0; i < tex_cache.num_entries; i++) {
if (tex_cache.entries[i].lump == lump) {
tex_cache.entries[i].last_used = tex_cache.frame_count;
*out_size = tex_cache.entries[i].size;
return tex_cache.entries[i].data;
}
}
// Need to load from CD
uint32_t size = W_LumpLength(lump);
*out_size = size;
// Make room if needed
while (tex_cache.used + size > TEXTURE_CACHE_SIZE) {
int victim = find_eviction_candidate(size);
if (victim < 0) {
// Can't evict anything, cache is all locked
return NULL;
}
// Remove victim
tex_cache.used -= tex_cache.entries[victim].size;
tex_cache.entries[victim] = tex_cache.entries[--tex_cache.num_entries];
compact_cache();
}
// Add new entry
if (tex_cache.num_entries >= MAX_CACHED_TEXTURES) {
// Too many entries, evict oldest
int victim = find_eviction_candidate(0);
if (victim >= 0) {
tex_cache.used -= tex_cache.entries[victim].size;
tex_cache.entries[victim] = tex_cache.entries[--tex_cache.num_entries];
compact_cache();
}
}
struct cached_texture *entry = &tex_cache.entries[tex_cache.num_entries++];
entry->lump = lump;
entry->data = tex_cache.buffer + tex_cache.used;
entry->size = size;
entry->last_used = tex_cache.frame_count;
entry->locked = 0;
tex_cache.used += size;
// Load from WAD
uint8_t *temp = W_CacheLumpNum(lump, PU_STATIC);
memcpy(entry->data, temp, size);
Z_Free(temp);
return entry->data;
}
void texture_cache_frame_end(void)
{
tex_cache.frame_count++;
}
void texture_cache_lock(int lump)
{
for (int i = 0; i < tex_cache.num_entries; i++) {
if (tex_cache.entries[i].lump == lump) {
tex_cache.entries[i].locked = 1;
return;
}
}
}
void texture_cache_unlock_all(void)
{
for (int i = 0; i < tex_cache.num_entries; i++) {
tex_cache.entries[i].locked = 0;
}
}// src/level_load.c
// Optimized level loading with preloading
void P_SetupLevel_Saturn(int episode, int map)
{
char lumpname[8];
// Show loading screen
V_DrawPatch(0, 0, loading_screen);
I_FinishUpdate();
// Build level lump name
if (gamemode == commercial) {
sprintf(lumpname, "MAP%02d", map);
} else {
sprintf(lumpname, "E%dM%d", episode, map);
}
int lump = W_GetNumForName(lumpname);
// Level lumps are sequential after the marker:
// THINGS, LINEDEFS, SIDEDEFS, VERTEXES, SEGS,
// SSECTORS, NODES, SECTORS, REJECT, BLOCKMAP
// Calculate total size for preload
uint32_t total_size = 0;
for (int i = 1; i <= 10; i++) {
total_size += W_LumpLength(lump + i);
}
// Clear texture cache for new level
texture_cache_unlock_all();
// Preload critical textures used in level
// (Analyze SIDEDEFS to find texture references)
// Load level data
P_LoadVertexes(lump + 4);
P_LoadSectors(lump + 8);
P_LoadSideDefs(lump + 3);
P_LoadLineDefs(lump + 2);
P_LoadSubsectors(lump + 6);
P_LoadNodes(lump + 7);
P_LoadSegs(lump + 5);
P_LoadBlockMap(lump + 10);
P_LoadReject(lump + 9);
P_LoadThings(lump + 1);
// Prefetch commonly used textures
prefetch_level_textures();
}
static void prefetch_level_textures(void)
{
// Lock key textures in cache:
// - Sky texture
// - Common wall textures (analyze sidedefs)
// - Floor/ceiling textures (analyze sectors)
// Lock sky
int sky_lump = W_GetNumForName("SKY1");
texture_cache_get(sky_lump, NULL);
texture_cache_lock(sky_lump);
// TODO: Analyze level for common textures and preload
}2 MB Main RAM Allocation:
├── Code + Static Data: ~400 KB
├── WAD Directory: ~50 KB (kept in RAM)
├── Level Geometry: ~100 KB (varies per level)
├── Texture Cache: ~256 KB
├── Lump Read Cache: ~128 KB
├── DOOM Heap (Z_Zone): ~800 KB
├── Framebuffer (if in RAM): ~64 KB (or use VDP2 VRAM)
├── Stack: ~16 KB
└── Misc/Reserve: ~186 KB
Sound RAM (512 KB):
├── 68000 Driver: ~8 KB
├── Command Buffer: ~256 bytes
├── SFX Samples: ~384 KB
└── Music: ~96 KB (if using sampled music)
Want me to detail music playback (MUS format synthesis) or the VDP2 scaling modes for resolution flexibility?