Skip to content

Instantly share code, notes, and snippets.

@7flash
Created September 28, 2025 01:16
Show Gist options
  • Select an option

  • Save 7flash/ea0977e6fa4eff53805ea25e60dde932 to your computer and use it in GitHub Desktop.

Select an option

Save 7flash/ea0977e6fa4eff53805ea25e60dde932 to your computer and use it in GitHub Desktop.
sept28-ttt-llm-verilog
// Testbench for tictactoe_predictor
// Simulates inputs, dumps waves for visualization
`timescale 1ns / 1ps
module tb_tictactoe_predictor;
reg [31:0] input_bits;
wire [3:0] output_pos;
// Instantiate the DUT (Design Under Test)
tictactoe_predictor dut (
.input_bits(input_bits),
.output_pos(output_pos)
);
// Test cases: Example inputs as 32-bit vectors
// Your example: "0000 0010 0100" -> bits 0000_0010_0100_padded (LSB-padded for sim)
// Assume pos0 MSB: input_bits = 32'b00000000_00000000_00000000_00010100 (binary for 0000 0010 0100 reversed if needed)
initial begin
$dumpfile("tictactoe_sim.vcd"); // VCD for waveform viewer
$dumpvars(0, tb_tictactoe_predictor); // Dump all vars
// Test 1: Empty board (should predict pos4 = 0100)
input_bits = 32'b00000000000000000000000000010100; // 0100 padded
#10; // Delay for combo logic
$display("Input: %b, Output pos: %b (dec: %d)", input_bits, output_pos, output_pos);
if (output_pos != 4'd4) $error("Test 1 failed!");
// Test 2: Your example "0000 0010 0100" -> expect 1000 (8)
input_bits = 32'b00000000000000000000010010000000; // Flatten: 000000100100 padded (adjust bit order if MSB/LSB)
#10;
$display("Input: %b, Output pos: %b (dec: %d)", input_bits, output_pos, output_pos);
if (output_pos != 4'd8) $error("Test 2 failed!");
// Test 3: Another state
input_bits = 32'b00000000000000000000000000000000; // All zero
#10;
$display("Input: %b, Output pos: %b (dec: %d)", input_bits, output_pos, output_pos);
$finish; // End sim
end
endmodule
// Tic-Tac-Toe O-Move Predictor: Verilog Hardware Inference Engine
// Simplified MLP: 32-bit input -> FC1 (32x64) -> ReLU -> FC2 (64x9) -> Argmax -> 4-bit binary output
// Assumptions:
// - Fixed-point arithmetic: 8-bit signed integers (Q7.0, no fractions for simplicity; scale weights accordingly)
// - Combinatorial logic (no clock; for FPGA/ASIC, add clk/rst for pipelining)
// - Weights hardcoded (example values; replace with trained int-scaled ones, e.g., *128 and round)
// - ReLU: max(0, x)
// - Argmax: Finds index of max logit (0-8)
// - Input: 32-bit vector [31:0] (MSB first, padded bits)
// - Output: 4-bit binary position (e.g., 1000 for pos8)
// Synthesize with tools like Vivado; simulate with ModelSim
module tictactoe_predictor (
input [31:0] input_bits, // 32-bit input vector (bits as 0/1, MSB pos0)
output [3:0] output_pos // 4-bit binary position (0-8)
);
// Fixed-point typedef (8-bit signed)
wire signed [7:0] hidden [0:63]; // 64 hidden neurons
wire signed [7:0] logits [0:8]; // 9 output logits
wire signed [7:0] max_logit;
reg [3:0] argmax_idx;
// Hardcoded weights (example 8-bit int values; train/scale your floats to [-128,127])
// FC1: 32x64 weights (simplified subset; full would be wire arrays)
wire signed [7:0] fc1_w [0:31][0:63]; // Dense array; init in real design
initial begin
fc1_w[0][0] = 8'd5; fc1_w[0][1] = -8'd12; // TODO: read real weights into here
end
wire signed [7:0] fc1_b [0:63] = {64{8'd0}}; // Biases zero
// FC2: 64x9 weights
wire signed [7:0] fc2_w [0:63][0:8];
initial begin
// Example
fc2_w[0][0] = 8'd34; fc2_w[0][1] = -8'd21; // ... fill 64*9=576
end
wire signed [7:0] fc2_b [0:8] = {9{8'd0}};
// Compute FC1: hidden[h] = sum_i (input_bits[i] * fc1_w[i][h]) + fc1_b[h]
// (input_bits as 0/1, so effectively sum of weights where bit=1)
genvar i, h;
generate
for (h = 0; h < 64; h = h + 1) begin : fc1_layer
wire signed [15:0] sum_temp = 16'd0; // Accumulator (16-bit to avoid overflow)
for (i = 0; i < 32; i = i + 1) begin : fc1_mac
assign sum_temp = sum_temp + (input_bits[i] ? {{8{fc1_w[i][h][7]}}, fc1_w[i][h]} : 16'd0); // Sign-extend if bit=1
end
assign hidden[h] = sum_temp[14:7] + fc1_b[h]; // Shift/scale + bias (approx saturation)
end
endgenerate
// ReLU: hidden[h] = max(0, hidden[h])
generate
for (h = 0; h < 64; h = h + 1) begin : relu_layer
assign hidden[h] = (hidden[h] > 0) ? hidden[h] : 8'd0;
end
endgenerate
// Compute FC2: logits[p] = sum_h (hidden[h] * fc2_w[h][p]) + fc2_b[p]
generate
for (genvar p = 0; p < 9; p = p + 1) begin : fc2_layer
wire signed [15:0] sum_temp2 = 16'd0;
for (h = 0; h < 64; h = h + 1) begin : fc2_mac
assign sum_temp2 = sum_temp2 + (hidden[h] * {{8{fc2_w[h][p][7]}}, fc2_w[h][p]});
end
assign logits[p] = sum_temp2[14:7] + fc2_b[p];
end
endgenerate
// Argmax: Find index of max logit
always @(*) begin
argmax_idx = 4'd0;
max_logit = logits[0];
for (genvar p = 1; p < 9; p = p + 1) begin
if (logits[p] > max_logit) begin
max_logit = logits[p];
argmax_idx = p[3:0];
end
end
end
assign output_pos = argmax_idx; // Direct 4-bit output (pos 0-8 in binary)
endmodule
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment