Skip to content

Instantly share code, notes, and snippets.

@falseywinchnet
Last active December 24, 2025 13:11
Show Gist options
  • Select an option

  • Save falseywinchnet/28125372913f4f770fd09f9e961a2a41 to your computer and use it in GitHub Desktop.

Select an option

Save falseywinchnet/28125372913f4f770fd09f9e961a2a41 to your computer and use it in GitHub Desktop.
import numpy as np
import numba
# Precompute twiddle factors for a 512-point FFT
tw = [np.exp(-1.0 * 1.0j * np.pi * np.arange(((2**i)/2), dtype=np.complex128) / ((2**i)/2)) for i in range(1, 10)]
# Flatten and prepare for Numba-friendly 2D array [N, 1]
twiddlefactors = np.concatenate([arr.reshape(-1, 1) for arr in tw]).astype(np.complex128)
@numba.jit(numba.complex128[:](numba.float64[:], numba.complex128[:,:]), fastmath=True, nopython=True)
def unrolled_numba_rfft(input_data: np.ndarray, twiddlefactors: np.ndarray):
# Butterfly matrix for the base case (Stage 0)
M = np.array([[1.+0.0j, 1.+0.0j], [1.+0.0j, -1.-1.2246468e-16j]], dtype=numba.complex128)
# Intermediate buffers
X_stage_0 = np.zeros((2, 256), dtype=numba.complex128)
X_stage_1 = np.zeros((4, 128), dtype=numba.complex128)
X_stage_2 = np.zeros((8, 64), dtype=numba.complex128)
X_stage_3 = np.zeros((16, 32), dtype=numba.complex128)
X_stage_4 = np.zeros((32, 16), dtype=numba.complex128)
X_stage_5 = np.zeros((64, 8), dtype=numba.complex128)
X_stage_6 = np.zeros((128, 4), dtype=numba.complex128)
X_stage_7 = np.zeros((256, 2), dtype=numba.complex128)
X_stage_8 = np.zeros((512, 1), dtype=numba.complex128)
# Stage-0: Base Butterfly
X_stage_0[0, :] = input_data[:256] + input_data[256:]
X_stage_0[1, :] = input_data[:256] - input_data[256:]
# Stage 1
e, q = 2, 128
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_0[:e, q:2*q]
X_stage_1[:e, :q] = X_stage_0[:e, :q] + temp
X_stage_1[e:e*2, :q] = X_stage_0[:e, :q] - temp
# Stage 2
e, q = 4, 64
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_1[:e, q:2*q]
X_stage_2[:e, :q] = X_stage_1[:e, :q] + temp
X_stage_2[e:e*2, :q] = X_stage_1[:e, :q] - temp
# Stage 3
e, q = 8, 32
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_2[:e, q:2*q]
X_stage_3[:e, :q] = X_stage_2[:e, :q] + temp
X_stage_3[e:e*2, :q] = X_stage_2[:e, :q] - temp
# Stage 4
e, q = 16, 16
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_3[:e, q:2*q]
X_stage_4[:e, :q] = X_stage_3[:e, :q] + temp
X_stage_4[e:e*2, :q] = X_stage_3[:e, :q] - temp
# Stage 5
e, q = 32, 8
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_4[:e, q:2*q]
X_stage_5[:e, :q] = X_stage_4[:e, :q] + temp
X_stage_5[e:e*2, :q] = X_stage_4[:e, :q] - temp
# Stage 6
e, q = 64, 4
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_5[:e, q:2*q]
X_stage_6[:e, :q] = X_stage_5[:e, :q] + temp
X_stage_6[e:e*2, :q] = X_stage_5[:e, :q] - temp
# Stage 7
e, q = 128, 2
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_6[:e, q:2*q]
X_stage_7[:e, :q] = X_stage_6[:e, :q] + temp
X_stage_7[e:e*2, :q] = X_stage_6[:e, :q] - temp
# Stage 8
e, q = 256, 1
temp = twiddlefactors[(e-1):(2*e)-1] * X_stage_7[:e, q:2*q]
X_stage_8[:e, :q] = X_stage_7[:e, :q] + temp
X_stage_8[e:e*2, :q] = X_stage_7[:e, :q] - temp
# Ensure Nyquist frequency is purely real
X_stage_8[256, 0] = X_stage_8[256, 0].real + 0.0j
return X_stage_8[:257, 0]
# --- Verification Logic ---
# Generate random signal
np.random.seed(42)
test_input = np.random.rand(512).astype(np.float64)
# Execute custom RFFT
custom_result = unrolled_numba_rfft(test_input, twiddlefactors)
# Execute Numpy RFFT
numpy_result = np.fft.rfft(test_input)
# Comparison
is_close = np.allclose(custom_result, numpy_result)
max_diff = np.max(np.abs(custom_result - numpy_result))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment