Skip to content

Instantly share code, notes, and snippets.

@zoon
Created November 9, 2025 01:16
Show Gist options
  • Select an option

  • Save zoon/316fdd296b1bb8eb7a8cbf0f72cd1d69 to your computer and use it in GitHub Desktop.

Select an option

Save zoon/316fdd296b1bb8eb7a8cbf0f72cd1d69 to your computer and use it in GitHub Desktop.
--!strict
--!optimize 2
--intentionally not !native
-- Copyright (c) 2025, Andrei Zhilin (https://github.com/zoon)
-- SPDX-License-Identifier: MIT
-- Base64Chrome implements high-throughput Base64 codecs that mirror Chrome/WHATWG behavior.
-- Unsafe/strict decoders, MIME-aware encoder, and reusable lookup tables are laid out below for speed-critical code.
-- All routines assume little-endian buffers and stay within RFC 4648 alphabets to avoid expensive branching.
-- References:
-- * RFC 4648 (base64 / base64url): https://www.rfc-editor.org/rfc/rfc4648
-- * RFC 2045 (MIME 76-col lines): https://www.rfc-editor.org/rfc/rfc2045
-- * WHATWG Forgiving base64: https://infra.spec.whatwg.org/#forgiving-base64
-- Low Level API (internal):
-- Encoders:
-- - enc64 -- standard encoder; alphabet selectable; optional padding
-- - enc64_mime -- MIME encoder; STD alphabet; always padded; wraps at ≤76 cols (CRLF)
-- Decoders:
-- - dec64_unsafe -- very fast; does NOT validate input (*)
-- - dec64_safe -- validates input; ~30% slower; supports RFC 4648 + forgiving rules
-- Alphabets
-- - 'STD' — RFC 4648 §4: uses '+' and '/'.
-- - 'URL_SAFE' — RFC 4648 §5 (URL and Filename Safe Alphabet): uses '-' and '_'.
--
-- (*) As of 2025, no luau Base64 decoder (including HTTPService:JSONDecode for buffer) validates input data.
--[=[ Low-level API usage:
```luau
local chrome = require"./Base64Chrome"
local enc64 = chrome.internal.enc64
local dec64 = chrome.internal.dec64_unsafe
local enc_lut = chrome.internal.STD_ENC_LUT()
local dec_lut = chrome.internal.STD_DEC_LUT()
function fast_string_encoder(s: string) : string -- base64
local strlen = #s
local input = buffer.fromstring(s)
local output = buffer.create(chrome.encoded_length(strlen))
local bytes_written = enc64(output, 0, input, 0, strlen, enc_lut, false)
return buffer.readstring(output, 0, bytes_written)
end
function fast_string_decoder(b64: string): string
local strlen = #b64
local inout = buffer.create(strlen)
local bytes_written = chrome.internal.dec64_unsafe(inout, 0, dec_lut)
return buffer.readstring(inout, 0, bytes_written)
end
function fast_buffer_encoder(b: buffer) : string -- base64
local len = buffer.len(b)
local output = buffer.create(chrome.encoded_length(len))
local bytes_written = enc64(output, 0, b, 0, len, enc_lut, false)
return buffer.readstring(output, 0, bytes_written)
end
function fast_buffer_decoder(b64: string): buffer
local strlen = #b64
local inout = buffer.fromstring(b64)
local bytes_written = chrome.internal.dec64_unsafe(inout, 0, dec_lut)
local out = buffer.create(bytes_written)
buffer.copy(out, 0, inout, 0, bytes_written)
return out
end
-- If you know your input data and can mutate the input buffer, you can avoid extra allocations:
function fast_buffer_decoder_opt(input: buffer): (buffer, int)
local size = buffer.len(input)
local bytes_written = chrome.internal.dec64_unsafe(input, 0, size, dec_lut)
return inout, bytes_written
end
```
--]=]
local _ = {}
-- Performance notes for the future:
-- - 12-bit LUTs a la Alfred Klomp are 1.5x slower in luau (encode & decode), even with unrolling
-- - Sum/mult faster than bor/lshift (bench it if not believe)
-- - Any branching in tight loop: up to -30% performance
-- - 4x buffer.readu8 faster than 1x buffer.readu32 + unpacking
-- - 3x unwinding gives ~10%, slightly better for encode - nah, be lean
-- strict LE
assert(string.pack("=i2", 1) == "\1\0", "system must be little-endian")
type int = number
type buf = buffer
type str = string
type bool = boolean
-- to prevent small buffer allocations
local _SCRATCH_BUFFER = buffer.create(1024 * 2048)
-- To be strictly RFC 2045-compliant line separator must be CRLF, but most decoders will accept LF-only.
local STRICT_MIME_COMPLIANCE = true
local CRLF_LE = 10 * 0x100 + 13
local LF = 10
local MAX_MIME_LINE_WIDTH = 76
--------------------------------
-- Utility
--------------------------------
-- assume ntail in 0..3
local function tail_output_length(ntail: int, unpadded: bool?): int
if ntail == 0 then return 0 end
if not unpadded then return 4 end
return ntail + 1
end
-- max line width for MIME encoder
--- @return int Width of the line in [4, 76], multiple of 4 | +inf for nil
local function line_width(wrap: int?): int
if not wrap then return math.huge end
local width = math.clamp(wrap, 4, MAX_MIME_LINE_WIDTH)
-- must be a multiple of 4, to simplify breaking lines after full 4-byte output chunks.
return math.floor(width / 4) * 4
end
local function write_eol(b: buf, offset: int): int
if STRICT_MIME_COMPLIANCE then
buffer.writeu16(b, offset, CRLF_LE)
return offset + 2
end
buffer.writeu8(b, offset, LF)
return offset + 1
end
local function encoded_length(count: int, unpadded: bool?, wrap: int?): int
local full = count // 3
local tail = count - full * 3
local len = full * 4 + tail_output_length(tail, unpadded)
if not wrap or len == 0 then return len end
local lw = line_width(wrap)
local breaks = (len - 1) // lw
return len + breaks * if STRICT_MIME_COMPLIANCE then 2 else 1
end
local function clone_buffer(b: buf)
local clone = buffer.create(buffer.len(b))
buffer.copy(clone, 0, b, 0)
return clone
end
-----------------------------
-- LUTS
-----------------------------
local STATE_INVALID = 0xff
local STATE_WHITESPACE = 0xfe
local STATE_PADDING = 0xfd
local BASE64_MASK = 0x3f
local PADDING_CHAR = string.byte "=" -- 0x3d
local PADDING_CHAR_X2 = PADDING_CHAR * 0x100 + PADDING_CHAR
local WHITESPACE = buffer.fromstring " \n\r\t\f\v"
-- Std may be whitespace tolerant
local STD_ENC_LUT = buffer.fromstring "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
local STD_DEC_LUT = buffer.create(256)
buffer.fill(STD_DEC_LUT, 0, STATE_INVALID)
buffer.writeu8(STD_DEC_LUT, PADDING_CHAR, STATE_PADDING)
for i = 0, BASE64_MASK do
buffer.writeu8(STD_DEC_LUT, buffer.readu8(STD_ENC_LUT, i), i)
end
for i = 0, buffer.len(WHITESPACE) - 1 do
buffer.writeu8(STD_DEC_LUT, buffer.readu8(WHITESPACE, i), STATE_WHITESPACE)
end
-- UrlSafe always strict
local URL_ENC_LUT = buffer.fromstring "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
local URL_DEC_LUT = buffer.create(256)
buffer.fill(URL_DEC_LUT, 0, STATE_INVALID)
buffer.writeu8(URL_DEC_LUT, PADDING_CHAR, STATE_PADDING)
for i = 0, BASE64_MASK do
buffer.writeu8(URL_DEC_LUT, buffer.readu8(URL_ENC_LUT, i), i)
end
-----------------------------
-- Low Level Codecs
-----------------------------
--- @param target: buffer Buffer to write into
--- @param wo: int Write offset
--- @param src: buffer Buffer to read from
--- @param ro: int Read offset
--- @param count: int Number of bytes to read
--- @param lut: buffer Encode lookup table
--- @param nopad: bool?, if true padding will be omitted
--- @return int Bytes written
@native
local function _enc64(target: buf, wo: int, src: buf, ro: int, count: int, lut: buf, nopad: bool?): int
-- The main loop processes input in 3-byte chunks, advancing `ro` by 3 each time.
-- The loop condition (ro + 4 <= count) ensures we never read past the end of the buffer with readu32.
-- This means the tail can be up to 3 bytes (unlike some encoders that only allow up to 2),
-- and the tail is always handled safely bytewise below.
local start_offset = wo
local u32BE = 0
local rend = ro + count
while ro + 4 <= rend do
u32BE = bit32.byteswap(buffer.readu32(src, ro))
ro += 3
buffer.writeu8(target, wo + 3, buffer.readu8(lut, bit32.extract(u32BE, 8, 6)))
buffer.writeu8(target, wo + 2, buffer.readu8(lut, bit32.extract(u32BE, 14, 6)))
buffer.writeu8(target, wo + 1, buffer.readu8(lut, bit32.extract(u32BE, 20, 6)))
buffer.writeu8(target, wo, buffer.readu8(lut, bit32.extract(u32BE, 26, 6)))
wo += 4
end
-- handle remaining 0..3 bytes
local ntail = rend - ro
if ntail >= 1 and not nopad then buffer.writeu16(target, wo + 2, PADDING_CHAR_X2) end
u32BE = 0
for i = 0, ntail - 1 do
u32BE += bit32.lshift(buffer.readu8(src, ro + i), (3 - i) * 8)
buffer.writeu8(target, wo + i, buffer.readu8(lut, bit32.extract(u32BE, 26 - (i * 6), 6)))
end
if ntail >= 1 then -- write last significant byte
buffer.writeu8(target, wo + ntail, buffer.readu8(lut, bit32.extract(u32BE, 26 - (ntail * 6), 6)))
end
return (wo - start_offset) + tail_output_length(ntail, nopad)
end
--- MIME-compiliant base64 encoder
--- @param target: buffer Buffer to write into
--- @param wo: int Write offset
--- @param src: buffer Buffer to read from
--- @param ro: int Read offset
--- @param count: int Number of bytes to read
--- @param wrap: int, will be truncated to 4..76
--- @return int Bytes written
@native
local function _enc64_mime(target: buf, wo: int, src: buf, ro: int, count: int, wrap: int): int
local start_offset = wo
local line_width = line_width(wrap)
local break_offset = start_offset
local lut = STD_ENC_LUT
local u32BE = 0
local rend = ro + count
while ro + 4 <= rend do
-- readu32: [_, B3, B2, B1] -> byteswap: [B1, B2, B3, _]
u32BE = bit32.byteswap(buffer.readu32(src, ro))
ro += 3
-- stylua: ignore
buffer.writeu8(target, wo + 3, buffer.readu8(lut, bit32.extract(u32BE, 8, 6)))
buffer.writeu8(target, wo + 2, buffer.readu8(lut, bit32.extract(u32BE, 14, 6)))
buffer.writeu8(target, wo + 1, buffer.readu8(lut, bit32.extract(u32BE, 20, 6)))
buffer.writeu8(target, wo, buffer.readu8(lut, bit32.extract(u32BE, 26, 6)))
wo += 4
if wo - break_offset >= line_width then
wo = write_eol(target, wo)
break_offset = wo
end
end
-- handle remaining 0..3 bytes
local ntail = rend - ro
local tlen = tail_output_length(ntail)
if wo - break_offset + tlen > line_width then wo = write_eol(target, wo) end
if ntail > 0 then buffer.writeu16(target, wo + 2, PADDING_CHAR_X2) end
u32BE = 0
for i = 0, ntail - 1 do
u32BE += bit32.lshift(buffer.readu8(src, ro + i), (3 - i) * 8)
buffer.writeu8(target, wo + i, buffer.readu8(lut, bit32.extract(u32BE, 26 - (i * 6), 6)))
end
if ntail > 0 then -- write last significant byte
buffer.writeu8(target, wo + ntail, buffer.readu8(lut, bit32.extract(u32BE, 26 - (ntail * 6), 6)))
end
-- assert((wo - start_offset) + tlen == encoded_length(count, false, line_width))
return (wo - start_offset) + tlen
end
-- Note: for all decodes we reuse input buffer for output for simplicity,
-- because base64 input length is always bigger then output, and we can safely
-- write 4 bytes at once.
@native
local function dec64_fallback(inout: buf, wo: int, ro: int, rend: int, lut: buf): (int, int) -- wo, ro
-- invariant: nbits%6 == 0 and nbits/6 == 'n base64 chars consumed'
local accu, nbits = 0, 0
while ro < rend do
local u8 = buffer.readu8(inout, ro)
ro += 1 -- consume byte
local u6 = buffer.readu8(lut, u8)
if u6 < 64 then -- valid base64 char
accu *= 0x40
accu += u6
nbits += 6
if nbits == 24 then -- full quad consumed
buffer.writeu32(inout, wo, bit32.byteswap(accu * 0x100))
return wo + 3, ro
end
elseif u6 == STATE_WHITESPACE then
continue
elseif u6 == STATE_INVALID then
error(`invalid base64 char at: {ro - 1}: '0x{string.format("%X", u8)}'`, 3)
else -- assume that we always have tail padding stripped
assert(u6 == STATE_PADDING and u8 == PADDING_CHAR, "sanity check failed")
error(`misplaced padding char at: {ro - 1}: '{string.char(u8)}'`, 3)
end
end
if nbits > 6 then
accu = bit32.lshift(accu, 24 - nbits)
buffer.writeu8(inout, wo, bit32.rshift(accu, 16))
wo += 1
if nbits > 12 then
buffer.writeu8(inout, wo, bit32.rshift(accu, 8))
wo += 1
end
elseif nbits == 6 then
error("invalid base64 length", 3)
end
return wo, ro
end
--- Decodes a validated base64 block in place without per-quad safety checks.
--- Caller must ensure padding bytes are stripped so the hot loop can stream quickly.
--- @param inout buf Buffer that holds the encoded data and receives the decoded bytes
--- @param from int Offset inside `inout` where reading and writing starts
--- @param count int Number of bytes available to decode starting at `from`
--- @param lut buf Decoder lookup table mapping bytes to their 6-bit value
--- @return int Total decoded byte count produced starting at `from`
@native
function _dec64_unsafe(inout: buf, from: int, count: int, lut: buf): int -- bytes written
local ro, wo = from, from
local rend = from + count
-- strip padding
while rend > from and buffer.readu8(inout, rend - 1) == PADDING_CHAR do
rend -= 1
end
while ro <= rend - 4 do
local b4 = buffer.readu8(lut, buffer.readu8(inout, ro + 3))
local b3 = buffer.readu8(lut, buffer.readu8(inout, ro + 2))
local b2 = buffer.readu8(lut, buffer.readu8(inout, ro + 1))
local b1 = buffer.readu8(lut, buffer.readu8(inout, ro))
ro += 4
-- The expression below combines four 6-bit values (b1..b4) into a 32-bit integer:
-- big-endian triplet: [B1, B2, B3, 0] = b1<<26|b2<<20|b3<<14|b4<<8
-- We need a byteswap, because `buffer.writeu32` writes in low-endian order.
-- The trailing zero is harmlessly overwritten in the next iteration.
-- '*' and '+' are used for performance over bitwise operations.
buffer.writeu32(inout, wo, bit32.byteswap(b1 * 0x40000_00 + b2 * 0x1000_00 + b3 * 0x40_00 + b4 * 0x100))
wo += 3
end
if ro < rend then
wo, ro = dec64_fallback(inout, wo, ro, rend, lut)
end
assert(ro == rend, "sanity check failed: underconsume stream")
return wo - from
end
--- Decodes base64 data in place while validating every quad via the supplied LUT.
--- Falls back to the forgiving scalar path when a quad contains whitespace or invalid bytes.
--- @param inout buffer Buffer that holds the encoded data and receives the decoded bytes
--- @param from int Offset inside `inout` where reading/writing starts
--- @param count int Number of bytes to consume from `inout`
--- @param lut buffer Decoder lookup table with STATE_* metadata
--- @return int Total decoded byte count produced starting at `from`
@native
local function _dec64_safe(inout: buf, from: int, count: int, lut: buf): int -- bytes written
local ro, wo = from, from
local rend = from + count
-- strip padding
while rend > from and buffer.readu8(inout, rend - 1) == PADDING_CHAR do
rend -= 1
end
while ro <= rend - 4 do
local b4 = buffer.readu8(lut, buffer.readu8(inout, ro + 3))
local b3 = buffer.readu8(lut, buffer.readu8(inout, ro + 2))
local b2 = buffer.readu8(lut, buffer.readu8(inout, ro + 1))
local b1 = buffer.readu8(lut, buffer.readu8(inout, ro))
if bit32.bor(b1, b2, b3, b4) < 64 then
ro += 4
buffer.writeu32(inout, wo, bit32.byteswap(b1 * 0x40000_00 + b2 * 0x1000_00 + b3 * 0x40_00 + b4 * 0x100))
wo += 3
else -- some char in quad is not from base64 alphabet
wo, ro = dec64_fallback(inout, wo, ro, rend, lut)
continue
end
end
if ro < rend then -- handle tail
wo, ro = dec64_fallback(inout, wo, ro, rend, lut)
end
assert(ro == rend, "sanity check failed: underconsume stream")
return wo - from
end
-----------------------------
-- Public API
-----------------------------
local module = {}
--- Encode buffer data using the fast codec to mirror EncodeService:Base64Encode semantics.
--- @param src buffer Buffer containing the raw bytes to encode
--- @param opts EncodeBufferOptions? Optional slice/alphabet controls
--- @return buffer Base64-encoded buffer (always newly allocated)
function module.encode(src: buffer): buffer
local span = buffer.len(src)
local out_len = encoded_length(span, false)
local target = buffer.create(out_len)
local bytes_written = _enc64(target, 0, src, 0, span, STD_ENC_LUT, false)
assert(bytes_written == out_len, "sanity check failed")
return target
end
--- Decode a base64 buffer in place using the unsafe fast path to match EncodeService:Base64Decode.
--- This assumes the buffer only contains valid base64 bytes for the selected alphabet.
--- @param src buffer Buffer with the encoded data
--- @param opts DecodeBufferOptions? Optional slice/alphabet controls
--- @return buffer Newly allocated buffer with the decoded bytes
function module.decode(src: buffer): buffer
local span = buffer.len(src)
local working = buffer.create(span)
buffer.copy(working, 0, src, 0, span)
local decoded_len = _dec64_unsafe(working, 0, span, STD_DEC_LUT)
local result = buffer.create(decoded_len)
buffer.copy(result, 0, working, 0, decoded_len)
return result
end
-----------------------------
-- Low Level API
-----------------------------
module.internal = table.freeze {
-- luts
STD_ENC_LUT = function() return clone_buffer(STD_ENC_LUT) end,
STD_DEC_LUT = function() return clone_buffer(STD_DEC_LUT) end,
URL_ENC_LUT = function() return clone_buffer(URL_ENC_LUT) end,
URL_DEC_LUT = function() return clone_buffer(URL_DEC_LUT) end,
-- codecs
enc64 = _enc64,
enc64_mime = _enc64_mime,
dec64_unsafe = _dec64_unsafe,
dec64_safe = _dec64_safe,
-- utility
encoded_length = encoded_length,
}
--[[ dev test
local function grow(b: buf, capacity: int): buf
if buffer.len(b) >= capacity then return b end
local fresh = buffer.create(capacity)
buffer.copy(fresh, 0, b, 0)
return fresh
end
local function encode(s: str): str
local count = #s
local unpadded = false
local enc_len = encoded_length(count, unpadded)
local inout = grow(_SCRATCH_BUFFER, #s + enc_len)
local ro = 0
local wo = count
buffer.writestring(inout, ro, s)
local bytes_written = _enc64(inout, wo, inout, ro, count, STD_ENC_LUT)
return buffer.readstring(inout, wo, bytes_written)
end
local _decode_helper = function(base64: str | buf, from: int?, count: int?): (buffer, int, int)
local start = from or 0
local len: int
local inout: buffer
if type(base64) == "string" then
len = count or #base64 - start
inout = grow(_SCRATCH_BUFFER, len)
buffer.writestring(inout, start, base64, len)
else
len = count or buffer.len(base64) - start
inout = base64
end
return inout, start, len
end
local function decode(base64: str | buf): str
local inout, from, count = _decode_helper(base64)
local dec_count = _dec64_safe(inout, from, count, STD_DEC_LUT)
return buffer.readstring(inout, 0, dec_count)
end
local function decode_unsafe(base64: str | buf): str
local inout, from, count = _decode_helper(base64)
local dec_count = _dec64_unsafe(inout, from, count, STD_DEC_LUT)
return buffer.readstring(inout, 0, dec_count)
end
-- stylua: ignore start
local test_cases = {
{ "", "" },
{ "f", "Zg==" },
{ "fo", "Zm8=" },
{ "foo", "Zm9v" },
{ "foobar", "Zm9vYmFy" },
{ "hello world", "aGVsbG8gd29ybGQ=" },
{ "\0\1\2\3", "AAECAw==" },
{ "Man is distinguished", "TWFuIGlzIGRpc3Rpbmd1aXNoZWQ=" },
{ "test\n\r\t", "dGVzdAoNCQ==" },
}
for i, case in test_cases do
local raw, expected = case[1], case[2]
assert(encode(raw) == expected, string.format("(#%d) err encode: %q -> %q", i, raw, encode(raw)))
assert(decode(expected) == raw, string.format("(#%d) err decode: %q -> %q", i, raw, decode(expected)))
assert(decode_unsafe(expected) == raw, string.format("(#%d) err decode: %q -> %q", i, raw, decode_unsafe(expected)))
end
do -- test wrapping
for i = 1, 64 do
local WRAP_AT = 16
local big_str = string.rep("a", i)
local count = #big_str
local unpadded = false
local enc_len = encoded_length(count, unpadded)
local inout = grow(_SCRATCH_BUFFER, #big_str + enc_len)
local ro = 0
local wo = count
buffer.writestring(inout, ro, big_str)
local ec = _enc64_mime(inout, wo, inout, ro, count, WRAP_AT)
local estr = buffer.readstring(inout, wo, ec)
-- print(estr)
local dstr = decode(estr)
assert(dstr == big_str, string.format("(#%d) err decode: %q -> %q", i, big_str, dstr))
end
end
do -- partial buffer
for i, case in test_cases do
local b = _SCRATCH_BUFFER
local raw, expected = case[1], case[2]
-- write from i
buffer.writestring(b, i, raw, #raw)
local write_from = i + #raw
local ec = _enc64(b, write_from, b, i, #raw, STD_ENC_LUT)
local es = buffer.readstring(b, write_from, ec)
local b_copy = buffer.create(write_from + #es)
buffer.writestring(b_copy, write_from, es, #es)
local dc = _dec64_unsafe(b, write_from, #es, STD_DEC_LUT)
local ds = buffer.readstring(b, write_from, dc)
-- b overridden, use copy for second decoder
local dcs = _dec64_safe(b_copy, write_from, #es, STD_DEC_LUT)
local dss = buffer.readstring(b_copy, write_from, dcs)
assert(es == expected, string.format("(#%d) err encode: %q -> %q", i, raw, es))
assert(ds == raw, string.format("(#%d) err decode: %q -> %q", i, raw, ds))
assert(dss == raw, string.format("(#%d) err decode: %q -> %q", i, raw, dss))
end
end
do -- These shoud produce errors in safe mode:
assert(not pcall(decode, "SGVsbG8\0"), "unhandled invalid input 2") -- Null bytes
assert(not pcall(decode, "SGVsbG8\255"), "unhandled invalid input 3") -- High bytes
assert(not pcall(decode, "Hell@oWorld!"), "unhandled invalid input 4") -- @ is invalid
assert(not pcall(decode, "Test1#23"), "unhandled invalid input 5") -- # is invalid
assert(not pcall(decode, "DataMon$ey"), "unhandled invalid input 6") -- $ is invalid
assert(not pcall(decode, "SGVsbG8-V29ybGQ_"), "unhandled invalid input 7") -- URL-safe Base64 for Std decoder
assert(not pcall(decode, "SGVs=bG8="), "unhandled invalid input 8") -- Padding in middle
end
do -- These shoud produce errors in any mode:
assert(not pcall(decode_unsafe, "SGVsbG8==A"), "unhandled invalid input 9") -- Characters after final padding
assert(not pcall(decode_unsafe, "A"), "unhandled invalid input 10") -- Single character
end
print "dev-test: ok"
-- stylua: ignore end
--]]
return table.freeze(module)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment