Skip to content

Instantly share code, notes, and snippets.

@ArvidSilverlock
Last active March 6, 2024 08:56
Show Gist options
  • Select an option

  • Save ArvidSilverlock/0b6fb5fe2fa941773da04dd1fdfcfa8b to your computer and use it in GitHub Desktop.

Select an option

Save ArvidSilverlock/0b6fb5fe2fa941773da04dd1fdfcfa8b to your computer and use it in GitHub Desktop.
Buffer Bitpacking
local function calculateGroups(format)
local groups = {}
local currentIndex = 1
local excess = 0
repeat
local groupStart = currentIndex
local groupWidth = excess
repeat
local value = format[currentIndex]
if not value then break end
local width = value.width
-- check for the float64 precision hardcap of 53
if groupWidth + width > 53 then break end
-- add the value to the group
groupWidth += width
currentIndex += 1
until groupWidth >= 32 -- check for the softcap of 32 bits
-- add the calculated group to the group list
table.insert(groups, {
groupStart = groupStart,
groupEnd = currentIndex - 1,
width = groupWidth,
})
-- calculate how many bits would need to be manipulated on a second buffer call
-- which therefore contribute to the next group's width
excess = groupWidth - math.clamp(groupWidth // 8, 1, 4) * 8
until currentIndex > #format
return groups
end
return calculateGroups
-- This specific scheme was u15, u25, u25, u31, u18, u29, u26, u26, u2, u22, u2, u8, u10, u12, u22, u22, u2, u1, u8, u6, u7, u32, u27, u23, u31, u15, u31, u22, u14, u13, u8, u22, u2, u14, u31, u8, u15, u16, u32, u2, u6, u23, u17, u2, u1, u13, u20
-- The variables are set to `r` or `w` depending on whether it's reading or writing then the hex index of the value. This is not hardcoded.
do -- write
local chunk
chunk = w1 + w2 * 2^15
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w3 * 2^8
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
buffer.writeu32(bufferObject, offset, chunk // 2^32 + w4 * 2^1)
offset += 4
chunk = w5 + w6 * 2^18
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w7 * 2^15
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w8 * 2^9
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w9 * 2^3 + w10 * 2^5 + w11 * 2^27 + w12 * 2^29
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w13 * 2^5 + w14 * 2^15 + w15 * 2^27
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w16 * 2^17
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w17 * 2^7 + w18 * 2^9 + w19 * 2^10 + w20 * 2^18 + w21 * 2^24
buffer.writeu32(bufferObject, offset, chunk)
offset += 3
chunk = chunk // 2^24 + w22 * 2^7
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w23 * 2^7
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w24 * 2^2
buffer.writeu32(bufferObject, offset, chunk)
offset += 3
buffer.writeu32(bufferObject, offset, chunk // 2^24 + w25 * 2^1)
offset += 4
chunk = w26 + w27 * 2^15
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w28 * 2^14
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w29 * 2^4 + w30 * 2^18 + w31 * 2^31
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w32 * 2^7 + w33 * 2^29 + w34 * 2^31
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w35 * 2^13
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w36 * 2^12 + w37 * 2^20
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w38 * 2^3 + w39 * 2^19
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w40 * 2^19 + w41 * 2^21 + w42 * 2^27
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w43 * 2^18
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
chunk = chunk // 2^32 + w44 * 2^3 + w45 * 2^5 + w46 * 2^6 + w47 * 2^19
buffer.writeu32(bufferObject, offset, chunk)
offset += 4
buffer.writeu8(bufferObject, offset, chunk // 2^32)
offset += 1
end
do -- read
local chunk
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w1 = chunk % 2^23
w2 = chunk // 2^23 % 2^3
w3 = chunk // 2^26 % 2^3
w4 = chunk // 2^29 % 2^24
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w4 += chunk % 2^21 * 2^3
w5 = chunk // 2^21 % 2^5
w6 = chunk // 2^26 % 2^24
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w6 += chunk % 2^18 * 2^6
w7 = chunk // 2^18 % 2^24
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w7 += chunk % 2^10 * 2^14
w8 = chunk // 2^10 % 2^13
w9 = chunk // 2^23 % 2^13
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w9 += chunk % 2^4 * 2^9
wa = chunk // 2^4 % 2^4
wb = chunk // 2^8 % 2^13
wc = chunk // 2^21 % 2^22
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
wc += chunk % 2^11 * 2^11
wd = chunk // 2^11 % 2^16
we = chunk // 2^27 % 2^16
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
we += chunk % 2^11 * 2^5
wf = chunk // 2^11 % 2^30
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
wf += chunk % 2^9 * 2^21
w10 = chunk // 2^9 % 2^11
w11 = chunk // 2^20 % 2^13
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w11 += chunk % 2^1 * 2^12
w12 = chunk // 2^1 % 2^31
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w13 = chunk % 2^19
w14 = chunk // 2^19 % 2^9
w15 = chunk // 2^28 % 2^14
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w15 += chunk % 2^10 * 2^4
w16 = chunk // 2^10 % 2^2
w17 = chunk // 2^12 % 2^28
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w17 += chunk % 2^8 * 2^20
w18 = chunk // 2^8 % 2^2
w19 = chunk // 2^10 % 2^6
w1a = chunk // 2^16 % 2^26
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w1a += chunk % 2^10 * 2^16
w1b = chunk // 2^10 % 2^30
chunk = buffer.readu32(bitbufferObject, offset)
offset += 3
w1b += chunk % 2^8 * 2^22
w1c = chunk // 2^8 % 2^8
w1d = chunk // 2^16 % 2^9
w1e = chunk // 2^25 % 2^4
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w1f = chunk // 2^5 % 2^31
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w1f += chunk % 2^4 * 2^27
w20 = chunk // 2^4 % 2^4
w21 = chunk // 2^8 % 2^4
w22 = chunk // 2^12 % 2^4
w23 = chunk // 2^16 % 2^28
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w23 += chunk % 2^12 * 2^16
w24 = chunk // 2^12 % 2^21
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w24 += chunk % 2^1 * 2^20
w25 = chunk // 2^1 % 2^21
w26 = chunk // 2^22 % 2^13
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w26 += chunk % 2^3 * 2^10
w27 = chunk // 2^3 % 2^22
w28 = chunk // 2^25 % 2^17
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w28 += chunk % 2^10 * 2^7
w29 = chunk // 2^10 % 2^20
w2a = chunk // 2^30 % 2^18
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w2a += chunk % 2^16 * 2^2
w2b = chunk // 2^16 % 2^23
chunk = buffer.readu32(bitbufferObject, offset)
offset += 4
w2b += chunk % 2^7 * 2^16
w2c = chunk // 2^7 % 2^32
chunk = buffer.readu32(bitbufferObject, offset)
offset += 3
w2c += chunk % 2^7 * 2^25
w2d = chunk // 2^7 % 2^22
end
local READ_CALLBACKS = {
[8] = "buffer.readu8(%s, %s)",
[16] = "buffer.readu16(%s, %s)",
[24] = "buffer.readu32(%s, %s)",
[32] = "buffer.readu32(%s, %s)",
}
local Snippet = require("../Snippet.luau")
local function generateRead(configuration)
local format = configuration.format
local variables = configuration.variables
local groups = configuration.groups
local bufferVariable = configuration.bufferVariable or "b"
local offsetVariable = configuration.offsetVariable or "offset"
local output = Snippet.new()
output:Push("local chunk") -- initialise chunk
local excess = 0
local offsetDisparity = 0
for _, group in groups do
local groupWidth = group.width
-- the readWidth is rounded up so the *current* group contains all the necessary bytes
-- the offsetWidth is rounded down so the *next* group has all the necessary bytes
local readWidth = math.clamp(math.ceil(groupWidth / 8), 1, 4) * 8
local offsetWidth = math.clamp(groupWidth // 8, 1, 4) * 8
-- add the chunk reading to the output
output:Push(
`chunk = {READ_CALLBACKS[readWidth]:format(bufferVariable, offsetVariable)}`,
`{offsetVariable} += {offsetWidth // 8}`
)
-- add the excess bits to the last value of the previous group
-- that weren't included in the previous read call
if excess > 0 then
local lastValue = group.groupStart - 1
local offset = format[lastValue].width - excess
local name = variables[lastValue]
-- first `excess` bits rshifted by `offset`
output:Push(`{name} += chunk % 2^{excess} * 2^{offset}`)
end
-- `offsetDisparity` is either 0 or 8, it will shift `excess` out of the negatives
-- if the previous group's `readWidth` and `offsetWidth` were different
local offset = excess + offsetDisparity
for index = group.groupStart, group.groupEnd do
local name, width = variables[index], format[index].width
-- rshifts `chunk` by `offset` if need be
local offsetString = if offset > 0 then ` // 2^{offset}` else ""
-- get the first `width` bits after `chunk` is shifted
local boundString = ` % 2^{math.min(width, readWidth - offset)}`
output:Push(`{name} = chunk{offsetString}{boundString}`)
offset += width
end
-- specified at `local offset = excess + offsetDisparity`
offsetDisparity = readWidth - offsetWidth
-- how many bits are left over after reading
excess = groupWidth - readWidth
end
-- if there are any unread bits left over
if excess > 0 then
local readWidth = math.clamp(math.ceil(excess / 8), 1, 4) * 8
output:Push(
`chunk = {READ_CALLBACKS[readWidth]:format(bufferVariable, offsetVariable)}`,
`{offsetVariable} += {readWidth // 8}`
)
-- find the last value in the format
local lastValue = #format
local offset = format[lastValue].width - excess
local name = variables[lastValue]
-- first `excess` bits rshifted by `offset`
output:Push(`{name} += chunk % 2^{excess} * 2^{offset}`)
end
return tostring(output)
end
return generateRead
local WRITE_CALLBACKS = {
[8] = "buffer.writeu8(%s, %s, %s)",
[16] = "buffer.writeu16(%s, %s, %s)",
[24] = "buffer.writeu32(%s, %s, %s)",
[32] = "buffer.writeu32(%s, %s, %s)",
}
local Snippet = require("../Snippet.luau")
local function generateWrite(configuration)
local format = configuration.format
local variables = configuration.variables
local groups = configuration.groups
local bufferVariable = configuration.bufferVariable or "b"
local offsetVariable = configuration.offsetVariable or "offset"
local output = Snippet.new()
output:Push("local chunk") -- initialise chunk
local lastWriteWidth = 0
local excess = 0
for _, group in groups do
local chunkCalculation = {}
-- stylua: ignore
-- if there are any extra bits from the previous group, lshift `chunk` to extract the
-- unwritten bits so they remain in the `chunk`
if excess > 0 then
table.insert(chunkCalculation, `chunk // 2^{lastWriteWidth}`)
end
local offset = excess
for index = group.groupStart, group.groupEnd do
local name, width = variables[index], format[index].width
-- rshift the value by the `offset`, there is no `%` for a performance optimisation but this
-- would cause numbers outside of their required range to overflow into other values
local offsetString = if offset > 0 then ` * 2^{offset}` else ""
table.insert(chunkCalculation, `{name}{offsetString}`)
offset += width
end
local groupWidth = group.width
local writeWidth = math.clamp(groupWidth // 8, 1, 4) * 8
-- how many bits are left over after writing
excess = groupWidth - writeWidth
-- this is stored for the next group so it can retain access to any unwritten bits
lastWriteWidth = writeWidth
-- these two branches are the same, but the second one skips the step of assigning `chunk`
-- if it can, instead directly passing the calculations into the write function
if excess > 0 then
output:Push(
`chunk = {table.concat(chunkCalculation, " + ")}`,
WRITE_CALLBACKS[writeWidth]:format(bufferVariable, offsetVariable, "chunk"),
`{offsetVariable} += {writeWidth // 8}`
)
else
local chunkString = table.concat(chunkCalculation, " + ")
output:Push(
WRITE_CALLBACKS[writeWidth]:format(bufferVariable, offsetVariable, chunkString),
`{offsetVariable} += {writeWidth // 8}`
)
end
end
-- if there are any unwritten bits left over
if excess > 0 then
local writeWidth = math.clamp(math.ceil(excess / 8), 1, 4) * 8
-- rshift away the already written bits prior to writing
output:Push(
WRITE_CALLBACKS[writeWidth]:format(bufferVariable, offsetVariable, `chunk // 2^{lastWriteWidth}`),
`{offsetVariable} += {writeWidth // 8}`
)
end
return tostring(output)
end
return generateWrite
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment