Skip to content

Instantly share code, notes, and snippets.

@pgaskin
Last active January 11, 2026 10:58
Show Gist options
  • Select an option

  • Save pgaskin/1d2f1c7f5002670432247dd5adf85557 to your computer and use it in GitHub Desktop.

Select an option

Save pgaskin/1d2f1c7f5002670432247dd5adf85557 to your computer and use it in GitHub Desktop.
Simple lzstring decompression in Go.
package lzstring
import (
"errors"
"math/bits"
"unicode/utf8"
"unsafe"
)
// Decompress decompresses a lzstring-compressed byte sequence from a slice of
// bytes or utf-16 code units, appending the result into an slice of utf-16 code
// units. To decode the result, use string(utf16.Decode(dst)).
func Decompress[T byte | uint16](dst []uint16, src []T) ([]uint16, error) {
var (
dat uint32
bit uint32
)
ubits := func(bits int) (res uint32, ok bool) {
// interpret src as a big-endian bitstream, and read a little-endian uint from it
for i := range bits {
if bit == 0 {
if len(src) == 0 {
return res, false
}
bit = 1 << (uint(unsafe.Sizeof(src[0])*8) - 1)
dat = uint32(src[0])
src = src[1:]
}
if dat&bit != 0 {
res |= 1 << i
}
bit >>= 1 // msb to lsb
}
return res, true
}
var (
last uint32 // last chunk start index
dict [][2]uint32
)
for {
dictSize := uint32(len(dict))
op, ok := ubits(bits.Len32(3 + dictSize))
if !ok {
return dst, errors.New("unexpected end of stream")
}
if op == 2 {
return dst, nil
}
chunk := uint32(len(dst)) // current chunk start index
if op > 2 {
idx := op - 3
if dictSize == 0 {
return dst, errors.New("first packet must be a literal")
}
if idx > dictSize {
return dst, errors.New("dictionary index out of range")
}
if idx == dictSize {
dst = append(dst, dst[last:]...)
dst = append(dst, dst[last])
} else {
dst = append(dst, dst[dict[idx][0]:dict[idx][1]]...)
}
} else {
bits := [...]int{
0: 8,
1: 16,
}[op]
lit, ok := ubits(bits)
if !ok {
return dst, errors.New("unexpected end of stream")
}
dst = append(dst, uint16(lit))
dict = append(dict, [2]uint32{chunk, chunk + 1})
}
if dictSize != 0 {
dict = append(dict, [2]uint32{last, chunk + 1})
}
last = chunk
}
}
// Unquote unquotes a valid JSON string as a series of UTF-16 code units. Any
// junk after the end of the string is ignored. This is roughly equivalent to
// the following JS:
//
// function unquote(s) {
// s = JSON.parse(s)
// return Array(s.length).keys().map(i => s.charCodeAt(i)).toArray()
// }
//
// Unlike the usual Go strings and JSON libraries, this will split high UTF-8
// characters into the UTF-16 surrogate pairs, and it will preserve invalid
// surrogate pairs as their raw hex values.
//
// This is intended for use when parsing and decompressing raw lzstrings
// serialized as JSON, i.e.:
//
// JSON.stringify(LZString.compress("whatever"))
func Unquote[T string | []byte](dst []uint16, src T) ([]uint16, error) {
if len(src) == 0 || src[0] != '"' {
return dst, errors.New("json string missing start quote")
}
src = src[1:]
for {
if len(src) == 0 {
return dst, errors.New("json string missing end quote")
}
r, rn := utf8.DecodeRuneInString(string(src[:min(len(src), 4)]))
if r == utf8.RuneError {
return dst, errors.New("json is not valid utf-8")
}
src = src[rn:]
switch {
case r == '"':
return dst, nil
case r == '\\':
if len(src) == 0 {
return dst, errors.New("unexpected eof in json escape")
}
e := src[0]
src = src[1:]
switch e {
case '"', '\\', '/':
dst = append(dst, uint16(e))
case 'b':
dst = append(dst, '\b')
case 'f':
dst = append(dst, '\f')
case 'n':
dst = append(dst, '\n')
case 'r':
dst = append(dst, '\r')
case 't':
dst = append(dst, '\t')
case 'u':
if len(src) < 4 {
return dst, errors.New("invalid json unicode escape")
}
var v uint16
for _, c := range []byte(src[:4]) {
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = 10 + c - 'a'
case 'A' <= c && c <= 'F':
c = 10 + c - 'A'
default:
return dst, errors.New("invalid json unicode escape")
}
v = v*16 + uint16(c)
}
src = src[4:]
dst = append(dst, v)
default:
return dst, errors.New("invalid json escape")
}
case r >= 1<<16:
// would be encoded as a surrogate pair, so split it into the two code units
dst = append(dst, uint16(0xd800+((r-1<<16)>>10)&0x3ff), uint16(0xdc00+(r-1<<16)&0x3ff))
default:
dst = append(dst, uint16(r))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment