pgaskin/lzstring.go

## lzstring.go
package lzstring

import (
	"errors"
	"math/bits"
	"unicode/utf8"
	"unsafe"
)

// Decompress decompresses a lzstring-compressed byte sequence from a slice of
// bytes or utf-16 code units, appending the result into an slice of utf-16 code
// units. To decode the result, use string(utf16.Decode(dst)).
func Decompress[T byte | uint16](dst []uint16, src []T) ([]uint16, error) {
	var (
		dat uint32
		bit uint32
	)
	ubits := func(bits int) (res uint32, ok bool) {
		// interpret src as a big-endian bitstream, and read a little-endian uint from it
		for i := range bits {
			if bit == 0 {
				if len(src) == 0 {
					return res, false
				}
				bit = 1 << (uint(unsafe.Sizeof(src[0])*8) - 1)
				dat = uint32(src[0])
				src = src[1:]
			}
			if dat&bit != 0 {
				res |= 1 << i
			}
			bit >>= 1 // msb to lsb
		}
		return res, true
	}
	var (
		last uint32 // last chunk start index
		dict [][2]uint32
	)
	for {
		dictSize := uint32(len(dict))
		op, ok := ubits(bits.Len32(3 + dictSize))
		if !ok {
			return dst, errors.New("unexpected end of stream")
		}
		if op == 2 {
			return dst, nil
		}
		chunk := uint32(len(dst)) // current chunk start index
		if op > 2 {
			idx := op - 3
			if dictSize == 0 {
				return dst, errors.New("first packet must be a literal")
			}
			if idx > dictSize {
				return dst, errors.New("dictionary index out of range")
			}
			if idx == dictSize {
				dst = append(dst, dst[last:]...)
				dst = append(dst, dst[last])
			} else {
				dst = append(dst, dst[dict[idx][0]:dict[idx][1]]...)
			}
		} else {
			bits := [...]int{
				0: 8,
				1: 16,
			}[op]
			lit, ok := ubits(bits)
			if !ok {
				return dst, errors.New("unexpected end of stream")
			}
			dst = append(dst, uint16(lit))
			dict = append(dict, [2]uint32{chunk, chunk + 1})
		}
		if dictSize != 0 {
			dict = append(dict, [2]uint32{last, chunk + 1})
		}
		last = chunk
	}
}

// Unquote unquotes a valid JSON string as a series of UTF-16 code units. Any
// junk after the end of the string is ignored. This is roughly equivalent to
// the following JS:
//
//	function unquote(s) {
//		s = JSON.parse(s)
//		return Array(s.length).keys().map(i => s.charCodeAt(i)).toArray()
//	}
//
// Unlike the usual Go strings and JSON libraries, this will split high UTF-8
// characters into the UTF-16 surrogate pairs, and it will preserve invalid
// surrogate pairs as their raw hex values.
//
// This is intended for use when parsing and decompressing raw lzstrings
// serialized as JSON, i.e.:
//
//	JSON.stringify(LZString.compress("whatever"))
func Unquote[T string | []byte](dst []uint16, src T) ([]uint16, error) {
	if len(src) == 0 || src[0] != '"' {
		return dst, errors.New("json string missing start quote")
	}
	src = src[1:]

	for {
		if len(src) == 0 {
			return dst, errors.New("json string missing end quote")
		}

		r, rn := utf8.DecodeRuneInString(string(src[:min(len(src), 4)]))
		if r == utf8.RuneError {
			return dst, errors.New("json is not valid utf-8")
		}
		src = src[rn:]

		switch {
		case r == '"':
			return dst, nil

		case r == '\\':
			if len(src) == 0 {
				return dst, errors.New("unexpected eof in json escape")
			}

			e := src[0]
			src = src[1:]

			switch e {
			case '"', '\\', '/':
				dst = append(dst, uint16(e))
			case 'b':
				dst = append(dst, '\b')
			case 'f':
				dst = append(dst, '\f')
			case 'n':
				dst = append(dst, '\n')
			case 'r':
				dst = append(dst, '\r')
			case 't':
				dst = append(dst, '\t')

			case 'u':
				if len(src) < 4 {
					return dst, errors.New("invalid json unicode escape")
				}

				var v uint16
				for _, c := range []byte(src[:4]) {
					switch {
					case '0' <= c && c <= '9':
						c = c - '0'
					case 'a' <= c && c <= 'f':
						c = 10 + c - 'a'
					case 'A' <= c && c <= 'F':
						c = 10 + c - 'A'
					default:
						return dst, errors.New("invalid json unicode escape")
					}
					v = v*16 + uint16(c)
				}
				src = src[4:]

				dst = append(dst, v)

			default:
				return dst, errors.New("invalid json escape")
			}

		case r >= 1<<16:
			// would be encoded as a surrogate pair, so split it into the two code units
			dst = append(dst, uint16(0xd800+((r-1<<16)>>10)&0x3ff), uint16(0xdc00+(r-1<<16)&0x3ff))

		default:
			dst = append(dst, uint16(r))
		}
	}
}
	package lzstring

	import (
	"errors"
	"math/bits"
	"unicode/utf8"
	"unsafe"
	)

	// Decompress decompresses a lzstring-compressed byte sequence from a slice of
	// bytes or utf-16 code units, appending the result into an slice of utf-16 code
	// units. To decode the result, use string(utf16.Decode(dst)).
	func Decompress[T byte \| uint16](dst []uint16, src []T) ([]uint16, error) {
	var (
	dat uint32
	bit uint32
	)
	ubits := func(bits int) (res uint32, ok bool) {
	// interpret src as a big-endian bitstream, and read a little-endian uint from it
	for i := range bits {
	if bit == 0 {
	if len(src) == 0 {
	return res, false
	}
	bit = 1 << (uint(unsafe.Sizeof(src[0])*8) - 1)
	dat = uint32(src[0])
	src = src[1:]
	}
	if dat&bit != 0 {
	res \|= 1 << i
	}
	bit >>= 1 // msb to lsb
	}
	return res, true
	}
	var (
	last uint32 // last chunk start index
	dict [][2]uint32
	)
	for {
	dictSize := uint32(len(dict))
	op, ok := ubits(bits.Len32(3 + dictSize))
	if !ok {
	return dst, errors.New("unexpected end of stream")
	}
	if op == 2 {
	return dst, nil
	}
	chunk := uint32(len(dst)) // current chunk start index
	if op > 2 {
	idx := op - 3
	if dictSize == 0 {
	return dst, errors.New("first packet must be a literal")
	}
	if idx > dictSize {
	return dst, errors.New("dictionary index out of range")
	}
	if idx == dictSize {
	dst = append(dst, dst[last:]...)
	dst = append(dst, dst[last])
	} else {
	dst = append(dst, dst[dict[idx][0]:dict[idx][1]]...)
	}
	} else {
	bits := [...]int{
	0: 8,
	1: 16,
	}[op]
	lit, ok := ubits(bits)
	if !ok {
	return dst, errors.New("unexpected end of stream")
	}
	dst = append(dst, uint16(lit))
	dict = append(dict, [2]uint32{chunk, chunk + 1})
	}
	if dictSize != 0 {
	dict = append(dict, [2]uint32{last, chunk + 1})
	}
	last = chunk
	}
	}

	// Unquote unquotes a valid JSON string as a series of UTF-16 code units. Any
	// junk after the end of the string is ignored. This is roughly equivalent to
	// the following JS:
	//
	// function unquote(s) {
	// s = JSON.parse(s)
	// return Array(s.length).keys().map(i => s.charCodeAt(i)).toArray()
	// }
	//
	// Unlike the usual Go strings and JSON libraries, this will split high UTF-8
	// characters into the UTF-16 surrogate pairs, and it will preserve invalid
	// surrogate pairs as their raw hex values.
	//
	// This is intended for use when parsing and decompressing raw lzstrings
	// serialized as JSON, i.e.:
	//
	// JSON.stringify(LZString.compress("whatever"))
	func Unquote[T string \| []byte](dst []uint16, src T) ([]uint16, error) {
	if len(src) == 0 \|\| src[0] != '"' {
	return dst, errors.New("json string missing start quote")
	}
	src = src[1:]

	for {
	if len(src) == 0 {
	return dst, errors.New("json string missing end quote")
	}

	r, rn := utf8.DecodeRuneInString(string(src[:min(len(src), 4)]))
	if r == utf8.RuneError {
	return dst, errors.New("json is not valid utf-8")
	}
	src = src[rn:]

	switch {
	case r == '"':
	return dst, nil

	case r == '\\':
	if len(src) == 0 {
	return dst, errors.New("unexpected eof in json escape")
	}

	e := src[0]
	src = src[1:]

	switch e {
	case '"', '\\', '/':
	dst = append(dst, uint16(e))
	case 'b':
	dst = append(dst, '\b')
	case 'f':
	dst = append(dst, '\f')
	case 'n':
	dst = append(dst, '\n')
	case 'r':
	dst = append(dst, '\r')
	case 't':
	dst = append(dst, '\t')

	case 'u':
	if len(src) < 4 {
	return dst, errors.New("invalid json unicode escape")
	}

	var v uint16
	for _, c := range []byte(src[:4]) {
	switch {
	case '0' <= c && c <= '9':
	c = c - '0'
	case 'a' <= c && c <= 'f':
	c = 10 + c - 'a'
	case 'A' <= c && c <= 'F':
	c = 10 + c - 'A'
	default:
	return dst, errors.New("invalid json unicode escape")
	}
	v = v*16 + uint16(c)
	}
	src = src[4:]

	dst = append(dst, v)

	default:
	return dst, errors.New("invalid json escape")
	}

	case r >= 1<<16:
	// would be encoded as a surrogate pair, so split it into the two code units
	dst = append(dst, uint16(0xd800+((r-1<<16)>>10)&0x3ff), uint16(0xdc00+(r-1<<16)&0x3ff))

	default:
	dst = append(dst, uint16(r))
	}
	}
	}
No results found