HaraldKorneliussen/brle.py

## brle.py
#!/usr/bin/python

# In the binary case, maybe the length of a b-run should be encoded bit-flipped.
# Then the bit-flip of the compressed string should decode into the bit-flip
# of the uncompressed string, which seems like a neat property.

# It would be even neater if we could also preserve reversal of the string, but then
# we have to rethink a lot. The special casing of the ending sabotages most meaning
# we can squeeze into reversal.

def bijEnc(num, symbols="ab"):
    """
    Encode a natural number as a word in the alphabet specified by the symbols string,
    so that every natural number has exactly one representation. This is often called
    the bijective base encoding of the number.
    """
    ret = ''
    slen = len(symbols)
    while num > 0:
        num -= 1
        ret = symbols[num % slen] + ret
        num = num // slen
    return ret

def bijDec(s, symbols="ab"):
    """
    Decode a string in the alphabet specified by the symbols string, into a natural number.
    Inverse function of bijEnc, provided you use the same symbols string.
    """
    acc = 0
    for c in s:
        acc *= len(symbols)
        acc += (symbols.index(c) + 1)
    return acc

def rrle(s):
    """
    Recursively run-length encode the runs in s
    """
    if len(s) < 2:
        return s

    i = 0
    ret = ""
    runLetter = s[i]
    runLength = 0
    while i < len(s):
        # How long is the run?
        while i < len(s) and s[i] == runLetter:
            runLength += 1
            i += 1

        # Are there more letters to encode? If so, just write out this run
        if i < len(s):
            toEncode = rrle(bijEnc(runLength))
            ret += runLetter + runLetter.join((c for c in toEncode))
            runLetter = s[i]
            runLength = 0

    # We've come to the end of the string. Write out the final run, which requires
    # special treatment to be bijective. We can't just write out the run length as
    # is, because then the output string would always be even in length, and then
    # how would the decoder interpret an odd-length string?
    # So instead we halve the run length value, and write out odd-length runs with an
    # extra runLetter at the end, and even-length runs without.

    oddRun = runLength % 2 == 1
    runLength //= 2
    if runLength > 0:
        toEncode = rrle(bijEnc(runLength))
        ret += runLetter + runLetter.join((c for c in toEncode))
    if oddRun:
        ret += runLetter
    return ret

def rrld(s):
    """
    Recursively run-length decode the string s
    """
    if len(s) < 2:
        return s

    i = 1
    ret = ""
    runLetter = s[0]
    runLengthString = ""
    repeats = 0
    while i < len(s):
        # extract the string encoding the run length
        while i < len(s) and s[i-1] == runLetter:
            runLengthString += s[i]
            i += 2

        # in all but the last run,
        if i < len(s):
            # recurse on the run length string
            # to decode the actual run length as a number
            repeats = bijDec(rrld(runLengthString))
            # append the run
            ret += runLetter * repeats
            # prepare the next run
            runLetter = s[i-1]
            runLengthString = ""

    # The final run is decoded as usual
    repeats = bijDec(rrld(runLengthString))

    # but there may be a final letter to handle:
    if i == len(s):

        # If it's the same letter as the run was,
        # we double the run length and add one to find
        # the actual run length
        if (runLetter == s[i-1]):
            ret += runLetter * (2 * repeats + 1)

        # If it's a different letter, it actually started
        # a new run of just that one letter. So we append
        # the next-to-final run the usual way, and then
        # the final "run" of length one.
        else:
            ret += runLetter * repeats
            ret += s[i-1]
    else:
        # If there was no final letter, we just double the decoded
        # run length to find the actual run length.
        ret += runLetter * (2 * repeats)
    return ret
	#!/usr/bin/python

	# In the binary case, maybe the length of a b-run should be encoded bit-flipped.
	# Then the bit-flip of the compressed string should decode into the bit-flip
	# of the uncompressed string, which seems like a neat property.

	# It would be even neater if we could also preserve reversal of the string, but then
	# we have to rethink a lot. The special casing of the ending sabotages most meaning
	# we can squeeze into reversal.

	def bijEnc(num, symbols="ab"):
	"""
	Encode a natural number as a word in the alphabet specified by the symbols string,
	so that every natural number has exactly one representation. This is often called
	the bijective base encoding of the number.
	"""
	ret = ''
	slen = len(symbols)
	while num > 0:
	num -= 1
	ret = symbols[num % slen] + ret
	num = num // slen
	return ret

	def bijDec(s, symbols="ab"):
	"""
	Decode a string in the alphabet specified by the symbols string, into a natural number.
	Inverse function of bijEnc, provided you use the same symbols string.
	"""
	acc = 0
	for c in s:
	acc *= len(symbols)
	acc += (symbols.index(c) + 1)
	return acc

	def rrle(s):
	"""
	Recursively run-length encode the runs in s
	"""
	if len(s) < 2:
	return s

	i = 0
	ret = ""
	runLetter = s[i]
	runLength = 0
	while i < len(s):
	# How long is the run?
	while i < len(s) and s[i] == runLetter:
	runLength += 1
	i += 1

	# Are there more letters to encode? If so, just write out this run
	if i < len(s):
	toEncode = rrle(bijEnc(runLength))
	ret += runLetter + runLetter.join((c for c in toEncode))
	runLetter = s[i]
	runLength = 0

	# We've come to the end of the string. Write out the final run, which requires
	# special treatment to be bijective. We can't just write out the run length as
	# is, because then the output string would always be even in length, and then
	# how would the decoder interpret an odd-length string?
	# So instead we halve the run length value, and write out odd-length runs with an
	# extra runLetter at the end, and even-length runs without.

	oddRun = runLength % 2 == 1
	runLength //= 2
	if runLength > 0:
	toEncode = rrle(bijEnc(runLength))
	ret += runLetter + runLetter.join((c for c in toEncode))
	if oddRun:
	ret += runLetter
	return ret

	def rrld(s):
	"""
	Recursively run-length decode the string s
	"""
	if len(s) < 2:
	return s

	i = 1
	ret = ""
	runLetter = s[0]
	runLengthString = ""
	repeats = 0
	while i < len(s):
	# extract the string encoding the run length
	while i < len(s) and s[i-1] == runLetter:
	runLengthString += s[i]
	i += 2

	# in all but the last run,
	if i < len(s):
	# recurse on the run length string
	# to decode the actual run length as a number
	repeats = bijDec(rrld(runLengthString))
	# append the run
	ret += runLetter * repeats
	# prepare the next run
	runLetter = s[i-1]
	runLengthString = ""

	# The final run is decoded as usual
	repeats = bijDec(rrld(runLengthString))

	# but there may be a final letter to handle:
	if i == len(s):

	# If it's the same letter as the run was,
	# we double the run length and add one to find
	# the actual run length
	if (runLetter == s[i-1]):
	ret += runLetter * (2 * repeats + 1)

	# If it's a different letter, it actually started
	# a new run of just that one letter. So we append
	# the next-to-final run the usual way, and then
	# the final "run" of length one.
	else:
	ret += runLetter * repeats
	ret += s[i-1]
	else:
	# If there was no final letter, we just double the decoded
	# run length to find the actual run length.
	ret += runLetter * (2 * repeats)
	return ret
No results found