-
-
Save pgodwin/7d66729444173146ad698d154f2b9b6c to your computer and use it in GitHub Desktop.
| using System.Text; | |
| /* This code provided by Bruce Jackson (brucejackson.info) and is | |
| * provided "AS IS" with no warranties, expressed or implied, as to | |
| * its effectivness or use. | |
| * | |
| * User agrees to edit the code to meet their partiular use and | |
| * understands that this code has not been fully | |
| * tested in a production environment. | |
| * | |
| * The author expressly waives any rights of copyright to the code | |
| * and so any user is welcome to use this in whatever way they want. | |
| * */ | |
| namespace OcfLzw | |
| { | |
| /// <summary> | |
| /// Alternate implementation of the OCF/LZW Decompression by Bruce Jackson (brucejackson.info). | |
| /// This code has not been tested in production and is provided for reference only. | |
| /// </summary> | |
| public class DecompressBlob | |
| { | |
| class lzwItem | |
| { | |
| public lzwItem(uint _prefix, uint _sufix) | |
| { | |
| Prefix = _prefix; | |
| Suffix = _sufix; | |
| } | |
| public uint Prefix; | |
| public uint Suffix; | |
| } | |
| private const int MAX_CODES = 8192; | |
| private uint[] tempDecompressBuffer = new uint[MAX_CODES]; | |
| private lzwItem[] lzwLookupTable = new lzwItem[MAX_CODES]; | |
| private int tempBufferIndex = 0; | |
| private int currentByteBufferIndex = 0; | |
| private int codeCount = 257; | |
| private byte[] finalByteBuffer = null; | |
| public byte[] Decompress(byte[] rawbytes, int stringSize) | |
| { | |
| finalByteBuffer = new byte[stringSize]; | |
| int byteArrayIndex = 0, | |
| shift = 1, | |
| currentShift = 1; | |
| uint prevCode = 0, | |
| middleCode = 0, | |
| lookupIndex = 0, | |
| firstCode = rawbytes[byteArrayIndex]; | |
| while (true) | |
| { | |
| if (currentShift >= 9) | |
| { | |
| currentShift -= 8; | |
| if (firstCode != 0) | |
| { | |
| middleCode = rawbytes[++byteArrayIndex]; | |
| firstCode = (firstCode << currentShift + 8) | (middleCode << currentShift); | |
| middleCode = rawbytes[++byteArrayIndex]; | |
| uint tempCode = middleCode >> (8 - currentShift); | |
| lookupIndex = firstCode | tempCode; | |
| goto skipit; | |
| } | |
| else | |
| { | |
| firstCode = rawbytes[++byteArrayIndex]; | |
| middleCode = rawbytes[++byteArrayIndex]; | |
| } | |
| } | |
| else | |
| { | |
| middleCode = rawbytes[++byteArrayIndex]; | |
| } | |
| lookupIndex = (firstCode << currentShift) | (middleCode >> 8 - currentShift); | |
| if (lookupIndex == 256) // time to move to a new lookup table | |
| { | |
| shift = 1; | |
| currentShift++; | |
| firstCode = rawbytes[byteArrayIndex]; | |
| tempDecompressBuffer = new uint[MAX_CODES]; | |
| tempBufferIndex = 0; | |
| lzwLookupTable = new lzwItem[MAX_CODES]; | |
| codeCount = 257; | |
| continue; | |
| } | |
| else if (lookupIndex == 257) // EOF marker, better than using the string size | |
| { | |
| return finalByteBuffer; | |
| } | |
| skipit: | |
| if (prevCode == 0) | |
| { | |
| tempDecompressBuffer[0] = lookupIndex; | |
| } | |
| if (lookupIndex < codeCount) | |
| { | |
| SaveItemToLookupTable(lookupIndex); | |
| if (codeCount < MAX_CODES) | |
| { | |
| lzwLookupTable[codeCount++] = new lzwItem(prevCode, tempDecompressBuffer[tempBufferIndex]); | |
| } | |
| } | |
| else | |
| { | |
| lzwLookupTable[codeCount++] = new lzwItem(prevCode, tempDecompressBuffer[tempBufferIndex]); | |
| SaveItemToLookupTable(lookupIndex); | |
| } | |
| firstCode = (uint)(middleCode & (0xff >> currentShift)); | |
| currentShift += shift; | |
| switch (codeCount) // use the lookup table size and not the current byte count | |
| { | |
| case 511: | |
| case 1023: | |
| case 2047: | |
| case 4095: | |
| shift++; | |
| currentShift++; | |
| break; | |
| } | |
| prevCode = lookupIndex; | |
| } | |
| } | |
| private void SaveItemToLookupTable(uint compressedCode) | |
| { | |
| tempBufferIndex = -1; | |
| while (compressedCode >= 258) | |
| { | |
| tempDecompressBuffer[++tempBufferIndex] = lzwLookupTable[compressedCode].Suffix; | |
| compressedCode = lzwLookupTable[compressedCode].Prefix; | |
| } | |
| tempDecompressBuffer[++tempBufferIndex] = compressedCode; | |
| for (int i = tempBufferIndex; i >= 0; i--) | |
| { | |
| finalByteBuffer[currentByteBufferIndex++] = (byte)tempDecompressBuffer[i]; | |
| } | |
| } | |
| } | |
| } |
Thanks @plessbd, and thanks for sharing your port.
@pgodwin thank you for sharing this script. Have you any idea where I can get the stringSize from in Cerner db tables?
Thank you! Here's a clojure port, unprofiled just yet.
@thegoatherder: working on the CE_BLOB table, BLOB_LENGTH seems to return the uncompressed length when the blob is compressed (and the concatenated length when the blob is split into a sequence?). The docs aren't helpful unfortunately. blobgetlen() returns the actual length of BLOB_CONTENTS.
Thanks for sharing the script. It has been extremely helpful.
One issue we are having is joining split Blobs. ie where the Blob sequence number >1. Has anyone else tackled this?
@pgodwin Thank you for this! I also ported it to Java in case this is helpful to anyone else.
https://github.com/ARMoir/CernerBlobJava/blob/main/src/ocflzw/DecompressBlob.java
Does someone know how the uncompress is done by PowerChart at real-time? I have limited Oracle access, but I can't find anything like a function or procedure in the database ?!?
I assume PowerChart either has its own implementation or leans on the CCL functions that handle the decompression.
I should point out that there is a full library for this on https://github.com/pgodwin/OcfLzw
Thx and it was my assumption also but it is in none of the 24 functions :-(
Just wanted to say thank you, and provide a python port https://github.com/plessbd/ocflzw-decompress