iterations= 10
data.shape = (64, 512, 512) total size = 128.0 MB
---- testing pickle ----
pickle dumps time: 0.16670258045196534 0.007583755528625725
pickle loads time: 0.08387384414672852 0.0015653707744914705
---- testing cPickle ----
cPickle dumps time: 0.1355360269546509 0.02795624634235191
cPickle loads time: 0.05218687057495117 0.0003695142975977074
---- testing marshal ----
marshal dumps time: 0.05070292949676514 0.0007349747542887223
marshal loads time: 0.05259006023406983 0.000816788432272282
---- testing numpy save/load ----
numpy save time: 0.06062030792236328 0.003113365628325949
numpy load time: 0.028949522972106935 0.0029791145564422483
---- testing fast numpy io ----
fast numpy io dumps time: 0.05341365337371826 0.001540740588879568
fast numpy io loads time: 0.05321941375732422 0.0008756969021245359
Created
November 15, 2025 09:47
-
-
Save turtleizzy/4b3b96449cc5dd2f30a7108dee0b0f53 to your computer and use it in GitHub Desktop.
Performance comparison for marshalling large array blocks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pickle | |
| import _pickle as cPickle | |
| import marshal | |
| import numpy as np | |
| import time | |
| import io | |
| # --------------------------------------------------------------------------------- | |
| # from fastnumpyio | |
| # https://github.com/divideconcept/fastnumpyio/blob/main/fastnumpyio.py | |
| import sys | |
| import numpy as np | |
| import numpy.lib.format | |
| import struct | |
| def save(file, array): | |
| magic_string=b"\x93NUMPY\x01\x00v\x00" | |
| header=bytes(("{'descr': '"+array.dtype.descr[0][1]+"', 'fortran_order': False, 'shape': "+str(array.shape)+", }").ljust(127-len(magic_string))+"\n",'utf-8') | |
| if type(file) == str: | |
| file=open(file,"wb") | |
| file.write(magic_string) | |
| file.write(header) | |
| file.write(array.data) | |
| def pack(array): | |
| size=len(array.shape) | |
| return bytes(array.dtype.byteorder.replace('=','<' if sys.byteorder == 'little' else '>')+array.dtype.kind,'utf-8')+array.dtype.itemsize.to_bytes(1,byteorder='little')+struct.pack(f'<B{size}I',size,*array.shape)+array.data | |
| def load(file): | |
| if type(file) == str: | |
| file=open(file,"rb") | |
| header = file.read(128) | |
| if not header: | |
| return None | |
| descr = str(header[19:25], 'utf-8').replace("'","").replace(" ","") | |
| shape = tuple(int(num) for num in str(header[60:120], 'utf-8').replace(',)', ')').replace(', }', '').replace('(', '').replace(')', '').split(',')) | |
| datasize = numpy.lib.format.descr_to_dtype(descr).itemsize | |
| for dimension in shape: | |
| datasize *= dimension | |
| return np.ndarray(shape, dtype=descr, buffer=file.read(datasize)) | |
| def unpack(data): | |
| dtype = str(data[:2],'utf-8') | |
| dtype += str(data[2]) | |
| size = data[3] | |
| shape = struct.unpack_from(f'<{size}I', data, 4) | |
| datasize=data[2] | |
| for dimension in shape: | |
| datasize *= dimension | |
| return np.ndarray(shape, dtype=dtype, buffer=data[4+size*4:4+size*4+datasize]) | |
| # --------------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| iterations=10 | |
| print("iterations=",iterations) | |
| data = np.random.rand(64, 512, 512) | |
| print("data.shape =",data.shape, "total size =",data.size * data.itemsize / 1024 / 1024, "MB") | |
| print("---- testing pickle ----") | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| res = pickle.dumps(data) | |
| times.append(time.time() - stTime) | |
| print("pickle dumps time: ", np.mean(times), np.std(times)) | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| pickle.loads(res) | |
| times.append(time.time() - stTime) | |
| print("pickle loads time: ", np.mean(times), np.std(times)) | |
| print("---- testing cPickle ----") | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| res = cPickle.dumps(data) | |
| times.append(time.time() - stTime) | |
| print("cPickle dumps time: ", np.mean(times), np.std(times)) | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| cPickle.loads(res) | |
| times.append(time.time() - stTime) | |
| print("cPickle loads time: ", np.mean(times), np.std(times)) | |
| print("---- testing marshal ----") | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| res = marshal.dumps(data) | |
| times.append(time.time() - stTime) | |
| print("marshal dumps time: ", np.mean(times), np.std(times)) | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| marshal.loads(res) | |
| times.append(time.time() - stTime) | |
| print("marshal loads time: ", np.mean(times), np.std(times)) | |
| print("---- testing numpy save/load ----") | |
| times = [] | |
| buffer = io.BytesIO() | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| np.save(buffer, data) | |
| times.append(time.time() - stTime) | |
| print("numpy save time: ", np.mean(times), np.std(times)) | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| buffer.seek(0) | |
| np.load(buffer) | |
| times.append(time.time() - stTime) | |
| print("numpy load time: ", np.mean(times), np.std(times)) | |
| print("---- testing fast numpy io ----") | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| res = pack(data) | |
| times.append(time.time() - stTime) | |
| print("fast numpy io dumps time: ", np.mean(times), np.std(times)) | |
| times = [] | |
| for _ in range(iterations): | |
| stTime = time.time() | |
| unpack(res) | |
| times.append(time.time() - stTime) | |
| print("fast numpy io loads time: ", np.mean(times), np.std(times)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment