Skip to content

Instantly share code, notes, and snippets.

@turtleizzy
Created November 15, 2025 09:47
Show Gist options
  • Select an option

  • Save turtleizzy/4b3b96449cc5dd2f30a7108dee0b0f53 to your computer and use it in GitHub Desktop.

Select an option

Save turtleizzy/4b3b96449cc5dd2f30a7108dee0b0f53 to your computer and use it in GitHub Desktop.
Performance comparison for marshalling large array blocks
import pickle
import _pickle as cPickle
import marshal
import numpy as np
import time
import io
# ---------------------------------------------------------------------------------
# from fastnumpyio
# https://github.com/divideconcept/fastnumpyio/blob/main/fastnumpyio.py
import sys
import numpy as np
import numpy.lib.format
import struct
def save(file, array):
magic_string=b"\x93NUMPY\x01\x00v\x00"
header=bytes(("{'descr': '"+array.dtype.descr[0][1]+"', 'fortran_order': False, 'shape': "+str(array.shape)+", }").ljust(127-len(magic_string))+"\n",'utf-8')
if type(file) == str:
file=open(file,"wb")
file.write(magic_string)
file.write(header)
file.write(array.data)
def pack(array):
size=len(array.shape)
return bytes(array.dtype.byteorder.replace('=','<' if sys.byteorder == 'little' else '>')+array.dtype.kind,'utf-8')+array.dtype.itemsize.to_bytes(1,byteorder='little')+struct.pack(f'<B{size}I',size,*array.shape)+array.data
def load(file):
if type(file) == str:
file=open(file,"rb")
header = file.read(128)
if not header:
return None
descr = str(header[19:25], 'utf-8').replace("'","").replace(" ","")
shape = tuple(int(num) for num in str(header[60:120], 'utf-8').replace(',)', ')').replace(', }', '').replace('(', '').replace(')', '').split(','))
datasize = numpy.lib.format.descr_to_dtype(descr).itemsize
for dimension in shape:
datasize *= dimension
return np.ndarray(shape, dtype=descr, buffer=file.read(datasize))
def unpack(data):
dtype = str(data[:2],'utf-8')
dtype += str(data[2])
size = data[3]
shape = struct.unpack_from(f'<{size}I', data, 4)
datasize=data[2]
for dimension in shape:
datasize *= dimension
return np.ndarray(shape, dtype=dtype, buffer=data[4+size*4:4+size*4+datasize])
# ---------------------------------------------------------------------------------
if __name__ == "__main__":
iterations=10
print("iterations=",iterations)
data = np.random.rand(64, 512, 512)
print("data.shape =",data.shape, "total size =",data.size * data.itemsize / 1024 / 1024, "MB")
print("---- testing pickle ----")
times = []
for _ in range(iterations):
stTime = time.time()
res = pickle.dumps(data)
times.append(time.time() - stTime)
print("pickle dumps time: ", np.mean(times), np.std(times))
times = []
for _ in range(iterations):
stTime = time.time()
pickle.loads(res)
times.append(time.time() - stTime)
print("pickle loads time: ", np.mean(times), np.std(times))
print("---- testing cPickle ----")
times = []
for _ in range(iterations):
stTime = time.time()
res = cPickle.dumps(data)
times.append(time.time() - stTime)
print("cPickle dumps time: ", np.mean(times), np.std(times))
times = []
for _ in range(iterations):
stTime = time.time()
cPickle.loads(res)
times.append(time.time() - stTime)
print("cPickle loads time: ", np.mean(times), np.std(times))
print("---- testing marshal ----")
times = []
for _ in range(iterations):
stTime = time.time()
res = marshal.dumps(data)
times.append(time.time() - stTime)
print("marshal dumps time: ", np.mean(times), np.std(times))
times = []
for _ in range(iterations):
stTime = time.time()
marshal.loads(res)
times.append(time.time() - stTime)
print("marshal loads time: ", np.mean(times), np.std(times))
print("---- testing numpy save/load ----")
times = []
buffer = io.BytesIO()
for _ in range(iterations):
stTime = time.time()
np.save(buffer, data)
times.append(time.time() - stTime)
print("numpy save time: ", np.mean(times), np.std(times))
times = []
for _ in range(iterations):
stTime = time.time()
buffer.seek(0)
np.load(buffer)
times.append(time.time() - stTime)
print("numpy load time: ", np.mean(times), np.std(times))
print("---- testing fast numpy io ----")
times = []
for _ in range(iterations):
stTime = time.time()
res = pack(data)
times.append(time.time() - stTime)
print("fast numpy io dumps time: ", np.mean(times), np.std(times))
times = []
for _ in range(iterations):
stTime = time.time()
unpack(res)
times.append(time.time() - stTime)
print("fast numpy io loads time: ", np.mean(times), np.std(times))

Performance comparison for marshalling large array blocks in Python

iterations= 10
data.shape = (64, 512, 512) total size = 128.0 MB
---- testing pickle ----
pickle dumps time:  0.16670258045196534 0.007583755528625725
pickle loads time:  0.08387384414672852 0.0015653707744914705
---- testing cPickle ----
cPickle dumps time:  0.1355360269546509 0.02795624634235191
cPickle loads time:  0.05218687057495117 0.0003695142975977074
---- testing marshal ----
marshal dumps time:  0.05070292949676514 0.0007349747542887223
marshal loads time:  0.05259006023406983 0.000816788432272282
---- testing numpy save/load ----
numpy save time:  0.06062030792236328 0.003113365628325949
numpy load time:  0.028949522972106935 0.0029791145564422483
---- testing fast numpy io ----
fast numpy io dumps time:  0.05341365337371826 0.001540740588879568
fast numpy io loads time:  0.05321941375732422 0.0008756969021245359
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment