Skip to content

Instantly share code, notes, and snippets.

@naiaden
Last active March 17, 2022 22:46
Show Gist options
  • Select an option

  • Save naiaden/f33f63beacf0a27a535428870cf3c949 to your computer and use it in GitHub Desktop.

Select an option

Save naiaden/f33f63beacf0a27a535428870cf3c949 to your computer and use it in GitHub Desktop.
Two chunkers for lists
s = [1,5,10,5,10,2000,5,1,2,100_000_000,5,100_000,10]
d = ["asd" * ss for ss in s]
import sys
xx = [sys.getsizeof(x) for x in d]
print(xx)
import more_itertools
def LengthChunker(iterable, length = 5):
return more_itertools.ichunked(iterable, length)
for y in LengthChunker(xx, 3):
print(list(y))
##
_marker = object()
def SizeChunker(iterable, size = 25):
source = iter(iterable)
cur_size = 0
cur_list = []
def send():
nonlocal cur_size
nonlocal cur_list
yield iter(cur_list)
cur_size = 0
cur_list = []
def add(item):
nonlocal cur_size
nonlocal cur_list
new_size = sys.getsizeof(item)
cur_list.append(new_size) # rather add item itself, but it's too big to print for `d`
cur_size += new_size
while True:
item = next(source, _marker)
if item is _marker:
yield from send()
return
item_size = sys.getsizeof(item)
if cur_size > size:
yield from send()
if cur_list and cur_size + item_size > size:
yield from send()
add(item)
else:
add(item)
s = [1,5,10,5,10,2000,5,1,2,100_000_000,5,100_000,10]
d = ["asd" * ss for ss in s]
for i, y in enumerate(SizeChunker(d, 250)):
print(i, list(y))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment