Last active
March 17, 2022 22:46
-
-
Save naiaden/f33f63beacf0a27a535428870cf3c949 to your computer and use it in GitHub Desktop.
Two chunkers for lists
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| s = [1,5,10,5,10,2000,5,1,2,100_000_000,5,100_000,10] | |
| d = ["asd" * ss for ss in s] | |
| import sys | |
| xx = [sys.getsizeof(x) for x in d] | |
| print(xx) | |
| import more_itertools | |
| def LengthChunker(iterable, length = 5): | |
| return more_itertools.ichunked(iterable, length) | |
| for y in LengthChunker(xx, 3): | |
| print(list(y)) | |
| ## | |
| _marker = object() | |
| def SizeChunker(iterable, size = 25): | |
| source = iter(iterable) | |
| cur_size = 0 | |
| cur_list = [] | |
| def send(): | |
| nonlocal cur_size | |
| nonlocal cur_list | |
| yield iter(cur_list) | |
| cur_size = 0 | |
| cur_list = [] | |
| def add(item): | |
| nonlocal cur_size | |
| nonlocal cur_list | |
| new_size = sys.getsizeof(item) | |
| cur_list.append(new_size) # rather add item itself, but it's too big to print for `d` | |
| cur_size += new_size | |
| while True: | |
| item = next(source, _marker) | |
| if item is _marker: | |
| yield from send() | |
| return | |
| item_size = sys.getsizeof(item) | |
| if cur_size > size: | |
| yield from send() | |
| if cur_list and cur_size + item_size > size: | |
| yield from send() | |
| add(item) | |
| else: | |
| add(item) | |
| s = [1,5,10,5,10,2000,5,1,2,100_000_000,5,100_000,10] | |
| d = ["asd" * ss for ss in s] | |
| for i, y in enumerate(SizeChunker(d, 250)): | |
| print(i, list(y)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment