Skip to content

Instantly share code, notes, and snippets.

@MrJeremyHobbs
Created February 29, 2020 01:55
Show Gist options
  • Select an option

  • Save MrJeremyHobbs/9ba7ddc724c7c6ee10b0c77016abc7fb to your computer and use it in GitHub Desktop.

Select an option

Save MrJeremyHobbs/9ba7ddc724c7c6ee10b0c77016abc7fb to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import os
import logging
from pymarc import MARCReader
from tqdm import tqdm
def main():
# clean-up
try:
os.system('cls')
os.system('del testing.log')
except:
pass
# logging
logging.basicConfig(filename='testing.log',level=logging.DEBUG)
# progressbar
#total_records = 3715163
total_records = 3715163
pbar = tqdm(total=total_records)
# open marc file and read contents
# change the slice to limit files
for filename in os.listdir('.//data'): # <------- check this limiter
counter = 0
with open(f'data/{filename}', 'rb') as fh:
reader = MARCReader(fh, to_unicode=False, utf8_handling='ignore')
# progressbar
pbar.set_description(f"Reading {filename}")
# loop through records
for record in wrapper(reader):
#try:
counter += 1
pbar.update(1)
#except Exception as error:
# logging.info(error)
# pbar.update(1)
# counter += 1
# next(reader)
# log totals
logging.info(f'{filename} total: {counter}')
# finish
pbar.set_description(f"Finished")
print(counter)
# functions
def wrapper(gen):
while True:
try:
yield next(gen)
except StopIteration:
break
except Exception as error:
logging.info(error)
pass
# top-level
if __name__ == "__main__":
main()
@MrJeremyHobbs
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment