Last active
April 18, 2023 16:25
-
-
Save Garlando/4e5aa44a193a892f0bc5c13a14a094e7 to your computer and use it in GitHub Desktop.
Create a manifest file for Redshift from specific objects in S3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import boto3 | |
| import json | |
| def get_matching_objects(line, s3): | |
| continuation_token = None | |
| entries = [] | |
| while True: | |
| if continuation_token is None: | |
| response = s3.list_objects_v2(Bucket='my-bucket', Prefix='prefix/'+line) | |
| else: | |
| response = s3.list_objects_v2(Bucket='my-bucket', Prefix='prefix/'+line, ContinuationToken=continuation_token) | |
| if 'Contents' in response: | |
| for obj in response['Contents']: | |
| if 'infix_text' in obj['Key'] and 'errors' not in obj['Key'] and 'manifests' not in obj['Key']: | |
| print(obj['Key']) | |
| entries.append({ | |
| 'url': 's3://my-bucket/'+obj['Key'], | |
| 'mandatory': 'true' | |
| }) | |
| if not(response['IsTruncated']): | |
| break | |
| continuation_token = response['NextContinuationToken'] | |
| return entries | |
| def write_manifest(line, entries): | |
| with open('manifests/'+line+'.json', 'w') as f: | |
| f.write(json.dumps({'entries': entries})) | |
| with open('events.md', 'r') as f: | |
| lines = f.readlines() | |
| s3 = boto3.client('s3') | |
| for line in lines: | |
| line = line.strip() | |
| print(line) | |
| entries = get_matching_objects(line, s3) | |
| write_manifest(line, entries) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| event_type_1 | |
| event_type_2 | |
| foo | |
| bar |
Author
Yes, this also assumes you have boto3 installed for whichever Python instance you're running, and that you have aws credentials to access S3
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Looks good I think, I need to have a play with it. Do you need to be logged in to aws first?