Created
January 5, 2026 23:59
-
-
Save sethryder/b96f8ba9b175c8e6ad74ae18f953d2e9 to your computer and use it in GitHub Desktop.
glue table version cleanup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import boto3 | |
| import time | |
| from botocore.exceptions import ClientError | |
| # --- CONFIGURATION --- | |
| DATABASE_NAME = 'your_database_name' | |
| VERSIONS_TO_KEEP = 10 # Number of versions to keep per table | |
| DRY_RUN = True # Set to False to perform actual deletions | |
| # --------------------- | |
| glue = boto3.client('glue') | |
| def get_all_tables(database): | |
| """Retrieves all table names in the specified database.""" | |
| table_names = [] | |
| paginator = glue.get_paginator('get_tables') | |
| for page in paginator.paginate(DatabaseName=database): | |
| for table in page['TableList']: | |
| table_names.append(table['Name']) | |
| return table_names | |
| def cleanup_table_versions(database, table): | |
| """Deletes old versions for a single table.""" | |
| paginator = glue.get_paginator('get_table_versions') | |
| all_versions = [] | |
| try: | |
| for page in paginator.paginate(DatabaseName=database, TableName=table): | |
| all_versions.extend(page['TableVersions']) | |
| # Sort numerically: VersionId is a string, convert to int | |
| all_versions.sort(key=lambda x: int(x['VersionId']), reverse=True) | |
| to_delete = [v['VersionId'] for v in all_versions[VERSIONS_TO_KEEP:]] | |
| if not to_delete: | |
| return 0 | |
| if DRY_RUN: | |
| print(f" [DRY RUN] Would delete {len(to_delete)} versions for table: {table}") | |
| return len(to_delete) | |
| # Batch delete in chunks of 100 | |
| for i in range(0, len(to_delete), 100): | |
| batch = to_delete[i:i + 100] | |
| glue.batch_delete_table_version( | |
| DatabaseName=database, | |
| TableName=table, | |
| VersionIds=batch | |
| ) | |
| # Short sleep to prevent Throttling (Adjust if needed) | |
| time.sleep(0.2) | |
| return len(to_delete) | |
| except ClientError as e: | |
| print(f" [ERROR] Could not process {table}: {e}") | |
| return 0 | |
| def run_database_cleanup(): | |
| print(f"--- Starting Cleanup for Database: {DATABASE_NAME} ---") | |
| tables = get_all_tables(DATABASE_NAME) | |
| print(f"Found {len(tables)} tables to process.") | |
| total_deleted = 0 | |
| for idx, table_name in enumerate(tables): | |
| print(f"[{idx + 1}/{len(tables)}] Processing table: {table_name}...") | |
| deleted_count = cleanup_table_versions(DATABASE_NAME, table_name) | |
| total_deleted += deleted_count | |
| print(f"\n--- Finished ---") | |
| print(f"Total versions {'would be' if DRY_RUN else ''} deleted: {total_deleted}") | |
| if __name__ == "__main__": | |
| run_database_cleanup() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment