Skip to content

Instantly share code, notes, and snippets.

@sethryder
Created January 5, 2026 23:59
Show Gist options
  • Select an option

  • Save sethryder/b96f8ba9b175c8e6ad74ae18f953d2e9 to your computer and use it in GitHub Desktop.

Select an option

Save sethryder/b96f8ba9b175c8e6ad74ae18f953d2e9 to your computer and use it in GitHub Desktop.
glue table version cleanup
import boto3
import time
from botocore.exceptions import ClientError
# --- CONFIGURATION ---
DATABASE_NAME = 'your_database_name'
VERSIONS_TO_KEEP = 10 # Number of versions to keep per table
DRY_RUN = True # Set to False to perform actual deletions
# ---------------------
glue = boto3.client('glue')
def get_all_tables(database):
"""Retrieves all table names in the specified database."""
table_names = []
paginator = glue.get_paginator('get_tables')
for page in paginator.paginate(DatabaseName=database):
for table in page['TableList']:
table_names.append(table['Name'])
return table_names
def cleanup_table_versions(database, table):
"""Deletes old versions for a single table."""
paginator = glue.get_paginator('get_table_versions')
all_versions = []
try:
for page in paginator.paginate(DatabaseName=database, TableName=table):
all_versions.extend(page['TableVersions'])
# Sort numerically: VersionId is a string, convert to int
all_versions.sort(key=lambda x: int(x['VersionId']), reverse=True)
to_delete = [v['VersionId'] for v in all_versions[VERSIONS_TO_KEEP:]]
if not to_delete:
return 0
if DRY_RUN:
print(f" [DRY RUN] Would delete {len(to_delete)} versions for table: {table}")
return len(to_delete)
# Batch delete in chunks of 100
for i in range(0, len(to_delete), 100):
batch = to_delete[i:i + 100]
glue.batch_delete_table_version(
DatabaseName=database,
TableName=table,
VersionIds=batch
)
# Short sleep to prevent Throttling (Adjust if needed)
time.sleep(0.2)
return len(to_delete)
except ClientError as e:
print(f" [ERROR] Could not process {table}: {e}")
return 0
def run_database_cleanup():
print(f"--- Starting Cleanup for Database: {DATABASE_NAME} ---")
tables = get_all_tables(DATABASE_NAME)
print(f"Found {len(tables)} tables to process.")
total_deleted = 0
for idx, table_name in enumerate(tables):
print(f"[{idx + 1}/{len(tables)}] Processing table: {table_name}...")
deleted_count = cleanup_table_versions(DATABASE_NAME, table_name)
total_deleted += deleted_count
print(f"\n--- Finished ---")
print(f"Total versions {'would be' if DRY_RUN else ''} deleted: {total_deleted}")
if __name__ == "__main__":
run_database_cleanup()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment