Skip to content

Instantly share code, notes, and snippets.

@moio
Created November 7, 2025 14:14
Show Gist options
  • Select an option

  • Save moio/effa110473a896b47832ca0d31a61b9e to your computer and use it in GitHub Desktop.

Select an option

Save moio/effa110473a896b47832ca0d31a61b9e to your computer and use it in GitHub Desktop.
This script analyzes Kubernetes watch event YAML files
#!/usr/bin/env uv run --with pyyaml python
import glob
import os
import yaml
import datetime
## This script analyzes Kubernetes watch event YAML files captured with the following snippet:
# for kind in clusterroles roles rolebindings clusterrolebindings crds; do
# {
# kubectl get -A --show-managed-fields --chunk-size=0 -w --output-watch-events -o yaml $kind > $kind-watch-events.yaml &
# pid=$!
# sleep 120
# kill $pid
# } &
# done ; wait
# The duration of your data capture script
CAPTURE_DURATION_SECONDS = 120
TIME_MINUTES = CAPTURE_DURATION_SECONDS / 60.0
print(f"Calculating accurate change rates over a {TIME_MINUTES}-minute window...")
print("This script uses 'creationTimestamp' to filter out the initial load.")
print("---------------------------------------------------------------------")
# Find all files ending with '-watch-events.yaml' in the current directory
file_paths = glob.glob('*-watch-events.yaml')
# Process each file found
for file_path in file_paths:
# --- 1. Calculate the script's start time ---
# Get the file's last modification time (as a POSIX timestamp, seconds since epoch)
# This is our best guess for when the 'kill' command ran.
file_mod_time_epoch = os.path.getmtime(file_path)
# Convert it to an "aware" datetime object in UTC
file_mod_time_utc = datetime.datetime.fromtimestamp(
file_mod_time_epoch, tz=datetime.timezone.utc
)
# Subtract the script duration to find the approximate start time
script_start_time_utc = file_mod_time_utc - datetime.timedelta(
seconds=CAPTURE_DURATION_SECONDS
)
# --- 2. Process the file ---
total_docs = 0
real_time_adds = 0
initial_load_adds = 0
modified_changes = 0
deleted_changes = 0
errors = 0
try:
with open(file_path, 'r') as f:
# Use safe_load_all for multi-document YAML streams
for doc in yaml.safe_load_all(f):
if doc is None:
continue # Skip empty documents between "---"
total_docs += 1
event_type = doc.get('type')
if event_type == 'MODIFIED':
modified_changes += 1
elif event_type == 'DELETED':
deleted_changes += 1
elif event_type == 'ADDED':
try:
# Get the creation timestamp from the object
timestamp_str = doc.get('object', {}).get('metadata', {}).get('creationTimestamp')
if not timestamp_str:
# If no timestamp, assume it's a real-time event (unlikely, but safe)
real_time_adds += 1
continue
# Parse the K8s UTC timestamp string (e.g., "2024-08-16T00:10:36Z")
creation_time_utc = datetime.datetime.strptime(
timestamp_str, "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=datetime.timezone.utc)
# This is the core logic:
# If the object was created AFTER the script started,
# it's a real-time ADDED event.
if creation_time_utc >= script_start_time_utc:
real_time_adds += 1
else:
# It was created before, so it's part of the initial load.
initial_load_adds += 1
except Exception as e:
# Handle parsing errors
print(f" [Warning] Error parsing timestamp in {file_path}: {e}")
errors += 1
# We ignore other event types like ERROR or BOOKMARK
except yaml.YAMLError as e:
print(f"Error parsing YAML in file {file_path}: {e}\n")
continue
except Exception as e:
print(f"Error reading file {file_path}: {e}\n")
continue
# --- 3. Report the results ---
kind = os.path.basename(file_path).replace('-watch-events.yaml', '')
total_real_changes = real_time_adds + modified_changes + deleted_changes
if TIME_MINUTES > 0:
change_rate = total_real_changes / TIME_MINUTES
else:
change_rate = 0
print(f"Resource Kind: {kind}")
print(f" Script Start Time (calculated): {script_start_time_utc.isoformat()}")
print(f" Initial Load (ADDED events): {initial_load_adds}")
print(f" Real-time ADDED events: {real_time_adds}")
print(f" Real-time MODIFIED events: {modified_changes}")
print(f" Real-time DELETED events: {deleted_changes}")
print(f" -----------------------------------------------")
print(f" Total Real-Time Changes: {total_real_changes}")
print(f" Accurate Change Rate: {change_rate:.2f} changes/minute")
print("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment