Created
November 18, 2025 23:56
-
-
Save jac18281828/884a57f1bf890cf3c900bb31dfbd8e20 to your computer and use it in GitHub Desktop.
find time gap in log files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| import re | |
| from datetime import datetime | |
| # Regex to strip ANSI escape sequences (colors, cursor moves, etc.) | |
| ANSI_ESCAPE_RE = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') | |
| # Timestamp format for lines like: | |
| # 2025-11-18T21:52:39.117949Z | |
| TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" | |
| def strip_ansi(s: str) -> str: | |
| """Remove ANSI escape sequences from a string.""" | |
| return ANSI_ESCAPE_RE.sub("", s) | |
| def parse_timestamp_from_line(line: str): | |
| """ | |
| Extract and parse the timestamp from a log line. | |
| Assumptions: | |
| - After stripping ANSI, the line starts with an ISO-like timestamp: | |
| 2025-11-18T21:52:39.117949Z INFO ... | |
| - We take the first whitespace-separated token as the timestamp. | |
| """ | |
| clean = strip_ansi(line).strip() | |
| if not clean: | |
| return None | |
| parts = clean.split() | |
| if not parts: | |
| return None | |
| ts_str = parts[0] | |
| try: | |
| return datetime.strptime(ts_str, TIMESTAMP_FORMAT) | |
| except ValueError: | |
| # Not a timestamp line, or a different format | |
| return None | |
| def find_longest_gaps(path: str, top_n: int = 5): | |
| """ | |
| Scan the log file, compute time gaps between consecutive | |
| timestamped lines, and return the top N gaps. | |
| Each returned item is: | |
| (gap_seconds, prev_line_number, curr_line_number) | |
| """ | |
| gaps = [] | |
| prev_time = None | |
| prev_line_num = None | |
| with open(path, "r", encoding="utf-8", errors="replace") as f: | |
| for line_num, line in enumerate(f, start=1): | |
| ts = parse_timestamp_from_line(line) | |
| if ts is None: | |
| # Ignore lines that don't start with a valid timestamp | |
| continue | |
| if prev_time is not None: | |
| delta = ts - prev_time | |
| gap_seconds = delta.total_seconds() | |
| # Ignore negative gaps (out-of-order timestamps) | |
| if gap_seconds >= 0: | |
| gaps.append((gap_seconds, prev_line_num, line_num)) | |
| prev_time = ts | |
| prev_line_num = line_num | |
| # Sort by gap length descending and return the top N | |
| gaps.sort(key=lambda x: x[0], reverse=True) | |
| return gaps[:top_n] | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Find the longest time gaps between timestamped log lines (ANSI-safe)." | |
| ) | |
| parser.add_argument("logfile", help="Path to the log file") | |
| parser.add_argument( | |
| "-n", "--top", | |
| type=int, | |
| default=5, | |
| help="Number of longest gaps to show (default: 5)", | |
| ) | |
| args = parser.parse_args() | |
| longest = find_longest_gaps(args.logfile, args.top) | |
| if not longest: | |
| print("No valid timestamp gaps found.") | |
| return | |
| print(f"Top {len(longest)} longest gaps in {args.logfile}:") | |
| for rank, (gap_sec, prev_line, curr_line) in enumerate(longest, start=1): | |
| hours = int(gap_sec // 3600) | |
| minutes = int((gap_sec % 3600) // 60) | |
| seconds = gap_sec % 60 | |
| if hours > 0: | |
| pretty_gap = f"{hours}h {minutes}m {seconds:.2f}s" | |
| elif minutes > 0: | |
| pretty_gap = f"{minutes}m {seconds:.2f}s" | |
| else: | |
| pretty_gap = f"{seconds:.2f}s" | |
| print( | |
| f"{rank}. Gap {pretty_gap} " | |
| f"between line {prev_line} and line {curr_line}" | |
| ) | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
works great for tracing logs in rust