Skip to content

Instantly share code, notes, and snippets.

@sunsided
Created November 2, 2025 10:37
Show Gist options
  • Select an option

  • Save sunsided/fa31a169639e3152c84041b497b46872 to your computer and use it in GitHub Desktop.

Select an option

Save sunsided/fa31a169639e3152c84041b497b46872 to your computer and use it in GitHub Desktop.
Create backdated git commits based on file change dates
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
git_historic_commit.py — import a tree and create backdated commits.
Behavior
- Scans --source (outside or inside repo).
- Copies files into repo under --dest (default: repo root '.').
- Commit order/dates follow source mtimes.
- Filters match *source-relative* POSIX paths (Unicode NFC).
"""
from __future__ import annotations
import argparse, fnmatch, os, shutil, subprocess, sys, unicodedata
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterable, List
@dataclass
class FileInfo:
src_rel: Path # relative to source root
src_abs: Path # absolute source
mtime_ns: int
dst_rel: Path # destination path inside repo (relative to repo root)
def run(cmd: List[str], check=True, capture=False, env=None, cwd: Path | None = None):
return subprocess.run(
cmd, check=check, text=True, capture_output=capture, env=env, cwd=str(cwd) if cwd else None
)
def git_root() -> Path:
out = run(["git", "rev-parse", "--show-toplevel"], capture=True)
return Path(out.stdout.strip())
def is_git_repo() -> bool:
try:
git_root(); return True
except subprocess.CalledProcessError:
return False
def nfc(s: str) -> str:
return unicodedata.normalize("NFC", s)
def expand_globs(patterns: Iterable[str]) -> List[str]:
"""For '**/name.ext' also add 'name.ext'. Strip leading './'."""
out: List[str] = []
for p in patterns:
p = p.lstrip("./")
out.append(p)
if p.startswith("**/"):
out.append(p[3:]) # also match root files
return out
def match_any_glob(rel_posix: str, patterns: Iterable[str], case_insensitive: bool) -> bool:
if not patterns:
return False
s = rel_posix if not case_insensitive else rel_posix.lower()
for pat in patterns:
p = pat if not case_insensitive else pat.lower()
if fnmatch.fnmatchcase(s, p):
return True
return False
def iter_src_files(base: Path):
for p in base.rglob("*"):
if p.is_file():
yield p
def to_iso8601_z(ts_ns: int) -> str:
seconds = ts_ns // 1_000_000_000
dt = datetime.fromtimestamp(seconds, tz=timezone.utc).astimezone()
return dt.isoformat(timespec="seconds")
def group_by_mtime(files: List[FileInfo], window_seconds: int) -> List[List[FileInfo]]:
if not files: return []
files.sort(key=lambda f: f.mtime_ns)
groups: List[List[FileInfo]] = []
cur = [files[0]]
anchor = files[0].mtime_ns
window_ns = window_seconds * 1_000_000_000
for f in files[1:]:
if f.mtime_ns - anchor <= window_ns:
cur.append(f)
else:
groups.append(cur); cur = [f]; anchor = f.mtime_ns
groups.append(cur)
return groups
def plan(src_root: Path, repo_root: Path, dest_root_rel: Path,
excludes: List[str], respect_gitignore: bool,
exclude_dirs: List[str], include_only: List[str],
includes: List[str], # force-include overrides exclude
ignore_case: bool, debug_filters: bool) -> List[FileInfo]:
src_root = src_root.resolve()
dest_root_rel = Path(dest_root_rel.as_posix().rstrip("/")) if dest_root_rel else Path(".")
items: List[FileInfo] = []
# Normalize + expand patterns (so '**/*.ext' also matches '*.ext')
ex_files = [nfc(p) for p in expand_globs(excludes)]
ex_dirs = [nfc(p) for p in expand_globs(exclude_dirs)]
inc_only = [nfc(p) for p in expand_globs(include_only)]
inc_override = [nfc(p) for p in expand_globs(includes)]
for src_abs in iter_src_files(src_root):
try:
src_rel = src_abs.relative_to(src_root)
except Exception:
src_rel = Path(os.path.relpath(src_abs, src_root))
rel_posix = nfc(src_rel.as_posix())
key = rel_posix if not ignore_case else rel_posix.lower()
# 1) directory-level exclude (on any parent)
skip_dir = any(
fnmatch.fnmatchcase(
(p.as_posix() if not ignore_case else p.as_posix().lower()), pat
)
for pat in ex_dirs
for p in [src_rel] + list(src_rel.parents)
)
if skip_dir:
if debug_filters: print(f"[skip:dir] {rel_posix}")
continue
# 2) include-only gate (whitelist)
if inc_only and not match_any_glob(key, inc_only, False):
if debug_filters: print(f"[skip:inc] {rel_posix}")
continue
# 3) file-level excludes (blacklist) …
excluded = match_any_glob(key, ex_files, False)
# 4) … with include override to re-include specific matches
if excluded and match_any_glob(key, inc_override, False):
excluded = False
if debug_filters: print(f"[reinc] {rel_posix}")
if excluded:
if debug_filters: print(f"[skip:exc] {rel_posix}")
continue
try:
st = src_abs.stat()
except FileNotFoundError:
continue
dst_rel = (dest_root_rel / src_rel)
items.append(FileInfo(src_rel=src_rel, src_abs=src_abs, mtime_ns=st.st_mtime_ns, dst_rel=dst_rel))
if not items:
return []
if respect_gitignore:
proc = subprocess.Popen(
["git", "check-ignore", "--stdin"],
cwd=repo_root, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL, text=True
)
stdin = "\n".join(i.dst_rel.as_posix() for i in items)
stdout, _ = proc.communicate(stdin)
ignored = set(stdout.splitlines())
kept = []
for i in items:
if i.dst_rel.as_posix() in ignored:
if debug_filters: print(f"[skip:.gi] {i.dst_rel.as_posix()}")
else:
kept.append(i)
items = kept
return items
def has_pending_changes(repo: Path) -> bool:
proc = run(["git", "status", "--porcelain"], capture=True, cwd=repo)
return bool(proc.stdout.strip())
def ensure_parent(dst_abs: Path):
dst_abs.parent.mkdir(parents=True, exist_ok=True)
def copy_preserve_mtime(src: Path, dst: Path):
ensure_parent(dst)
shutil.copy2(src, dst)
def git_check_ignored(repo: Path, rel_paths: List[str]) -> List[str]:
if not rel_paths:
return []
proc = subprocess.Popen(
["git", "check-ignore", "--stdin"],
cwd=repo, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True
)
stdout, _ = proc.communicate("\n".join(rel_paths))
return [line.strip() for line in stdout.splitlines()]
def main() -> int:
ap = argparse.ArgumentParser(description="Create backdated commits by importing a directory tree.")
ap.add_argument("source", type=Path, help="Path to the *source* directory to import (may be outside the repo).")
ap.add_argument("--dest", type=Path, default=None,
help="Destination subdir *inside the repo* (default: repo root '.').")
ap.add_argument("--exclude", action="append", default=[], help="Glob (source-relative) to exclude; repeatable.")
ap.add_argument("--exclude-dir", action="append", default=[], help="Directory globs to exclude whole subtrees.")
ap.add_argument("--include", action="append", default=[],
help="Glob to *force-include* files even if they match an exclude; repeatable.")
ap.add_argument("--include-only", action="append", default=[], help="If set, only files matching any of these globs pass.")
ap.add_argument("--ignore-case", action="store_true", help="Make include/exclude matching case-insensitive.")
ap.add_argument("--debug-filters", action="store_true", help="Print why files are included/excluded.")
ap.add_argument("--group-seconds", type=int, default=0,
help="Group files with mtimes within N seconds into a single commit.")
ap.add_argument("--respect-gitignore", action="store_true",
help="Skip files whose *destination* would be ignored by .gitignore.")
ap.add_argument("--force-add-ignored", action="store_true",
help="Force-add files Git would normally ignore (uses 'git add -f').")
ap.add_argument("--message", default="historical import",
help="Commit message prefix (timestamp appended).")
ap.add_argument("--dry-run", action="store_true", help="Plan only; do not copy or commit.")
ap.add_argument("--verbose", action="store_true", help="Print copy/add/commit operations as they happen.")
args = ap.parse_args()
if not is_git_repo():
print("Error: not inside a git repository.", file=sys.stderr)
return 2
repo = git_root()
os.chdir(repo) # run all git ops at repo root
src_root = args.source.resolve()
if not src_root.exists() or not src_root.is_dir():
print(f"Error: source does not exist or is not a directory: {src_root}", file=sys.stderr)
return 2
# DEFAULT DEST: repo root ('.') unless explicitly provided
dest_rel = args.dest if args.dest is not None else Path(".")
if dest_rel.is_absolute():
print("--dest must be a path inside the repo (relative).", file=sys.stderr)
return 2
items = plan(
src_root, repo,
dest_rel,
args.exclude,
args.respect_gitignore,
args.exclude_dir,
args.include_only,
args.include, # include overrides
args.ignore_case,
args.debug_filters,
)
if not items:
print("Nothing to import (after filtering).")
return 0
groups = group_by_mtime(items, args.group_seconds)
total = sum(len(g) for g in groups)
display_prefix = "" if dest_rel.as_posix() in (".", "") else (dest_rel.as_posix() + "/")
print(f"Planned commits: {len(groups)} (total files: {total})")
for idx, grp in enumerate(groups, 1):
ts_iso = to_iso8601_z(max(f.mtime_ns for f in grp))
print(f"[{idx:04d}] {len(grp):4d} file(s) @ {ts_iso}")
for f in grp[:5]:
print(f" - {display_prefix}{f.src_rel.as_posix()}")
if len(grp) > 5:
print(f" ... +{len(grp)-5} more")
if args.dry_run:
return 0
if has_pending_changes(repo):
print("Error: working tree not clean. Commit or stash your changes first.", file=sys.stderr)
return 2
for i, grp in enumerate(groups, 1):
ts_ns = max(f.mtime_ns for f in grp)
ts_iso = to_iso8601_z(ts_ns)
for f in grp:
dst_abs = (repo / f.dst_rel)
if args.verbose:
print(f"[copy] {f.src_abs} -> {dst_abs}")
copy_preserve_mtime(f.src_abs, dst_abs)
paths = [str(f.dst_rel) for f in grp]
ignored_now = git_check_ignored(repo, paths)
if ignored_now and not args.force_add_ignored and args.verbose:
for p in ignored_now:
print(f"[info] '{p}' is ignored by .gitignore; not forcing add (use --force-add-ignored to include).")
add_cmd = ["git", "add"]
if args.force_add_ignored:
add_cmd.append("-f")
add_cmd += ["--"] + paths
if args.verbose:
print(f"[add ] {add_cmd}")
run(add_cmd, cwd=repo)
proc = run(["git", "diff", "--cached", "--quiet"], check=False, cwd=repo)
if proc.returncode == 0:
if args.verbose:
print(f"[skip] nothing staged changed for group #{i}, unstage {len(paths)} path(s)")
run(["git", "reset", "--"] + paths, check=False, cwd=repo)
continue
env = os.environ.copy()
env["GIT_AUTHOR_DATE"] = ts_iso
env["GIT_COMMITTER_DATE"] = ts_iso
msg = f"{args.message}: {len(grp)} file(s) up to {ts_iso}"
if args.verbose:
print(f"[commit] {msg}")
run(["git", "commit", "-m", msg, "--no-gpg-sign", "--date", ts_iso], env=env, cwd=repo)
print(f"Committed [{i}/{len(groups)}]: {len(grp)} file(s) @ {ts_iso}")
print("Done.")
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment