Skip to content

Instantly share code, notes, and snippets.

@acunniffe
Created December 1, 2025 01:49
Show Gist options
  • Select an option

  • Save acunniffe/8dbc2804abea056eb8044ea9f2da952c to your computer and use it in GitHub Desktop.

Select an option

Save acunniffe/8dbc2804abea056eb8044ea9f2da952c to your computer and use it in GitHub Desktop.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::fs::File;
use std::io::{Read, Write};
use std::path::PathBuf;
use crate::authorship::post_commit::filter_untracked_files;
use crate::authorship::stats::{
CommitStats, get_git_diff_stats, stats_for_commit_stats, stats_from_authorship_log,
};
use crate::authorship::virtual_attribution::VirtualAttributions;
use crate::error::GitAiError;
use crate::git::refs::get_authorship;
use crate::git::repository::{Repository, exec_git};
pub fn handle_stats_delta(repository_option: &Option<Repository>, _args: &[String]) {
if repository_option.is_none() {
eprintln!("No repository found from current directory");
std::process::exit(1);
}
let repository = repository_option.as_ref().unwrap();
let stats_delta_log_path = repository
.storage
.repo_path
.join("ai")
.join("stats_delta.log");
let mut stats_delta_log = StatsDeltaLog::new(stats_delta_log_path).unwrap();
// Collect new landed entries to print at the end
let mut new_landed_entries: Vec<StatsDeltaLogEntry> = Vec::new();
// Step 1: Get HEAD commit SHA
let head_commit = match repository.head().and_then(|h| h.peel_to_commit()) {
Ok(commit) => commit,
Err(e) => {
eprintln!("Failed to get HEAD commit: {}", e);
std::process::exit(1);
}
};
let head_sha = head_commit.id();
// Step 2: Determine commit range to process
let range_spec = if let Some(last_indexed) = stats_delta_log.last_indexed_commit() {
// Try to verify the last indexed commit still exists
match repository.find_commit(last_indexed.to_string()) {
Ok(_) => format!("{}..{}", last_indexed, head_sha),
Err(_) => {
// Commit doesn't exist anymore (squashed/rebased), fall back to HEAD~1..HEAD
format!("{}~1..{}", head_sha, head_sha)
}
}
} else {
// Initial run, use HEAD~1..HEAD
format!("{}~1..{}", head_sha, head_sha)
};
// Get commits using git rev-list
let mut args_list = repository.global_args_for_exec();
args_list.push("rev-list".to_string());
args_list.push("--max-count=50".to_string());
args_list.push(range_spec.clone());
let commit_shas = match exec_git(&args_list) {
Ok(output) => {
let stdout = String::from_utf8(output.stdout).unwrap_or_default();
stdout
.lines()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect::<Vec<String>>()
}
// only failure mode is <2 commits in the range. IN which case stats will appear one commit later
Err(e) => Vec::new(),
};
// Step 3: Check for active editing at HEAD
let head_working_log_dir = repository.storage.working_logs.join(&head_sha);
if head_working_log_dir.exists() {
// Check if we already have an Editing entry for this SHA
if stats_delta_log.find_by_sha(&head_sha).is_some() {
// Touch to update last_seen
stats_delta_log.touch(&head_sha);
} else {
// Create new Editing entry
let now = Utc::now();
stats_delta_log.add(StatsDeltaLogEntry::Editing {
working_log_base_sha: head_sha.clone(),
first_seen: now,
last_seen: now,
});
}
}
// Step 4: Process each commit from rev-list
for commit_sha in commit_shas {
let commit = match repository.find_commit(commit_sha.clone()) {
Ok(c) => c,
Err(_) => continue,
};
// Check if we already have a Landed* entry for this commit (skip Editing entries)
if let Some(entry) = stats_delta_log.find_by_sha(&commit_sha) {
match entry {
StatsDeltaLogEntry::Editing { .. } => {
// Editing entries can be upgraded to Landed* - continue processing
}
_ => {
// Already processed as Landed* - skip
continue;
}
}
}
// a) First, check for LandedGitAIPostCommit (priority)
if let Some(_authorship_log) = get_authorship(repository, &commit_sha) {
// Has authorship notes - this is a git-ai post-commit
if let Ok(parent) = commit.parent(0) {
let parent_sha = parent.id();
// Remove any Editing entry for this parent SHA (consolidation)
stats_delta_log.delete(&parent_sha);
let stats = stats_for_commit_stats(repository, &commit_sha).unwrap_or_else(|e| {
eprintln!("Failed to compute stats for commit {}: {}", commit_sha, e);
CommitStats::default()
});
let entry = StatsDeltaLogEntry::LandedGitAIPostCommit {
working_log_base_sha: parent_sha,
commit_sha: commit_sha.clone(),
stats,
processed_at: Utc::now(),
};
new_landed_entries.push(entry.clone());
stats_delta_log.add(entry);
}
continue; // Skip heuristic check
}
// b) Otherwise, check for LandedCommitHueristic (fallback)
if let Ok(parent_count) = commit.parent_count() {
if parent_count == 1 {
if let Ok(parent) = commit.parent(0) {
let parent_sha = parent.id();
let parent_working_log_dir = repository.storage.working_logs.join(&parent_sha);
if parent_working_log_dir.exists() {
// Remove any Editing entry for this parent SHA (consolidation)
stats_delta_log.delete(&parent_sha);
let stats = simulate_post_commit(repository, &parent_sha, &commit_sha)
.unwrap_or_else(|e| {
eprintln!(
"Failed to simulate stats for commit {}: {}",
commit_sha, e
);
CommitStats::default()
});
// This commit has a working log parent - mark as heuristic
let entry = StatsDeltaLogEntry::LandedCommitHueristic {
working_log_base_sha: parent_sha,
stats,
processed_at: Utc::now(),
};
new_landed_entries.push(entry.clone());
stats_delta_log.add(entry);
}
}
}
}
}
// Step 5: Update tracking metadata
stats_delta_log.set_last_indexed_commit_at(head_sha.to_string(), Utc::now());
// Step 6: Save and clean the log
if let Err(e) = stats_delta_log.save_and_clean(repository) {
eprintln!("Failed to save stats_delta log: {}", e);
std::process::exit(1);
}
// Print new landed entries as JSON
match serde_json::to_string_pretty(&new_landed_entries) {
Ok(json) => println!("{}", json),
Err(e) => eprintln!("Failed to serialize new landed entries: {}", e),
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum StatsDeltaLogEntry {
Editing {
working_log_base_sha: String,
first_seen: DateTime<Utc>,
last_seen: DateTime<Utc>,
},
LandedCommitHueristic {
working_log_base_sha: String,
stats: CommitStats,
processed_at: DateTime<Utc>,
},
LandedGitAIPostCommit {
working_log_base_sha: String, // parent sha
commit_sha: String, // commit sha
stats: CommitStats,
processed_at: DateTime<Utc>,
},
}
impl StatsDeltaLogEntry {
/// Get the working_log_base_sha for this entry (common across all variants)
pub fn working_log_base_sha(&self) -> &str {
match self {
StatsDeltaLogEntry::Editing {
working_log_base_sha,
..
} => working_log_base_sha,
StatsDeltaLogEntry::LandedCommitHueristic {
working_log_base_sha,
..
} => working_log_base_sha,
StatsDeltaLogEntry::LandedGitAIPostCommit {
working_log_base_sha,
..
} => working_log_base_sha,
}
}
/// Touch the last_seen time for Editing entries (returns new entry)
pub fn touch(&self) -> Self {
match self {
StatsDeltaLogEntry::Editing {
working_log_base_sha,
first_seen,
..
} => StatsDeltaLogEntry::Editing {
working_log_base_sha: working_log_base_sha.clone(),
first_seen: *first_seen,
last_seen: Utc::now(),
},
_ => self.clone(),
}
}
}
/// The on-disk format for the stats delta log
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StatsDeltaLogData {
pub last_indexed_commit: Option<String>,
pub last_indexed_timestamp: Option<DateTime<Utc>>,
pub log: Vec<StatsDeltaLogEntry>,
}
pub struct StatsDeltaLog {
path: PathBuf,
last_indexed_commit: Option<String>,
last_indexed_timestamp: Option<DateTime<Utc>>,
entries: Vec<StatsDeltaLogEntry>,
}
impl StatsDeltaLog {
/// Create a new StatsDeltaLog with the given path
///
/// If the file exists on disk, it will be automatically loaded.
/// Otherwise, a fresh empty log is created.
pub fn new(path: PathBuf) -> Result<Self, std::io::Error> {
if path.exists() {
Self::load(path)
} else {
Ok(Self {
path,
last_indexed_commit: None,
last_indexed_timestamp: None,
entries: Vec::new(),
})
}
}
/// Load the log from disk (JSON format)
fn load(path: PathBuf) -> Result<Self, std::io::Error> {
let mut file = File::open(&path)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
let data: StatsDeltaLogData = serde_json::from_str(&contents)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
Ok(Self {
path,
last_indexed_commit: data.last_indexed_commit,
last_indexed_timestamp: data.last_indexed_timestamp,
entries: data.log,
})
}
/// Save the log to disk (JSON format)
pub fn save(&self) -> Result<(), std::io::Error> {
// Ensure parent directory exists
if let Some(parent) = self.path.parent() {
std::fs::create_dir_all(parent)?;
}
let data = StatsDeltaLogData {
last_indexed_commit: self.last_indexed_commit.clone(),
last_indexed_timestamp: self.last_indexed_timestamp,
log: self.entries.clone(),
};
let json = serde_json::to_string_pretty(&data)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
let mut file = File::create(&self.path)?;
file.write_all(json.as_bytes())?;
Ok(())
}
/// Save the log to disk and clean up old entries (keeps max 200 items, FIFO)
///
/// For each deleted item:
/// - LandedCommitHueristic: delete the working log
/// - LandedGitAIPostCommit: no side effects
/// - Editing: if last_seen > 5 days ago, delete the working log
pub fn save_and_clean(&mut self, repository: &Repository) -> Result<(), std::io::Error> {
const MAX_ENTRIES: usize = 200;
const EDITING_STALE_DAYS: i64 = 5;
if self.entries.len() > MAX_ENTRIES {
// FIFO: oldest entries are at the beginning (lowest indices)
let num_to_delete = self.entries.len() - MAX_ENTRIES;
// Collect the entries we're about to delete for cleanup
let deleted_entries: Vec<StatsDeltaLogEntry> =
self.entries.drain(0..num_to_delete).collect();
// Perform cleanup operations on deleted entries
let now = Utc::now();
for entry in deleted_entries {
match entry {
StatsDeltaLogEntry::LandedCommitHueristic {
working_log_base_sha,
..
} => {
// Delete working log for heuristic entries
let _ = repository
.storage
.delete_working_log_for_base_commit(&working_log_base_sha);
}
StatsDeltaLogEntry::LandedGitAIPostCommit { .. } => {
// No side effects for GitAI post-commit entries
}
StatsDeltaLogEntry::Editing {
working_log_base_sha,
last_seen,
..
} => {
// Delete working log if last_seen > 5 days ago
let age = now.signed_duration_since(last_seen);
if age.num_days() > EDITING_STALE_DAYS {
let _ = repository
.storage
.delete_working_log_for_base_commit(&working_log_base_sha);
}
}
}
}
}
// Save the log
self.save()
}
/// Get the last indexed commit SHA
pub fn last_indexed_commit(&self) -> Option<&str> {
self.last_indexed_commit.as_deref()
}
/// Get the last indexed timestamp
pub fn last_indexed_timestamp(&self) -> Option<DateTime<Utc>> {
self.last_indexed_timestamp
}
/// Set the last indexed commit SHA with a specific timestamp
pub fn set_last_indexed_commit_at(&mut self, commit_sha: String, timestamp: DateTime<Utc>) {
self.last_indexed_commit = Some(commit_sha);
self.last_indexed_timestamp = Some(timestamp);
}
/// Get all entries
pub fn entries(&self) -> &[StatsDeltaLogEntry] {
&self.entries
}
/// Get mutable access to all entries
pub fn entries_mut(&mut self) -> &mut Vec<StatsDeltaLogEntry> {
&mut self.entries
}
/// Find an entry by working_log_base_sha
pub fn find_by_sha(&self, working_log_base_sha: &str) -> Option<&StatsDeltaLogEntry> {
self.entries
.iter()
.find(|e| e.working_log_base_sha() == working_log_base_sha)
}
/// Find a mutable entry by working_log_base_sha
pub fn find_by_sha_mut(
&mut self,
working_log_base_sha: &str,
) -> Option<&mut StatsDeltaLogEntry> {
self.entries
.iter_mut()
.find(|e| e.working_log_base_sha() == working_log_base_sha)
}
/// Add a new entry to the log
pub fn add(&mut self, entry: StatsDeltaLogEntry) {
self.entries.push(entry);
}
/// Delete an entry by working_log_base_sha
pub fn delete(&mut self, working_log_base_sha: &str) -> bool {
if let Some(pos) = self
.entries
.iter()
.position(|e| e.working_log_base_sha() == working_log_base_sha)
{
self.entries.remove(pos);
true
} else {
false
}
}
/// Touch the last_seen time for an Editing entry by working_log_base_sha
pub fn touch(&mut self, working_log_base_sha: &str) -> bool {
if let Some(entry) = self.find_by_sha_mut(working_log_base_sha) {
*entry = entry.touch();
true
} else {
false
}
}
}
/// Simulate post_commit stats generation from a working log (read-only)
///
/// This is used for the LandedCommitHueristic case where we have a working log
/// but the commit was made without git-ai post-commit. We simulate what the stats
/// would have been if post_commit had run.
fn simulate_post_commit(
repository: &Repository,
working_log_base_sha: &str,
commit_sha: &str,
) -> Result<CommitStats, GitAiError> {
// Check if working log directory exists
let working_log_dir = repository.storage.working_logs.join(working_log_base_sha);
if !working_log_dir.exists() {
return Err(GitAiError::Generic(format!(
"Working log directory does not exist for base SHA: {}",
working_log_base_sha
)));
}
// Read working log checkpoints for the base SHA
let working_log = repository
.storage
.working_log_for_base_commit(working_log_base_sha);
let parent_working_log = working_log.read_all_checkpoints()?;
// Filter out untracked files from the working log
let filtered_working_log =
filter_untracked_files(repository, &parent_working_log, commit_sha, None)?;
// Create VirtualAttributions from working log with stubbed human author
let working_va = VirtualAttributions::from_just_working_log(
repository.clone(),
working_log_base_sha.to_string(),
Some("example@usegitai.com".to_string()),
)?;
// Get pathspecs for files in the working log
let pathspecs: HashSet<String> = filtered_working_log
.iter()
.flat_map(|cp| cp.entries.iter().map(|e| e.file.clone()))
.collect();
// Convert VirtualAttributions to authorship log (index-only mode)
// This only looks at committed hunks, not the working copy, since the commit has already landed
let authorship_log = working_va.to_authorship_log_index_only(
repository,
working_log_base_sha,
commit_sha,
Some(&pathspecs),
)?;
// Get git diff stats for the commit
let (git_diff_added_lines, git_diff_deleted_lines) =
get_git_diff_stats(repository, commit_sha)?;
// Generate CommitStats from the authorship log
Ok(stats_from_authorship_log(
Some(&authorship_log),
git_diff_added_lines,
git_diff_deleted_lines,
))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment