hrishioa/process_commits.ts

## process_commits.ts
import { GoogleGenAI } from "@google/genai";
import fs from "fs";
import path from "path";

interface CommitData {
  repository: string;
  extracted_at: string;
  total_commits: number;
  commits: Commit[];
}

interface Commit {
  hash: string;
  author: string;
  date: string;
  committer: string;
  commitDate: string;
  subject: string;
  body: string;
  diffStats: string;
  fileChanges: FileChange[];
  githubComments: GitHubComment[];
}

interface FileChange {
  status: string;
  path: string;
  change_type: string;
  patch?: string;
}

interface GitHubComment {
  id: number;
  user: string;
  body: string;
  created_at: string;
  updated_at: string;
  html_url: string;
}

interface CommitAnalysis {
  counter: number;
  hash: string;
  description: string;
  playByPlay: string[];
  broaderContext: string;
  AIInvolvement: number;
  Size: number;
  Importance: number;
  Observations: string;
  Prompt: string;
  title: string;
}

interface DoubleEnrichedCommit extends Commit {
  analysis?: CommitAnalysis;
  analysisXml?: string;
}

const CLOUDFLARE_ACCOUNT_ID = "XXXX";
const CLOUDFLARE_GATEWAY_ID = "XXXX";

const PROJECT_INFO = `OAuth 2.1 Provider Framework for Cloudflare Workers
This is a TypeScript library that implements the provider side of the OAuth 2.1 protocol with PKCE support. The library is intended to be used on Cloudflare Workers.

Beta
As of March, 2025, this library is very new, prerelease software. The API is still subject to change.

Benefits of this library
The library acts as a wrapper around your Worker code, which adds authorization for your API endpoints.
All token management is handled automatically.
Your API handler is written like a regular fetch handler, but receives the already-authenticated user details as a parameter. No need to perform any checks of your own.
The library is agnostic to how you manage and authenticate users.
The library is agnostic to how you build your UI. Your authorization flow can be implemented using whatever UI framework you use for everything else.
The library's storage does not store any secrets, only hashes of them.

Written using Claude
This library (including the schema documentation) was largely written with the help of Claude, the AI model by Anthropic. Claude's output was thoroughly reviewed by Cloudflare engineers with careful attention paid to security and compliance with standards. Many improvements were made on the initial output, mostly again by prompting Claude (and reviewing the results). Check out the commit history to see how Claude was prompted and what code it produced.

"NOOOOOOOO!!!! You can't just use an LLM to write an auth library!"

"haha gpus go brrr"

In all seriousness, two months ago (January 2025), I (@kentonv) would have agreed. I was an AI skeptic. I thought LLMs were glorified Markov chain generators that didn't actually understand code and couldn't produce anything novel. I started this project on a lark, fully expecting the AI to produce terrible code for me to laugh at. And then, uh... the code actually looked pretty good. Not perfect, but I just told the AI to fix things, and it did. I was shocked.

To emphasize, this is not "vibe coded". Every line was thoroughly reviewed and cross-referenced with relevant RFCs, by security experts with previous experience with those RFCs. I was trying to validate my skepticism. I ended up proving myself wrong.

Again, please check out the commit history -- especially early commits -- to understand how this went.`;

function extractXML(response: string): string | null {
  const match = response.match(/<commitAnalysis[^>]*>[\s\S]*?<\/commitAnalysis>/);
  return match ? match[0] : null;
}

function parseXMLToJSON(xml: string): CommitAnalysis | null {
  try {
    // Simple XML parsing - extract values between tags
    const getValue = (tag: string): string => {
      const match = xml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\/${tag}>`));
      return match ? match[1].trim() : "";
    };

    const getNumericValue = (tag: string): number => {
      const value = getValue(tag);
      return value ? parseInt(value, 10) || 0 : 0;
    };

    // Extract play by play points
    const playByPlayMatches = xml.match(/<point>([\s\S]*?)<\/point>/g);
    const playByPlay = playByPlayMatches
      ? playByPlayMatches.map(match => match.replace(/<\/?point>/g, "").trim())
      : [];

    // Extract counter and hash from commitAnalysis tag
    const commitAnalysisMatch = xml.match(/<commitAnalysis[^>]*counter="([^"]*)"[^>]*hash="([^"]*)"[^>]*>/);
    const counter = commitAnalysisMatch ? parseInt(commitAnalysisMatch[1], 10) : 0;
    const hash = commitAnalysisMatch ? commitAnalysisMatch[2] : "";

    return {
      counter,
      hash,
      description: getValue("description"),
      playByPlay,
      broaderContext: getValue("broaderContext"),
      AIInvolvement: getNumericValue("AIInvolvement"),
      Size: getNumericValue("Size"),
      Importance: getNumericValue("Importance"),
      Observations: getValue("Observations"),
      Prompt: getValue("Prompt"),
      title: getValue("title")
    };
  } catch (error) {
    console.error("Failed to parse XML:", error);
    return null;
  }
}

function buildSystemPrompt(processedCommits: DoubleEnrichedCommit[], currentCommitIndex: number, totalCommits: number): string {
  let systemPrompt = `You are being provided with a single commit (${currentCommitIndex}/${totalCommits}) from a project - the full commit info as well as a brief about the project is provided. PAY CLOSE ATTENTION TO THE COMMIT DATA ITSELF AND TREAT EVERYTHING ELSE AS SUPPLEMENTAL.

<commit${currentCommitIndex}>
{COMMIT_DATA_PLACEHOLDER}
</commit${currentCommitIndex}>

<project_info>
${PROJECT_INFO}
</project_info>`;

  if (processedCommits.length > 0) {
    systemPrompt += "\n\n<commitsSoFar>";

    for (const commit of processedCommits) {
      if (commit.analysisXml) {
        systemPrompt += "\n" + commit.analysisXml;
      }
    }

    systemPrompt += "\n</commitsSoFar>";
  }

  return systemPrompt;
}

function buildUserPrompt(commit: Commit, commitIndex: number): string {
  return `Commit to Analyze:
<commit${commitIndex}>
${JSON.stringify(commit, null, 2)}
</commit${commitIndex}>

From the commit, provide the following analysis in XML, in this format.
<commitAnalysis counter="" hash=""> // Re-list the commit hash and the counter for the commit you're analysing.
<description> // medium-length, two paragraph description of what is happening in this commit specifically. What is being done?
</description>
<playByPlay> // Bullet point list all the things that happened in this commit. Could include files changed and why, functionality introduced, frustrations had, AI involvement, interesting points, etc.
<point></point>
<point></point>
</playByPlay>
<broaderContext> // medium-length. How does this commit fit into the broader development work in this repo? Is progress being made and in which direction?
</broaderContext>
<AIInvolvement></AIInvolvement> // numeric, 0-100 score on how much AI was involved in this commit, 100 being 100% AI written and 0% being completely human and manual.
<Size></Size> // numeric, 0-100 score based on your impression on how big the commit is in general size. The full codebase is about 2-3K lines of code.
<Importance></Importance> // numeric, 0-100 score based on your impression of how important - not relative to size - this commit is to the overall development and the repo.
<Observations></Observations> // text, large - can just be observations you have, or the devs have had, or the AIs have had.
<Prompt></Prompt> // can be empty, are there any prompts in this commit? Feel free to multiple and cover all the messages being sent to the AI. Make sure it's verbatim - only include messages to the actual AI, not thoughts.
<title></title> // Give the commit a clean, 20-30 word title covering what actually happened here, so that when the titles are placed together there's a clean idea of what's happening. Be funny or interesting if you can.
</commitAnalysis>`;
}

async function callGemini(systemPrompt: string, userPrompt: string, temperature: number = 0): Promise<string> {
  const ai = new GoogleGenAI({
    apiKey: process.env.GOOGLE_API_KEY!,
  });

  const model = "gemini-2.5-flash-preview-05-20";

  const contents = [
    {
      role: "user",
      parts: [{ text: userPrompt }],
    },
  ];

  const response = await ai.models.generateContent({
    config: {
      httpOptions: {
        baseUrl: `https://gateway.ai.cloudflare.com/v1/${CLOUDFLARE_ACCOUNT_ID}/${CLOUDFLARE_GATEWAY_ID}/google-ai-studio`,
      },
      temperature,
      responseMimeType: "text/plain",
      systemInstruction: [{ text: systemPrompt }],
    },
    contents,
    model,
  });

  return response.candidates?.[0]?.content?.parts?.[0]?.text || "";
}

function updateProgress(current: number, total: number, commit: Commit): void {
  const percentage = Math.round((current / total) * 100);
  const progressBar = "█".repeat(Math.floor(percentage / 2)) + "░".repeat(50 - Math.floor(percentage / 2));

  process.stdout.write(`\r[${progressBar}] ${percentage}% (${current}/${total}) - ${commit.hash.substring(0, 8)}: ${commit.subject.substring(0, 50)}...`);
}

async function main() {
  console.log("Starting double enrichment process...");

  // Check for required environment variable
  if (!process.env.GOOGLE_API_KEY) {
    console.error("Error: GOOGLE_API_KEY environment variable is required");
    process.exit(1);
  }

  // Read enriched commits data
  const enrichedDataPath = path.join(process.cwd(), "data", "commits-data-enriched.json");
  const doubleEnrichedDataPath = path.join(process.cwd(), "data", "commits-data-double-enriched.json");

  console.log(`Reading enriched commits from: ${enrichedDataPath}`);

  let commitData: CommitData;
  try {
    const enrichedData = fs.readFileSync(enrichedDataPath, "utf-8");
    commitData = JSON.parse(enrichedData);
  } catch (error) {
    console.error(`Error reading enriched commits data: ${error}`);
    process.exit(1);
  }

  console.log(`Found ${commitData.commits.length} commits to process`);

  // Check if double enriched file exists and load existing progress
  let doubleEnrichedCommits: DoubleEnrichedCommit[] = [...commitData.commits];
  let startIndex = 0;

  if (fs.existsSync(doubleEnrichedDataPath)) {
    console.log("Found existing double enriched file, loading progress...");
    try {
      const existingData = fs.readFileSync(doubleEnrichedDataPath, "utf-8");
      const existingCommitData: CommitData & { commits: DoubleEnrichedCommit[] } = JSON.parse(existingData);
      doubleEnrichedCommits = existingCommitData.commits;

      // Find where to resume
      startIndex = doubleEnrichedCommits.findIndex(commit => !commit.analysis);
      if (startIndex === -1) {
        console.log("All commits already processed!");
        return;
      }
      console.log(`Resuming from commit ${startIndex + 1}/${commitData.commits.length}`);
    } catch (error) {
      console.error(`Error reading existing double enriched file: ${error}`);
      console.log("Starting fresh...");
    }
  }

  // Process each commit starting from the resume point
  for (let i = startIndex; i < commitData.commits.length; i++) {
    const commit = commitData.commits[i];
    updateProgress(i + 1, commitData.commits.length, commit);

    // Get processed commits so far for context
    const processedCommits = doubleEnrichedCommits.slice(0, i).filter(c => c.analysis);

    // Build prompts
    const systemPrompt = buildSystemPrompt(processedCommits, i, commitData.commits.length)
      .replace("{COMMIT_DATA_PLACEHOLDER}", JSON.stringify(commit, null, 2));
    const userPrompt = buildUserPrompt(commit, i);

    let success = false;
    let temperature = 0;

    // Try with temperature 0 first, then 0.5 if it fails
    for (let attempt = 0; attempt < 2; attempt++) {
      try {
        const response = await callGemini(systemPrompt, userPrompt, temperature);

        // Extract XML
        const xmlResponse = extractXML(response);
        if (!xmlResponse) {
          throw new Error("No XML found in response");
        }

        // Parse XML to JSON
        const analysis = parseXMLToJSON(xmlResponse);
        if (!analysis) {
          throw new Error("Failed to parse XML to JSON");
        }

        // Save to double enriched commit
        doubleEnrichedCommits[i] = {
          ...commit,
          analysis,
          analysisXml: xmlResponse
        };

        success = true;
        break;
      } catch (error) {
        console.error(`\nAttempt ${attempt + 1} failed for commit ${i}: ${error}`);
        if (attempt === 0) {
          console.log("Retrying with temperature 0.5...");
          temperature = 0.5;
        }
      }
    }

    if (!success) {
      console.error(`\nFailed to process commit ${i} after 2 attempts. Exiting.`);

      // Try to get the raw response for debugging
      try {
        const response = await callGemini(systemPrompt, userPrompt, 0.5);
        console.log("\nRaw response for debugging:");
        console.log(response);
      } catch (error) {
        console.error("Could not get raw response for debugging:", error);
      }

      process.exit(1);
    }

    // Save progress incrementally
    const outputData = {
      ...commitData,
      commits: doubleEnrichedCommits
    };

    try {
      fs.writeFileSync(doubleEnrichedDataPath, JSON.stringify(outputData, null, 2));
    } catch (error) {
      console.error(`\nError saving progress: ${error}`);
      process.exit(1);
    }

    // Small delay to be nice to the API
    await new Promise(resolve => setTimeout(resolve, 1000));
  }

  console.log("\n\nDouble enrichment completed successfully!");
  console.log(`Results saved to: ${doubleEnrichedDataPath}`);
}

// Handle graceful shutdown
process.on('SIGINT', () => {
  console.log("\n\nReceived SIGINT, saving progress and exiting...");
  process.exit(0);
});

process.on('SIGTERM', () => {
  console.log("\n\nReceived SIGTERM, saving progress and exiting...");
  process.exit(0);
});

main().catch(error => {
  console.error("Fatal error:", error);
  process.exit(1);
});
	import { GoogleGenAI } from "@google/genai";
	import fs from "fs";
	import path from "path";

	interface CommitData {
	repository: string;
	extracted_at: string;
	total_commits: number;
	commits: Commit[];
	}

	interface Commit {
	hash: string;
	author: string;
	date: string;
	committer: string;
	commitDate: string;
	subject: string;
	body: string;
	diffStats: string;
	fileChanges: FileChange[];
	githubComments: GitHubComment[];
	}

	interface FileChange {
	status: string;
	path: string;
	change_type: string;
	patch?: string;
	}

	interface GitHubComment {
	id: number;
	user: string;
	body: string;
	created_at: string;
	updated_at: string;
	html_url: string;
	}

	interface CommitAnalysis {
	counter: number;
	hash: string;
	description: string;
	playByPlay: string[];
	broaderContext: string;
	AIInvolvement: number;
	Size: number;
	Importance: number;
	Observations: string;
	Prompt: string;
	title: string;
	}

	interface DoubleEnrichedCommit extends Commit {
	analysis?: CommitAnalysis;
	analysisXml?: string;
	}

	const CLOUDFLARE_ACCOUNT_ID = "XXXX";
	const CLOUDFLARE_GATEWAY_ID = "XXXX";

	const PROJECT_INFO = `OAuth 2.1 Provider Framework for Cloudflare Workers
	This is a TypeScript library that implements the provider side of the OAuth 2.1 protocol with PKCE support. The library is intended to be used on Cloudflare Workers.

	Beta
	As of March, 2025, this library is very new, prerelease software. The API is still subject to change.

	Benefits of this library
	The library acts as a wrapper around your Worker code, which adds authorization for your API endpoints.
	All token management is handled automatically.
	Your API handler is written like a regular fetch handler, but receives the already-authenticated user details as a parameter. No need to perform any checks of your own.
	The library is agnostic to how you manage and authenticate users.
	The library is agnostic to how you build your UI. Your authorization flow can be implemented using whatever UI framework you use for everything else.
	The library's storage does not store any secrets, only hashes of them.

	Written using Claude
	This library (including the schema documentation) was largely written with the help of Claude, the AI model by Anthropic. Claude's output was thoroughly reviewed by Cloudflare engineers with careful attention paid to security and compliance with standards. Many improvements were made on the initial output, mostly again by prompting Claude (and reviewing the results). Check out the commit history to see how Claude was prompted and what code it produced.

	"NOOOOOOOO!!!! You can't just use an LLM to write an auth library!"

	"haha gpus go brrr"

	In all seriousness, two months ago (January 2025), I (@kentonv) would have agreed. I was an AI skeptic. I thought LLMs were glorified Markov chain generators that didn't actually understand code and couldn't produce anything novel. I started this project on a lark, fully expecting the AI to produce terrible code for me to laugh at. And then, uh... the code actually looked pretty good. Not perfect, but I just told the AI to fix things, and it did. I was shocked.

	To emphasize, this is not "vibe coded". Every line was thoroughly reviewed and cross-referenced with relevant RFCs, by security experts with previous experience with those RFCs. I was trying to validate my skepticism. I ended up proving myself wrong.

	Again, please check out the commit history -- especially early commits -- to understand how this went.`;

	function extractXML(response: string): string \| null {
	const match = response.match(/<commitAnalysis[^>]>[\s\S]?<\/commitAnalysis>/);
	return match ? match[0] : null;
	}

	function parseXMLToJSON(xml: string): CommitAnalysis \| null {
	try {
	// Simple XML parsing - extract values between tags
	const getValue = (tag: string): string => {
	const match = xml.match(new RegExp(`<${tag}[^>]>([\\s\\S]?)<\/${tag}>`));
	return match ? match[1].trim() : "";
	};

	const getNumericValue = (tag: string): number => {
	const value = getValue(tag);
	return value ? parseInt(value, 10) \|\| 0 : 0;
	};

	// Extract play by play points
	const playByPlayMatches = xml.match(/<point>([\s\S]*?)<\/point>/g);
	const playByPlay = playByPlayMatches
	? playByPlayMatches.map(match => match.replace(/<\/?point>/g, "").trim())
	: [];

	// Extract counter and hash from commitAnalysis tag
	const commitAnalysisMatch = xml.match(/<commitAnalysis[^>]counter="([^"])"[^>]hash="([^"])"[^>]*>/);
	const counter = commitAnalysisMatch ? parseInt(commitAnalysisMatch[1], 10) : 0;
	const hash = commitAnalysisMatch ? commitAnalysisMatch[2] : "";

	return {
	counter,
	hash,
	description: getValue("description"),
	playByPlay,
	broaderContext: getValue("broaderContext"),
	AIInvolvement: getNumericValue("AIInvolvement"),
	Size: getNumericValue("Size"),
	Importance: getNumericValue("Importance"),
	Observations: getValue("Observations"),
	Prompt: getValue("Prompt"),
	title: getValue("title")
	};
	} catch (error) {
	console.error("Failed to parse XML:", error);
	return null;
	}
	}

	function buildSystemPrompt(processedCommits: DoubleEnrichedCommit[], currentCommitIndex: number, totalCommits: number): string {
	let systemPrompt = `You are being provided with a single commit (${currentCommitIndex}/${totalCommits}) from a project - the full commit info as well as a brief about the project is provided. PAY CLOSE ATTENTION TO THE COMMIT DATA ITSELF AND TREAT EVERYTHING ELSE AS SUPPLEMENTAL.

	<commit${currentCommitIndex}>
	{COMMIT_DATA_PLACEHOLDER}
	</commit${currentCommitIndex}>

	<project_info>
	${PROJECT_INFO}
	</project_info>`;

	if (processedCommits.length > 0) {
	systemPrompt += "\n\n<commitsSoFar>";

	for (const commit of processedCommits) {
	if (commit.analysisXml) {
	systemPrompt += "\n" + commit.analysisXml;
	}
	}

	systemPrompt += "\n</commitsSoFar>";
	}

	return systemPrompt;
	}

	function buildUserPrompt(commit: Commit, commitIndex: number): string {
	return `Commit to Analyze:
	<commit${commitIndex}>
	${JSON.stringify(commit, null, 2)}
	</commit${commitIndex}>

	From the commit, provide the following analysis in XML, in this format.
	<commitAnalysis counter="" hash=""> // Re-list the commit hash and the counter for the commit you're analysing.
	<description> // medium-length, two paragraph description of what is happening in this commit specifically. What is being done?
	</description>
	<playByPlay> // Bullet point list all the things that happened in this commit. Could include files changed and why, functionality introduced, frustrations had, AI involvement, interesting points, etc.
	<point></point>
	<point></point>
	</playByPlay>
	<broaderContext> // medium-length. How does this commit fit into the broader development work in this repo? Is progress being made and in which direction?
	</broaderContext>
	<AIInvolvement></AIInvolvement> // numeric, 0-100 score on how much AI was involved in this commit, 100 being 100% AI written and 0% being completely human and manual.
	<Size></Size> // numeric, 0-100 score based on your impression on how big the commit is in general size. The full codebase is about 2-3K lines of code.
	<Importance></Importance> // numeric, 0-100 score based on your impression of how important - not relative to size - this commit is to the overall development and the repo.
	<Observations></Observations> // text, large - can just be observations you have, or the devs have had, or the AIs have had.
	<Prompt></Prompt> // can be empty, are there any prompts in this commit? Feel free to multiple and cover all the messages being sent to the AI. Make sure it's verbatim - only include messages to the actual AI, not thoughts.
	<title></title> // Give the commit a clean, 20-30 word title covering what actually happened here, so that when the titles are placed together there's a clean idea of what's happening. Be funny or interesting if you can.
	</commitAnalysis>`;
	}

	async function callGemini(systemPrompt: string, userPrompt: string, temperature: number = 0): Promise<string> {
	const ai = new GoogleGenAI({
	apiKey: process.env.GOOGLE_API_KEY!,
	});

	const model = "gemini-2.5-flash-preview-05-20";

	const contents = [
	{
	role: "user",
	parts: [{ text: userPrompt }],
	},
	];

	const response = await ai.models.generateContent({
	config: {
	httpOptions: {
	baseUrl: `https://gateway.ai.cloudflare.com/v1/${CLOUDFLARE_ACCOUNT_ID}/${CLOUDFLARE_GATEWAY_ID}/google-ai-studio`,
	},
	temperature,
	responseMimeType: "text/plain",
	systemInstruction: [{ text: systemPrompt }],
	},
	contents,
	model,
	});

	return response.candidates?.[0]?.content?.parts?.[0]?.text \|\| "";
	}

	function updateProgress(current: number, total: number, commit: Commit): void {
	const percentage = Math.round((current / total) * 100);
	const progressBar = "█".repeat(Math.floor(percentage / 2)) + "░".repeat(50 - Math.floor(percentage / 2));

	process.stdout.write(`\r[${progressBar}] ${percentage}% (${current}/${total}) - ${commit.hash.substring(0, 8)}: ${commit.subject.substring(0, 50)}...`);
	}

	async function main() {
	console.log("Starting double enrichment process...");

	// Check for required environment variable
	if (!process.env.GOOGLE_API_KEY) {
	console.error("Error: GOOGLE_API_KEY environment variable is required");
	process.exit(1);
	}

	// Read enriched commits data
	const enrichedDataPath = path.join(process.cwd(), "data", "commits-data-enriched.json");
	const doubleEnrichedDataPath = path.join(process.cwd(), "data", "commits-data-double-enriched.json");

	console.log(`Reading enriched commits from: ${enrichedDataPath}`);

	let commitData: CommitData;
	try {
	const enrichedData = fs.readFileSync(enrichedDataPath, "utf-8");
	commitData = JSON.parse(enrichedData);
	} catch (error) {
	console.error(`Error reading enriched commits data: ${error}`);
	process.exit(1);
	}

	console.log(`Found ${commitData.commits.length} commits to process`);

	// Check if double enriched file exists and load existing progress
	let doubleEnrichedCommits: DoubleEnrichedCommit[] = [...commitData.commits];
	let startIndex = 0;

	if (fs.existsSync(doubleEnrichedDataPath)) {
	console.log("Found existing double enriched file, loading progress...");
	try {
	const existingData = fs.readFileSync(doubleEnrichedDataPath, "utf-8");
	const existingCommitData: CommitData & { commits: DoubleEnrichedCommit[] } = JSON.parse(existingData);
	doubleEnrichedCommits = existingCommitData.commits;

	// Find where to resume
	startIndex = doubleEnrichedCommits.findIndex(commit => !commit.analysis);
	if (startIndex === -1) {
	console.log("All commits already processed!");
	return;
	}
	console.log(`Resuming from commit ${startIndex + 1}/${commitData.commits.length}`);
	} catch (error) {
	console.error(`Error reading existing double enriched file: ${error}`);
	console.log("Starting fresh...");
	}
	}

	// Process each commit starting from the resume point
	for (let i = startIndex; i < commitData.commits.length; i++) {
	const commit = commitData.commits[i];
	updateProgress(i + 1, commitData.commits.length, commit);

	// Get processed commits so far for context
	const processedCommits = doubleEnrichedCommits.slice(0, i).filter(c => c.analysis);

	// Build prompts
	const systemPrompt = buildSystemPrompt(processedCommits, i, commitData.commits.length)
	.replace("{COMMIT_DATA_PLACEHOLDER}", JSON.stringify(commit, null, 2));
	const userPrompt = buildUserPrompt(commit, i);

	let success = false;
	let temperature = 0;

	// Try with temperature 0 first, then 0.5 if it fails
	for (let attempt = 0; attempt < 2; attempt++) {
	try {
	const response = await callGemini(systemPrompt, userPrompt, temperature);

	// Extract XML
	const xmlResponse = extractXML(response);
	if (!xmlResponse) {
	throw new Error("No XML found in response");
	}

	// Parse XML to JSON
	const analysis = parseXMLToJSON(xmlResponse);
	if (!analysis) {
	throw new Error("Failed to parse XML to JSON");
	}

	// Save to double enriched commit
	doubleEnrichedCommits[i] = {
	...commit,
	analysis,
	analysisXml: xmlResponse
	};

	success = true;
	break;
	} catch (error) {
	console.error(`\nAttempt ${attempt + 1} failed for commit ${i}: ${error}`);
	if (attempt === 0) {
	console.log("Retrying with temperature 0.5...");
	temperature = 0.5;
	}
	}
	}

	if (!success) {
	console.error(`\nFailed to process commit ${i} after 2 attempts. Exiting.`);

	// Try to get the raw response for debugging
	try {
	const response = await callGemini(systemPrompt, userPrompt, 0.5);
	console.log("\nRaw response for debugging:");
	console.log(response);
	} catch (error) {
	console.error("Could not get raw response for debugging:", error);
	}

	process.exit(1);
	}

	// Save progress incrementally
	const outputData = {
	...commitData,
	commits: doubleEnrichedCommits
	};

	try {
	fs.writeFileSync(doubleEnrichedDataPath, JSON.stringify(outputData, null, 2));
	} catch (error) {
	console.error(`\nError saving progress: ${error}`);
	process.exit(1);
	}

	// Small delay to be nice to the API
	await new Promise(resolve => setTimeout(resolve, 1000));
	}

	console.log("\n\nDouble enrichment completed successfully!");
	console.log(`Results saved to: ${doubleEnrichedDataPath}`);
	}

	// Handle graceful shutdown
	process.on('SIGINT', () => {
	console.log("\n\nReceived SIGINT, saving progress and exiting...");
	process.exit(0);
	});

	process.on('SIGTERM', () => {
	console.log("\n\nReceived SIGTERM, saving progress and exiting...");
	process.exit(0);
	});

	main().catch(error => {
	console.error("Fatal error:", error);
	process.exit(1);
	});
No results found