Skip to content

Instantly share code, notes, and snippets.

@usernaamee
Created May 3, 2025 18:31
Show Gist options
  • Select an option

  • Save usernaamee/775630a9ecb7260ca4bd6c92a0648750 to your computer and use it in GitHub Desktop.

Select an option

Save usernaamee/775630a9ecb7260ca4bd6c92a0648750 to your computer and use it in GitHub Desktop.
A better contextual search on readthedocs rst/txt file folder
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>RTD Search Agent (Standalone)</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
<style>
/* Apply base font */
body {
font-family: 'Inter', sans-serif;
}
/* Style for file input button (using Tailwind @apply) */
input[type="file"]::file-selector-button {
@apply mr-4 py-2 px-4 rounded-md border border-gray-300 text-sm font-semibold bg-white text-gray-700 hover:bg-gray-50 cursor-pointer transition-colors duration-150;
}
/* Ensure text areas wrap text */
textarea {
white-space: pre-wrap;
word-wrap: break-word;
}
/* Style for LLM answer area */
#llmAnswer {
@apply bg-gray-50 p-4 rounded-md border border-gray-200 whitespace-pre-wrap break-words min-h-[100px]; /* Added min-height */
}
/* Scrollable log area */
#statusLog {
max-height: 250px; /* Control height */
overflow-y: auto; /* Enable vertical scroll */
@apply bg-gray-100 p-2 rounded border border-gray-300 text-xs text-gray-700 mb-4 font-mono; /* Use mono font for logs */
}
/* Progress bar styling */
#searchProgressContainer {
@apply w-full bg-gray-200 rounded-full h-2.5 dark:bg-gray-700 mb-2 mt-1 hidden; /* Hidden initially */
}
#searchProgressBar {
@apply bg-blue-600 h-2.5 rounded-full transition-all duration-300 ease-out;
width: 0%; /* Start at 0% */
}
/* Style for log entries */
.log-entry {
@apply text-gray-700;
}
.log-entry.error {
@apply text-red-600 font-semibold;
}
</style>
</head>
<body class="bg-gray-100 p-4 md:p-8">
<div class="max-w-4xl mx-auto bg-white p-6 rounded-lg shadow-md">
<h1 class="text-2xl font-bold mb-6 text-center text-gray-800">Documentation Query Agent</h1>
<div class="mb-6 p-4 border border-gray-200 rounded-md bg-gray-50">
<h2 class="text-lg font-semibold mb-3 text-gray-700">LLM Configuration</h2>
<div class="grid grid-cols-1 md:grid-cols-3 gap-4">
<div>
<label for="llmUrl" class="block text-sm font-medium text-gray-700 mb-1">Base URL</label>
<input type="url" id="llmUrl" value="http://localhost:8080/v1/chat/completions" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" placeholder="http://localhost:8080/v1">
</div>
<div>
<label for="llmApiKey" class="block text-sm font-medium text-gray-700 mb-1">API Key</label>
<input type="password" id="llmApiKey" value="DUMMY_KEY" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" placeholder="Enter API Key if required">
<p class="mt-1 text-xs text-red-600 font-semibold">Warning: Avoid entering sensitive keys in the browser.</p>
</div>
<div>
<label for="llmModel" class="block text-sm font-medium text-gray-700 mb-1">Model Name</label>
<input type="text" id="llmModel" value="local-model" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" placeholder="e.g., local-model">
</div>
</div>
<p class="mt-2 text-xs text-gray-500">Ensure the LLM endpoint URL has CORS configured to allow requests from this page.</p>
</div>
<div class="mb-6">
<label for="docFolder" class="block text-lg font-semibold mb-2 text-gray-700">1. Load Documentation Folder</label>
<input type="file" id="docFolder" webkitdirectory directory multiple class="block w-full text-sm text-gray-500 file:cursor-pointer">
<p class="mt-1 text-xs text-gray-500">Select the folder containing your .rst and .txt documentation files. Reading happens in your browser.</p>
<div id="readingStatus" class="mt-2 text-sm text-blue-600 font-medium"></div>
</div>
<div class="mb-6">
<label for="userQuery" class="block text-lg font-semibold mb-2 text-gray-700">2. Enter Your Query</label>
<textarea id="userQuery" rows="3" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm" placeholder="e.g., How do I configure the database connection?"></textarea>
<div id="searchProgressContainer">
<div id="searchProgressBar"></div>
</div>
<button id="submitQuery" class="mt-3 w-full inline-flex justify-center py-2 px-4 border border-transparent shadow-sm text-sm font-medium rounded-md text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50 disabled:cursor-not-allowed" disabled>
Load Docs First
</button>
</div>
<div class="mb-4">
<h3 class="text-md font-semibold mb-1 text-gray-700">Status Log</h3>
<div id="statusLog"></div>
</div>
<div>
<h3 class="text-lg font-semibold mb-2 text-gray-700">LLM Answer</h3>
<div id="llmAnswer">Waiting for query...</div>
</div>
</div>
<script>
/**
* Doc Query Agent (Refactored & Combined)
*
* This script implements a client-side documentation query agent.
* It allows users to load a folder of documentation files (.rst, .txt),
* performs keyword-based searches, interacts with an LLM for guidance
* and final answers, and displays the results. All in a single HTML file.
*/
// Create a namespace object to hold the application logic
const docQueryAgent = {
// --- Configuration ---
config: {
MAX_CONTEXT_CHARS: 3500, // Max characters of context to send to LLM
MAX_SNIPPET_LEN: 500, // Max length of a single context snippet (approx)
MAX_SNIPPETS_INITIAL: 10, // Max number of snippets to fetch initially
MAX_SNIPPETS_REFINED: 8, // Max snippets after LLM guidance
REQUEST_TIMEOUT_MS: 600 * 1000, // Timeout 600 seconds (10 minutes)
GUIDANCE_MAX_TOKENS: 1500, // Limit tokens for the guidance response (keep it short)
FINAL_ANSWER_MAX_TOKENS: 1500, // Limit tokens for the final answer
SEARCH_UPDATE_INTERVAL: 100, // Update progress bar every N paragraphs for responsiveness
// Reduced stop words list (can be expanded)
STOP_WORDS: new Set([
"a", "about", "above", "after", "again", "against", "all", "am", "an", "and",
"any", "are", "as", "at", "be", "because", "been", "before", "being",
"below", "between", "both", "but", "by", "can", "cannot", "could",
"did", "do", "does", "doing", "down", "during", "each", "few", "for",
"from", "further", "had", "has", "have", "having", "he", "her", "here",
"hers", "herself", "him", "himself", "his", "how", "i", "if", "in", "into",
"is", "it", "its", "itself", "let", "me", "more", "most", "my", "myself",
"no", "nor", "not", "of", "off", "on", "once", "only", "or", "other",
"ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she",
"should", "so", "some", "such", "than", "that", "the", "their", "theirs",
"them", "themselves", "then", "there", "these", "they", "this", "those",
"through", "to", "too", "under", "until", "up", "very", "was", "we", "were",
"what", "when", "where", "which", "while", "who", "whom", "why", "with",
"would", "you", "your", "yours", "yourself", "yourselves",
// Domain-specific stop words (examples)
"using", "get", "set", "configure", "run", "install", "find", "tell", "me", "about",
"example", "examples", "file", "files", "folder", "directory", "doc", "docs",
"documentation", "page", "section", "chapter", "paragraph"
])
},
// --- Application State ---
state: {
documentStore: {}, // Stores paragraphs: { id: { text: "...", path: "..." } }
nextDocId: 0,
isReadingFiles: false,
isQuerying: false,
isSearching: false,
currentReader: null, // To hold the stream reader for cancellation
},
// --- DOM Elements ---
// Cache DOM elements for performance
elements: {
docFolderInput: null,
readingStatusDiv: null,
userQueryInput: null,
submitQueryButton: null,
llmUrlInput: null,
llmApiKeyInput: null,
llmModelInput: null,
llmAnswerDiv: null,
statusLogDiv: null,
searchProgressContainer: null,
searchProgressBar: null,
},
// --- Initialization ---
/**
* Initializes the application by getting DOM elements and setting up event listeners.
*/
init() {
// Select all necessary DOM elements
this.elements.docFolderInput = document.getElementById('docFolder');
this.elements.readingStatusDiv = document.getElementById('readingStatus');
this.elements.userQueryInput = document.getElementById('userQuery');
this.elements.submitQueryButton = document.getElementById('submitQuery');
this.elements.llmUrlInput = document.getElementById('llmUrl');
this.elements.llmApiKeyInput = document.getElementById('llmApiKey');
this.elements.llmModelInput = document.getElementById('llmModel');
this.elements.llmAnswerDiv = document.getElementById('llmAnswer');
this.elements.statusLogDiv = document.getElementById('statusLog');
this.elements.searchProgressContainer = document.getElementById('searchProgressContainer');
this.elements.searchProgressBar = document.getElementById('searchProgressBar');
// Add event listeners
this.elements.docFolderInput.addEventListener('change', (event) => this.handleFolderSelect(event));
this.elements.submitQueryButton.addEventListener('click', () => this.handleSubmitQuery());
// Set initial state
this.updateSubmitButtonState();
this.logStatus("Application loaded. Please select documentation folder.");
},
// --- Utility Functions ---
/**
* Logs a message to the console and the status log div in the UI.
* @param {string} message - The message to log.
* @param {boolean} [isError=false] - Whether the message represents an error.
*/
logStatus(message, isError = false) {
if (isError) {
console.error(message);
} else {
console.log(message);
}
const entry = document.createElement('div');
// Sanitize message to prevent basic HTML injection in the log
const cleanMessage = String(message).replace(/</g, "&lt;").replace(/>/g, "&gt;");
const timestamp = new Date().toLocaleTimeString();
entry.innerHTML = `[${timestamp}] ${cleanMessage}`;
entry.classList.add('log-entry'); // Base class
if (isError) {
entry.classList.add('error'); // Error specific class
}
// Prepend the new log entry
if (this.elements.statusLogDiv) { // Ensure element exists
this.elements.statusLogDiv.insertBefore(entry, this.elements.statusLogDiv.firstChild);
} else {
console.warn("Status log element not found when trying to log:", message);
}
},
/**
* Clears all entries from the status log div.
*/
clearLog() {
if (this.elements.statusLogDiv) {
this.elements.statusLogDiv.innerHTML = '';
}
},
/**
* Updates the text and disabled state of the submit button based on application state.
*/
updateSubmitButtonState() {
const btn = this.elements.submitQueryButton;
// Ensure button exists before modifying
if (!btn) return;
const docsLoaded = Object.keys(this.state.documentStore).length > 0;
btn.disabled = this.state.isReadingFiles || this.state.isSearching || this.state.isQuerying || !docsLoaded;
if (this.state.isReadingFiles) btn.textContent = 'Reading Files...';
else if (this.state.isSearching) btn.textContent = 'Searching Docs...';
else if (this.state.isQuerying) btn.textContent = 'Querying LLM...';
else if (!docsLoaded) btn.textContent = 'Load Docs First';
else btn.textContent = 'Ask LLM';
},
/**
* Extracts potential keywords from a query string.
* Removes stop words and short words.
* @param {string} query - The user's query.
* @returns {string[]} An array of keywords.
*/
extractKeywords(query) {
if (!query) return [];
// Match words, convert to lowercase
const words = query.toLowerCase().match(/\b\w+\b/g) || [];
// Filter out stop words and words shorter than 3 characters
return words.filter(word => !this.config.STOP_WORDS.has(word) && word.length > 2);
},
/**
* Splits text into paragraphs based on double line breaks.
* @param {string} text - The text content of a file.
* @returns {string[]} An array of paragraphs (non-empty strings).
*/
splitIntoParagraphs(text) {
if (!text) return [];
// Split by one or more empty lines (handles different line ending combinations)
return text.split(/\r?\n\s*\r?\n/)
.map(p => p.trim()) // Remove leading/trailing whitespace from each paragraph
.filter(p => p.length > 5); // Filter out very short paragraphs/empty strings
},
/**
* Removes <think>...</think> blocks from a string, including multi-line blocks.
* Also trims whitespace around the removed blocks and the final result.
* @param {string} text - The input text potentially containing think blocks.
* @returns {string} - The text with think blocks removed.
*/
stripThinkTags(text) {
if (!text) return "";
const trimmedText = text.trim();
// Regex: \s*<think>.*?<\/think>\s*
// \s* - Matches optional whitespace before the opening tag
// <think> - Matches the opening tag
// .*? - Matches any character (including newline) non-greedily
// </think> - Matches the closing tag
// \s* - Matches optional whitespace after the closing tag
// gs flags - g: global (all occurrences), s: dotall (. matches newline)
return trimmedText.replace(/\s*<think>.*?<\/think>\s*/gs, '').trim();
},
/**
* Updates the search progress bar.
* @param {number} current - The number of items processed.
* @param {number} total - The total number of items.
*/
updateSearchProgress(current, total) {
if (!this.elements.searchProgressBar) return; // Check if element exists
if (total <= 0) {
this.elements.searchProgressBar.style.width = '0%';
return;
}
const progress = Math.min(100, Math.round((current / total) * 100));
this.elements.searchProgressBar.style.width = `${progress}%`;
},
// --- File Reading and Storing ---
/**
* Resets the document store and related state variables.
*/
initializeDocumentStore() {
this.state.documentStore = {};
this.state.nextDocId = 0;
this.logStatus("Document store initialized.");
},
/**
* Handles the selection of a folder, reads relevant files, and stores paragraphs.
* @param {Event} event - The file input change event.
*/
async handleFolderSelect(event) {
const files = event.target.files;
if (!files || files.length === 0) {
this.logStatus("No folder selected or folder empty.", true);
return;
}
this.state.isReadingFiles = true;
this.updateSubmitButtonState();
this.initializeDocumentStore(); // Clear previous documents
if (this.elements.readingStatusDiv) {
this.elements.readingStatusDiv.textContent = `Reading ${files.length} file entries...`;
}
this.logStatus(`Starting to read ${files.length} file entries...`);
this.clearLog(); // Clear log for new session
let processedFileCount = 0;
let storedParagraphCount = 0;
const promises = [];
const startTime = performance.now();
for (const file of files) {
// webkitRelativePath is non-standard but common for folder uploads
const filePath = file.webkitRelativePath || file.name;
// Process only .rst and .txt files
if (filePath && (file.name.endsWith('.rst') || file.name.endsWith('.txt'))) {
promises.push(
this.readFileContent(file).then(({ path, content }) => {
if (content) {
const paragraphs = this.splitIntoParagraphs(content);
if (paragraphs.length === 0) {
this.logStatus(`Warning: No paragraphs found in ${path} after splitting.`);
}
paragraphs.forEach(para => {
const docId = this.state.nextDocId++;
this.state.documentStore[docId] = { text: para, path: path };
storedParagraphCount++;
});
processedFileCount++;
} else {
this.logStatus(`Warning: Empty content read from ${path}`);
}
}).catch(error => this.logStatus(`Error reading file ${filePath}: ${error}`, true))
);
} else if (!file.type && file.size === 0 && filePath) {
// Skip likely directory entries (no type, size 0)
} else if (filePath && !(file.name.endsWith('.rst') || file.name.endsWith('.txt'))) {
// Skip other file types explicitly
// this.logStatus(`Skipping non .rst/.txt file: ${filePath}`); // Optional: Log skipped files
} else if (!filePath) {
this.logStatus(`Skipping item with no path: ${file.name}`);
}
// Provide feedback during processing large folders (yield to UI thread)
if (promises.length > 0 && promises.length % 100 === 0) {
if (this.elements.readingStatusDiv) {
this.elements.readingStatusDiv.textContent = `Reading Files... (${processedFileCount} relevant files processed, ${storedParagraphCount} paragraphs stored)`;
}
this.logStatus(`Reading progress: ${processedFileCount} files processed...`);
await new Promise(resolve => setTimeout(resolve, 0)); // Allow UI updates
}
}
// Wait for all file reading promises to complete
await Promise.all(promises);
const duration = ((performance.now() - startTime) / 1000).toFixed(2);
if (this.elements.readingStatusDiv) {
this.elements.readingStatusDiv.textContent = `Reading complete! Found ${processedFileCount} relevant files, stored ${storedParagraphCount} paragraphs in ${duration}s. Ready to query.`;
}
this.logStatus(`File reading finished. Files: ${processedFileCount}, Paragraphs: ${storedParagraphCount}, Time: ${duration}s.`);
this.state.isReadingFiles = false;
this.updateSubmitButtonState();
},
/**
* Reads the content of a single file as text.
* Attempts UTF-8 first, then Latin-1 as a fallback.
* @param {File} file - The file object to read.
* @returns {Promise<{path: string, content: string|null}>} A promise resolving with the file path and content.
*/
readFileContent(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
const filePath = file.webkitRelativePath || file.name;
reader.onload = (event) => resolve({ path: filePath, content: event.target.result });
reader.onerror = (event) => {
reader.abort(); // Ensure reader is stopped
reject(new Error(`FileReader error for ${filePath}: ${event.target.error}`));
};
try {
// Try reading as UTF-8 first
reader.readAsText(file, 'utf-8');
} catch (e) {
this.logStatus(`UTF-8 read failed for ${filePath}, trying Latin-1...`, true);
try {
// Fallback to Latin-1 if UTF-8 fails
reader.readAsText(file, 'latin-1');
} catch (e2) {
reject(new Error(`Could not read file ${filePath} with UTF-8 or Latin-1: ${e2}`));
}
}
});
},
// --- Manual Searching and Context Assembly ---
/**
* Performs a keyword-based search over the stored document paragraphs.
* Scores paragraphs based on keyword matches and proximity.
* @param {string[]} keywords - An array of keywords to search for.
* @param {number} maxSnippets - The maximum number of snippets to return.
* @returns {Promise<Array<{score: number, path: string, snippet: string, docId: number}>>} A promise resolving with an array of scored snippets.
*/
async searchDocsManual(keywords, maxSnippets) {
// Ensure progress bar elements exist
if (!this.elements.searchProgressContainer || !this.elements.searchProgressBar) {
this.logStatus("Search progress elements not found.", true);
return [];
}
this.elements.searchProgressContainer.style.display = 'block';
this.updateSearchProgress(0, 1); // Show 0% initially
const docIds = Object.keys(this.state.documentStore);
const totalDocs = docIds.length;
if (totalDocs === 0) {
this.logStatus("Document store is empty. Cannot search.", true);
this.elements.searchProgressContainer.style.display = 'none';
return [];
}
// Allow search even with no keywords - might return all docs or based on other criteria later
if (!keywords || keywords.length === 0) {
this.logStatus("No keywords extracted for search. Searching all paragraphs (might be less efficient).");
// Set keywordSet to empty, scoring logic will handle it (score will be 0 unless modified)
keywords = [];
}
this.logStatus(`Starting manual search for keywords: [${keywords.join(', ')}] across ${totalDocs} paragraphs.`);
const startTime = performance.now();
const scoredSnippets = [];
const keywordSet = new Set(keywords.map(kw => kw.toLowerCase())); // Ensure keywords are lowercase
let processedCount = 0;
for (const docIdStr of docIds) {
const docId = parseInt(docIdStr, 10);
const docInfo = this.state.documentStore[docId];
if (!docInfo || !docInfo.text) continue; // Skip if data is missing
const paraText = docInfo.text;
const paraLower = paraText.toLowerCase();
const path = docInfo.path;
let score = 0;
const foundKeywords = new Set();
const keywordIndices = [];
// Find occurrences of each keyword only if keywords exist
if (keywordSet.size > 0) {
for (const kw of keywordSet) {
let index = paraLower.indexOf(kw);
if (index !== -1) {
foundKeywords.add(kw);
keywordIndices.push(index);
}
}
score = foundKeywords.size; // Base score on unique keywords found
} else {
score = 0; // Default score if no keywords to search for
// Optionally, could add a base score or length-based score here if desired
}
// Only proceed with snippet generation etc. if score > 0 (or if desired for keyword-less search)
// For now, only include snippets that match keywords.
if (score > 0) {
// Bonus for multiple keywords found close together
if (score > 1 && keywordIndices.length > 1) {
keywordIndices.sort((a, b) => a - b);
const span = keywordIndices[keywordIndices.length - 1] - keywordIndices[0];
if (span < this.config.MAX_SNIPPET_LEN) {
score += 1; // Simple proximity bonus
}
}
// Generate a snippet around the first found keyword
let snippet = paraText; // Default to full paragraph
try {
let firstKwIndex = Math.min(...keywordIndices);
if (firstKwIndex !== Infinity && firstKwIndex >= 0) {
const snippetHalfLen = Math.floor(this.config.MAX_SNIPPET_LEN / 2);
let start = Math.max(0, firstKwIndex - snippetHalfLen);
let end = Math.min(paraText.length, firstKwIndex + snippetHalfLen);
snippet = paraText.substring(start, end);
const prefix = start > 0 ? "..." : "";
const suffix = end < paraText.length ? "..." : "";
snippet = prefix + snippet + suffix;
if (snippet.length > this.config.MAX_SNIPPET_LEN + 6) {
snippet = snippet.substring(0, this.config.MAX_SNIPPET_LEN + 3) + "...";
}
} else {
snippet = paraText.substring(0, this.config.MAX_SNIPPET_LEN) + (paraText.length > this.config.MAX_SNIPPET_LEN ? "..." : "");
}
} catch (e) {
this.logStatus(`Error extracting snippet for doc ID ${docId}: ${e}`, true);
snippet = paraText.substring(0, this.config.MAX_SNIPPET_LEN) + (paraText.length > this.config.MAX_SNIPPET_LEN ? "..." : "");
}
scoredSnippets.push({ score: score, path: path, snippet: snippet, docId: docId });
}
// If keyword-less search should include all paragraphs, push here with score 0
processedCount++;
// Update progress bar periodically or at the end
if (processedCount % this.config.SEARCH_UPDATE_INTERVAL === 0 || processedCount === totalDocs) {
this.updateSearchProgress(processedCount, totalDocs);
await new Promise(resolve => setTimeout(resolve, 0));
}
}
const duration = ((performance.now() - startTime) / 1000).toFixed(2);
this.logStatus(`Manual search completed in ${duration}s. Found ${scoredSnippets.length} potential snippets matching keywords.`);
// Sort snippets by score (descending), then path (ascending)
scoredSnippets.sort((a, b) => {
if (b.score !== a.score) return b.score - a.score;
return a.path.localeCompare(b.path);
});
const finalSnippets = scoredSnippets.slice(0, maxSnippets);
this.logStatus(`Returning ${finalSnippets.length} top snippets after scoring and limiting.`);
this.elements.searchProgressContainer.style.display = 'none'; // Hide progress bar
return finalSnippets;
},
/**
* Assembles the final context string from scored snippets.
* Avoids duplicate paragraphs and respects character/snippet limits.
* @param {Array<{score: number, path: string, snippet: string, docId: number}>} scoredSnippets - Sorted snippets from search.
* @param {number} maxSnippets - Max number of snippets to include.
* @param {number} maxChars - Max total characters for the context.
* @returns {string} The assembled context string.
*/
assembleContext(scoredSnippets, maxSnippets, maxChars) {
let finalContext = "";
let totalChars = 0;
let snippetCount = 0;
const addedDocIds = new Set(); // Track included paragraph IDs to avoid duplicates
const includedFiles = new Set(); // Track files already marked with "--- Context from: ..."
this.logStatus(`Assembling context from ${scoredSnippets.length} candidates (max ${maxSnippets} snippets, ${maxChars} chars)`);
for (const { path, snippet, docId } of scoredSnippets) {
if (snippetCount >= maxSnippets) {
this.logStatus(`Reached max snippet count (${maxSnippets}).`);
break;
}
if (addedDocIds.has(docId)) {
continue;
}
const displayPath = String(path).replace(/</g, "&lt;").replace(/>/g, "&gt;");
const fileMarker = `--- Context from: ${displayPath} ---\n`;
const snippetText = snippet + "\n";
let contextToAdd = "";
let estimatedLen = 0;
if (!includedFiles.has(path)) {
contextToAdd += fileMarker;
estimatedLen += fileMarker.length;
// Don't add to includedFiles until we successfully add the snippet
}
contextToAdd += snippetText;
estimatedLen += snippetText.length;
if (totalChars + estimatedLen <= maxChars) {
finalContext += contextToAdd;
totalChars += contextToAdd.length;
snippetCount++;
addedDocIds.add(docId);
if (!includedFiles.has(path)) { // Mark file included only if snippet was added
includedFiles.add(path);
}
} else {
this.logStatus(`Reached context character limit (${totalChars} + ${estimatedLen} > ${maxChars}), stopping snippet inclusion.`);
break;
}
}
this.logStatus(`Assembled context: ${snippetCount} unique snippets from ${includedFiles.size} files, ${totalChars} chars.`);
return finalContext.trim();
},
// --- LLM Interaction ---
/**
* Makes a fetch request to the configured LLM API.
* Handles headers, body, timeout, and basic error checking.
* @param {object[]} messages - The array of message objects for the LLM.
* @param {number} maxTokens - The maximum number of tokens for the response.
* @param {boolean} [isStreaming=false] - Whether to request a streaming response.
* @returns {Promise<Response>} A promise resolving with the fetch Response object.
* @throws {Error} If the request fails or times out.
*/
async callLLM(messages, maxTokens, isStreaming = false) {
const url = this.elements.llmUrlInput.value;
const apiKey = this.elements.llmApiKeyInput.value;
const model = this.elements.llmModelInput.value;
if (!url || !model) {
throw new Error("LLM Base URL and Model Name must be provided in configuration.");
}
const headers = { 'Content-Type': 'application/json' };
if (apiKey && apiKey !== 'DUMMY_KEY') {
headers['Authorization'] = `Bearer ${apiKey}`;
}
const body = JSON.stringify({
model: model,
messages: messages,
max_tokens: maxTokens,
stream: isStreaming,
temperature: isStreaming ? 0.5 : 0.1
});
const controller = new AbortController();
const timeoutValue = this.config.REQUEST_TIMEOUT_MS || 180000; // Default to 180s if not set
const timeoutId = setTimeout(() => {
this.logStatus(`LLM request timed out after ${timeoutValue / 1000}s. Aborting.`, true);
controller.abort();
}, timeoutValue);
this.logStatus(`Attempting LLM call to ${url} (Model: ${model}, Streaming: ${isStreaming}, Timeout: ${timeoutValue/1000}s)`);
try {
const response = await fetch(url, {
method: 'POST',
headers: headers,
body: body,
signal: controller.signal
});
clearTimeout(timeoutId);
if (!response.ok) {
let errorBody = 'Could not read error body';
try {
errorBody = await response.clone().text();
} catch (e) {
this.logStatus(`Failed to read error body: ${e}`, true);
}
throw new Error(`LLM API request failed: ${response.status} ${response.statusText}. Body: ${errorBody}`);
}
this.logStatus(`LLM API request successful (Status: ${response.status})`);
return response;
} catch (error) {
clearTimeout(timeoutId);
if (error.name === 'AbortError') {
throw new Error(`LLM request timed out after ${timeoutValue / 1000}s.`);
}
this.logStatus(`Error during LLM fetch: ${error}`, true);
throw error;
}
},
/**
* Asks the LLM if the current context is sufficient or needs refinement.
* @param {string} userQuery - The original user query.
* @param {string} context - The currently assembled context.
* @returns {Promise<string|null>} A promise resolving with refinement keywords string, or null if context is sufficient/error occurs.
*/
async askLlmForGuidance(userQuery, context) {
const system_prompt =
"You are a search assistant. Analyze the user's QUESTION and the provided CONTEXT snippets. " +
"Determine if the context is likely sufficient or if a more focused search is needed. " +
"Respond ONLY with 'CONTEXT_SUFFICIENT' or 'SEARCH_FOR: keyword1 keyword2 ...'. " +
"Provide 1-5 specific, relevant keywords if search refinement is needed. " +
"Do not add explanation, apologies, or any other text outside the required format.";
const messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": `CONTEXT:\n${context || '[No context found]'}\n\nQUESTION:\n${userQuery}`}
];
this.logStatus("Asking LLM for Search Guidance...");
if (this.elements.llmAnswerDiv) {
this.elements.llmAnswerDiv.textContent = 'Asking LLM for search guidance...';
}
try {
const response = await this.callLLM(messages, this.config.GUIDANCE_MAX_TOKENS, false);
const data = await response.json();
let rawContent = '';
if (data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content) {
rawContent = data.choices[0].message.content;
} else if (typeof data.content === 'string') {
rawContent = data.content;
} else {
this.logStatus("LLM guidance response format unexpected.", true);
console.error("Unexpected guidance response structure:", data);
return null;
}
this.logStatus(`LLM Raw Guidance Response: '${rawContent}'`);
const content = this.stripThinkTags(rawContent);
this.logStatus(`LLM Guidance after stripping <think> tags: '${content}'`);
if (content.startsWith("SEARCH_FOR:")) {
let keywordsStr = content.substring("SEARCH_FOR:".length).trim();
keywordsStr = keywordsStr.replace(/[.,;`]+$/, '').trim();
if (keywordsStr) {
this.logStatus(`LLM suggests refining search with: '${keywordsStr}'`);
return keywordsStr;
} else {
this.logStatus("LLM guidance 'SEARCH_FOR:' but no keywords followed. Assuming sufficient.", true);
return null;
}
} else if (content.includes("CONTEXT_SUFFICIENT")) {
this.logStatus("LLM indicates context is sufficient.");
return null;
} else {
if (!content) {
this.logStatus("LLM guidance contained only <think> tags or was empty after stripping, assuming context sufficient.", true);
} else {
this.logStatus(`LLM guidance format unexpected or unclear (after stripping): '${content}'. Assuming sufficient.`, true);
}
return null;
}
} catch (error) {
// Error already logged in callLLM, just update UI and return null
this.logStatus(`Error getting LLM guidance: ${error.message}`, true); // Log specific message
if (this.elements.llmAnswerDiv) {
this.elements.llmAnswerDiv.textContent = `Error getting LLM guidance: ${error.message || error}. Proceeding with initial context.`;
}
return null;
}
},
/**
* Asks the LLM for the final answer based on the query and context, streaming the response.
* @param {string} userQuery - The original user query.
* @param {string} context - The final assembled context.
* @returns {Promise<boolean>} A promise resolving with true if streaming completed (even if empty), false on error.
*/
async askLlmForFinalAnswer(userQuery, context) {
const system_prompt =
"You are a specialist assistant knowledgeable about a specific software package. " +
"Answer the user's question based *only* on the provided CONTEXT from the documentation. " +
"If the context doesn't contain the answer or is empty/irrelevant, state clearly that the information is not available in the provided snippets. " +
"Be concise and direct. Do not include apologies or introductory phrases like 'Based on the context...' unless essential for clarity. " +
"Format code examples appropriately using markdown (e.g., ```python ... ```)." +
"If providing an answer, make it helpful and complete based *solely* on the given context.";
const messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": `CONTEXT:\n${context || '[No relevant context found]'}\n\nQUESTION:\n${userQuery}`}
];
if (!context) {
this.logStatus("Warning: No relevant context found for final query.", true);
}
this.logStatus("Sending Final Query to LLM (Streaming)...");
if (!this.elements.llmAnswerDiv) {
this.logStatus("LLM Answer element not found.", true);
return false;
}
this.elements.llmAnswerDiv.innerHTML = '<p class="text-sm italic text-gray-600 mb-2">--- LLM Answer Stream ---</p>';
const contentArea = document.createElement('div');
// Apply whitespace style to the content area to respect spaces and newlines
contentArea.style.whiteSpace = 'pre-wrap';
this.elements.llmAnswerDiv.appendChild(contentArea);
this.state.currentReader = null;
try {
const response = await this.callLLM(messages, this.config.FINAL_ANSWER_MAX_TOKENS, true);
if (!response.body) {
throw new Error("Response body is not readable (stream not available).");
}
this.state.currentReader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
let fullResponse = '';
let firstChunkProcessed = false;
while (true) {
const { done, value } = await this.state.currentReader.read();
if (done) {
this.logStatus("LLM stream finished.");
break;
}
buffer += decoder.decode(value, { stream: true });
let lines = buffer.split('\n');
buffer = lines.pop();
for (const line of lines) {
if (line.startsWith('data: ')) {
const jsonData = line.substring(6).trim();
if (jsonData === '[DONE]') {
this.logStatus("LLM stream sent [DONE] signal.");
continue;
}
try {
const chunk = JSON.parse(jsonData);
let contentPiece = chunk.choices?.[0]?.delta?.content;
if (contentPiece) {
const cleanedContentPiece = this.stripThinkTags(contentPiece);
if (cleanedContentPiece) {
if (!firstChunkProcessed) {
this.logStatus("LLM stream started receiving data...");
firstChunkProcessed = true;
}
// *** FIX: Append to textContent instead of creating new nodes ***
contentArea.textContent += cleanedContentPiece;
fullResponse += cleanedContentPiece;
this.elements.llmAnswerDiv.scrollTop = this.elements.llmAnswerDiv.scrollHeight;
}
}
const finishReason = chunk.choices?.[0]?.finish_reason;
if (finishReason) {
this.logStatus(`LLM stream indicated finish reason: ${finishReason}`);
}
} catch (e) {
this.logStatus(`Error parsing LLM stream JSON chunk: ${e}. Chunk: ${jsonData}`, true);
}
}
}
}
if (buffer.trim()) {
this.logStatus(`Warning: Non-empty buffer remaining after stream end: ${buffer}`, true);
}
if (!firstChunkProcessed) {
this.logStatus("LLM stream provided no content.", true);
contentArea.appendChild(document.createTextNode("[No content received from LLM stream]"));
}
return true;
} catch (error) {
// Error already logged in callLLM
this.logStatus(`Error getting LLM final answer: ${error.message}`, true);
const errorMsg = `<p class="text-red-600 font-semibold mt-2">Error during LLM request: ${error.message || error}</p>`;
if (this.elements.llmAnswerDiv.innerHTML.includes('--- LLM Answer Stream ---')) {
this.elements.llmAnswerDiv.innerHTML += errorMsg;
} else {
this.elements.llmAnswerDiv.innerHTML = errorMsg;
}
return false;
} finally {
if (this.state.currentReader) {
try {
await this.state.currentReader.cancel();
this.logStatus("Stream reader cancelled.");
} catch (cancelError) {
this.logStatus(`Error cancelling stream reader (ignored): ${cancelError}`, true);
}
this.state.currentReader = null;
}
}
},
// --- Main Query Logic ---
/**
* Handles the submission of the user query.
* Orchestrates the search, context assembly, LLM guidance, and final answer steps.
*/
async handleSubmitQuery() {
// Ensure elements are available
if (!this.elements.userQueryInput || !this.elements.llmAnswerDiv) {
console.error("Required UI elements not found.");
alert("Initialization error. Please refresh the page.");
return;
}
const userQuery = this.elements.userQueryInput.value.trim();
if (!userQuery) {
this.logStatus("Query cannot be empty.", true);
alert("Please enter a query.");
return;
}
const docsLoaded = Object.keys(this.state.documentStore).length > 0;
if (!docsLoaded) {
this.logStatus("Documentation not loaded yet.", true);
alert("Please load the documentation folder first.");
return;
}
if (this.state.isQuerying || this.state.isReadingFiles || this.state.isSearching) {
this.logStatus("Please wait for the current operation to complete.", true);
alert("Please wait for the current operation (reading, searching, or querying) to complete.");
return;
}
this.state.isQuerying = true;
this.state.isSearching = true;
this.updateSubmitButtonState();
this.elements.llmAnswerDiv.textContent = 'Processing query...';
this.logStatus(`--- New Query Start ---`);
this.logStatus(`User Query: "${userQuery}"`);
try {
// Step 1: Initial Search
const initialKeywords = this.extractKeywords(userQuery);
this.logStatus(`Initial keywords for search: ${initialKeywords.join(', ') || '(None extracted)'}`);
let initialSnippets = await this.searchDocsManual(initialKeywords, this.config.MAX_SNIPPETS_INITIAL);
this.state.isSearching = false;
this.updateSubmitButtonState();
let context = this.assembleContext(initialSnippets, this.config.MAX_SNIPPETS_INITIAL, this.config.MAX_CONTEXT_CHARS);
// Step 2: Ask LLM for Guidance
const refinedKeywordsStr = await this.askLlmForGuidance(userQuery, context);
// Step 3: Refined Search (if needed)
if (refinedKeywordsStr) {
const refinedKeywordsList = this.extractKeywords(refinedKeywordsStr);
if (refinedKeywordsList.length > 0) {
this.logStatus(`Refining search with LLM suggested keywords: ${refinedKeywordsList.join(', ')}`);
this.state.isSearching = true;
this.updateSubmitButtonState();
const refinedSnippets = await this.searchDocsManual(refinedKeywordsList, this.config.MAX_SNIPPETS_REFINED);
this.state.isSearching = false;
this.updateSubmitButtonState();
context = this.assembleContext(refinedSnippets, this.config.MAX_SNIPPETS_REFINED, this.config.MAX_CONTEXT_CHARS);
} else {
this.logStatus("LLM suggested refinement but no valid keywords extracted. Using previous context.", true);
}
} else {
this.logStatus("Proceeding with initial context (LLM indicated sufficient or guidance failed).");
}
// Step 4: Query LLM for Final Answer
await this.askLlmForFinalAnswer(userQuery, context);
} catch (error) {
this.logStatus(`Error during query processing pipeline: ${error}`, true);
if (this.elements.llmAnswerDiv) { // Check element exists before update
this.elements.llmAnswerDiv.textContent = `An unexpected error occurred: ${error.message || error}`;
}
this.state.isSearching = false; // Ensure flag is reset on error
} finally {
this.logStatus(`--- Query End ---`);
this.state.isQuerying = false;
this.state.isSearching = false;
this.updateSubmitButtonState();
}
}
};
// --- Global Initialization ---
document.addEventListener('DOMContentLoaded', () => {
// Defensive check in case script runs before elements are fully parsed (though DOMContentLoaded should handle it)
if (document.readyState === 'loading') {
console.log("DOM not fully loaded, waiting...");
document.addEventListener('DOMContentLoaded', docQueryAgent.init.bind(docQueryAgent));
} else {
console.log("DOM ready, initializing agent.");
docQueryAgent.init();
}
});
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment