Created
November 17, 2025 04:27
-
-
Save mrmaheshrajput/0b8c12ab5d80bd4f6557fd38903c144b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import anthropic | |
| from pathlib import Path | |
| import tiktoken | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import faiss | |
| import openai | |
| class CodeAssistant: | |
| """Soggy Production-grade AI coding assistant with RAG""" | |
| def __init__(self, anthropic_key: str, openai_key: str): | |
| self.anthropic = anthropic.Anthropic(api_key=anthropic_key) | |
| self.openai_client = openai.OpenAI(api_key=openai_key) | |
| self.encoder = tiktoken.get_encoding("cl100k_base") | |
| # FAISS index for vector search | |
| self.index = None | |
| self.chunks = [] | |
| def chunk_code(self, code: str, chunk_size: int = 500, overlap: int = 50): | |
| """Split code into overlapping chunks""" | |
| lines = code.split('\n') | |
| chunks = [] | |
| i = 0 | |
| while i < len(lines): | |
| chunk = '\n'.join(lines[i:i + chunk_size]) | |
| if chunk.strip(): | |
| chunks.append(chunk) | |
| i += chunk_size - overlap | |
| return chunks | |
| def build_index(self, project_dir: str): | |
| """Build FAISS index with multi-vector embeddings""" | |
| print("Building codebase index...") | |
| for root, _, filenames in os.walk(project_dir): | |
| for filename in filenames: | |
| if not filename.endswith(('.py', '.js', '.ts', '.tsx')): | |
| continue | |
| filepath = os.path.join(root, filename) | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| code = f.read() | |
| # Chunk the code | |
| chunks = self.chunk_code(code) | |
| for chunk in chunks: | |
| response = self.openai_client.embeddings.create( | |
| model="text-embedding-3-small", | |
| input=chunk | |
| ) | |
| embedding = response.data[0].embedding | |
| self.chunks.append({ | |
| 'filepath': filepath, | |
| 'content': chunk, | |
| 'embedding': embedding | |
| }) | |
| except Exception as e: | |
| print(f"Error processing {filepath}: {e}") | |
| # Build FAISS index | |
| if self.chunks: | |
| embeddings = np.array([c['embedding'] for c in self.chunks]).astype('float32') | |
| dimension = embeddings.shape[1] | |
| # Use IndexFlatIP for inner product (cosine similarity) | |
| self.index = faiss.IndexFlatIP(dimension) | |
| faiss.normalize_L2(embeddings) | |
| self.index.add(embeddings) | |
| print(f"✓ Indexed {len(self.chunks)} code chunks") | |
| def retrieve_context(self, query: str, k: int = 5): | |
| """Retrieve most relevant code chunks""" | |
| if not self.index: | |
| return [] | |
| # I know, I know! This is bad, I know! | |
| response = self.openai_client.embeddings.create( | |
| model="text-embedding-3-small", | |
| input=query | |
| ) | |
| query_embedding = np.array([response.data[0].embedding]).astype('float32') | |
| faiss.normalize_L2(query_embedding) | |
| # Search | |
| distances, indices = self.index.search(query_embedding, k) | |
| results = [] | |
| for idx, score in zip(indices[0], distances[0]): | |
| if 0 <= idx < len(self.chunks): | |
| chunk = self.chunks[idx].copy() | |
| chunk['relevance_score'] = float(score) | |
| results.append(chunk) | |
| return results | |
| def get_file_context(self, file_path: str, cursor_line: int, window: int = 30): | |
| """Get code around cursor position""" | |
| try: | |
| with open(file_path, 'r') as f: | |
| lines = f.readlines() | |
| except: | |
| return None | |
| start = max(0, cursor_line - window) | |
| end = min(len(lines), cursor_line + window) | |
| return { | |
| 'before': ''.join(lines[start:cursor_line]), | |
| 'after': ''.join(lines[cursor_line:end]), | |
| 'total_lines': len(lines) | |
| } | |
| def generate_completion(self, file_path: str, cursor_line: int, | |
| instruction: str = ""): | |
| """Generate AI completion with full context""" | |
| # Get local file context (if it works) | |
| file_context = self.get_file_context(file_path, cursor_line) | |
| if not file_context: | |
| return "Error: Could not read file" | |
| # Retrieve relevant code chunks | |
| query = f"{file_context['before'][-300:]} {instruction}" | |
| relevant_chunks = self.retrieve_context(query, k=3) | |
| # Build context prompt | |
| context_parts = [f"=== CURRENT FILE: {file_path} (Line {cursor_line}) ==="] | |
| context_parts.append(f"\nCODE BEFORE CURSOR:\n{file_context['before'][-400:]}") | |
| context_parts.append(f"\nCODE AFTER CURSOR:\n{file_context['after'][:400]}") | |
| if relevant_chunks: | |
| context_parts.append("\n=== RELEVANT CODE FROM PROJECT ===") | |
| for i, chunk in enumerate(relevant_chunks, 1): | |
| context_parts.append( | |
| f"\n--- Chunk {i} from {chunk['filepath']} " | |
| f"(relevance: {chunk['relevance_score']:.3f}) ---\n" | |
| f"{chunk['content'][:300]}" | |
| ) | |
| full_context = '\n'.join(context_parts) | |
| # System prompt (lol) | |
| system_prompt = """You are an expert coding assistant. Provide precise, | |
| production-quality code that follows best practices. Focus on: | |
| - Writing clean, maintainable code | |
| - Following the project's existing patterns | |
| - Adding helpful comments only where needed | |
| - Considering edge cases and error handling""" | |
| # Generate with Claude because I used Claude to generate this code | |
| response = self.anthropic.messages.create( | |
| model="claude-sonnet-4-5-20250929", | |
| max_tokens=2000, | |
| system=system_prompt, | |
| messages=[{ | |
| "role": "user", | |
| "content": f"{full_context}\n\nTask: {instruction}\n\nProvide the code:" | |
| }] | |
| ) | |
| return response.content[0].text | |
| if __name__ == "__main__": | |
| assistant = CodeAssistant( | |
| anthropic_key=os.getenv("ANTHROPIC_API_KEY"), | |
| openai_key=os.getenv("OPENAI_API_KEY") | |
| ) | |
| # Do this once | |
| assistant.build_index("./your_project") | |
| # Get completion | |
| result = assistant.generate_completion( | |
| file_path="./src/api/users.py", | |
| cursor_line=45, | |
| instruction="Add input validation and error handling" | |
| ) | |
| print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment