mrmaheshrajput/Meh Coding Assistant.py

## Meh Coding Assistant.py
import os
import anthropic
from pathlib import Path
import tiktoken
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import faiss
import openai


class CodeAssistant:
    """Soggy Production-grade AI coding assistant with RAG"""

    def __init__(self, anthropic_key: str, openai_key: str):
        self.anthropic = anthropic.Anthropic(api_key=anthropic_key)
        self.openai_client = openai.OpenAI(api_key=openai_key)
        self.encoder = tiktoken.get_encoding("cl100k_base")

        # FAISS index for vector search
        self.index = None
        self.chunks = []

    def chunk_code(self, code: str, chunk_size: int = 500, overlap: int = 50):
        """Split code into overlapping chunks"""
        lines = code.split('\n')
        chunks = []

        i = 0
        while i < len(lines):
            chunk = '\n'.join(lines[i:i + chunk_size])
            if chunk.strip():
                chunks.append(chunk)
            i += chunk_size - overlap

        return chunks

    def build_index(self, project_dir: str):
        """Build FAISS index with multi-vector embeddings"""
        print("Building codebase index...")

        for root, _, filenames in os.walk(project_dir):
            for filename in filenames:
                if not filename.endswith(('.py', '.js', '.ts', '.tsx')):
                    continue

                filepath = os.path.join(root, filename)
                try:
                    with open(filepath, 'r', encoding='utf-8') as f:
                        code = f.read()

                    # Chunk the code
                    chunks = self.chunk_code(code)

                    for chunk in chunks:
                        response = self.openai_client.embeddings.create(
                            model="text-embedding-3-small",
                            input=chunk
                        )

                        embedding = response.data[0].embedding

                        self.chunks.append({
                            'filepath': filepath,
                            'content': chunk,
                            'embedding': embedding
                        })
                except Exception as e:
                    print(f"Error processing {filepath}: {e}")

        # Build FAISS index
        if self.chunks:
            embeddings = np.array([c['embedding'] for c in self.chunks]).astype('float32')
            dimension = embeddings.shape[1]

            # Use IndexFlatIP for inner product (cosine similarity)
            self.index = faiss.IndexFlatIP(dimension)
            faiss.normalize_L2(embeddings)
            self.index.add(embeddings)

            print(f"✓ Indexed {len(self.chunks)} code chunks")

    def retrieve_context(self, query: str, k: int = 5):
        """Retrieve most relevant code chunks"""
        if not self.index:
            return []

        # I know, I know! This is bad, I know!
        response = self.openai_client.embeddings.create(
            model="text-embedding-3-small",
            input=query
        )
        query_embedding = np.array([response.data[0].embedding]).astype('float32')
        faiss.normalize_L2(query_embedding)

        # Search
        distances, indices = self.index.search(query_embedding, k)

        results = []
        for idx, score in zip(indices[0], distances[0]):
            if 0 <= idx < len(self.chunks):
                chunk = self.chunks[idx].copy()
                chunk['relevance_score'] = float(score)
                results.append(chunk)

        return results

    def get_file_context(self, file_path: str, cursor_line: int, window: int = 30):
        """Get code around cursor position"""
        try:
            with open(file_path, 'r') as f:
                lines = f.readlines()
        except:
            return None

        start = max(0, cursor_line - window)
        end = min(len(lines), cursor_line + window)

        return {
            'before': ''.join(lines[start:cursor_line]),
            'after': ''.join(lines[cursor_line:end]),
            'total_lines': len(lines)
        }

    def generate_completion(self, file_path: str, cursor_line: int,
                          instruction: str = ""):
        """Generate AI completion with full context"""

        # Get local file context (if it works)
        file_context = self.get_file_context(file_path, cursor_line)
        if not file_context:
            return "Error: Could not read file"

        # Retrieve relevant code chunks
        query = f"{file_context['before'][-300:]} {instruction}"
        relevant_chunks = self.retrieve_context(query, k=3)

        # Build context prompt
        context_parts = [f"=== CURRENT FILE: {file_path} (Line {cursor_line}) ==="]
        context_parts.append(f"\nCODE BEFORE CURSOR:\n{file_context['before'][-400:]}")
        context_parts.append(f"\nCODE AFTER CURSOR:\n{file_context['after'][:400]}")

        if relevant_chunks:
            context_parts.append("\n=== RELEVANT CODE FROM PROJECT ===")
            for i, chunk in enumerate(relevant_chunks, 1):
                context_parts.append(
                    f"\n--- Chunk {i} from {chunk['filepath']} "
                    f"(relevance: {chunk['relevance_score']:.3f}) ---\n"
                    f"{chunk['content'][:300]}"
                )

        full_context = '\n'.join(context_parts)

        # System prompt (lol)
        system_prompt = """You are an expert coding assistant. Provide precise,
production-quality code that follows best practices. Focus on:
- Writing clean, maintainable code
- Following the project's existing patterns
- Adding helpful comments only where needed
- Considering edge cases and error handling"""

        # Generate with Claude because I used Claude to generate this code
        response = self.anthropic.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=2000,
            system=system_prompt,
            messages=[{
                "role": "user",
                "content": f"{full_context}\n\nTask: {instruction}\n\nProvide the code:"
            }]
        )

        return response.content[0].text

if __name__ == "__main__":
    assistant = CodeAssistant(
        anthropic_key=os.getenv("ANTHROPIC_API_KEY"),
        openai_key=os.getenv("OPENAI_API_KEY")
    )

    # Do this once
    assistant.build_index("./your_project")

    # Get completion
    result = assistant.generate_completion(
        file_path="./src/api/users.py",
        cursor_line=45,
        instruction="Add input validation and error handling"
    )

    print(result)
	import os
	import anthropic
	from pathlib import Path
	import tiktoken
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity
	import faiss
	import openai


	class CodeAssistant:
	"""Soggy Production-grade AI coding assistant with RAG"""

	def __init__(self, anthropic_key: str, openai_key: str):
	self.anthropic = anthropic.Anthropic(api_key=anthropic_key)
	self.openai_client = openai.OpenAI(api_key=openai_key)
	self.encoder = tiktoken.get_encoding("cl100k_base")

	# FAISS index for vector search
	self.index = None
	self.chunks = []

	def chunk_code(self, code: str, chunk_size: int = 500, overlap: int = 50):
	"""Split code into overlapping chunks"""
	lines = code.split('\n')
	chunks = []

	i = 0
	while i < len(lines):
	chunk = '\n'.join(lines[i:i + chunk_size])
	if chunk.strip():
	chunks.append(chunk)
	i += chunk_size - overlap

	return chunks

	def build_index(self, project_dir: str):
	"""Build FAISS index with multi-vector embeddings"""
	print("Building codebase index...")

	for root, _, filenames in os.walk(project_dir):
	for filename in filenames:
	if not filename.endswith(('.py', '.js', '.ts', '.tsx')):
	continue

	filepath = os.path.join(root, filename)
	try:
	with open(filepath, 'r', encoding='utf-8') as f:
	code = f.read()

	# Chunk the code
	chunks = self.chunk_code(code)

	for chunk in chunks:
	response = self.openai_client.embeddings.create(
	model="text-embedding-3-small",
	input=chunk
	)

	embedding = response.data[0].embedding

	self.chunks.append({
	'filepath': filepath,
	'content': chunk,
	'embedding': embedding
	})
	except Exception as e:
	print(f"Error processing {filepath}: {e}")

	# Build FAISS index
	if self.chunks:
	embeddings = np.array([c['embedding'] for c in self.chunks]).astype('float32')
	dimension = embeddings.shape[1]

	# Use IndexFlatIP for inner product (cosine similarity)
	self.index = faiss.IndexFlatIP(dimension)
	faiss.normalize_L2(embeddings)
	self.index.add(embeddings)

	print(f"✓ Indexed {len(self.chunks)} code chunks")

	def retrieve_context(self, query: str, k: int = 5):
	"""Retrieve most relevant code chunks"""
	if not self.index:
	return []

	# I know, I know! This is bad, I know!
	response = self.openai_client.embeddings.create(
	model="text-embedding-3-small",
	input=query
	)
	query_embedding = np.array([response.data[0].embedding]).astype('float32')
	faiss.normalize_L2(query_embedding)

	# Search
	distances, indices = self.index.search(query_embedding, k)

	results = []
	for idx, score in zip(indices[0], distances[0]):
	if 0 <= idx < len(self.chunks):
	chunk = self.chunks[idx].copy()
	chunk['relevance_score'] = float(score)
	results.append(chunk)

	return results

	def get_file_context(self, file_path: str, cursor_line: int, window: int = 30):
	"""Get code around cursor position"""
	try:
	with open(file_path, 'r') as f:
	lines = f.readlines()
	except:
	return None

	start = max(0, cursor_line - window)
	end = min(len(lines), cursor_line + window)

	return {
	'before': ''.join(lines[start:cursor_line]),
	'after': ''.join(lines[cursor_line:end]),
	'total_lines': len(lines)
	}

	def generate_completion(self, file_path: str, cursor_line: int,
	instruction: str = ""):
	"""Generate AI completion with full context"""

	# Get local file context (if it works)
	file_context = self.get_file_context(file_path, cursor_line)
	if not file_context:
	return "Error: Could not read file"

	# Retrieve relevant code chunks
	query = f"{file_context['before'][-300:]} {instruction}"
	relevant_chunks = self.retrieve_context(query, k=3)

	# Build context prompt
	context_parts = [f"=== CURRENT FILE: {file_path} (Line {cursor_line}) ==="]
	context_parts.append(f"\nCODE BEFORE CURSOR:\n{file_context['before'][-400:]}")
	context_parts.append(f"\nCODE AFTER CURSOR:\n{file_context['after'][:400]}")

	if relevant_chunks:
	context_parts.append("\n=== RELEVANT CODE FROM PROJECT ===")
	for i, chunk in enumerate(relevant_chunks, 1):
	context_parts.append(
	f"\n--- Chunk {i} from {chunk['filepath']} "
	f"(relevance: {chunk['relevance_score']:.3f}) ---\n"
	f"{chunk['content'][:300]}"
	)

	full_context = '\n'.join(context_parts)

	# System prompt (lol)
	system_prompt = """You are an expert coding assistant. Provide precise,
	production-quality code that follows best practices. Focus on:
	- Writing clean, maintainable code
	- Following the project's existing patterns
	- Adding helpful comments only where needed
	- Considering edge cases and error handling"""

	# Generate with Claude because I used Claude to generate this code
	response = self.anthropic.messages.create(
	model="claude-sonnet-4-5-20250929",
	max_tokens=2000,
	system=system_prompt,
	messages=[{
	"role": "user",
	"content": f"{full_context}\n\nTask: {instruction}\n\nProvide the code:"
	}]
	)

	return response.content[0].text

	if __name__ == "__main__":
	assistant = CodeAssistant(
	anthropic_key=os.getenv("ANTHROPIC_API_KEY"),
	openai_key=os.getenv("OPENAI_API_KEY")
	)

	# Do this once
	assistant.build_index("./your_project")

	# Get completion
	result = assistant.generate_completion(
	file_path="./src/api/users.py",
	cursor_line=45,
	instruction="Add input validation and error handling"
	)

	print(result)
No results found