kausmeows/custom_retriever_with_run_context.py

## custom_retriever_with_run_context.py
"""
Custom Retriever with RunContext
=============================

Demonstrates how to pass application-controlled data (like a project_id
or file_name) into a custom retriever using RunContext.dependencies.

This is useful when:
- Your retriever needs scoping (e.g., per-project, per-tenant)
- You want to pass runtime filters that the LLM shouldn't control
- You need access to the agent instance inside the retriever
"""

from functools import partial
from typing import Dict, List, Optional

from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.run.base import RunContext

# ---------------------------------------------------------------------------
# Sample Data (simulating a multi-project document store)
# ---------------------------------------------------------------------------
PROJECTS: Dict[str, List[dict]] = {
    "project-alpha": [
        {"id": "1", "filename": "architecture.md", "content": "The system uses a microservices architecture with gRPC."},
        {"id": "2", "filename": "architecture.md", "content": "Each service has its own PostgreSQL database."},
        {"id": "3", "filename": "onboarding.md", "content": "New engineers should start by reading the architecture doc."},
        {"id": "4", "filename": "api-guide.md", "content": "All API endpoints require Bearer token authentication."},
    ],
    "project-beta": [
        {"id": "5", "filename": "setup.md", "content": "Run docker-compose up to start the development environment."},
        {"id": "6", "filename": "testing.md", "content": "Use pytest with the --cov flag for coverage reports."},
    ],
}


# ---------------------------------------------------------------------------
# Custom Retriever with RunContext
# ---------------------------------------------------------------------------
def project_scoped_retriever(
    agent: Agent,
    query: str,
    project_id: str,
    run_context: Optional[RunContext] = None,
    num_documents: Optional[int] = None,
    **kwargs,
) -> Optional[List[dict]]:
    """Retrieve documents scoped to a project, with optional file_name filtering via run_context.

    Args:
        agent: The Agent instance (auto-injected by the framework).
        query: The search query string.
        project_id: The project to search within (bound via functools.partial).
        run_context: Runtime context with dependencies (auto-injected by the framework).
        num_documents: Max results to return.
        **kwargs: Additional keyword arguments.
    """
    # Extract file_name from run_context.dependencies (set by the application)
    file_name = None
    if run_context and run_context.dependencies:
        file_name = run_context.dependencies.get("file_name")

    print(f"file_name: {file_name}")

    # Get documents for this project
    project_docs = PROJECTS.get(project_id, [])

    # Filter by file_name if provided
    if file_name:
        project_docs = [doc for doc in project_docs if doc["filename"] == file_name]

    # Simple keyword search (match any word from the query)
    query_words = query.lower().split()
    results = [
        doc for doc in project_docs
        if any(word in doc["content"].lower() for word in query_words)
    ]

    if num_documents:
        results = results[:num_documents]

    return results if results else None


# ---------------------------------------------------------------------------
# Create Agent (project_id is bound at creation time via partial)
# ---------------------------------------------------------------------------
agent = Agent(
    model=OpenAIChat(id="gpt-5.2"),
    knowledge_retriever=partial(project_scoped_retriever, project_id="project-alpha"),
    markdown=True,
)


# ---------------------------------------------------------------------------
# Run Agent
# ---------------------------------------------------------------------------
if __name__ == "__main__":
    # Search across all files in the project
    agent.print_response(
        "What authentication is required for the API?",
        stream=True,
    )

    print("\n--- Now searching only within architecture.md ---\n")

    # Scope the search to a specific file using run_context.dependencies
    agent.print_response(
        "Tell me about the database setup.",
        stream=True,
        dependencies={"file_name": "architecture.md"},
    )
	"""
	Custom Retriever with RunContext
	=============================

	Demonstrates how to pass application-controlled data (like a project_id
	or file_name) into a custom retriever using RunContext.dependencies.

	This is useful when:
	- Your retriever needs scoping (e.g., per-project, per-tenant)
	- You want to pass runtime filters that the LLM shouldn't control
	- You need access to the agent instance inside the retriever
	"""

	from functools import partial
	from typing import Dict, List, Optional

	from agno.agent import Agent
	from agno.models.openai import OpenAIChat
	from agno.run.base import RunContext

	# ---------------------------------------------------------------------------
	# Sample Data (simulating a multi-project document store)
	# ---------------------------------------------------------------------------
	PROJECTS: Dict[str, List[dict]] = {
	"project-alpha": [
	{"id": "1", "filename": "architecture.md", "content": "The system uses a microservices architecture with gRPC."},
	{"id": "2", "filename": "architecture.md", "content": "Each service has its own PostgreSQL database."},
	{"id": "3", "filename": "onboarding.md", "content": "New engineers should start by reading the architecture doc."},
	{"id": "4", "filename": "api-guide.md", "content": "All API endpoints require Bearer token authentication."},
	],
	"project-beta": [
	{"id": "5", "filename": "setup.md", "content": "Run docker-compose up to start the development environment."},
	{"id": "6", "filename": "testing.md", "content": "Use pytest with the --cov flag for coverage reports."},
	],
	}


	# ---------------------------------------------------------------------------
	# Custom Retriever with RunContext
	# ---------------------------------------------------------------------------
	def project_scoped_retriever(
	agent: Agent,
	query: str,
	project_id: str,
	run_context: Optional[RunContext] = None,
	num_documents: Optional[int] = None,
	**kwargs,
	) -> Optional[List[dict]]:
	"""Retrieve documents scoped to a project, with optional file_name filtering via run_context.

	Args:
	agent: The Agent instance (auto-injected by the framework).
	query: The search query string.
	project_id: The project to search within (bound via functools.partial).
	run_context: Runtime context with dependencies (auto-injected by the framework).
	num_documents: Max results to return.
	**kwargs: Additional keyword arguments.
	"""
	# Extract file_name from run_context.dependencies (set by the application)
	file_name = None
	if run_context and run_context.dependencies:
	file_name = run_context.dependencies.get("file_name")

	print(f"file_name: {file_name}")

	# Get documents for this project
	project_docs = PROJECTS.get(project_id, [])

	# Filter by file_name if provided
	if file_name:
	project_docs = [doc for doc in project_docs if doc["filename"] == file_name]

	# Simple keyword search (match any word from the query)
	query_words = query.lower().split()
	results = [
	doc for doc in project_docs
	if any(word in doc["content"].lower() for word in query_words)
	]

	if num_documents:
	results = results[:num_documents]

	return results if results else None


	# ---------------------------------------------------------------------------
	# Create Agent (project_id is bound at creation time via partial)
	# ---------------------------------------------------------------------------
	agent = Agent(
	model=OpenAIChat(id="gpt-5.2"),
	knowledge_retriever=partial(project_scoped_retriever, project_id="project-alpha"),
	markdown=True,
	)


	# ---------------------------------------------------------------------------
	# Run Agent
	# ---------------------------------------------------------------------------
	if __name__ == "__main__":
	# Search across all files in the project
	agent.print_response(
	"What authentication is required for the API?",
	stream=True,
	)

	print("\n--- Now searching only within architecture.md ---\n")

	# Scope the search to a specific file using run_context.dependencies
	agent.print_response(
	"Tell me about the database setup.",
	stream=True,
	dependencies={"file_name": "architecture.md"},
	)
No results found