Skip to content

Instantly share code, notes, and snippets.

@Miyamura80
Created November 26, 2025 20:16
Show Gist options
  • Select an option

  • Save Miyamura80/b1a93eb1d1b6d50b33f68aa15f83f818 to your computer and use it in GitHub Desktop.

Select an option

Save Miyamura80/b1a93eb1d1b6d50b33f68aa15f83f818 to your computer and use it in GitHub Desktop.
DSPy Blog Post: Deep Research comparison
import dspy
from ddgs import DDGS
from dotenv import load_dotenv
load_dotenv()
QUESTION = "What is the projected future demand for high bandwidth memory?"
def search_web(query: str) -> tuple[str, list[str]]:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=3))
formatted_results = "\n\n".join(
[f"{r['title']}: {r['href']} - {r['body']}" for r in results]
)
sources = [r["href"] for r in results]
return formatted_results, sources
class Plan(dspy.Signature):
goal: str = dspy.InputField(desc="The research goal to break down")
plan: list[str] = dspy.OutputField(desc="List of sub-questions to research")
class Synthesize(dspy.Signature):
goal: str = dspy.InputField(desc="The original research goal")
docs: str = dspy.InputField(desc="The research findings from web searches")
sources: str = dspy.InputField(desc="Available source URLs")
response: str = dspy.OutputField(desc="The synthesized research response")
relevant_sources: list[str] = dspy.OutputField(
desc="List of all relevant source URLs"
)
lm = dspy.LM("openai/gpt-4o-mini", temperature=0)
planner = dspy.Predict(Plan)
synthesiser = dspy.Predict(Synthesize)
class DeepResearch(dspy.Module):
def forward(self, goal: str):
p = planner(goal=goal, lm=lm)
docs, all_sources = [], []
for sq in p.plan:
doc_text, sources = search_web(sq)
docs.append(doc_text)
all_sources.extend(sources)
final_response = synthesiser(
goal=goal,
docs="\n\n---\n\n".join(docs),
sources="\n".join([f"- {src}" for src in all_sources]),
lm=lm,
)
return final_response.response, final_response.relevant_sources
response, sources = DeepResearch()(goal=QUESTION)
print(f"Response: {response}")
print(f"\nRelevant sources: {sources}")
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from ddgs import DDGS
from dotenv import load_dotenv
from pydantic import BaseModel, Field
load_dotenv()
QUESTION = "What is the projected future demand for high bandwidth memory?"
def search_web(query: str) -> tuple[str, list[str]]:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=3))
formatted_results = "\n\n".join(
[f"{r['title']}: {r['href']} - {r['body']}" for r in results]
)
sources = [r["href"] for r in results]
return formatted_results, sources
class PlanResponse(BaseModel):
plan: list[str] = Field(description="List of sub-questions to research")
class DeepResearchResponse(BaseModel):
response: str = Field(description="The synthesized research response")
relevant_sources: list[str] = Field(description="List of all source URLs")
class DeepResearch:
def __init__(self):
self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
def run(self, goal: str):
plan_result = self.llm.with_structured_output(PlanResponse).invoke(
ChatPromptTemplate.from_messages(
[
(
"system",
"Break the research goal into 3-5 specific sub-questions.",
),
("user", f"Research goal: {goal}"),
]
).format_messages()
)
docs, all_sources = [], []
for sq in plan_result.plan:
doc_text, sources = search_web(sq)
docs.append(doc_text)
all_sources.extend(sources)
final_result = self.llm.with_structured_output(DeepResearchResponse).invoke(
ChatPromptTemplate.from_messages(
[
(
"system",
"Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.",
),
(
"user",
f"Goal: {goal}\n\nResearch findings:\n\n"
+ "\n\n---\n\n".join(docs)
+ "\n\nAvailable sources:\n"
+ "\n".join([f"- {src}" for src in all_sources]),
),
]
).format_messages()
)
return final_result.response, final_result.relevant_sources
response, sources = DeepResearch().run(QUESTION)
print(f"Response: {response}")
print(f"\nRelevant sources: {sources}")
# Linter will ignore these directories
IGNORE_LINT_DIRS = .venv|venv
LINE_LENGTH = 88
install_tools:
@echo "$(YELLOW)đź”§Installing tools...$(RESET)"
@uv tool install black --force
@uv tool install ruff --force
@uv tool install ty --force
@uv tool install vulture --force
@echo "$(GREEN)âś…Tools installed.$(RESET)"
fmt: install_tools
@echo "$(YELLOW)✨Formatting project with Black...$(RESET)"
@uv tool run black --exclude '/($(IGNORE_LINT_DIRS))/' . --line-length $(LINE_LENGTH)
from agents import Agent, Runner, ModelSettings
from ddgs import DDGS
from dotenv import load_dotenv
from pydantic import BaseModel, Field
load_dotenv()
QUESTION = "What is the projected future demand for high bandwidth memory?"
def search_web(query: str) -> tuple[str, list[str]]:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=3))
formatted_results = "\n\n".join(
[f"{r['title']}: {r['href']} - {r['body']}" for r in results]
)
sources = [r["href"] for r in results]
return formatted_results, sources
class PlanResponse(BaseModel):
plan: list[str] = Field(description="List of sub-questions to research")
class DeepResearchResponse(BaseModel):
response: str = Field(description="The synthesized research response")
relevant_sources: list[str] = Field(description="List of all source URLs")
class DeepResearch:
def __init__(self):
self.planner = Agent(
name="Planner",
instructions="Break the research goal into 3-5 specific sub-questions.",
output_type=PlanResponse,
model="gpt-4o-mini",
model_settings=ModelSettings(temperature=0),
)
self.synthesizer = Agent(
name="Synthesizer",
instructions="Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.",
output_type=DeepResearchResponse,
model="gpt-4o-mini",
model_settings=ModelSettings(temperature=0),
)
def run(self, goal: str):
plan_result = Runner.run_sync(self.planner, f"Research goal: {goal}")
docs, all_sources = [], []
for sq in plan_result.final_output.plan:
doc_text, sources = search_web(sq)
docs.append(doc_text)
all_sources.extend(sources)
final_result = Runner.run_sync(
self.synthesizer,
f"Goal: {goal}\n\nResearch findings:\n\n"
+ "\n\n---\n\n".join(docs)
+ "\n\nAvailable sources:\n"
+ "\n".join([f"- {src}" for src in all_sources]),
)
return (
final_result.final_output.response,
final_result.final_output.relevant_sources,
)
response, sources = DeepResearch().run(QUESTION)
print(f"Response: {response}")
print(f"\nRelevant sources: {sources}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment