Created
November 26, 2025 20:16
-
-
Save Miyamura80/b1a93eb1d1b6d50b33f68aa15f83f818 to your computer and use it in GitHub Desktop.
DSPy Blog Post: Deep Research comparison
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import dspy | |
| from ddgs import DDGS | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| QUESTION = "What is the projected future demand for high bandwidth memory?" | |
| def search_web(query: str) -> tuple[str, list[str]]: | |
| with DDGS() as ddgs: | |
| results = list(ddgs.text(query, max_results=3)) | |
| formatted_results = "\n\n".join( | |
| [f"{r['title']}: {r['href']} - {r['body']}" for r in results] | |
| ) | |
| sources = [r["href"] for r in results] | |
| return formatted_results, sources | |
| class Plan(dspy.Signature): | |
| goal: str = dspy.InputField(desc="The research goal to break down") | |
| plan: list[str] = dspy.OutputField(desc="List of sub-questions to research") | |
| class Synthesize(dspy.Signature): | |
| goal: str = dspy.InputField(desc="The original research goal") | |
| docs: str = dspy.InputField(desc="The research findings from web searches") | |
| sources: str = dspy.InputField(desc="Available source URLs") | |
| response: str = dspy.OutputField(desc="The synthesized research response") | |
| relevant_sources: list[str] = dspy.OutputField( | |
| desc="List of all relevant source URLs" | |
| ) | |
| lm = dspy.LM("openai/gpt-4o-mini", temperature=0) | |
| planner = dspy.Predict(Plan) | |
| synthesiser = dspy.Predict(Synthesize) | |
| class DeepResearch(dspy.Module): | |
| def forward(self, goal: str): | |
| p = planner(goal=goal, lm=lm) | |
| docs, all_sources = [], [] | |
| for sq in p.plan: | |
| doc_text, sources = search_web(sq) | |
| docs.append(doc_text) | |
| all_sources.extend(sources) | |
| final_response = synthesiser( | |
| goal=goal, | |
| docs="\n\n---\n\n".join(docs), | |
| sources="\n".join([f"- {src}" for src in all_sources]), | |
| lm=lm, | |
| ) | |
| return final_response.response, final_response.relevant_sources | |
| response, sources = DeepResearch()(goal=QUESTION) | |
| print(f"Response: {response}") | |
| print(f"\nRelevant sources: {sources}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from langchain_openai import ChatOpenAI | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from ddgs import DDGS | |
| from dotenv import load_dotenv | |
| from pydantic import BaseModel, Field | |
| load_dotenv() | |
| QUESTION = "What is the projected future demand for high bandwidth memory?" | |
| def search_web(query: str) -> tuple[str, list[str]]: | |
| with DDGS() as ddgs: | |
| results = list(ddgs.text(query, max_results=3)) | |
| formatted_results = "\n\n".join( | |
| [f"{r['title']}: {r['href']} - {r['body']}" for r in results] | |
| ) | |
| sources = [r["href"] for r in results] | |
| return formatted_results, sources | |
| class PlanResponse(BaseModel): | |
| plan: list[str] = Field(description="List of sub-questions to research") | |
| class DeepResearchResponse(BaseModel): | |
| response: str = Field(description="The synthesized research response") | |
| relevant_sources: list[str] = Field(description="List of all source URLs") | |
| class DeepResearch: | |
| def __init__(self): | |
| self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) | |
| def run(self, goal: str): | |
| plan_result = self.llm.with_structured_output(PlanResponse).invoke( | |
| ChatPromptTemplate.from_messages( | |
| [ | |
| ( | |
| "system", | |
| "Break the research goal into 3-5 specific sub-questions.", | |
| ), | |
| ("user", f"Research goal: {goal}"), | |
| ] | |
| ).format_messages() | |
| ) | |
| docs, all_sources = [], [] | |
| for sq in plan_result.plan: | |
| doc_text, sources = search_web(sq) | |
| docs.append(doc_text) | |
| all_sources.extend(sources) | |
| final_result = self.llm.with_structured_output(DeepResearchResponse).invoke( | |
| ChatPromptTemplate.from_messages( | |
| [ | |
| ( | |
| "system", | |
| "Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.", | |
| ), | |
| ( | |
| "user", | |
| f"Goal: {goal}\n\nResearch findings:\n\n" | |
| + "\n\n---\n\n".join(docs) | |
| + "\n\nAvailable sources:\n" | |
| + "\n".join([f"- {src}" for src in all_sources]), | |
| ), | |
| ] | |
| ).format_messages() | |
| ) | |
| return final_result.response, final_result.relevant_sources | |
| response, sources = DeepResearch().run(QUESTION) | |
| print(f"Response: {response}") | |
| print(f"\nRelevant sources: {sources}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Linter will ignore these directories | |
| IGNORE_LINT_DIRS = .venv|venv | |
| LINE_LENGTH = 88 | |
| install_tools: | |
| @echo "$(YELLOW)đź”§Installing tools...$(RESET)" | |
| @uv tool install black --force | |
| @uv tool install ruff --force | |
| @uv tool install ty --force | |
| @uv tool install vulture --force | |
| @echo "$(GREEN)âś…Tools installed.$(RESET)" | |
| fmt: install_tools | |
| @echo "$(YELLOW)✨Formatting project with Black...$(RESET)" | |
| @uv tool run black --exclude '/($(IGNORE_LINT_DIRS))/' . --line-length $(LINE_LENGTH) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from agents import Agent, Runner, ModelSettings | |
| from ddgs import DDGS | |
| from dotenv import load_dotenv | |
| from pydantic import BaseModel, Field | |
| load_dotenv() | |
| QUESTION = "What is the projected future demand for high bandwidth memory?" | |
| def search_web(query: str) -> tuple[str, list[str]]: | |
| with DDGS() as ddgs: | |
| results = list(ddgs.text(query, max_results=3)) | |
| formatted_results = "\n\n".join( | |
| [f"{r['title']}: {r['href']} - {r['body']}" for r in results] | |
| ) | |
| sources = [r["href"] for r in results] | |
| return formatted_results, sources | |
| class PlanResponse(BaseModel): | |
| plan: list[str] = Field(description="List of sub-questions to research") | |
| class DeepResearchResponse(BaseModel): | |
| response: str = Field(description="The synthesized research response") | |
| relevant_sources: list[str] = Field(description="List of all source URLs") | |
| class DeepResearch: | |
| def __init__(self): | |
| self.planner = Agent( | |
| name="Planner", | |
| instructions="Break the research goal into 3-5 specific sub-questions.", | |
| output_type=PlanResponse, | |
| model="gpt-4o-mini", | |
| model_settings=ModelSettings(temperature=0), | |
| ) | |
| self.synthesizer = Agent( | |
| name="Synthesizer", | |
| instructions="Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.", | |
| output_type=DeepResearchResponse, | |
| model="gpt-4o-mini", | |
| model_settings=ModelSettings(temperature=0), | |
| ) | |
| def run(self, goal: str): | |
| plan_result = Runner.run_sync(self.planner, f"Research goal: {goal}") | |
| docs, all_sources = [], [] | |
| for sq in plan_result.final_output.plan: | |
| doc_text, sources = search_web(sq) | |
| docs.append(doc_text) | |
| all_sources.extend(sources) | |
| final_result = Runner.run_sync( | |
| self.synthesizer, | |
| f"Goal: {goal}\n\nResearch findings:\n\n" | |
| + "\n\n---\n\n".join(docs) | |
| + "\n\nAvailable sources:\n" | |
| + "\n".join([f"- {src}" for src in all_sources]), | |
| ) | |
| return ( | |
| final_result.final_output.response, | |
| final_result.final_output.relevant_sources, | |
| ) | |
| response, sources = DeepResearch().run(QUESTION) | |
| print(f"Response: {response}") | |
| print(f"\nRelevant sources: {sources}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment