Miyamura80/Makefile

## dspy_deepresearch.py
import dspy
from ddgs import DDGS
from dotenv import load_dotenv

load_dotenv()
QUESTION = "What is the projected future demand for high bandwidth memory?"


def search_web(query: str) -> tuple[str, list[str]]:
    with DDGS() as ddgs:
        results = list(ddgs.text(query, max_results=3))
    formatted_results = "\n\n".join(
        [f"{r['title']}: {r['href']} - {r['body']}" for r in results]
    )
    sources = [r["href"] for r in results]
    return formatted_results, sources


class Plan(dspy.Signature):
    goal: str = dspy.InputField(desc="The research goal to break down")
    plan: list[str] = dspy.OutputField(desc="List of sub-questions to research")


class Synthesize(dspy.Signature):
    goal: str = dspy.InputField(desc="The original research goal")
    docs: str = dspy.InputField(desc="The research findings from web searches")
    sources: str = dspy.InputField(desc="Available source URLs")
    response: str = dspy.OutputField(desc="The synthesized research response")
    relevant_sources: list[str] = dspy.OutputField(
        desc="List of all relevant source URLs"
    )


lm = dspy.LM("openai/gpt-4o-mini", temperature=0)
planner = dspy.Predict(Plan)
synthesiser = dspy.Predict(Synthesize)


class DeepResearch(dspy.Module):
    def forward(self, goal: str):
        p = planner(goal=goal, lm=lm)
        docs, all_sources = [], []
        for sq in p.plan:
            doc_text, sources = search_web(sq)
            docs.append(doc_text)
            all_sources.extend(sources)
        final_response = synthesiser(
            goal=goal,
            docs="\n\n---\n\n".join(docs),
            sources="\n".join([f"- {src}" for src in all_sources]),
            lm=lm,
        )
        return final_response.response, final_response.relevant_sources


response, sources = DeepResearch()(goal=QUESTION)
print(f"Response: {response}")
print(f"\nRelevant sources: {sources}")

## langchain_deepresearch.py
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from ddgs import DDGS
from dotenv import load_dotenv
from pydantic import BaseModel, Field

load_dotenv()
QUESTION = "What is the projected future demand for high bandwidth memory?"


def search_web(query: str) -> tuple[str, list[str]]:
    with DDGS() as ddgs:
        results = list(ddgs.text(query, max_results=3))
    formatted_results = "\n\n".join(
        [f"{r['title']}: {r['href']} - {r['body']}" for r in results]
    )
    sources = [r["href"] for r in results]
    return formatted_results, sources


class PlanResponse(BaseModel):
    plan: list[str] = Field(description="List of sub-questions to research")


class DeepResearchResponse(BaseModel):
    response: str = Field(description="The synthesized research response")
    relevant_sources: list[str] = Field(description="List of all source URLs")


class DeepResearch:
    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

    def run(self, goal: str):
        plan_result = self.llm.with_structured_output(PlanResponse).invoke(
            ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        "Break the research goal into 3-5 specific sub-questions.",
                    ),
                    ("user", f"Research goal: {goal}"),
                ]
            ).format_messages()
        )
        docs, all_sources = [], []
        for sq in plan_result.plan:
            doc_text, sources = search_web(sq)
            docs.append(doc_text)
            all_sources.extend(sources)
        final_result = self.llm.with_structured_output(DeepResearchResponse).invoke(
            ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        "Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.",
                    ),
                    (
                        "user",
                        f"Goal: {goal}\n\nResearch findings:\n\n"
                        + "\n\n---\n\n".join(docs)
                        + "\n\nAvailable sources:\n"
                        + "\n".join([f"- {src}" for src in all_sources]),
                    ),
                ]
            ).format_messages()
        )
        return final_result.response, final_result.relevant_sources


response, sources = DeepResearch().run(QUESTION)
print(f"Response: {response}")
print(f"\nRelevant sources: {sources}")

## Makefile
# Linter will ignore these directories
IGNORE_LINT_DIRS = .venv|venv
LINE_LENGTH = 88

install_tools:
	@echo "$(YELLOW)🔧Installing tools...$(RESET)"
	@uv tool install black --force
	@uv tool install ruff --force
	@uv tool install ty --force
	@uv tool install vulture --force
	@echo "$(GREEN)✅Tools installed.$(RESET)"

fmt: install_tools
	@echo "$(YELLOW)✨Formatting project with Black...$(RESET)"
	@uv tool run black --exclude '/($(IGNORE_LINT_DIRS))/' . --line-length $(LINE_LENGTH)

## oai_deepresearch.py
from agents import Agent, Runner, ModelSettings
from ddgs import DDGS
from dotenv import load_dotenv
from pydantic import BaseModel, Field

load_dotenv()
QUESTION = "What is the projected future demand for high bandwidth memory?"


def search_web(query: str) -> tuple[str, list[str]]:
    with DDGS() as ddgs:
        results = list(ddgs.text(query, max_results=3))
    formatted_results = "\n\n".join(
        [f"{r['title']}: {r['href']} - {r['body']}" for r in results]
    )
    sources = [r["href"] for r in results]
    return formatted_results, sources


class PlanResponse(BaseModel):
    plan: list[str] = Field(description="List of sub-questions to research")


class DeepResearchResponse(BaseModel):
    response: str = Field(description="The synthesized research response")
    relevant_sources: list[str] = Field(description="List of all source URLs")


class DeepResearch:
    def __init__(self):
        self.planner = Agent(
            name="Planner",
            instructions="Break the research goal into 3-5 specific sub-questions.",
            output_type=PlanResponse,
            model="gpt-4o-mini",
            model_settings=ModelSettings(temperature=0),
        )
        self.synthesizer = Agent(
            name="Synthesizer",
            instructions="Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.",
            output_type=DeepResearchResponse,
            model="gpt-4o-mini",
            model_settings=ModelSettings(temperature=0),
        )

    def run(self, goal: str):
        plan_result = Runner.run_sync(self.planner, f"Research goal: {goal}")
        docs, all_sources = [], []
        for sq in plan_result.final_output.plan:
            doc_text, sources = search_web(sq)
            docs.append(doc_text)
            all_sources.extend(sources)
        final_result = Runner.run_sync(
            self.synthesizer,
            f"Goal: {goal}\n\nResearch findings:\n\n"
            + "\n\n---\n\n".join(docs)
            + "\n\nAvailable sources:\n"
            + "\n".join([f"- {src}" for src in all_sources]),
        )
        return (
            final_result.final_output.response,
            final_result.final_output.relevant_sources,
        )


response, sources = DeepResearch().run(QUESTION)
print(f"Response: {response}")
print(f"\nRelevant sources: {sources}")
	import dspy
	from ddgs import DDGS
	from dotenv import load_dotenv

	load_dotenv()
	QUESTION = "What is the projected future demand for high bandwidth memory?"


	def search_web(query: str) -> tuple[str, list[str]]:
	with DDGS() as ddgs:
	results = list(ddgs.text(query, max_results=3))
	formatted_results = "\n\n".join(
	[f"{r['title']}: {r['href']} - {r['body']}" for r in results]
	)
	sources = [r["href"] for r in results]
	return formatted_results, sources


	class Plan(dspy.Signature):
	goal: str = dspy.InputField(desc="The research goal to break down")
	plan: list[str] = dspy.OutputField(desc="List of sub-questions to research")


	class Synthesize(dspy.Signature):
	goal: str = dspy.InputField(desc="The original research goal")
	docs: str = dspy.InputField(desc="The research findings from web searches")
	sources: str = dspy.InputField(desc="Available source URLs")
	response: str = dspy.OutputField(desc="The synthesized research response")
	relevant_sources: list[str] = dspy.OutputField(
	desc="List of all relevant source URLs"
	)


	lm = dspy.LM("openai/gpt-4o-mini", temperature=0)
	planner = dspy.Predict(Plan)
	synthesiser = dspy.Predict(Synthesize)


	class DeepResearch(dspy.Module):
	def forward(self, goal: str):
	p = planner(goal=goal, lm=lm)
	docs, all_sources = [], []
	for sq in p.plan:
	doc_text, sources = search_web(sq)
	docs.append(doc_text)
	all_sources.extend(sources)
	final_response = synthesiser(
	goal=goal,
	docs="\n\n---\n\n".join(docs),
	sources="\n".join([f"- {src}" for src in all_sources]),
	lm=lm,
	)
	return final_response.response, final_response.relevant_sources


	response, sources = DeepResearch()(goal=QUESTION)
	print(f"Response: {response}")
	print(f"\nRelevant sources: {sources}")
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import ChatPromptTemplate
	from ddgs import DDGS
	from dotenv import load_dotenv
	from pydantic import BaseModel, Field

	load_dotenv()
	QUESTION = "What is the projected future demand for high bandwidth memory?"


	def search_web(query: str) -> tuple[str, list[str]]:
	with DDGS() as ddgs:
	results = list(ddgs.text(query, max_results=3))
	formatted_results = "\n\n".join(
	[f"{r['title']}: {r['href']} - {r['body']}" for r in results]
	)
	sources = [r["href"] for r in results]
	return formatted_results, sources


	class PlanResponse(BaseModel):
	plan: list[str] = Field(description="List of sub-questions to research")


	class DeepResearchResponse(BaseModel):
	response: str = Field(description="The synthesized research response")
	relevant_sources: list[str] = Field(description="List of all source URLs")


	class DeepResearch:
	def __init__(self):
	self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

	def run(self, goal: str):
	plan_result = self.llm.with_structured_output(PlanResponse).invoke(
	ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"Break the research goal into 3-5 specific sub-questions.",
	),
	("user", f"Research goal: {goal}"),
	]
	).format_messages()
	)
	docs, all_sources = [], []
	for sq in plan_result.plan:
	doc_text, sources = search_web(sq)
	docs.append(doc_text)
	all_sources.extend(sources)
	final_result = self.llm.with_structured_output(DeepResearchResponse).invoke(
	ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.",
	),
	(
	"user",
	f"Goal: {goal}\n\nResearch findings:\n\n"
	+ "\n\n---\n\n".join(docs)
	+ "\n\nAvailable sources:\n"
	+ "\n".join([f"- {src}" for src in all_sources]),
	),
	]
	).format_messages()
	)
	return final_result.response, final_result.relevant_sources


	response, sources = DeepResearch().run(QUESTION)
	print(f"Response: {response}")
	print(f"\nRelevant sources: {sources}")
	# Linter will ignore these directories
	IGNORE_LINT_DIRS = .venv\|venv
	LINE_LENGTH = 88

	install_tools:
	@echo "$(YELLOW)🔧Installing tools...$(RESET)"
	@uv tool install black --force
	@uv tool install ruff --force
	@uv tool install ty --force
	@uv tool install vulture --force
	@echo "$(GREEN)✅Tools installed.$(RESET)"

	fmt: install_tools
	@echo "$(YELLOW)✨Formatting project with Black...$(RESET)"
	@uv tool run black --exclude '/($(IGNORE_LINT_DIRS))/' . --line-length $(LINE_LENGTH)
	from agents import Agent, Runner, ModelSettings
	from ddgs import DDGS
	from dotenv import load_dotenv
	from pydantic import BaseModel, Field

	load_dotenv()
	QUESTION = "What is the projected future demand for high bandwidth memory?"


	def search_web(query: str) -> tuple[str, list[str]]:
	with DDGS() as ddgs:
	results = list(ddgs.text(query, max_results=3))
	formatted_results = "\n\n".join(
	[f"{r['title']}: {r['href']} - {r['body']}" for r in results]
	)
	sources = [r["href"] for r in results]
	return formatted_results, sources


	class PlanResponse(BaseModel):
	plan: list[str] = Field(description="List of sub-questions to research")


	class DeepResearchResponse(BaseModel):
	response: str = Field(description="The synthesized research response")
	relevant_sources: list[str] = Field(description="List of all source URLs")


	class DeepResearch:
	def __init__(self):
	self.planner = Agent(
	name="Planner",
	instructions="Break the research goal into 3-5 specific sub-questions.",
	output_type=PlanResponse,
	model="gpt-4o-mini",
	model_settings=ModelSettings(temperature=0),
	)
	self.synthesizer = Agent(
	name="Synthesizer",
	instructions="Synthesize all research findings into a comprehensive response. Include the URLs of the sources you used in your response.",
	output_type=DeepResearchResponse,
	model="gpt-4o-mini",
	model_settings=ModelSettings(temperature=0),
	)

	def run(self, goal: str):
	plan_result = Runner.run_sync(self.planner, f"Research goal: {goal}")
	docs, all_sources = [], []
	for sq in plan_result.final_output.plan:
	doc_text, sources = search_web(sq)
	docs.append(doc_text)
	all_sources.extend(sources)
	final_result = Runner.run_sync(
	self.synthesizer,
	f"Goal: {goal}\n\nResearch findings:\n\n"
	+ "\n\n---\n\n".join(docs)
	+ "\n\nAvailable sources:\n"
	+ "\n".join([f"- {src}" for src in all_sources]),
	)
	return (
	final_result.final_output.response,
	final_result.final_output.relevant_sources,
	)


	response, sources = DeepResearch().run(QUESTION)
	print(f"Response: {response}")
	print(f"\nRelevant sources: {sources}")