amosgyamfi/gemini3flash_video_understanding.py

## gemini3flash_video_understanding.py
import logging

from vision_agents.core import User, Agent, cli
from vision_agents.core.agents import AgentLauncher
from vision_agents.plugins import getstream, gemini, inworld, smart_turn, deepgram

logger = logging.getLogger(__name__)

async def create_agent(**kwargs) -> Agent:
    agent = Agent(
        edge=getstream.Edge(),  # low latency edge. clients for React, iOS, Android, RN, Flutter etc.
        agent_user=User(name="Video Understanding Agent", id="agent"),
        instructions="Read @gemini_flash_instructions.md",
        llm=gemini.LLM("gemini-3-flash-preview"),
        tts=inworld.TTS(),
        stt=deepgram.STT(),
        turn_detection=smart_turn.TurnDetection(), # vogent turn detection (or omit; some STT providers handle turn-taking)
    )
    return agent


async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
    call = await agent.create_call(call_type, call_id)

    # Have the agent join the call/room
    with await agent.join(call):
        await agent.simple_response("What do you see in my video? Tell me about the objects in the video.")

        # run till the call ends
        await agent.finish()


if __name__ == "__main__":
    cli(AgentLauncher(create_agent=create_agent, join_call=join_call))
	import logging

	from vision_agents.core import User, Agent, cli
	from vision_agents.core.agents import AgentLauncher
	from vision_agents.plugins import getstream, gemini, inworld, smart_turn, deepgram

	logger = logging.getLogger(__name__)

	async def create_agent(**kwargs) -> Agent:
	agent = Agent(
	edge=getstream.Edge(), # low latency edge. clients for React, iOS, Android, RN, Flutter etc.
	agent_user=User(name="Video Understanding Agent", id="agent"),
	instructions="Read @gemini_flash_instructions.md",
	llm=gemini.LLM("gemini-3-flash-preview"),
	tts=inworld.TTS(),
	stt=deepgram.STT(),
	turn_detection=smart_turn.TurnDetection(), # vogent turn detection (or omit; some STT providers handle turn-taking)
	)
	return agent


	async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
	call = await agent.create_call(call_type, call_id)

	# Have the agent join the call/room
	with await agent.join(call):
	await agent.simple_response("What do you see in my video? Tell me about the objects in the video.")

	# run till the call ends
	await agent.finish()


	if __name__ == "__main__":
	cli(AgentLauncher(create_agent=create_agent, join_call=join_call))
No results found