Skip to content

Instantly share code, notes, and snippets.

@amosgyamfi
Created December 18, 2025 21:18
Show Gist options
  • Select an option

  • Save amosgyamfi/c27a6cc72e110b7e08af5630127bb7bf to your computer and use it in GitHub Desktop.

Select an option

Save amosgyamfi/c27a6cc72e110b7e08af5630127bb7bf to your computer and use it in GitHub Desktop.
import logging
from vision_agents.core import User, Agent, cli
from vision_agents.core.agents import AgentLauncher
from vision_agents.plugins import getstream, gemini, inworld, smart_turn, deepgram
logger = logging.getLogger(__name__)
async def create_agent(**kwargs) -> Agent:
agent = Agent(
edge=getstream.Edge(), # low latency edge. clients for React, iOS, Android, RN, Flutter etc.
agent_user=User(name="Video Understanding Agent", id="agent"),
instructions="Read @gemini_flash_instructions.md",
llm=gemini.LLM("gemini-3-flash-preview"),
tts=inworld.TTS(),
stt=deepgram.STT(),
turn_detection=smart_turn.TurnDetection(), # vogent turn detection (or omit; some STT providers handle turn-taking)
)
return agent
async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
call = await agent.create_call(call_type, call_id)
# Have the agent join the call/room
with await agent.join(call):
await agent.simple_response("What do you see in my video? Tell me about the objects in the video.")
# run till the call ends
await agent.finish()
if __name__ == "__main__":
cli(AgentLauncher(create_agent=create_agent, join_call=join_call))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment