Created
December 18, 2025 21:18
-
-
Save amosgyamfi/c27a6cc72e110b7e08af5630127bb7bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import logging | |
| from vision_agents.core import User, Agent, cli | |
| from vision_agents.core.agents import AgentLauncher | |
| from vision_agents.plugins import getstream, gemini, inworld, smart_turn, deepgram | |
| logger = logging.getLogger(__name__) | |
| async def create_agent(**kwargs) -> Agent: | |
| agent = Agent( | |
| edge=getstream.Edge(), # low latency edge. clients for React, iOS, Android, RN, Flutter etc. | |
| agent_user=User(name="Video Understanding Agent", id="agent"), | |
| instructions="Read @gemini_flash_instructions.md", | |
| llm=gemini.LLM("gemini-3-flash-preview"), | |
| tts=inworld.TTS(), | |
| stt=deepgram.STT(), | |
| turn_detection=smart_turn.TurnDetection(), # vogent turn detection (or omit; some STT providers handle turn-taking) | |
| ) | |
| return agent | |
| async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: | |
| call = await agent.create_call(call_type, call_id) | |
| # Have the agent join the call/room | |
| with await agent.join(call): | |
| await agent.simple_response("What do you see in my video? Tell me about the objects in the video.") | |
| # run till the call ends | |
| await agent.finish() | |
| if __name__ == "__main__": | |
| cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment