Kwindla Hultman Kramer kwindla

## sonic-3-launch-day.py
import asyncio
import datetime
import os
import wave
from pathlib import Path

from dotenv import load_dotenv
from loguru import logger

from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams

## bot.py
import asyncio
import os
from datetime import datetime

from dotenv import load_dotenv
from loguru import logger

from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner

## gpt-5-ttft.py
# export OPENAI_API_KEY=sk_proj-...
# uv run test-gpt5.py "Write a four-line poem about LLMs."

# For a complete, single-file conversational voice agent
# example using GPT-5, see this gist:
# https://gist.github.com/kwindla/678ea297d12e24b928b636db928226fb

#  /// script
# dependencies = ["openai"]
# ///

## gpt-5-voice.agent.py
# export OPENAI_API_KEY=sk_proj-...
# uv run gpt-5-voice-agent.py

# /// script
# dependencies = [
#   "numba==0.61.2",
#   "openai",
#   "python-dotenv",
#   "fastapi[all]",
#   "uvicorn",

## voice-agents.md

      
              1 file
            
          
              0 forks
            
          
                1 comment
              
            
              24 stars
            
          
                kwindla
                / voice-agents.md
            
            
              Created
              June 23, 2025 23:43
            
              
                Advice on Voice Agents - June 2025
              
          
    Advice on Voice AI, June 2025

My top three pieces of advice for people getting started with voice agents.


Spend time up front understanding why latency and instruction following accuracy drive voice AI tech choices.


You will need to add significant tooling complexity as you go from proof of concept to production. Prepare for that. Especially important: build lightweight evals as early as you can.


The right path is: start with a proven, "best practices" tech stack -> get everything working one piece at a time -> deploy to real-world users and collect data -> then think about optimizing cost/latency/etc.


## video-inference-result.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                kwindla
                / video-inference-result.md
            
            
              Created
              June 13, 2025 16:40
            
              
                Gemini Pro video understanding
              
          
    Task

I've been experimenting as much as I can with Gemini video understanding. Multiple videos, multi-step prompts, etc.
Prompt

Analyze this YouTube video.
https://www.youtube.com/watch?v=PgyJs0jfp_o

  
## inference-note.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                kwindla
                / inference-note.md
            
            
              Created
              June 11, 2025 17:30
            
              
                Funny inference result captured while recording demo traces
              
          
    Funny GPT-4o inference result.

Audible in this vide: https://youtu.be/PgyJs0jfp_o?si=43CJgmk954kulmgl&t=863
Output

It sounds like we're on an intriguing mission! I'm going to scan through the grand chandeliers and ornate carpets of the hotel for traces. Be right back with the results!


## gemini-talk-transcript.py
from google import genai

import os

client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))

# filename_for_upload = "/Users/khkramer/Downloads/maven-lightning-trimmed.mp4"
# myfile = client.files.upload(file=filename_for_upload)
#
# print("My files:")

## daily-transport-double-transcription.py
# double transcription events
# pip install 'pipecat-ai[daily,silero,openai,cartesia]'==0.0.59 dotenv
#
# transcription events as expected
# pip install 'pipecat-ai[daily,silero,openai,cartesia]'==0.0.58 dotenv

import asyncio
import sys
import os

## gladia-tagalog-mixed.py
#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import argparse
import asyncio
import os
	import asyncio
	import datetime
	import os
	import wave
	from pathlib import Path

	from dotenv import load_dotenv
	from loguru import logger

	from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
	import asyncio
	import os
	from datetime import datetime

	from dotenv import load_dotenv
	from loguru import logger

	from pipecat.audio.vad.silero import SileroVADAnalyzer
	from pipecat.pipeline.pipeline import Pipeline
	from pipecat.pipeline.runner import PipelineRunner
	# export OPENAI_API_KEY=sk_proj-...
	# uv run test-gpt5.py "Write a four-line poem about LLMs."

	# For a complete, single-file conversational voice agent
	# example using GPT-5, see this gist:
	# https://gist.github.com/kwindla/678ea297d12e24b928b636db928226fb

	# /// script
	# dependencies = ["openai"]
	# ///
	# export OPENAI_API_KEY=sk_proj-...
	# uv run gpt-5-voice-agent.py

	# /// script
	# dependencies = [
	# "numba==0.61.2",
	# "openai",
	# "python-dotenv",
	# "fastapi[all]",
	# "uvicorn",
	from google import genai

	import os

	client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))

	# filename_for_upload = "/Users/khkramer/Downloads/maven-lightning-trimmed.mp4"
	# myfile = client.files.upload(file=filename_for_upload)
	#
	# print("My files:")
	# double transcription events
	# pip install 'pipecat-ai[daily,silero,openai,cartesia]'==0.0.59 dotenv
	#
	# transcription events as expected
	# pip install 'pipecat-ai[daily,silero,openai,cartesia]'==0.0.58 dotenv

	import asyncio
	import sys
	import os
	#
	# Copyright (c) 2024–2025, Daily
	#
	# SPDX-License-Identifier: BSD 2-Clause License
	#

	import argparse
	import asyncio
	import os