Skip to content

Instantly share code, notes, and snippets.

@yoi-hibino
Created March 16, 2025 04:42
Show Gist options
  • Select an option

  • Save yoi-hibino/0e2df78ca54bdf4baf5322dd20fd930d to your computer and use it in GitHub Desktop.

Select an option

Save yoi-hibino/0e2df78ca54bdf4baf5322dd20fd930d to your computer and use it in GitHub Desktop.
Sesame+OpenAI API
from huggingface_hub import hf_hub_download
from huggingface_hub import login
from generator import load_csm_1b
import torchaudio
import sounddevice as sd
import numpy as np
from openai import OpenAI
import os
login("__your_huggingface_access_token_here__")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
model_path = hf_hub_download(repo_id="sesame/csm-1b", filename="ckpt.pt")
generator = load_csm_1b(model_path, "cuda")
class ConversationalAssistant:
def __init__(self):
print("Initializing CSM Generator...")
print("CSM Generator ready!")
# Initialize with a system message to set the tone
self.conversation_history = [
{"role": "system", "content": "You are a friendly, warm, and casual chat buddy. Use a conversational tone, casual language, some slang, and occasional emojis. Keep responses brief and engaging like texting with a good friend. Show enthusiasm, ask follow-up questions sometimes, and share personal-feeling opinions. Use humor when appropriate and don't be too formal or technical unless specifically asked. no emoji"}
]
self.sample_rate = 24000 # CSM default sample rate
def add_to_history(self, role, content):
"""Add a message to the conversation history."""
self.conversation_history.append({"role": role, "content": content})
def generate_response(self, user_input):
"""Generate a text response using ChatGPT API."""
# Add user input to conversation history
self.add_to_history("user", user_input)
try:
# Call the ChatGPT API
response = client.chat.completions.create(
model="gpt-3.5-turbo", # You can use gpt-4 if you have access
messages=self.conversation_history,
max_tokens=150, # Adjust as needed
temperature=0.7, # Adjust for creativity vs. consistency
presence_penalty=0.6, # Encourage diverse topics
frequency_penalty=0.2 # Slightly discourage repetition
)
# Extract the response text
assistant_response = response.choices[0].message.content
# Add assistant response to history
self.add_to_history("assistant", assistant_response)
return assistant_response
except Exception as e:
print(f"Error in ChatGPT API call: {e}")
return "I'm sorry, I couldn't generate a response at this time."
def text_to_speech(self, text):
"""Convert text to speech using CSM."""
try:
print(f"Generating audio for: '{text}'")
audio = generator.generate(
text=text,
speaker=0,
context=[],
max_audio_length_ms=10_000,
)
# Move tensor from GPU to CPU if needed
if hasattr(audio, 'device') and str(audio.device).startswith('cuda'):
audio = audio.cpu()
# Convert to numpy array if it's still a tensor
if hasattr(audio, 'numpy'):
audio = audio.numpy()
return audio
except Exception as e:
print(f"Error in text-to-speech generation: {e}")
return None
def play_audio(self, audio):
"""Play the audio using sounddevice."""
if audio is not None:
print("Playing audio...")
sd.play(audio, self.sample_rate)
sd.wait()
print("Audio playback complete")
else:
print("No audio to play")
def chat(self):
"""Run an interactive chat session."""
print("\nWelcome to the conversational assistant!")
print("Type 'exit', 'quit', or 'bye' to end the conversation.\n")
while True:
user_input = input("You: ")
user_input = user_input.strip()
# Check for exit commands
if user_input.lower() in ['exit', 'quit', 'bye']:
print("Assistant: Goodbye!")
break
# Generate text response
response_text = self.generate_response(user_input)
print(f"Assistant: {response_text}")
# Convert to speech and play
audio = self.text_to_speech(response_text)
self.play_audio(audio)
if __name__ == "__main__":
if not os.getenv("OPENAI_API_KEY"):
print("Warning: OPENAI_API_KEY environment variable not set.")
print("Set it with: export OPENAI_API_KEY='your-api-key-here'")
api_key = input("Enter your OpenAI API key to continue: ")
os.environ["OPENAI_API_KEY"] = api_key
client = OpenAI(api_key=api_key)
# Create and run the assistant
assistant = ConversationalAssistant()
assistant.chat()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment