Skip to content

Instantly share code, notes, and snippets.

@nonchris
Last active August 8, 2025 19:48
Show Gist options
  • Select an option

  • Save nonchris/d987ed199a307b2fc7fd1d9d63097965 to your computer and use it in GitHub Desktop.

Select an option

Save nonchris/d987ed199a307b2fc7fd1d9d63097965 to your computer and use it in GitHub Desktop.
Simple Text to Speech GUI for OpenAI's API written in Python
"""
This is a very unclean, quick hack for educational purposes.
It allows you to interact with open ai's TTS (text to speech) backend via the API.
You can choose between all their voices.
Note: This code has a few known bugs/ issues:
* It uses a deprecated function for writing the audio
* The filename field for the user is just ignored.
* The API key is HARDCODED in the software and can be extracted easily
* You can't paste using right click (ctrl+v works though)
* You can't change the language in the UI
...and probably a few more.
The UI part was mostly AI generated and isn't audited so far.
It just works good enough, for now.
You can use it if you like to, but keep these limitations in mind.
I had less than 25 minutes to build this.
And the key we're using was handed to everyone anyway so there was no security issue
(it had a hard limit and was invalidated afterwards)
"""
# first replace the api key (around line 46) with your actual token
# you can find or create key here.: https://platform.openai.com/api-keys
# to GET STARTED execute these commands in your terminal or inside your IDE:
# python3 -m venv venv (note: the command for python may vary on windows, it could be py or python too)
# source venv/bin/activate (note: you might need activate.bat or activate.ps1 on windows)
# python3 -m pip install openai
# (you can skip the next one if you wanna build an .exe)
# python3 app.py
# BUILD as .exe for windows:
# the line you're looking for is: `os.environ["OPENAI_API_KEY"] = "YOUR API KEY"`
# pip install pyinstaller~=5.13
# pyinstaller .\text-to-speech-gui-openai.py --onefile --name text-zu-sprache.exe
import datetime as dt
import os
from os.path import expanduser
import subprocess
from pathlib import Path
import tkinter as tk
import tkinter.font as tkfont
from tkinter import filedialog
from tkinter import ttk
from openai import OpenAI
import ctypes
# note: again. pasting a key in an executable should NEVER be done.
# yet it's effective if you have a class of kids that all use the same key
# and also have access to the key in plain text
# your API key should look something like this: sk-veryManyRandomLettersAndNumbers
# paste it in there, keep the quotation marks
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
client = OpenAI()
def call_voice(text: str, output_path: Path, voice: str):
response = client.audio.speech.create(
model="tts-1",
voice=voice,
input=text
)
response.stream_to_file(output_path)
class App:
def __init__(self):
self.file_path = None
self.window = tk.Tk()
if os.name == "nt":
ctypes.windll.shcore.SetProcessDpiAwareness(1)
self.window.title("Text-zu-Sprache Generator")
self.custom_font = tkfont.Font(size=11)
self.window.geometry("700x600")
self.init_ui()
self.window.mainloop()
def init_ui(self):
self.whitespace()
tk.Label(self.window, text="Dateiname:", font=self.custom_font).pack()
self.output_file_entry = tk.Entry(self.window, font=self.custom_font)
self.output_file_entry.pack()
tk.Label(self.window, text="(Optional. Standard ist speech_0.mp3, speech_1.mp3 usw.)",
font=self.custom_font).pack()
self.whitespace(2)
tk.Label(self.window, text="Stimme:", font=self.custom_font).pack()
self.voice_var = tk.StringVar(value="alloy")
self.voice_dropdown = ttk.Combobox(self.window, textvariable=self.voice_var,
values=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
font=self.custom_font)
self.voice_dropdown.pack()
self.whitespace(2)
tk.Label(self.window, text="Text:", font=self.custom_font).pack()
self.text_entry = tk.Text(self.window, font=self.custom_font, wrap=tk.WORD, height=10)
self.text_entry.pack(expand=True, fill=tk.BOTH)
self.whitespace(2)
self.result_label = tk.Label(self.window, text="", font=self.custom_font)
self.result_label.pack()
button_frame = tk.Frame(self.window)
button_frame.pack()
tk.Button(button_frame, text="Generieren", command=self.generate_speech, font=self.custom_font).pack(
side=tk.LEFT)
tk.Button(button_frame, text="Ordner öffnen", command=self.open_folder, font=self.custom_font).pack(
side=tk.LEFT)
def generate_speech(self):
text = self.text_entry.get("1.0", tk.END).strip()
voice = self.voice_var.get()
if not text:
self.result_label.config(text="Fehler: Text darf nicht leer sein.", font=self.custom_font)
return
os.makedirs(f"{expanduser('~')}/audio", exist_ok=True)
base_file_name = self.output_file_entry.get() or "speech_0.mp3"
file = Path(f"{expanduser('~')}/audio/speech_0.mp3")
i = 0
while os.path.exists(file):
i += 1
file = Path(f"{expanduser('~')}/audio/speech_{i}.mp3")
print(file)
try:
start = dt.datetime.now()
print(f"Starting to generate at {start}")
call_voice(text, file, voice)
now = dt.datetime.now()
print(f"Done generating at {now}, this took: {(now - start).total_seconds()} seconds")
self.file_path = file
self.result_label.config(text=f"Erfolgreich! Datei gespeichert unter:\n{file}", font=self.custom_font)
except Exception as e:
self.result_label.config(text=f"Fehler: {e}", font=self.custom_font)
def open_folder(self):
if self.file_path:
folder = self.file_path.parent
if os.name == "nt":
os.startfile(folder)
elif os.name == "posix":
subprocess.call(["xdg-open", folder])
else:
self.result_label.config(text="Fehler: Plattform nicht unterstützt.", font=self.custom_font)
else:
self.result_label.config(text="Fehler: Keine Datei vorhanden.", font=self.custom_font)
def whitespace(self, rows=1):
for _ in range(rows):
tk.Label(self.window, text="", font=self.custom_font).pack()
if __name__ == '__main__':
App()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment