Skip to content

Instantly share code, notes, and snippets.

@edemekong
Created September 30, 2025 10:16
Show Gist options
  • Select an option

  • Save edemekong/b30041c7a99e9c73d8726608b2450944 to your computer and use it in GitHub Desktop.

Select an option

Save edemekong/b30041c7a99e9c73d8726608b2450944 to your computer and use it in GitHub Desktop.
Speech-to-text | Text-to-speech - OPEN AI
import axios from 'axios';
import * as fs from 'fs';
import * as path from 'path';
import { parseBuffer } from 'music-metadata';
import * as randomstring from 'randomstring';
import { createOpenAIInstance } from '../configs/open-ai';
import { storage } from '../configs/firebase';
export async function convertSpeechToTextFromUrl(value: string): Promise<string> {
const openai = createOpenAIInstance();
if (!value.startsWith("https://")) {
return value;
}
try {
console.log(value, "START PROCESSING")
const response = await axios({
method: "GET",
url: value,
responseType: "stream",
});
const localFilePath = path.resolve("./temp_audio.mp3");
const writer = fs.createWriteStream(localFilePath);
response.data.pipe(writer);
await new Promise((resolve, reject) => {
writer.on("finish", ()=>{
resolve(true);
});
writer.on("error", reject);
});
const translation = await openai.audio.translations.create({
file: fs.createReadStream(localFilePath),
model: "whisper-1",
});
console.log("Translated Text:", translation.text);
fs.unlinkSync(localFilePath);
return translation.text;
} catch (error) {
console.error("Error processing speech translation:", error);
}
return value;
}
export async function convertTextToSpeech(text: string, priorityId: string) {
try {
const openai = createOpenAIInstance();
const fileId = `file_${randomstring.generate({ charset: "alphanumeric", length: 8 })}`;
const audioResponse = await openai.audio.speech.create({
model: "tts-1",
voice: "nova",
response_format: "aac",
input: text,
});
const buffer = Buffer.from(await audioResponse.arrayBuffer());
const filePath = `priorities/${priorityId}/audios/${fileId}.aac`;
const fileReference = storage.bucket().file(filePath);
let fileMetaData: Record<string, any> = {
contentType: "audio/aac",
}
await fileReference.save(buffer, {
metadata: fileMetaData,
});
await fileReference.makePublic()
const downloadUrl = fileReference.publicUrl()
let durationInSeconds = 0;
try {
const metadata = await parseBuffer(buffer, { mimeType: 'audio/aac' });
durationInSeconds = metadata.format.duration || 0;
} catch (err) {
console.error('Error analyzing audio metadata:', err);
}
const mediaData = {
contentType: 'audio',
fileLink: downloadUrl,
filePath: filePath,
id: fileId,
duration: durationInSeconds,
}
return mediaData;
} catch (error) {
console.error("Error converting text to speech:", error);
}
return null;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment