Skip to content

Instantly share code, notes, and snippets.

@muchanem
Created March 27, 2025 05:32
Show Gist options
  • Select an option

  • Save muchanem/f46e498a541f05c7705acdb4518c71fe to your computer and use it in GitHub Desktop.

Select an option

Save muchanem/f46e498a541f05c7705acdb4518c71fe to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "audioop-lts",
# "llm",
# "llm-gemini",
# "pyaudio",
# "pydub",
# ]
# ///
import llm
from pydub import AudioSegment
import io
import json
def split_audio_into_chunks(audio_path, chunk_length_minutes=5):
"""
Split an m4a audio file into N-minute chunks and return each chunk as binary data.
"""
# Load the audio file
audio = AudioSegment.from_file(audio_path, format="m4a")
# Calculate chunk length in milliseconds
chunk_length_ms = chunk_length_minutes * 60 * 1000
# Initialize list for binary chunks
binary_chunks = []
# Calculate how many chunks we'll have
total_length_ms = len(audio)
# Split the audio into chunks
for start_ms in range(0, total_length_ms, chunk_length_ms):
end_ms = min(start_ms + chunk_length_ms, total_length_ms)
chunk = audio[start_ms:end_ms]
# Convert chunk to binary data in mp3 format
buffer = io.BytesIO()
chunk.export(buffer, format="mp3")
binary_chunks.append(buffer.getvalue())
return binary_chunks
# Example usage:
audio_path = "31.m4a"
model = llm.get_model("gemini-2.0-flash")
schema = llm.schema_dsl("timestamp str: mm:ss,text,speaker: SPK_0 or SPK_1,language: two letter code", multi=True)
prompt = "transcribe"
chunk_length_minutes = 5
chunks = split_audio_into_chunks(audio_path, chunk_length_minutes=chunk_length_minutes)
output_data = []
for i, chunk in enumerate(chunks):
try:
attachment = llm.Attachment(type="audio/mp3", content=chunk)
response = model.prompt("transcribe", attachments=[attachment], schema=schema)
# Parse JSON for the current chunk
response_json = json.loads(response.text())["items"]
# --- COMPUTE AND APPLY OFFSET ---
# How many milliseconds in each chunk
chunk_length_ms = chunk_length_minutes * 60 * 1000
# The start of the current chunk (in ms)
chunk_start_ms = i * chunk_length_ms
# Convert that to total seconds for easy arithmetic
chunk_start_seconds = chunk_start_ms // 1000
for item in response_json:
# item["timestamp"] is something like "mm:ss"
mm_str, ss_str = item["timestamp"].split(":")
mm = int(mm_str)
ss = int(ss_str)
original_seconds = mm * 60 + ss
# Add the offset
updated_seconds = original_seconds + chunk_start_seconds
# Convert back to mm:ss
new_mm = updated_seconds // 60
new_ss = updated_seconds % 60
item["timestamp"] = f"{new_mm:02d}:{new_ss:02d}"
# Now that timestamps are offset, extend the main list
output_data.extend(response_json)
except Exception as e:
print(f"Error: {e}")
break
# Optionally break early
#if i >= 2:
# break
# Finally, write out to JSON
with open("31.json", 'w', encoding='utf-8') as file:
json.dump(output_data, file, indent=2, ensure_ascii=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment