Skip to content

Instantly share code, notes, and snippets.

@LucasAbijmil
Last active May 14, 2024 10:09
Show Gist options
  • Select an option

  • Save LucasAbijmil/2bf482438d29f4a1a74fff4c4d84622a to your computer and use it in GitHub Desktop.

Select an option

Save LucasAbijmil/2bf482438d29f4a1a74fff4c4d84622a to your computer and use it in GitHub Desktop.
Audio Transcription
//
// Copyright © 2024 Lucas Abijmil. All rights reserved.
//
import Speech
protocol TranscriptionService {
var delegate: TranscriptionServiceDelegate? { get set }
func requestPermission() async -> AuthorizationStatus
func startTranscription(for audioURL: URL)
}
@MainActor
protocol TranscriptionServiceDelegate: AnyObject {
func didStartTranscription()
func didTranscribe(_ transcript: Transcript)
func didCancelTranscription()
func didFinishTranscription(successfully: Bool)
}
final class DefaultTranscriptionService: NSObject, TranscriptionService {
weak var delegate: TranscriptionServiceDelegate?
private var speechRecognizer: SFSpeechRecognizer?
private var recognitionRequest: SFSpeechURLRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
func requestPermission() async -> AuthorizationStatus {
return await AuthorizationStatus(from: SFSpeechRecognizer.requestAuthorization())
}
func startTranscription(for audioURL: URL) {
guard let speechRecognizer = SFSpeechRecognizer(), speechRecognizer.isAvailable else { return }
let recognitionRequest = SFSpeechURLRecognitionRequest(url: audioURL)
recognitionRequest.shouldReportPartialResults = false
recognitionRequest.addsPunctuation = true
recognitionRequest.requiresOnDeviceRecognition = false
self.speechRecognizer = speechRecognizer
self.recognitionRequest = recognitionRequest
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, delegate: self)
}
private func stop() {
guard recognitionTask?.isCancelled == true || recognitionTask?.isFinishing == true else { return }
speechRecognizer = nil
recognitionRequest = nil
recognitionTask = nil
}
}
extension DefaultTranscriptionService: SFSpeechRecognitionTaskDelegate {
func speechRecognitionDidDetectSpeech(_ task: SFSpeechRecognitionTask) {
MainActor.assumeIsolated {
delegate?.didStartTranscription()
}
}
func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishRecognition recognitionResult: SFSpeechRecognitionResult) {
guard recognitionResult.isFinal, !recognitionResult.bestTranscription.formattedString.isEmpty else {
return
}
MainActor.assumeIsolated {
let transcript = Transcript(
transcript: recognitionResult.bestTranscription.formattedString,
lines: recognitionResult.bestTranscription.segments.map { Transcript.Line(words: $0.substring, startTime: $0.timestamp, endTime: $0.timestamp + $0.duration, confidence: Double($0.confidence)) }
)
delegate?.didTranscribe(transcript)
}
stop()
}
func speechRecognitionTaskWasCancelled(_ task: SFSpeechRecognitionTask) {
MainActor.assumeIsolated {
delegate?.didCancelTranscription()
}
stop()
}
func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishSuccessfully successfully: Bool) {
MainActor.assumeIsolated {
delegate?.didFinishTranscription(successfully: successfully)
}
stop()
}
}
private extension SFSpeechRecognizer {
class func requestAuthorization() async -> SFSpeechRecognizerAuthorizationStatus {
return await withCheckedContinuation { continuation in
requestAuthorization(continuation.resume)
}
}
}
struct Transcript {
let transcript: String
let lines: [Line]
struct Line {
let words: String
let startTime: TimeInterval
let endTime: TimeInterval
let confidence: Double
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment