Last active
May 14, 2024 10:09
-
-
Save LucasAbijmil/2bf482438d29f4a1a74fff4c4d84622a to your computer and use it in GitHub Desktop.
Audio Transcription
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // Copyright © 2024 Lucas Abijmil. All rights reserved. | |
| // | |
| import Speech | |
| protocol TranscriptionService { | |
| var delegate: TranscriptionServiceDelegate? { get set } | |
| func requestPermission() async -> AuthorizationStatus | |
| func startTranscription(for audioURL: URL) | |
| } | |
| @MainActor | |
| protocol TranscriptionServiceDelegate: AnyObject { | |
| func didStartTranscription() | |
| func didTranscribe(_ transcript: Transcript) | |
| func didCancelTranscription() | |
| func didFinishTranscription(successfully: Bool) | |
| } | |
| final class DefaultTranscriptionService: NSObject, TranscriptionService { | |
| weak var delegate: TranscriptionServiceDelegate? | |
| private var speechRecognizer: SFSpeechRecognizer? | |
| private var recognitionRequest: SFSpeechURLRecognitionRequest? | |
| private var recognitionTask: SFSpeechRecognitionTask? | |
| func requestPermission() async -> AuthorizationStatus { | |
| return await AuthorizationStatus(from: SFSpeechRecognizer.requestAuthorization()) | |
| } | |
| func startTranscription(for audioURL: URL) { | |
| guard let speechRecognizer = SFSpeechRecognizer(), speechRecognizer.isAvailable else { return } | |
| let recognitionRequest = SFSpeechURLRecognitionRequest(url: audioURL) | |
| recognitionRequest.shouldReportPartialResults = false | |
| recognitionRequest.addsPunctuation = true | |
| recognitionRequest.requiresOnDeviceRecognition = false | |
| self.speechRecognizer = speechRecognizer | |
| self.recognitionRequest = recognitionRequest | |
| recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, delegate: self) | |
| } | |
| private func stop() { | |
| guard recognitionTask?.isCancelled == true || recognitionTask?.isFinishing == true else { return } | |
| speechRecognizer = nil | |
| recognitionRequest = nil | |
| recognitionTask = nil | |
| } | |
| } | |
| extension DefaultTranscriptionService: SFSpeechRecognitionTaskDelegate { | |
| func speechRecognitionDidDetectSpeech(_ task: SFSpeechRecognitionTask) { | |
| MainActor.assumeIsolated { | |
| delegate?.didStartTranscription() | |
| } | |
| } | |
| func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishRecognition recognitionResult: SFSpeechRecognitionResult) { | |
| guard recognitionResult.isFinal, !recognitionResult.bestTranscription.formattedString.isEmpty else { | |
| return | |
| } | |
| MainActor.assumeIsolated { | |
| let transcript = Transcript( | |
| transcript: recognitionResult.bestTranscription.formattedString, | |
| lines: recognitionResult.bestTranscription.segments.map { Transcript.Line(words: $0.substring, startTime: $0.timestamp, endTime: $0.timestamp + $0.duration, confidence: Double($0.confidence)) } | |
| ) | |
| delegate?.didTranscribe(transcript) | |
| } | |
| stop() | |
| } | |
| func speechRecognitionTaskWasCancelled(_ task: SFSpeechRecognitionTask) { | |
| MainActor.assumeIsolated { | |
| delegate?.didCancelTranscription() | |
| } | |
| stop() | |
| } | |
| func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishSuccessfully successfully: Bool) { | |
| MainActor.assumeIsolated { | |
| delegate?.didFinishTranscription(successfully: successfully) | |
| } | |
| stop() | |
| } | |
| } | |
| private extension SFSpeechRecognizer { | |
| class func requestAuthorization() async -> SFSpeechRecognizerAuthorizationStatus { | |
| return await withCheckedContinuation { continuation in | |
| requestAuthorization(continuation.resume) | |
| } | |
| } | |
| } | |
| struct Transcript { | |
| let transcript: String | |
| let lines: [Line] | |
| struct Line { | |
| let words: String | |
| let startTime: TimeInterval | |
| let endTime: TimeInterval | |
| let confidence: Double | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment