Last active
October 12, 2025 05:34
-
-
Save theoknock/13d2dd1d0df96347b6a2ec9b72c900aa to your computer and use it in GitHub Desktop.
A SwiftUI text-to-speech app that parses SSML-like markup (e.g., <prosody>, etc.) to read text with dynamic pacing, pitch, and volume controls using AVSpeechSynthesizer. Features play/pause/stop controls and real-time adjustment sliders for speech rate, pitch, and volume.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // ContentView.swift | |
| // DesiderataReader | |
| // | |
| // Created by Xcode Developer on 10/11/25. | |
| // | |
| import SwiftUI | |
| import AVFoundation | |
| import Combine | |
| import AVFAudio | |
| import Observation | |
| struct ContentView: View { | |
| @State private var speechManager = SpeechManager() | |
| @State private var isSpeaking: Bool = false | |
| var body: some View { | |
| NavigationView { | |
| VStack(spacing: 0) { | |
| Text(speechManager.displayText) | |
| .font(.system(.body, design: .serif).italic()) | |
| .padding() | |
| .background(Color(.systemGray6)) | |
| .cornerRadius(10) | |
| HStack(spacing: 40) { | |
| Button(action: { | |
| Task(priority: .high) { | |
| do { | |
| try await speechManager.togglePlay() | |
| isSpeaking.toggle() | |
| print("isSpeaking\t==\t\(speechManager.speaking)\t&&\t\(isSpeaking)") | |
| } catch { | |
| print("ERROR: isSpeaking\t==\t\(speechManager.speaking)\t&&\t\(isSpeaking)") | |
| } | |
| } | |
| }) { | |
| Image(systemName: isSpeaking ? "pause.circle.fill" : "play.circle.fill") | |
| .font(.system(size: 50)) | |
| .foregroundColor(.blue) | |
| } | |
| } | |
| .padding() | |
| VStack { | |
| Text("Speech Settings") | |
| .font(.headline) | |
| // HStack { | |
| // Text("Rate:") | |
| // Slider(value: $speechManager.rate, in: 0...2, step: 1) | |
| // .onChange(of: <#T##Equatable#>, <#T##action: (Equatable, Equatable) -> Void##(Equatable, Equatable) -> Void##(_ oldValue: Equatable, _ newValue: Equatable) -> Void#>) | |
| // Text(String(format: "%.0f", speechManager.rate)) | |
| // .frame(width: 50) | |
| // } | |
| HStack { | |
| Text("Rate:") | |
| Slider(value: $speechManager.rate, in: 0 ... 2, step: 1, onEditingChanged: { editing in | |
| if !editing { | |
| if isSpeaking { | |
| Task { | |
| speechManager.stop() | |
| speechManager.play() | |
| } | |
| } | |
| } | |
| }) | |
| Text(String(format: "%.0f", speechManager.rate)) | |
| .frame(width: 50) | |
| } | |
| HStack { | |
| Text("Pitch:") | |
| Slider(value: $speechManager.pitch, in: 0.8 ... 1.2) | |
| Text(String(format: "%.2f", speechManager.pitch)) | |
| .frame(width: 50) | |
| } | |
| HStack { | |
| Text("Volume:") | |
| Slider(value: $speechManager.volume, in: 0.0 ... 1.0) | |
| Text(String(format: "%.2f", speechManager.volume)) | |
| .frame(width: 50) | |
| } | |
| } | |
| .padding() | |
| .background(Color(.systemGray6)) | |
| .cornerRadius(10) | |
| .padding(.horizontal) | |
| Spacer() | |
| } | |
| .navigationTitle("SSML + AVSpeechSynthesizer Sample") | |
| .navigationBarTitleDisplayMode(.inline) | |
| .navigationSubtitle("James Alan Bush (Build #fe96301") | |
| } | |
| .onReceive(NotificationCenter.default.publisher(for: .AVSpeechSynthesizerDidFinish)) { _ in | |
| print("AVSpeechSynthesizerDidFinish") | |
| } | |
| } | |
| } | |
| @Observable class SpeechManager: NSObject, AVSpeechSynthesizerDelegate { | |
| private let synthesizer: AVSpeechSynthesizer = AVSpeechSynthesizer() // private let synthesizer: AVSpeechSynthesizer = AVSpeechSynthesizer() | |
| private var currentUtterance: AVSpeechUtterance? | |
| private var _rate: Float = Float(1.0) | |
| var rate: Float { | |
| get { _rate } | |
| set { | |
| willChangeValue(forKey: "rate") | |
| _rate = newValue | |
| didChangeValue(forKey: "rate") | |
| } | |
| } // var rate: Float = 0.45 | |
| var pitch: Float = 1.0 | |
| var volume: Float = 0.8 | |
| var speaking: Bool { | |
| synthesizer.isSpeaking ? true : false | |
| } | |
| // SSML-like markup for Desiderata | |
| // Note: AVSpeechSynthesizer doesn't support true SSML, so we'll simulate it with prosody controls | |
| // Replace this placeholder with the full text of Desiderata | |
| let ssmlText = """ | |
| <speak> | |
| <prosody rate="medium" pitch="high" volume="loud"> | |
| Go placidly amid the noise and the haste, and remember what peace there may be in silence. As far as possible, without surrender, be on good terms with all persons. | |
| Speak your truth quietly and clearly; and listen to others, even to the dull and the ignorant; they too have their story. | |
| Avoid loud and aggressive persons; they are vexatious to the spirit. If you compare yourself with others, you may become vain or bitter, for always there will be greater and lesser persons than yourself. | |
| Enjoy your achievements as well as your plans. Keep interested in your own career, however humble; it is a real possession in the changing fortunes of time. | |
| Exercise caution in your business affairs, for the world is full of trickery. But let this not blind you to what virtue there is; many persons strive for high ideals, and everywhere life is full of heroism. | |
| Be yourself. Especially do not feign affection. Neither be cynical about love; for in the face of all aridity and disenchantment, it is as perennial as the grass. | |
| Take kindly the counsel of the years, gracefully surrendering the things of youth. | |
| Nurture strength of spirit to shield you in sudden misfortune. But do not distress yourself with dark imaginings. Many fears are born of fatigue and loneliness. | |
| Beyond a wholesome discipline, be gentle with yourself. You are a child of the universe no less than the trees and the stars; you have a right to be here. | |
| And whether or not it is clear to you, no doubt the universe is unfolding as it should. Therefore be at peace with God, whatever you conceive Him to be. And whatever your labors and aspirations, in the noisy confusion of life, keep peace in your soul. With all its sham, drudgery and broken dreams, it is still a beautiful world. Be cheerful. Strive to be happy. | |
| </prosody> | |
| </speak> | |
| """ | |
| var displayText: String { | |
| // Strip SSML tags for display | |
| let pattern = "<[^>]+>" | |
| let regex = try! NSRegularExpression(pattern: pattern, options: []) | |
| let range = NSRange(location: 0, length: ssmlText.utf16.count) | |
| return regex.stringByReplacingMatches(in: ssmlText, options: [], range: range, withTemplate: "") | |
| .trimmingCharacters(in: .whitespacesAndNewlines) | |
| } | |
| override init() { | |
| super.init() | |
| synthesizer.delegate = self | |
| } | |
| func play() { | |
| let utterances = parseSSML(ssmlText) | |
| // Configure audio session for better quality | |
| do { | |
| try AVAudioSession.sharedInstance().setCategory(.playback, mode: .spokenAudio, options: .duckOthers) | |
| try AVAudioSession.sharedInstance().setActive(true) | |
| } catch { | |
| print("Failed to set up audio session: \(error)") | |
| } | |
| // Speak all utterances | |
| for utterance in utterances { | |
| utterance.rate = self.rate | |
| utterance.pitchMultiplier = self.pitch | |
| utterance.volume = self.volume | |
| // currentUtterance = utterance | |
| synthesizer.speak(utterance) | |
| } | |
| } | |
| func pause() { | |
| synthesizer.pauseSpeaking(at: .word) | |
| } | |
| func stop() { | |
| synthesizer.stopSpeaking(at: .word) | |
| } | |
| func togglePlay() async throws -> Void { | |
| Task(priority: .high) { | |
| if (!synthesizer.isPaused && synthesizer.isSpeaking) { | |
| // print("isSpeaking = \(self.speaking); pausing...") | |
| Task { | |
| synthesizer.pauseSpeaking(at: .word) | |
| } | |
| } else if (!synthesizer.isSpeaking && !synthesizer.isPaused) { | |
| // print("Is not speaking, but is not paused; speaking...") | |
| self.play() | |
| } else if (!synthesizer.isSpeaking || synthesizer.isPaused) { | |
| // print("Is not speaking, but is paused; continuing...") | |
| synthesizer.continueSpeaking() | |
| } | |
| } | |
| } | |
| private func parseSSML(_ ssml: String) -> [AVSpeechUtterance] { | |
| var utterances: [AVSpeechUtterance] = [] | |
| // Split by prosody tags and breaks | |
| let lines = ssml.components(separatedBy: .newlines) | |
| var currentProsody: (rate: Float, pitch: Float, volume: Float) = (1.0, 1.0, 0.8) | |
| for line in lines { | |
| let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines) | |
| // Parse prosody tags | |
| if trimmed.contains("<prosody") { | |
| if trimmed.contains("rate=\"slow\"") { | |
| currentProsody.rate = 0.0 | |
| } else if trimmed.contains("rate=\"medium\"") { | |
| currentProsody.rate = 1.0 | |
| } else if trimmed.contains("rate=\"fast\"") { | |
| currentProsody.rate = 2.0 | |
| } | |
| if trimmed.contains("pitch=\"low\"") { | |
| currentProsody.pitch = 0.9 | |
| } else if trimmed.contains("pitch=\"medium\"") { | |
| currentProsody.pitch = 1.0 | |
| } else if trimmed.contains("pitch=\"high\"") { | |
| currentProsody.pitch = 1.1 | |
| } | |
| if trimmed.contains("volume=\"soft\"") { | |
| currentProsody.volume = 0.6 | |
| } else if trimmed.contains("volume=\"loud\"") { | |
| currentProsody.volume = 1.0 | |
| } | |
| } | |
| // Extract text content | |
| let textPattern = ">([^<]+)<" | |
| if let regex = try? NSRegularExpression(pattern: textPattern), | |
| let match = regex.firstMatch(in: trimmed, range: NSRange(trimmed.startIndex..., in: trimmed)) { | |
| if let range = Range(match.range(at: 1), in: trimmed) { | |
| let text = String(trimmed[range]).trimmingCharacters(in: .whitespacesAndNewlines) | |
| if !text.isEmpty { | |
| let utterance = AVSpeechUtterance(string: text) | |
| utterance.rate = currentProsody.rate | |
| utterance.pitchMultiplier = currentProsody.pitch | |
| utterance.volume = currentProsody.volume | |
| utterance.voice = AVSpeechSynthesisVoice(language: "en-US") | |
| // Add pause after if there's a break tag | |
| if trimmed.contains("<break") { | |
| utterance.postUtteranceDelay = 0.5 | |
| } | |
| utterances.append(utterance) | |
| } | |
| } | |
| } | |
| // Handle text without tags | |
| if !trimmed.contains("<") && !trimmed.contains(">") && !trimmed.isEmpty { | |
| let utterance = AVSpeechUtterance(string: trimmed) | |
| utterance.rate = currentProsody.rate | |
| utterance.pitchMultiplier = currentProsody.pitch | |
| utterance.volume = currentProsody.volume | |
| utterance.voice = AVSpeechSynthesisVoice(language: "en-US") | |
| utterances.append(utterance) | |
| } | |
| } | |
| return utterances | |
| } | |
| // AVSpeechSynthesizerDelegate methods | |
| func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) { | |
| NotificationCenter.default.post(name: .AVSpeechSynthesizerDidFinish, object: nil) | |
| } | |
| } | |
| extension Notification.Name { | |
| static let AVSpeechSynthesizerDidFinish = Notification.Name("AVSpeechSynthesizerDidFinish") | |
| } | |
| #Preview { | |
| ContentView() | |
| .preferredColorScheme(.dark) | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // DesiderataReaderApp.swift | |
| // DesiderataReader | |
| // | |
| // Created by Xcode Developer on 10/11/25. | |
| // | |
| import SwiftUI | |
| @main | |
| struct DesiderataReaderApp: App { | |
| var body: some Scene { | |
| WindowGroup { | |
| ContentView() | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment

