Books — read-aloud mode with active-word highlight and tap-to-define

Adds a TTS read-along to the book reader. Tap the play button in the
toolbar; AVSpeechSynthesizer reads the chapter paragraph-by-paragraph
with the current word highlighted in yellow, auto-scrolling the active
paragraph to centre. Tap any word during read-along to pause and open
the definition sheet; reading resumes when the sheet dismisses.

Behavior per spec:
- Tap-to-define interrupts the synth (pauseSpeaking at: .immediate) and
  resumes on sheet dismiss.
- Voice picker sheet (waveform.circle toolbar button) lists installed
  Spanish voices grouped by Premium / Enhanced / Default quality, with
  a "Download more voices…" row that opens iOS Settings (no public
  deep-link to Accessibility → Spoken Content exists; the footer spells
  out the path).
- Speed picker (Slow / Normal / Fast) drives AVSpeechUtterance.rate.
- Stops at chapter end, no auto-advance to the next chapter.
- Vocabulary lines shaped `palabra = meaning` are skipped — the synth
  would otherwise say "palabra equals meaning" and they're reference
  material, not prose.

Audio session uses .playback + .spokenAudio mode and is properly
deactivated with .notifyOthersOnDeactivation on stop() so music apps
resume cleanly after reading ends.

Voice/rate persisted via @AppStorage; controller picks them up
onAppear and writes back through Bindings the picker mutates.

Word-index space in BookSpeechController.wordRanges(in:) matches
BookReaderView's split(separator: " ") rendering exactly — both split
on ASCII U+0020 only, so willSpeakRange callbacks resolve to the right
visible word.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trey T
2026-05-11 21:42:09 -05:00
parent 70d8299df8
commit a416233a2d
4 changed files with 472 additions and 21 deletions
@@ -24,6 +24,8 @@
28D2F489F1927BCCC2B56086 /* IrregularHighlightText.swift in Sources */ = {isa = PBXBuildFile; fileRef = 42ADC600530309A9B147A663 /* IrregularHighlightText.swift */; };
2B5B2D63DC9C290F66890A4A /* course_data.json in Resources */ = {isa = PBXBuildFile; fileRef = BC273716CD14A99EFF8206CA /* course_data.json */; };
2C7ABAB4D88E3E3B0EAD1EF7 /* PracticeHeaderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BF946245110C92F087D81E8 /* PracticeHeaderView.swift */; };
2CB1E7454C1C04C2A9A06D57 /* BookVoicePickerSheet.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1C5F851F5C2C71C293DD9938 /* BookVoicePickerSheet.swift */; };
33BFEC0F0DEFC8A0E1FD8009 /* BookSpeechController.swift in Sources */ = {isa = PBXBuildFile; fileRef = A661ADF1141176EE96774138 /* BookSpeechController.swift */; };
33E885EB38C3BB0CB058871A /* HandwritingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1F842EB5E566C74658D918BB /* HandwritingView.swift */; };
352A5BAA6E406AA5850653A4 /* PracticeSessionService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 842DB48F8570C39CDCFF2F57 /* PracticeSessionService.swift */; };
354631F309E625046A3A436B /* TextbookExerciseView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 854EA2A8D6CF203958BA3C24 /* TextbookExerciseView.swift */; };
@@ -162,6 +164,7 @@
1C3E36BDC2540AF2A67AEEB1 /* FeatureReferenceView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeatureReferenceView.swift; sourceTree = "<group>"; };
1C42EA0EBD4CB1E10A82BA25 /* AchievementService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AchievementService.swift; sourceTree = "<group>"; };
1C4B5204F6B8647C816814F0 /* SyncToast.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SyncToast.swift; sourceTree = "<group>"; };
1C5F851F5C2C71C293DD9938 /* BookVoicePickerSheet.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = BookVoicePickerSheet.swift; sourceTree = "<group>"; };
1EA0FA4F9149B9D8E197ADE9 /* PracticeView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PracticeView.swift; sourceTree = "<group>"; };
1EB4830F9289AACC82D753F8 /* ConjugaApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConjugaApp.swift; sourceTree = "<group>"; };
1F842EB5E566C74658D918BB /* HandwritingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HandwritingView.swift; sourceTree = "<group>"; };
@@ -229,6 +232,7 @@
A014EEC3EE08E945FBBA5335 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
A4B95B276C054DBFE508C4D1 /* StartupCoordinator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StartupCoordinator.swift; sourceTree = "<group>"; };
A63061BBC8998DF33E3DCA2B /* VerbListView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VerbListView.swift; sourceTree = "<group>"; };
A661ADF1141176EE96774138 /* BookSpeechController.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = BookSpeechController.swift; sourceTree = "<group>"; };
A6EC7C278E4287D91A0DB355 /* youtube_videos.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = youtube_videos.md; sourceTree = "<group>"; };
A7CDC5F2660A3009A3ADF048 /* StoryQuizView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryQuizView.swift; sourceTree = "<group>"; };
AC34396050805693AA4AC582 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
@@ -353,6 +357,7 @@
D570252DA3DCDD9217C71863 /* WidgetDataService.swift */,
AFF65B05E7CEC386F121973E /* YouTubeVideoStore.swift */,
221920B9BD6DC6F084093975 /* ExtraStudyStore.swift */,
A661ADF1141176EE96774138 /* BookSpeechController.swift */,
);
path = Services;
sourceTree = "<group>";
@@ -457,6 +462,7 @@
340B1F22929DC7C1DEB0EA8A /* BookLibraryView.swift */,
FF3475931F1AD16054741E65 /* BookChapterListView.swift */,
EDD4AF96186662567525F8C4 /* BookReaderView.swift */,
1C5F851F5C2C71C293DD9938 /* BookVoicePickerSheet.swift */,
);
name = Books;
path = Books;
@@ -766,6 +772,8 @@
65382875879BD537F5358381 /* BookLibraryView.swift in Sources */,
4E00225D668FDFA3026B7627 /* BookChapterListView.swift in Sources */,
64E08FBC4B188B332F8039FD /* BookReaderView.swift in Sources */,
33BFEC0F0DEFC8A0E1FD8009 /* BookSpeechController.swift in Sources */,
2CB1E7454C1C04C2A9A06D57 /* BookVoicePickerSheet.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -0,0 +1,214 @@
import AVFoundation
import Foundation
import Observation
/// Drives "read aloud" mode for `BookReaderView`. Wraps an
/// `AVSpeechSynthesizer` with a queue of paragraph utterances and exposes the
/// current paragraph/word index so the view can highlight the active word.
///
/// Skips vocabulary lines (`palabra = meaning`) since the synth pronounces the
/// `=` awkwardly and the bilingual gloss is reference material, not prose.
@MainActor
@Observable
final class BookSpeechController: NSObject, AVSpeechSynthesizerDelegate {
// MARK: - Observable state
private(set) var isReading: Bool = false
private(set) var isPaused: Bool = false
private(set) var currentParagraphIndex: Int? = nil
private(set) var currentWordIndex: Int? = nil
// MARK: - Configuration
var rate: Float = 0.45
var voiceIdentifier: String? = nil
// MARK: - Internals
private let synthesizer = AVSpeechSynthesizer()
private var queue: [QueueEntry] = []
private var queueCursor: Int = 0
private var audioSessionConfigured = false
private struct QueueEntry {
let paragraphIndex: Int
let text: String
let wordRanges: [Range<String.Index>]
}
override init() {
super.init()
synthesizer.delegate = self
}
// MARK: - Public control
/// Start (or restart) reading the given paragraphs. Indexes in
/// `currentParagraphIndex` are positions in the original `paragraphs`
/// array vocab lines are skipped internally but the visible index space
/// matches what the caller passed.
func start(paragraphs: [String], from startIndex: Int = 0) {
stop()
configureAudioSession()
var entries: [QueueEntry] = []
for (idx, p) in paragraphs.enumerated() where idx >= startIndex {
if Self.isVocabLine(p) { continue }
entries.append(QueueEntry(
paragraphIndex: idx,
text: p,
wordRanges: Self.wordRanges(in: p)
))
}
guard !entries.isEmpty else { return }
queue = entries
queueCursor = 0
isReading = true
isPaused = false
speakCurrent()
}
/// Pause immediately (no word boundary). Use this for tap-to-define so the
/// audio stops the moment the user taps.
func pause() {
guard isReading, !isPaused else { return }
synthesizer.pauseSpeaking(at: .immediate)
isPaused = true
}
func resume() {
guard isReading, isPaused else { return }
synthesizer.continueSpeaking()
isPaused = false
}
func stop() {
synthesizer.stopSpeaking(at: .immediate)
queue.removeAll()
queueCursor = 0
isReading = false
isPaused = false
currentParagraphIndex = nil
currentWordIndex = nil
deactivateAudioSession()
}
// MARK: - Vocab detection + word ranges
/// Vocabulary entries in the book are formatted `palabra = meaning`.
/// Reading them aloud says "palabra equals meaning" which is awkward, and
/// they're reference material, so the read-along skips them.
static func isVocabLine(_ paragraph: String) -> Bool {
paragraph.contains(" = ")
}
/// Word ranges that match the BookReaderView's space-split rendering
/// the visible word index N in a paragraph corresponds to wordRanges[N].
static func wordRanges(in text: String) -> [Range<String.Index>] {
var ranges: [Range<String.Index>] = []
var i = text.startIndex
while i < text.endIndex {
while i < text.endIndex && text[i] == " " {
i = text.index(after: i)
}
guard i < text.endIndex else { break }
let start = i
while i < text.endIndex && text[i] != " " {
i = text.index(after: i)
}
ranges.append(start..<i)
}
return ranges
}
// MARK: - Private
private func speakCurrent() {
guard queueCursor < queue.count else {
stop()
return
}
let entry = queue[queueCursor]
currentParagraphIndex = entry.paragraphIndex
currentWordIndex = nil
let utterance = AVSpeechUtterance(string: entry.text)
utterance.voice = resolveVoice()
utterance.rate = rate
utterance.pitchMultiplier = 1.0
utterance.postUtteranceDelay = 0.20
synthesizer.speak(utterance)
}
private func resolveVoice() -> AVSpeechSynthesisVoice? {
if let id = voiceIdentifier, let v = AVSpeechSynthesisVoice(identifier: id) {
return v
}
return AVSpeechSynthesisVoice(language: "es-ES")
}
private func configureAudioSession() {
guard !audioSessionConfigured else { return }
do {
let session = AVAudioSession.sharedInstance()
try session.setCategory(.playback, mode: .spokenAudio, options: [])
try session.setActive(true)
audioSessionConfigured = true
} catch {
print("[BookSpeech] audio session failed: \(error)")
}
}
/// Release audio focus on stop so the OS hands control back to whatever
/// app was playing before (music, podcast, etc.). Without this the
/// session stays "active" until the app is killed.
private func deactivateAudioSession() {
guard audioSessionConfigured else { return }
do {
try AVAudioSession.sharedInstance().setActive(false, options: [.notifyOthersOnDeactivation])
} catch {
print("[BookSpeech] audio session deactivation failed: \(error)")
}
audioSessionConfigured = false
}
private func handleWillSpeakRange(_ range: NSRange) {
guard queueCursor < queue.count else { return }
let entry = queue[queueCursor]
guard let stringRange = Range(range, in: entry.text) else { return }
let lower = stringRange.lowerBound
let idx = entry.wordRanges.firstIndex {
$0.lowerBound <= lower && lower < $0.upperBound
}
if let idx, idx != currentWordIndex {
currentWordIndex = idx
}
}
private func handleDidFinish() {
queueCursor += 1
if queueCursor < queue.count {
speakCurrent()
} else {
stop()
}
}
// MARK: - AVSpeechSynthesizerDelegate
nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer,
willSpeakRangeOfSpeechString characterRange: NSRange,
utterance: AVSpeechUtterance
) {
Task { @MainActor in self.handleWillSpeakRange(characterRange) }
}
nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer,
didFinish utterance: AVSpeechUtterance
) {
Task { @MainActor in self.handleDidFinish() }
}
}
@@ -6,39 +6,64 @@ struct BookReaderView: View {
let chapter: BookChapter
@Environment(DictionaryService.self) private var dictionary
@State private var speech = BookSpeechController()
@State private var selectedWord: WordAnnotation?
@State private var showEnglish = false
@State private var showVoicePicker = false
@State private var wasReadingBeforeTap = false
@State private var lookupCache: [String: WordAnnotation] = [:]
@AppStorage("bookReaderVoiceId") private var storedVoiceId: String = ""
@AppStorage("bookReaderRate") private var storedRate: Double = 0.45
private var paragraphsES: [String] { chapter.paragraphsES() }
private var paragraphsEN: [String] { chapter.paragraphsEN() }
var body: some View {
ScrollView {
LazyVStack(alignment: .leading, spacing: 18) {
Text(chapter.title)
.font(.title2.bold())
.padding(.bottom, 4)
ScrollViewReader { proxy in
ScrollView {
LazyVStack(alignment: .leading, spacing: 18) {
Text(chapter.title)
.font(.title2.bold())
.padding(.bottom, 4)
.id(-1)
ForEach(Array(paragraphsES.enumerated()), id: \.offset) { index, paragraph in
if showEnglish {
Text(translation(for: index))
.font(.body)
.foregroundStyle(.secondary)
} else {
TappableParagraph(text: paragraph, cache: lookupCache) { word in
handleTap(word: word, paragraph: paragraph)
}
ForEach(Array(paragraphsES.enumerated()), id: \.offset) { index, paragraph in
paragraphView(index: index, paragraph: paragraph)
.id(index)
}
}
.padding()
.adaptiveContainer(maxWidth: 800)
}
.onChange(of: speech.currentParagraphIndex) { _, newIndex in
guard let newIndex else { return }
withAnimation(.easeInOut(duration: 0.25)) {
proxy.scrollTo(newIndex, anchor: .center)
}
}
.padding()
.adaptiveContainer(maxWidth: 800)
}
.navigationTitle("Chapter \(chapter.number)")
.navigationBarTitleDisplayMode(.inline)
.toolbar {
ToolbarItem(placement: .topBarTrailing) {
ToolbarItemGroup(placement: .topBarTrailing) {
Button {
showVoicePicker = true
} label: {
Image(systemName: "waveform.circle")
.symbolRenderingMode(.hierarchical)
}
.accessibilityLabel("Voice & speed")
Button {
toggleReadAloud()
} label: {
Image(systemName: speech.isReading ? "stop.circle.fill" : "play.circle.fill")
.symbolRenderingMode(.hierarchical)
.foregroundStyle(.indigo)
}
.accessibilityLabel(speech.isReading ? "Stop reading" : "Read aloud")
Button {
withAnimation { showEnglish.toggle() }
} label: {
@@ -48,10 +73,36 @@ struct BookReaderView: View {
.accessibilityLabel(showEnglish ? "Show Spanish" : "Show English")
}
}
.sheet(item: $selectedWord) { word in
.sheet(item: $selectedWord, onDismiss: handleSheetDismiss) { word in
WordDetailSheet(word: word)
.presentationDetents([.height(220)])
}
.sheet(isPresented: $showVoicePicker) {
BookVoicePickerSheet(voiceIdentifier: voiceBinding, rate: rateBinding)
}
.onAppear {
speech.voiceIdentifier = storedVoiceId.isEmpty ? nil : storedVoiceId
speech.rate = Float(storedRate)
}
.onDisappear {
speech.stop()
}
}
@ViewBuilder
private func paragraphView(index: Int, paragraph: String) -> some View {
if showEnglish {
Text(translation(for: index))
.font(.body)
.foregroundStyle(.secondary)
} else {
TappableParagraph(
text: paragraph,
highlightedWordIndex: speech.currentParagraphIndex == index ? speech.currentWordIndex : nil
) { word in
handleTap(word: word, paragraph: paragraph)
}
}
}
private func translation(for index: Int) -> String {
@@ -60,9 +111,53 @@ struct BookReaderView: View {
return en.isEmpty ? "[translation unavailable]" : en
}
// MARK: - Read-along controls
private func toggleReadAloud() {
if speech.isReading {
speech.stop()
} else {
// Start from the first non-vocab paragraph at or after the topmost
// visible one. For V1 we start from the chapter top adding
// "start from visible paragraph" would need a scroll-position
// observer, which isn't worth the complexity yet.
speech.start(paragraphs: paragraphsES)
}
}
private var voiceBinding: Binding<String?> {
Binding(
get: { storedVoiceId.isEmpty ? nil : storedVoiceId },
set: { newValue in
storedVoiceId = newValue ?? ""
speech.voiceIdentifier = newValue
}
)
}
private var rateBinding: Binding<Float> {
Binding(
get: { Float(storedRate) },
set: { newValue in
storedRate = Double(newValue)
speech.rate = newValue
}
)
}
// MARK: - Word tap definition
private func handleTap(word: String, paragraph: String) {
let cleaned = cleanWord(word)
if cleaned.isEmpty { return }
// If reading aloud, pause immediately. Remember so we can resume when
// the user dismisses the definition sheet.
if speech.isReading, !speech.isPaused {
speech.pause()
wasReadingBeforeTap = true
}
if let cached = lookupCache[cleaned] {
selectedWord = cached
return
@@ -90,6 +185,12 @@ struct BookReaderView: View {
}
}
private func handleSheetDismiss() {
guard wasReadingBeforeTap else { return }
wasReadingBeforeTap = false
speech.resume()
}
private func cleanWord(_ word: String) -> String {
word.lowercased()
.trimmingCharacters(in: .punctuationCharacters)
@@ -101,14 +202,14 @@ struct BookReaderView: View {
private struct TappableParagraph: View {
let text: String
let cache: [String: WordAnnotation]
let highlightedWordIndex: Int?
let onTap: (String) -> Void
var body: some View {
let words = text.split(separator: " ", omittingEmptySubsequences: true).map(String.init)
FlowLayout(spacing: 0) {
ForEach(Array(words.enumerated()), id: \.offset) { _, word in
WordButton(word: word, onTap: onTap)
ForEach(Array(words.enumerated()), id: \.offset) { idx, word in
WordButton(word: word, isHighlighted: idx == highlightedWordIndex, onTap: onTap)
}
}
.accessibilityElement(children: .combine)
@@ -117,6 +218,7 @@ private struct TappableParagraph: View {
private struct WordButton: View {
let word: String
let isHighlighted: Bool
let onTap: (String) -> Void
var body: some View {
@@ -126,6 +228,15 @@ private struct WordButton: View {
Text(word + " ")
.font(.body)
.foregroundStyle(.primary)
.padding(.horizontal, isHighlighted ? 2 : 0)
.padding(.vertical, 1)
.background(
isHighlighted
? Color.yellow.opacity(0.35)
: Color.clear,
in: RoundedRectangle(cornerRadius: 4)
)
.animation(.easeInOut(duration: 0.15), value: isHighlighted)
}
.buttonStyle(.plain)
}
@@ -0,0 +1,118 @@
import SwiftUI
import AVFoundation
import UIKit
/// Voice + speed picker shown from the book reader's toolbar. Lists Spanish
/// voices currently installed on the device grouped by quality, and offers a
/// shortcut to the iOS Settings app where the user can download premium voices
/// (no public deep-link to the Accessibility section exists, so we open the
/// app's own Settings page with a hint).
struct BookVoicePickerSheet: View {
@Binding var voiceIdentifier: String?
@Binding var rate: Float
@Environment(\.dismiss) private var dismiss
private struct VoiceGroup: Identifiable {
let id: String
let title: String
let voices: [AVSpeechSynthesisVoice]
}
private var groups: [VoiceGroup] {
let all = AVSpeechSynthesisVoice.speechVoices()
.filter { $0.language.hasPrefix("es") }
let buckets: [(String, AVSpeechSynthesisVoiceQuality)] = [
("Premium", .premium),
("Enhanced", .enhanced),
("Default", .default),
]
return buckets.compactMap { (title, quality) in
let voices = all
.filter { $0.quality == quality }
.sorted { lhs, rhs in
if lhs.language != rhs.language { return lhs.language < rhs.language }
return lhs.name < rhs.name
}
return voices.isEmpty ? nil : VoiceGroup(id: title, title: title, voices: voices)
}
}
var body: some View {
NavigationStack {
Form {
Section("Speed") {
Picker("Speed", selection: $rate) {
Text("Slow").tag(Float(0.40))
Text("Normal").tag(Float(0.50))
Text("Fast").tag(Float(0.55))
}
.pickerStyle(.segmented)
}
if groups.isEmpty {
Section {
ContentUnavailableView(
"No Spanish voices",
systemImage: "person.wave.2",
description: Text("Install a Spanish voice in Settings → Accessibility → Spoken Content → Voices.")
)
}
} else {
ForEach(groups) { group in
Section(group.title) {
ForEach(group.voices, id: \.identifier) { voice in
voiceRow(voice)
}
}
}
}
Section {
Button {
openSettings()
} label: {
Label("Download more voices…", systemImage: "arrow.down.circle")
}
} footer: {
Text("Opens Settings. Navigate to Accessibility → Spoken Content → Voices → Spanish to install premium or enhanced voices.")
}
}
.navigationTitle("Read aloud")
.navigationBarTitleDisplayMode(.inline)
.toolbar {
ToolbarItem(placement: .confirmationAction) {
Button("Done") { dismiss() }
}
}
}
}
private func voiceRow(_ voice: AVSpeechSynthesisVoice) -> some View {
Button {
voiceIdentifier = voice.identifier
} label: {
HStack(spacing: 12) {
VStack(alignment: .leading, spacing: 2) {
Text(voice.name)
.font(.body)
.foregroundStyle(.primary)
Text(voice.language)
.font(.caption)
.foregroundStyle(.secondary)
}
Spacer()
if voice.identifier == voiceIdentifier {
Image(systemName: "checkmark")
.foregroundStyle(.indigo)
}
}
}
.tint(.primary)
}
private func openSettings() {
if let url = URL(string: UIApplication.openSettingsURLString) {
UIApplication.shared.open(url)
}
}
}