Add Books — read EPUB-imported books in Practice with tap-to-define

New "Books" row in the Practice tab opens a library of bundled bilingual
books. Each chapter renders Spanish paragraph-by-paragraph; tap any
word for a definition sheet (DictionaryService with on-device AI
fallback), or toggle the toolbar button to swap to the pre-computed
English translation inline.

Local-only Book + BookChapter SwiftData models added to the local
container schema (reset version bumped to 5). DataLoader.seedBooks
walks the bundle for `book_*.json` resources, so future books drop in
without touching app code — just bundle a new JSON and bump
bookDataVersion.

First book: Olly Richards' "Spanish Short Stories For Beginners
Vol 2" — 13 chapters, 2,646 paragraphs, bilingual.

Scripts/books/ is the repeatable pipeline for future EPUBs:
extract_epub.py → translate_chapters.py (per-chapter resumable jobs) →
bundle_book.py. Translation is done by parallel Claude Code subagents
reading per-job input files and writing output files — no API key
required, matching the pattern used for the textbook vocab vision
pass. See Scripts/books/README.md for the full how-to.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trey T
2026-05-11 09:21:44 -05:00
parent ade091f108
commit 09e49bda2c
17 changed files with 6782 additions and 1 deletions
+25
View File
@@ -38,6 +38,7 @@
48967E05C65E32F7082716CD /* AnswerChecker.swift in Sources */ = {isa = PBXBuildFile; fileRef = B3EFFA19D0AB2528A868E8ED /* AnswerChecker.swift */; }; 48967E05C65E32F7082716CD /* AnswerChecker.swift in Sources */ = {isa = PBXBuildFile; fileRef = B3EFFA19D0AB2528A868E8ED /* AnswerChecker.swift */; };
4C3484403FD96E37DA4BEA66 /* NewWordIntent.swift in Sources */ = {isa = PBXBuildFile; fileRef = 72CB5F95DF256DF7CD73269D /* NewWordIntent.swift */; }; 4C3484403FD96E37DA4BEA66 /* NewWordIntent.swift in Sources */ = {isa = PBXBuildFile; fileRef = 72CB5F95DF256DF7CD73269D /* NewWordIntent.swift */; };
4C577CF6B137D0A32759A169 /* VerbExampleGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EB3F9305349775E0EB28B9 /* VerbExampleGenerator.swift */; }; 4C577CF6B137D0A32759A169 /* VerbExampleGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02EB3F9305349775E0EB28B9 /* VerbExampleGenerator.swift */; };
4E00225D668FDFA3026B7627 /* BookChapterListView.swift in Sources */ = {isa = PBXBuildFile; fileRef = FF3475931F1AD16054741E65 /* BookChapterListView.swift */; };
50E0095A23E119D1AB561232 /* VerbDetailView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E1DBE662F89F02A0282F5BEE /* VerbDetailView.swift */; }; 50E0095A23E119D1AB561232 /* VerbDetailView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E1DBE662F89F02A0282F5BEE /* VerbDetailView.swift */; };
519E68D2DF4C80AB96058C0D /* LyricsConfirmationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3EA01795655C444795577A22 /* LyricsConfirmationView.swift */; }; 519E68D2DF4C80AB96058C0D /* LyricsConfirmationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3EA01795655C444795577A22 /* LyricsConfirmationView.swift */; };
51D072AF30F4B12CD3E8F918 /* SRSEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5C0E6EAFC0D24928BA956FA5 /* SRSEngine.swift */; }; 51D072AF30F4B12CD3E8F918 /* SRSEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5C0E6EAFC0D24928BA956FA5 /* SRSEngine.swift */; };
@@ -48,6 +49,8 @@
60E86BABE2735E2052B99DF3 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BCCC95A95581458E068E0484 /* SettingsView.swift */; }; 60E86BABE2735E2052B99DF3 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BCCC95A95581458E068E0484 /* SettingsView.swift */; };
61328552866DE185B15011A9 /* StoryLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 15AC27B1E3D332709657F20B /* StoryLibraryView.swift */; }; 61328552866DE185B15011A9 /* StoryLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 15AC27B1E3D332709657F20B /* StoryLibraryView.swift */; };
615D3128ED6E84EF59BB5AA3 /* LyricsReaderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 58394296923991E56BAC2B02 /* LyricsReaderView.swift */; }; 615D3128ED6E84EF59BB5AA3 /* LyricsReaderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 58394296923991E56BAC2B02 /* LyricsReaderView.swift */; };
64E08FBC4B188B332F8039FD /* BookReaderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = EDD4AF96186662567525F8C4 /* BookReaderView.swift */; };
65382875879BD537F5358381 /* BookLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 340B1F22929DC7C1DEB0EA8A /* BookLibraryView.swift */; };
6BB4B0A655E6CB6F82D81B5A /* WeekTestView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5E7EF4161C73AAC67B3A0004 /* WeekTestView.swift */; }; 6BB4B0A655E6CB6F82D81B5A /* WeekTestView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5E7EF4161C73AAC67B3A0004 /* WeekTestView.swift */; };
6D4A29280FDD99B8E18AF264 /* WidgetDataReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2889F2F81673AFF3A58A07A8 /* WidgetDataReader.swift */; }; 6D4A29280FDD99B8E18AF264 /* WidgetDataReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2889F2F81673AFF3A58A07A8 /* WidgetDataReader.swift */; };
6ED2AC2CAA54688161D4B920 /* SyncStatusMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 18CCD69C14D1B0CFBD03C92F /* SyncStatusMonitor.swift */; }; 6ED2AC2CAA54688161D4B920 /* SyncStatusMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 18CCD69C14D1B0CFBD03C92F /* SyncStatusMonitor.swift */; };
@@ -103,6 +106,7 @@
DB73836F751BB2751439E826 /* LyricsSearchService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 43B8AED76C14A05AF2339C27 /* LyricsSearchService.swift */; }; DB73836F751BB2751439E826 /* LyricsSearchService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 43B8AED76C14A05AF2339C27 /* LyricsSearchService.swift */; };
DF06034A4B2C11BA0C0A84CB /* ConjugaWidgetExtension.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = 9708FF3CF33E4765DB225F93 /* ConjugaWidgetExtension.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; DF06034A4B2C11BA0C0A84CB /* ConjugaWidgetExtension.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = 9708FF3CF33E4765DB225F93 /* ConjugaWidgetExtension.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; };
DF82C2579F9889DDB06362CC /* ReferenceStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 777C696A841803D5B775B678 /* ReferenceStore.swift */; }; DF82C2579F9889DDB06362CC /* ReferenceStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 777C696A841803D5B775B678 /* ReferenceStore.swift */; };
E3D9D82E54E37F9D38103FB9 /* book_olly-vol2.json in Resources */ = {isa = PBXBuildFile; fileRef = EBC046A3733791C29DAA6AC3 /* book_olly-vol2.json */; };
E7BFEE9A90E1300EFF5B1F32 /* HandwritingRecognizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3695075616689E72DBB26D4C /* HandwritingRecognizer.swift */; }; E7BFEE9A90E1300EFF5B1F32 /* HandwritingRecognizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3695075616689E72DBB26D4C /* HandwritingRecognizer.swift */; };
E814A9CF1067313F74B509C6 /* StoreInspector.swift in Sources */ = {isa = PBXBuildFile; fileRef = E8E9833868EB73AF9EB3A611 /* StoreInspector.swift */; }; E814A9CF1067313F74B509C6 /* StoreInspector.swift in Sources */ = {isa = PBXBuildFile; fileRef = E8E9833868EB73AF9EB3A611 /* StoreInspector.swift */; };
E99473B7DF9BCAE150E9D1E1 /* WidgetDataService.swift in Sources */ = {isa = PBXBuildFile; fileRef = D570252DA3DCDD9217C71863 /* WidgetDataService.swift */; }; E99473B7DF9BCAE150E9D1E1 /* WidgetDataService.swift in Sources */ = {isa = PBXBuildFile; fileRef = D570252DA3DCDD9217C71863 /* WidgetDataService.swift */; };
@@ -166,6 +170,7 @@
2889F2F81673AFF3A58A07A8 /* WidgetDataReader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WidgetDataReader.swift; sourceTree = "<group>"; }; 2889F2F81673AFF3A58A07A8 /* WidgetDataReader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WidgetDataReader.swift; sourceTree = "<group>"; };
2931634BEB33B93429CE254F /* VocabFlashcardView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VocabFlashcardView.swift; sourceTree = "<group>"; }; 2931634BEB33B93429CE254F /* VocabFlashcardView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VocabFlashcardView.swift; sourceTree = "<group>"; };
30EF2362D9FFF9B07A45CE6D /* StreakCalendarView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreakCalendarView.swift; sourceTree = "<group>"; }; 30EF2362D9FFF9B07A45CE6D /* StreakCalendarView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreakCalendarView.swift; sourceTree = "<group>"; };
340B1F22929DC7C1DEB0EA8A /* BookLibraryView.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = BookLibraryView.swift; sourceTree = "<group>"; };
34C67DD1A1CB9B8B5A2BDCED /* CheckpointExamView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CheckpointExamView.swift; sourceTree = "<group>"; }; 34C67DD1A1CB9B8B5A2BDCED /* CheckpointExamView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CheckpointExamView.swift; sourceTree = "<group>"; };
3540936F058728CFD87B1A1E /* textbook_vocab.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = textbook_vocab.json; sourceTree = "<group>"; }; 3540936F058728CFD87B1A1E /* textbook_vocab.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = textbook_vocab.json; sourceTree = "<group>"; };
3644B5ED77F29A65877D926A /* reflexive_verbs.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = reflexive_verbs.json; sourceTree = "<group>"; }; 3644B5ED77F29A65877D926A /* reflexive_verbs.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = reflexive_verbs.json; sourceTree = "<group>"; };
@@ -246,11 +251,14 @@
E8D95887B18216FCA71643D6 /* VocabReviewView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VocabReviewView.swift; sourceTree = "<group>"; }; E8D95887B18216FCA71643D6 /* VocabReviewView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VocabReviewView.swift; sourceTree = "<group>"; };
E8E9833868EB73AF9EB3A611 /* StoreInspector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoreInspector.swift; sourceTree = "<group>"; }; E8E9833868EB73AF9EB3A611 /* StoreInspector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoreInspector.swift; sourceTree = "<group>"; };
E972AA745F44586EF0B1B0C8 /* OnboardingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OnboardingView.swift; sourceTree = "<group>"; }; E972AA745F44586EF0B1B0C8 /* OnboardingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OnboardingView.swift; sourceTree = "<group>"; };
EBC046A3733791C29DAA6AC3 /* book_olly-vol2.json */ = {isa = PBXFileReference; includeInIndex = 1; path = "book_olly-vol2.json"; sourceTree = "<group>"; };
EBEEC9CC9A8C502AF5F42914 /* VerbExampleCache.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VerbExampleCache.swift; sourceTree = "<group>"; }; EBEEC9CC9A8C502AF5F42914 /* VerbExampleCache.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VerbExampleCache.swift; sourceTree = "<group>"; };
EDD4AF96186662567525F8C4 /* BookReaderView.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = BookReaderView.swift; sourceTree = "<group>"; };
F0A3099BE24A56F9B1F179E0 /* GrammarExercise.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GrammarExercise.swift; sourceTree = "<group>"; }; F0A3099BE24A56F9B1F179E0 /* GrammarExercise.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GrammarExercise.swift; sourceTree = "<group>"; };
F92BCE1A6720E47FCD26BADC /* StemChangeConjugationView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StemChangeConjugationView.swift; sourceTree = "<group>"; }; F92BCE1A6720E47FCD26BADC /* StemChangeConjugationView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StemChangeConjugationView.swift; sourceTree = "<group>"; };
FB5F16AFB9FAF6617FDFA35D /* DownloadedVideosView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadedVideosView.swift; sourceTree = "<group>"; }; FB5F16AFB9FAF6617FDFA35D /* DownloadedVideosView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadedVideosView.swift; sourceTree = "<group>"; };
FC2B1F646394D7C03493F1BF /* LyricsLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LyricsLibraryView.swift; sourceTree = "<group>"; }; FC2B1F646394D7C03493F1BF /* LyricsLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LyricsLibraryView.swift; sourceTree = "<group>"; };
FF3475931F1AD16054741E65 /* BookChapterListView.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = BookChapterListView.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
@@ -292,6 +300,7 @@
1994867BC8E985795A172854 /* Services */, 1994867BC8E985795A172854 /* Services */,
3C75490F53C34A37084FF478 /* ViewModels */, 3C75490F53C34A37084FF478 /* ViewModels */,
A81CA75762B08D35D5B7A44D /* Views */, A81CA75762B08D35D5B7A44D /* Views */,
EBC046A3733791C29DAA6AC3 /* book_olly-vol2.json */,
); );
path = Conjuga; path = Conjuga;
sourceTree = "<group>"; sourceTree = "<group>";
@@ -437,10 +446,22 @@
8FB89F19B33894DDF27C8EC2 /* Chat */, 8FB89F19B33894DDF27C8EC2 /* Chat */,
895E547BEFB5D0FBF676BE33 /* Lyrics */, 895E547BEFB5D0FBF676BE33 /* Lyrics */,
43E4D263B0AF47E401A51601 /* Stories */, 43E4D263B0AF47E401A51601 /* Stories */,
74AC8A0D381958D2A14316C3 /* Books */,
); );
path = Practice; path = Practice;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
74AC8A0D381958D2A14316C3 /* Books */ = {
isa = PBXGroup;
children = (
340B1F22929DC7C1DEB0EA8A /* BookLibraryView.swift */,
FF3475931F1AD16054741E65 /* BookChapterListView.swift */,
EDD4AF96186662567525F8C4 /* BookReaderView.swift */,
);
name = Books;
path = Books;
sourceTree = "<group>";
};
8102F7FA5BFE6D38B2212AD3 /* Guide */ = { 8102F7FA5BFE6D38B2212AD3 /* Guide */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
@@ -643,6 +664,7 @@
A651D3E1584A34472BCE53B5 /* textbook_vocab.json in Resources */, A651D3E1584A34472BCE53B5 /* textbook_vocab.json in Resources */,
F26F3BF58CF557D5A65EE901 /* youtube_videos.json in Resources */, F26F3BF58CF557D5A65EE901 /* youtube_videos.json in Resources */,
983988CE911C0FC5D869C516 /* youtube_videos.md in Resources */, 983988CE911C0FC5D869C516 /* youtube_videos.md in Resources */,
E3D9D82E54E37F9D38103FB9 /* book_olly-vol2.json in Resources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@@ -741,6 +763,9 @@
05D825674F6508D6D12D2156 /* YouTubeVideoStore.swift in Sources */, 05D825674F6508D6D12D2156 /* YouTubeVideoStore.swift in Sources */,
AE156A84B4ECDB1A4A38CE88 /* ExtraStudyStore.swift in Sources */, AE156A84B4ECDB1A4A38CE88 /* ExtraStudyStore.swift in Sources */,
6F7BE3533FAF4DE1D514AA7C /* ExtraStudyView.swift in Sources */, 6F7BE3533FAF4DE1D514AA7C /* ExtraStudyView.swift in Sources */,
65382875879BD537F5358381 /* BookLibraryView.swift in Sources */,
4E00225D668FDFA3026B7627 /* BookChapterListView.swift in Sources */,
64E08FBC4B188B332F8039FD /* BookReaderView.swift in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
+3 -1
View File
@@ -224,6 +224,7 @@ struct ConjugaApp: App {
TenseGuide.self, CourseDeck.self, VocabCard.self, TenseGuide.self, CourseDeck.self, VocabCard.self,
TextbookChapter.self, TextbookChapter.self,
DownloadedVideo.self, DownloadedVideo.self,
Book.self, BookChapter.self,
]), ]),
url: url, url: url,
cloudKitDatabase: .none cloudKitDatabase: .none
@@ -233,6 +234,7 @@ struct ConjugaApp: App {
TenseGuide.self, CourseDeck.self, VocabCard.self, TenseGuide.self, CourseDeck.self, VocabCard.self,
TextbookChapter.self, TextbookChapter.self,
DownloadedVideo.self, DownloadedVideo.self,
Book.self, BookChapter.self,
configurations: localConfig configurations: localConfig
) )
} }
@@ -261,7 +263,7 @@ struct ConjugaApp: App {
/// Clears accumulated stale schema metadata from previous container configurations. /// Clears accumulated stale schema metadata from previous container configurations.
/// Bump the version number to force another reset if the schema changes again. /// Bump the version number to force another reset if the schema changes again.
private static func performOneTimeLocalStoreResetIfNeeded(at url: URL) { private static func performOneTimeLocalStoreResetIfNeeded(at url: URL) {
let resetVersion = 4 // bump: DownloadedVideo added to local container (Issue #21) let resetVersion = 5 // bump: Book/BookChapter added to local container
let key = "localStoreResetVersion" let key = "localStoreResetVersion"
let defaults = UserDefaults.standard let defaults = UserDefaults.standard
+140
View File
@@ -9,6 +9,9 @@ actor DataLoader {
static let textbookDataVersion = 14 static let textbookDataVersion = 14
static let textbookDataKey = "textbookDataVersion" static let textbookDataKey = "textbookDataVersion"
static let bookDataVersion = 1
static let bookDataKey = "bookDataVersion"
/// Quick check: does the DB need seeding or course data refresh? /// Quick check: does the DB need seeding or course data refresh?
static func needsSeeding(container: ModelContainer) async -> Bool { static func needsSeeding(container: ModelContainer) async -> Bool {
let context = ModelContext(container) let context = ModelContext(container)
@@ -21,6 +24,9 @@ actor DataLoader {
let textbookVersion = UserDefaults.standard.integer(forKey: textbookDataKey) let textbookVersion = UserDefaults.standard.integer(forKey: textbookDataKey)
if textbookVersion < textbookDataVersion { return true } if textbookVersion < textbookDataVersion { return true }
let bookVersion = UserDefaults.standard.integer(forKey: bookDataKey)
if bookVersion < bookDataVersion { return true }
return false return false
} }
@@ -146,6 +152,38 @@ actor DataLoader {
if seedTextbookData(context: context) { if seedTextbookData(context: context) {
UserDefaults.standard.set(textbookDataVersion, forKey: textbookDataKey) UserDefaults.standard.set(textbookDataVersion, forKey: textbookDataKey)
} }
if seedBooks(context: context) {
UserDefaults.standard.set(bookDataVersion, forKey: bookDataKey)
}
}
/// Re-seed books if the version has changed or the rows are missing.
static func refreshBooksDataIfNeeded(container: ModelContainer) async {
let shared = UserDefaults.standard
let context = ModelContext(container)
let existingCount = (try? context.fetchCount(FetchDescriptor<Book>())) ?? 0
let versionCurrent = shared.integer(forKey: bookDataKey) >= bookDataVersion
if versionCurrent && existingCount > 0 { return }
if let existing = try? context.fetch(FetchDescriptor<Book>()) {
for book in existing { context.delete(book) }
}
if let existing = try? context.fetch(FetchDescriptor<BookChapter>()) {
for chapter in existing { context.delete(chapter) }
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: book wipe save failed: \(error)")
return
}
if seedBooks(context: context) {
shared.set(bookDataVersion, forKey: bookDataKey)
print("Book data re-seeded to version \(bookDataVersion)")
}
} }
/// Re-seed textbook data if the version has changed OR if the rows are /// Re-seed textbook data if the version has changed OR if the rows are
@@ -523,6 +561,108 @@ actor DataLoader {
return true return true
} }
// MARK: - Books seeding
/// Walk the bundle for any `book_*.json` resources and seed `Book` +
/// `BookChapter` rows from each one. Returns true when at least one row
/// was inserted (mirrors `seedTextbookData`'s contract).
@discardableResult
private static func seedBooks(context: ModelContext) -> Bool {
let bookURLs = bundledBookJSONURLs()
guard !bookURLs.isEmpty else {
print("[DataLoader] no book_*.json bundled — skipping book seed")
return false
}
var insertedBooks = 0
for url in bookURLs {
guard let data = try? Data(contentsOf: url),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
print("[DataLoader] WARN: could not read \(url.lastPathComponent)")
continue
}
guard let slug = json["slug"] as? String,
let title = json["title"] as? String,
let chaptersRaw = json["chapters"] as? [[String: Any]] else {
print("[DataLoader] WARN: \(url.lastPathComponent) missing required fields")
continue
}
let author = (json["author"] as? String) ?? ""
let language = (json["language"] as? String) ?? "es"
let book = Book(
slug: slug,
title: title,
author: author,
language: language,
chapterCount: chaptersRaw.count,
accentColorHex: accentHex(forSlug: slug)
)
context.insert(book)
insertedBooks += 1
for ch in chaptersRaw {
guard let number = ch["number"] as? Int,
let chTitle = ch["title"] as? String else { continue }
let paragraphsES = (ch["paragraphsES"] as? [String]) ?? []
let paragraphsEN = (ch["paragraphsEN"] as? [String]) ?? []
let esData = (try? JSONEncoder().encode(paragraphsES)) ?? Data()
let enData = (try? JSONEncoder().encode(paragraphsEN)) ?? Data()
let chapter = BookChapter(
id: "\(slug)-ch\(number)",
bookSlug: slug,
number: number,
title: chTitle,
paragraphsESJSON: esData,
paragraphsENJSON: enData
)
context.insert(chapter)
}
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: book save failed: \(error)")
return false
}
let persistedBooks = (try? context.fetchCount(FetchDescriptor<Book>())) ?? 0
let persistedChapters = (try? context.fetchCount(FetchDescriptor<BookChapter>())) ?? 0
guard persistedBooks > 0 else {
print("[DataLoader] ERROR: seeded \(insertedBooks) books but persisted count is 0")
return false
}
print("Book seeding complete: \(persistedBooks) books, \(persistedChapters) chapters")
return true
}
/// Find every `book_*.json` resource in the app bundle.
private static func bundledBookJSONURLs() -> [URL] {
var seen = Set<String>()
var out: [URL] = []
let bundle = Bundle.main
for ext in ["json"] {
if let urls = bundle.urls(forResourcesWithExtension: ext, subdirectory: nil) {
for url in urls where url.lastPathComponent.hasPrefix("book_") {
if seen.insert(url.lastPathComponent).inserted { out.append(url) }
}
}
}
return out.sorted { $0.lastPathComponent < $1.lastPathComponent }
}
/// Deterministic accent colour for a book, derived from its slug so the
/// cover tile has a stable colour across launches.
private static func accentHex(forSlug slug: String) -> String {
let palette = [
"#7B6CF6", "#E07A5F", "#3D5A80", "#81B29A",
"#F2CC8F", "#D4A5A5", "#5B8A72", "#A06CD5",
]
let hash = slug.unicodeScalars.reduce(0) { ($0 &* 31) &+ Int($1.value) }
return palette[abs(hash) % palette.count]
}
private static func seedTextbookVocabDecks(context: ModelContext, courseName: String) { private static func seedTextbookVocabDecks(context: ModelContext, courseName: String) {
let url = Bundle.main.url(forResource: "textbook_vocab", withExtension: "json") let url = Bundle.main.url(forResource: "textbook_vocab", withExtension: "json")
?? Bundle.main.bundleURL.appendingPathComponent("textbook_vocab.json") ?? Bundle.main.bundleURL.appendingPathComponent("textbook_vocab.json")
@@ -10,6 +10,7 @@ enum StartupCoordinator {
await DataLoader.seedIfNeeded(container: localContainer) await DataLoader.seedIfNeeded(container: localContainer)
await DataLoader.refreshCourseDataIfNeeded(container: localContainer) await DataLoader.refreshCourseDataIfNeeded(container: localContainer)
await DataLoader.refreshTextbookDataIfNeeded(container: localContainer) await DataLoader.refreshTextbookDataIfNeeded(container: localContainer)
await DataLoader.refreshBooksDataIfNeeded(container: localContainer)
} }
/// Recurring maintenance: legacy migrations, identity repair, cloud dedup. /// Recurring maintenance: legacy migrations, identity repair, cloud dedup.
@@ -0,0 +1,43 @@
import SwiftUI
import SharedModels
import SwiftData
struct BookChapterListView: View {
let book: Book
@Query private var allChapters: [BookChapter]
init(book: Book) {
self.book = book
let slug = book.slug
_allChapters = Query(
filter: #Predicate<BookChapter> { $0.bookSlug == slug },
sort: \BookChapter.number
)
}
var body: some View {
List {
ForEach(allChapters) { chapter in
NavigationLink(value: chapter) {
HStack(spacing: 12) {
Text("\(chapter.number)")
.font(.subheadline.weight(.bold).monospacedDigit())
.foregroundStyle(.secondary)
.frame(width: 32, alignment: .trailing)
VStack(alignment: .leading, spacing: 2) {
Text(chapter.title)
.font(.subheadline.weight(.medium))
Text("\(chapter.paragraphsES().count) paragraph\(chapter.paragraphsES().count == 1 ? "" : "s")")
.font(.caption2)
.foregroundStyle(.tertiary)
}
}
}
}
}
.navigationTitle(book.title.prefix(while: { $0 != ":" }).description)
.navigationBarTitleDisplayMode(.inline)
}
}
@@ -0,0 +1,103 @@
import SwiftUI
import SharedModels
import SwiftData
struct BookLibraryView: View {
@Query(sort: \Book.title) private var books: [Book]
var body: some View {
Group {
if books.isEmpty {
ContentUnavailableView(
"No Books",
systemImage: "books.vertical",
description: Text("Books bundled with the app will appear here.")
)
} else {
ScrollView {
LazyVStack(spacing: 12) {
ForEach(books) { book in
NavigationLink(value: book) {
BookCard(book: book)
}
.tint(.primary)
}
}
.padding()
}
}
}
.navigationTitle("Books")
.navigationBarTitleDisplayMode(.inline)
.navigationDestination(for: Book.self) { book in
BookChapterListView(book: book)
}
.navigationDestination(for: BookChapter.self) { chapter in
BookReaderView(chapter: chapter)
}
}
}
private struct BookCard: View {
let book: Book
private var accentColor: Color {
Color(hex: book.accentColorHex) ?? .indigo
}
private var shortTitle: String {
// Trim "Volume X" subtitle if present most book titles are way too long.
if let colon = book.title.firstIndex(of: ":") {
return String(book.title[..<colon])
}
return book.title
}
var body: some View {
HStack(spacing: 14) {
RoundedRectangle(cornerRadius: 6)
.fill(accentColor.gradient)
.frame(width: 48, height: 64)
.overlay {
Image(systemName: "book.closed.fill")
.font(.title3)
.foregroundStyle(.white.opacity(0.9))
}
VStack(alignment: .leading, spacing: 4) {
Text(shortTitle)
.font(.subheadline.weight(.semibold))
.multilineTextAlignment(.leading)
if !book.author.isEmpty {
Text(book.author)
.font(.caption)
.foregroundStyle(.secondary)
}
Text("\(book.chapterCount) chapter\(book.chapterCount == 1 ? "" : "s")")
.font(.caption2)
.foregroundStyle(.tertiary)
}
Spacer()
Image(systemName: "chevron.right")
.font(.caption)
.foregroundStyle(.tertiary)
}
.padding(.horizontal, 16)
.padding(.vertical, 12)
.glassEffect(in: RoundedRectangle(cornerRadius: 14))
}
}
private extension Color {
init?(hex: String) {
var s = hex.trimmingCharacters(in: .whitespacesAndNewlines)
if s.hasPrefix("#") { s.removeFirst() }
guard s.count == 6, let v = UInt32(s, radix: 16) else { return nil }
let r = Double((v >> 16) & 0xFF) / 255.0
let g = Double((v >> 8) & 0xFF) / 255.0
let b = Double(v & 0xFF) / 255.0
self = Color(red: r, green: g, blue: b)
}
}
@@ -0,0 +1,275 @@
import SwiftUI
import SharedModels
import FoundationModels
struct BookReaderView: View {
let chapter: BookChapter
@Environment(DictionaryService.self) private var dictionary
@State private var selectedWord: WordAnnotation?
@State private var showEnglish = false
@State private var lookupCache: [String: WordAnnotation] = [:]
private var paragraphsES: [String] { chapter.paragraphsES() }
private var paragraphsEN: [String] { chapter.paragraphsEN() }
var body: some View {
ScrollView {
LazyVStack(alignment: .leading, spacing: 18) {
Text(chapter.title)
.font(.title2.bold())
.padding(.bottom, 4)
ForEach(Array(paragraphsES.enumerated()), id: \.offset) { index, paragraph in
if showEnglish {
Text(translation(for: index))
.font(.body)
.foregroundStyle(.secondary)
} else {
TappableParagraph(text: paragraph, cache: lookupCache) { word in
handleTap(word: word, paragraph: paragraph)
}
}
}
}
.padding()
.adaptiveContainer(maxWidth: 800)
}
.navigationTitle("Chapter \(chapter.number)")
.navigationBarTitleDisplayMode(.inline)
.toolbar {
ToolbarItem(placement: .topBarTrailing) {
Button {
withAnimation { showEnglish.toggle() }
} label: {
Image(systemName: showEnglish ? "character.book.closed.fill.he" : "character.book.closed")
.symbolRenderingMode(.hierarchical)
}
.accessibilityLabel(showEnglish ? "Show Spanish" : "Show English")
}
}
.sheet(item: $selectedWord) { word in
WordDetailSheet(word: word)
.presentationDetents([.height(220)])
}
}
private func translation(for index: Int) -> String {
guard index < paragraphsEN.count else { return "" }
let en = paragraphsEN[index]
return en.isEmpty ? "[translation unavailable]" : en
}
private func handleTap(word: String, paragraph: String) {
let cleaned = cleanWord(word)
if cleaned.isEmpty { return }
if let cached = lookupCache[cleaned] {
selectedWord = cached
return
}
if let entry = dictionary.lookup(cleaned) {
let annotation = WordAnnotation(
word: cleaned,
baseForm: entry.baseForm,
english: entry.english,
partOfSpeech: entry.partOfSpeech
)
lookupCache[cleaned] = annotation
selectedWord = annotation
return
}
selectedWord = WordAnnotation(word: cleaned, baseForm: cleaned, english: "Looking up...", partOfSpeech: "")
Task {
do {
let annotation = try await WordLookup.lookup(word: cleaned, inContext: paragraph)
lookupCache[cleaned] = annotation
selectedWord = annotation
} catch {
selectedWord = WordAnnotation(word: cleaned, baseForm: cleaned, english: "Lookup unavailable", partOfSpeech: "")
}
}
}
private func cleanWord(_ word: String) -> String {
word.lowercased()
.trimmingCharacters(in: .punctuationCharacters)
.trimmingCharacters(in: .whitespaces)
}
}
// MARK: - Tappable paragraph
private struct TappableParagraph: View {
let text: String
let cache: [String: WordAnnotation]
let onTap: (String) -> Void
var body: some View {
let words = text.split(separator: " ", omittingEmptySubsequences: true).map(String.init)
FlowLayout(spacing: 0) {
ForEach(Array(words.enumerated()), id: \.offset) { _, word in
WordButton(word: word, onTap: onTap)
}
}
.accessibilityElement(children: .combine)
}
}
private struct WordButton: View {
let word: String
let onTap: (String) -> Void
var body: some View {
Button {
onTap(word)
} label: {
Text(word + " ")
.font(.body)
.foregroundStyle(.primary)
}
.buttonStyle(.plain)
}
}
// MARK: - Flow layout
private struct FlowLayout: Layout {
var spacing: CGFloat = 0
func sizeThatFits(proposal: ProposedViewSize, subviews: Subviews, cache: inout ()) -> CGSize {
let rows = computeRows(proposal: proposal, subviews: subviews)
var height: CGFloat = 0
for row in rows {
height += row.map { $0.height }.max() ?? 0
}
height += CGFloat(max(0, rows.count - 1)) * spacing
return CGSize(width: proposal.width ?? 0, height: height)
}
func placeSubviews(in bounds: CGRect, proposal: ProposedViewSize, subviews: Subviews, cache: inout ()) {
let rows = computeRows(proposal: proposal, subviews: subviews)
var y = bounds.minY
var subviewIndex = 0
for row in rows {
var x = bounds.minX
let rowHeight = row.map { $0.height }.max() ?? 0
for size in row {
subviews[subviewIndex].place(at: CGPoint(x: x, y: y), proposal: ProposedViewSize(size))
x += size.width
subviewIndex += 1
}
y += rowHeight + spacing
}
}
private func computeRows(proposal: ProposedViewSize, subviews: Subviews) -> [[CGSize]] {
let maxWidth = proposal.width ?? .infinity
var rows: [[CGSize]] = [[]]
var currentWidth: CGFloat = 0
for subview in subviews {
let size = subview.sizeThatFits(.unspecified)
if currentWidth + size.width > maxWidth && !rows[rows.count - 1].isEmpty {
rows.append([])
currentWidth = 0
}
rows[rows.count - 1].append(size)
currentWidth += size.width
}
return rows
}
}
// MARK: - Word detail sheet
private struct WordDetailSheet: View {
let word: WordAnnotation
var body: some View {
VStack(spacing: 16) {
HStack {
Text(word.word)
.font(.title2.bold())
Spacer()
if !word.partOfSpeech.isEmpty {
Text(word.partOfSpeech)
.font(.caption.weight(.medium))
.foregroundStyle(.secondary)
.padding(.horizontal, 8)
.padding(.vertical, 4)
.background(.fill.tertiary, in: Capsule())
}
}
Divider()
if word.english == "Looking up..." {
HStack(spacing: 8) {
ProgressView()
Text("Looking up word...")
.font(.subheadline)
.foregroundStyle(.secondary)
}
.frame(maxWidth: .infinity)
} else {
VStack(alignment: .leading, spacing: 8) {
if !word.baseForm.isEmpty && word.baseForm != word.word {
HStack {
Text("Base form:")
.font(.subheadline)
.foregroundStyle(.secondary)
Text(word.baseForm)
.font(.subheadline.weight(.semibold))
.italic()
}
}
if !word.english.isEmpty {
HStack {
Text("English:")
.font(.subheadline)
.foregroundStyle(.secondary)
Text(word.english)
.font(.subheadline.weight(.semibold))
}
}
}
.frame(maxWidth: .infinity, alignment: .leading)
}
Spacer()
}
.padding()
}
}
// MARK: - On-demand word lookup (matches StoryReaderView's WordLookup)
@MainActor
private enum WordLookup {
@Generable
struct WordInfo {
@Guide(description: "The dictionary base form (infinitive for verbs, singular for nouns)")
var baseForm: String
@Guide(description: "English translation")
var english: String
@Guide(description: "Part of speech: verb, noun, adjective, adverb, preposition, conjunction, article, pronoun, or other")
var partOfSpeech: String
}
static func lookup(word: String, inContext sentence: String) async throws -> WordAnnotation {
let session = LanguageModelSession(instructions: """
You are a Spanish dictionary. Given a word and the sentence it appears in, \
provide its base form, English translation, and part of speech.
""")
let response = try await session.respond(
to: "Word: \"\(word)\" in sentence: \"\(sentence)\"",
generating: WordInfo.self
)
let info = response.content
return WordAnnotation(
word: word,
baseForm: info.baseForm,
english: info.english,
partOfSpeech: info.partOfSpeech
)
}
}
@@ -253,6 +253,37 @@ struct PracticeView: View {
.glassEffect(in: RoundedRectangle(cornerRadius: 14)) .glassEffect(in: RoundedRectangle(cornerRadius: 14))
.padding(.horizontal) .padding(.horizontal)
// Books
NavigationLink {
BookLibraryView()
} label: {
HStack(spacing: 14) {
Image(systemName: "books.vertical.fill")
.font(.title3)
.frame(width: 36)
.foregroundStyle(.indigo)
VStack(alignment: .leading, spacing: 2) {
Text("Books")
.font(.subheadline.weight(.semibold))
Text("Read full-length books with tap-to-define")
.font(.caption)
.foregroundStyle(.secondary)
}
Spacer()
Image(systemName: "chevron.right")
.font(.caption)
.foregroundStyle(.tertiary)
}
.padding(.horizontal, 16)
.padding(.vertical, 14)
}
.tint(.primary)
.glassEffect(in: RoundedRectangle(cornerRadius: 14))
.padding(.horizontal)
// Quick Actions // Quick Actions
VStack(spacing: 12) { VStack(spacing: 12) {
Text("Quick Actions") Text("Quick Actions")
File diff suppressed because it is too large Load Diff
+1
View File
@@ -0,0 +1 @@
build/
+85
View File
@@ -0,0 +1,85 @@
# Books pipeline
Turns any EPUB into a chapter-structured JSON file the app bundles and reads.
## TL;DR
```bash
cd Conjuga/Scripts/books
./run.sh /path/to/book.epub --slug my-book-slug
```
This runs Phase 1 (extract) and Phase 2 (manifest jobs), then stops and tells you how many translation jobs are pending. Run those via Claude Code subagents (Phase 2.5 below), then re-run `./run.sh` to bundle the final file.
## Phases
| Phase | Script | What it does | Output |
|---|---|---|---|
| 1 | `extract_epub.py` | Unzip the EPUB, walk `content.opf` spine + `toc.ncx` navMap, group HTML files into chapters, strip HTML→text. | `build/<slug>/chapters.json` |
| 2 | `translate_chapters.py` | Split each chapter into ~30-paragraph translation batches. Each batch becomes a job with its own input/output file. **Resumable**: jobs whose output file already exists are skipped. | `build/<slug>/jobs/<jobid>.input.json` + `_pending.txt` |
| 2.5 | Claude Code subagents | Read each job's `.input.json`, translate Spanish→English, write `<jobid>.output.json`. See "Running translations" below. | `build/<slug>/jobs/<jobid>.output.json` |
| 3 | `bundle_book.py` | Merge `chapters.json` + every `*.output.json` into the final bundled JSON the app reads. | `Conjuga/Conjuga/book_<slug>.json` |
`run.sh` chains 1 → 2 → 3. If Phase 2 produces pending jobs, Phase 3 still runs but bundles with empty `paragraphsEN` placeholders so you can preview app structure before translation completes. Re-running `run.sh` after subagents fill in the outputs gives you the real bundled file.
## Adding a new book
1. **Drop the EPUB** anywhere on disk.
2. **Run Phase 1+2**:
```bash
cd Conjuga/Scripts/books
./run.sh /path/to/book.epub --slug my-book
```
Sanity-check the chapter list it prints. If chapter grouping looks wrong (e.g. an EPUB without a usable `toc.ncx`), `extract_epub.py` will need a fallback heuristic — see "Open assumptions" below.
3. **Run translations** (Phase 2.5). The default approach is to spawn Claude Code subagents from inside a Claude Code session pointed at this repo:
For each pending job ID listed in `build/<slug>/jobs/_pending.txt`, hand a subagent the prompt at `build/<slug>/jobs/_prompt_template.md` with `<JOB_INPUT_PATH>` / `<JOB_OUTPUT_PATH>` filled in. The subagent reads the input, translates, and writes the output. Resumable — interrupted runs just leave the missing job IDs in `_pending.txt`.
Cluster jobs into agent batches of ~510 jobs each to keep per-agent context manageable. ~5 parallel agents is a good throughput target.
4. **Bundle**:
```bash
./run.sh /path/to/book.epub --slug my-book # re-running pulls in the new outputs
# or directly:
python3 bundle_book.py my-book --require-all
```
`--require-all` will fail loudly if any job is still missing.
5. **Bump `bookDataVersion`** in `DataLoader.swift` so the in-app store re-seeds the new book on next launch (or any time you re-run with new translations).
6. **Verify the file is bundled** in `Conjuga.xcodeproj`. The script writes `book_<slug>.json` into `Conjuga/Conjuga/Resources/`; if that folder is part of a recursive group reference, Xcode picks it up automatically. Otherwise, add it manually or via the `xcodeproj` ruby gem.
## File layout
```
Conjuga/Scripts/books/
├── extract_epub.py # Phase 1
├── translate_chapters.py # Phase 2
├── bundle_book.py # Phase 3
├── run.sh # Orchestrator
└── build/ # gitignored
└── <slug>/
├── chapters.json
└── jobs/
├── _pending.txt
├── _prompt_template.md
├── ch01_b00.input.json
├── ch01_b00.output.json
└── ...
```
The final output (`book_<slug>.json`) lives at `Conjuga/Conjuga/book_<slug>.json` so the iOS app bundle includes it. (Existing `textbook_data.json` / `conjuga_data.json` use the same layout — files in the app target root rather than a Resources subgroup.)
## Open assumptions
- **TOC drives chapter boundaries.** If an EPUB ships without a usable `toc.ncx`, or the navMap is too granular (e.g. one navPoint per page), `extract_epub.py` will need a fallback that groups by `<h1>` headings in spine order.
- **Spanish bold tags = inline emphasis.** The Olly Richards books bold vocab hints inside paragraphs. We strip the bold and let the in-app dictionary lookup handle definitions instead. If a future book uses bold for something else (titles, etc.), revisit.
- **Translation is per-paragraph 1:1.** Subagents must preserve paragraph count and order. `bundle_book.py` will warn + pad/truncate if a job's output array length doesn't match its input — but that's a sign the subagent misbehaved.
## Out of scope (intentional)
- OCR of vocab image tables (use `Scripts/textbook/` if your book is image-heavy).
- Exercise extraction (textbook pipeline).
- Pre-computed per-word annotations (the app uses `DictionaryService.lookup()` at runtime).
- Cover image extraction (covers are derived from a color hash in the app for now).
+128
View File
@@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""Merge chapters.json + per-job translation outputs into the final bundled
book_<slug>.json that the iOS app reads from its bundle.
Usage:
python3 bundle_book.py <slug> [--build BUILD_DIR] [--dest DEST_DIR] [--require-all]
Inputs:
BUILD_DIR/<slug>/chapters.json
BUILD_DIR/<slug>/jobs/*.output.json (from translation subagents)
Output:
DEST_DIR/book_<slug>.json
{
"slug": "...",
"title": "...",
"author": "...",
"language": "...",
"chapters": [
{"id": "ch1", "number": 1, "title": "Preface",
"paragraphsES": ["...", ...],
"paragraphsEN": ["...", ...]},
...
]
}
If --require-all is passed, the script fails if any job is missing its output.
Otherwise it fills missing translations with empty strings and warns.
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
DEFAULT_DEST = Path("../../Conjuga")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("slug")
parser.add_argument("--build", type=Path, default=Path("build"))
parser.add_argument("--dest", type=Path, default=None)
parser.add_argument("--require-all", action="store_true")
args = parser.parse_args()
base = args.build / args.slug
chapters = json.loads((base / "chapters.json").read_text(encoding="utf-8"))
jobs_dir = base / "jobs"
# Index translation jobs by chapter -> ordered (offset, paragraphsEN).
chapter_translations: dict[int, list[tuple[int, list[str]]]] = {}
missing: list[str] = []
for input_path in sorted(jobs_dir.glob("*.input.json")):
job_id = input_path.stem.removesuffix(".input")
input_data = json.loads(input_path.read_text(encoding="utf-8"))
output_path = jobs_dir / f"{job_id}.output.json"
if not output_path.exists():
missing.append(job_id)
continue
output_data = json.loads(output_path.read_text(encoding="utf-8"))
paragraphs_en = output_data.get("paragraphsEN", [])
expected = len(input_data["paragraphsES"])
if len(paragraphs_en) != expected:
print(
f"WARN: {job_id} length mismatch — got {len(paragraphs_en)}, "
f"expected {expected}. Padding/truncating.",
file=sys.stderr,
)
if len(paragraphs_en) < expected:
paragraphs_en = paragraphs_en + [""] * (expected - len(paragraphs_en))
else:
paragraphs_en = paragraphs_en[:expected]
chapter_translations.setdefault(input_data["chapter"], []).append(
(input_data["rangeStart"], paragraphs_en)
)
if missing:
msg = f"{len(missing)} translation job(s) missing output: {missing[:5]}{'...' if len(missing) > 5 else ''}"
if args.require_all:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(1)
print(f"WARN: {msg} — using empty strings for those paragraphs.", file=sys.stderr)
bundled_chapters: list[dict] = []
for ch in chapters["chapters"]:
translations = sorted(chapter_translations.get(ch["number"], []))
paragraphs_en: list[str] = []
for _, en_chunk in translations:
paragraphs_en.extend(en_chunk)
# Pad to match ES length if jobs were missing for parts of this chapter.
if len(paragraphs_en) < len(ch["paragraphsES"]):
paragraphs_en += [""] * (len(ch["paragraphsES"]) - len(paragraphs_en))
elif len(paragraphs_en) > len(ch["paragraphsES"]):
paragraphs_en = paragraphs_en[: len(ch["paragraphsES"])]
bundled_chapters.append(
{
"id": ch["id"],
"number": ch["number"],
"title": ch["title"],
"paragraphsES": ch["paragraphsES"],
"paragraphsEN": paragraphs_en,
}
)
payload = {
"slug": chapters["slug"],
"title": chapters["title"],
"author": chapters["author"],
"language": chapters["language"],
"chapters": bundled_chapters,
}
dest_dir = (args.dest or DEFAULT_DEST).resolve()
dest_dir.mkdir(parents=True, exist_ok=True)
out_path = dest_dir / f"book_{args.slug}.json"
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Wrote {out_path}")
print(f" Chapters: {len(bundled_chapters)}")
print(f" Translated jobs: {sum(len(v) for v in chapter_translations.values())} / {sum(len(v) for v in chapter_translations.values()) + len(missing)}")
if __name__ == "__main__":
main()
+258
View File
@@ -0,0 +1,258 @@
#!/usr/bin/env python3
"""Parse an EPUB into chapters.json for the in-app Books feature.
Usage:
python3 extract_epub.py <epub_path> [--slug SLUG] [--out OUT_DIR]
Defaults:
SLUG derived from the EPUB filename (lowercased, dashed)
OUT_DIR ./build/<slug>
Output:
OUT_DIR/chapters.json
{
"title": "...",
"author": "...",
"language": "...",
"slug": "...",
"chapters": [
{"id": "ch1", "number": 1, "title": "Preface",
"paragraphsES": ["...", "..."]},
...
]
}
How chapter grouping works:
1. Read content.opf manifest (id -> href) and spine (ordered idrefs).
2. Read toc.ncx navMap to get the ordered list of chapter (title, first-href).
3. For each chapter, claim every spine file from its first href up to (but
not including) the next chapter's first href.
4. For each file in the chapter's range, parse <p> elements, strip tags,
normalise whitespace + smart quotes, drop empties.
"""
from __future__ import annotations
import argparse
import json
import re
import sys
import unicodedata
import warnings
import zipfile
from pathlib import Path
from typing import Iterable
from xml.etree import ElementTree as ET
from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
NS = {
"opf": "http://www.idpf.org/2007/opf",
"dc": "http://purl.org/dc/elements/1.1/",
"ncx": "http://www.daisy.org/z3986/2005/ncx/",
"xhtml": "http://www.w3.org/1999/xhtml",
}
def _slugify(s: str) -> str:
s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
s = re.sub(r"[^a-zA-Z0-9]+", "-", s).strip("-").lower()
return s or "book"
def _normalise(text: str) -> str:
# Collapse runs of whitespace, normalise smart quotes to plain ones.
text = text.replace(" ", " ")
text = re.sub(r"\s+", " ", text).strip()
text = re.sub(r"\s+([.,;:!?…])", r"\1", text)
text = re.sub(r"([¡¿])\s+", r"\1", text)
return text
def _read_zip_text(zf: zipfile.ZipFile, path: str) -> str:
return zf.read(path).decode("utf-8")
def _container_root(zf: zipfile.ZipFile) -> str:
container = ET.fromstring(_read_zip_text(zf, "META-INF/container.xml"))
rootfile = container.find(".//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile")
if rootfile is None:
raise RuntimeError("Missing rootfile entry in META-INF/container.xml")
return rootfile.attrib["full-path"]
def _parse_opf(zf: zipfile.ZipFile, opf_path: str):
text = _read_zip_text(zf, opf_path)
root = ET.fromstring(text)
title = (root.findtext(".//dc:title", default="", namespaces=NS) or "").strip()
author = (root.findtext(".//dc:creator", default="", namespaces=NS) or "").strip()
language = (root.findtext(".//dc:language", default="", namespaces=NS) or "").strip()
manifest: dict[str, str] = {}
for item in root.findall("opf:manifest/opf:item", NS):
manifest[item.attrib["id"]] = item.attrib["href"]
spine: list[str] = []
for itemref in root.findall("opf:spine/opf:itemref", NS):
spine.append(itemref.attrib["idref"])
ncx_id = root.find("opf:spine", NS).attrib.get("toc") if root.find("opf:spine", NS) is not None else None
ncx_href = manifest.get(ncx_id) if ncx_id else None
return {
"title": title,
"author": author,
"language": language,
"manifest": manifest,
"spine": spine,
"ncx_href": ncx_href,
"opf_dir": str(Path(opf_path).parent) if "/" in opf_path else "",
}
def _parse_ncx(zf: zipfile.ZipFile, ncx_path: str) -> list[dict]:
text = _read_zip_text(zf, ncx_path)
root = ET.fromstring(text)
chapters: list[dict] = []
for nav in root.findall("ncx:navMap/ncx:navPoint", NS):
title = (nav.findtext("ncx:navLabel/ncx:text", default="", namespaces=NS) or "").strip()
content = nav.find("ncx:content", NS)
src = content.attrib.get("src", "") if content is not None else ""
# Strip the anchor — we want the file path only.
href = src.split("#", 1)[0]
chapters.append({"title": title, "href": href})
return chapters
def _resolve_zip_path(base_dir: str, href: str) -> str:
if not base_dir:
return href
return f"{base_dir}/{href}".lstrip("/")
def _extract_paragraphs(zf: zipfile.ZipFile, zip_path: str) -> list[str]:
try:
html = _read_zip_text(zf, zip_path)
except KeyError:
return []
soup = BeautifulSoup(html, "lxml")
paragraphs: list[str] = []
for p in soup.find_all("p"):
# Drop nav-anchor wrappers that contain no real text.
text = _normalise(p.get_text(" ", strip=True))
if not text:
continue
# Drop chapter-heading paragraphs that only echo the title — handled
# separately by the TOC. Heuristic: very short paragraph that's just
# numbers + the chapter title pattern. Keep everything else.
paragraphs.append(text)
return paragraphs
def _chapter_files(
spine_files: list[str], chapter_hrefs: list[str]
) -> list[list[str]]:
"""Slice the spine into one list of files per chapter, using the chapter's
first href as the chapter boundary. Files before the first chapter (e.g.
cover, titlepage) are dropped."""
boundaries: list[int] = []
for href in chapter_hrefs:
try:
idx = spine_files.index(href)
except ValueError:
boundaries.append(-1)
continue
boundaries.append(idx)
ranges: list[list[str]] = []
for i, start in enumerate(boundaries):
if start < 0:
ranges.append([])
continue
end = len(spine_files)
for next_start in boundaries[i + 1:]:
if next_start >= 0:
end = next_start
break
ranges.append(spine_files[start:end])
return ranges
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("epub", type=Path)
parser.add_argument("--slug", default=None)
parser.add_argument("--out", type=Path, default=None)
args = parser.parse_args()
if not args.epub.exists():
print(f"EPUB not found: {args.epub}", file=sys.stderr)
sys.exit(2)
with zipfile.ZipFile(args.epub) as zf:
opf_path = _container_root(zf)
opf = _parse_opf(zf, opf_path)
if not opf["ncx_href"]:
print("No NCX found in spine; cannot derive chapter structure.", file=sys.stderr)
sys.exit(3)
ncx_path = _resolve_zip_path(opf["opf_dir"], opf["ncx_href"])
toc = _parse_ncx(zf, ncx_path)
spine_files = [
_resolve_zip_path(opf["opf_dir"], opf["manifest"].get(idref, ""))
for idref in opf["spine"]
]
chapter_hrefs = [_resolve_zip_path(opf["opf_dir"], c["href"]) for c in toc]
chapter_file_ranges = _chapter_files(spine_files, chapter_hrefs)
chapters_out: list[dict] = []
for i, (meta, files) in enumerate(zip(toc, chapter_file_ranges), start=1):
paragraphs: list[str] = []
for f in files:
paragraphs.extend(_extract_paragraphs(zf, f))
# Drop leading paragraph(s) that just echo the chapter title — the
# title is already stored separately.
title_norm = _normalise(meta["title"]).lower()
while paragraphs and _normalise(paragraphs[0]).lower() == title_norm:
paragraphs.pop(0)
chapters_out.append(
{
"id": f"ch{i}",
"number": i,
"title": meta["title"],
"paragraphsES": paragraphs,
}
)
slug = args.slug or _slugify(opf["title"]) or args.epub.stem
out_dir = args.out or (Path("build") / slug)
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / "chapters.json"
payload = {
"title": opf["title"],
"author": opf["author"],
"language": opf["language"],
"slug": slug,
"chapters": chapters_out,
}
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
total_paragraphs = sum(len(c["paragraphsES"]) for c in chapters_out)
print(f"Wrote {out_path}")
print(f" Title: {opf['title']}")
print(f" Author: {opf['author']}")
print(f" Chapters: {len(chapters_out)}")
print(f" Paragraphs: {total_paragraphs}")
for ch in chapters_out:
print(f" ch{ch['number']:02d} {len(ch['paragraphsES']):4d}{ch['title']}")
if __name__ == "__main__":
main()
+65
View File
@@ -0,0 +1,65 @@
#!/usr/bin/env bash
# Orchestrate the books pipeline: EPUB -> chapters.json -> per-chapter job
# manifest -> (translation by Claude Code subagents) -> bundled book_<slug>.json.
#
# This script DOES NOT run the LLM translation pass. After Phase 2 it stops
# and prints how many jobs are pending. Use Claude Code subagents (or a fresh
# session per the README) to fill in build/<slug>/jobs/*.output.json, then
# re-run this script — it will pick up where it left off via Phase 3.
#
# Usage:
# ./run.sh <epub_path> [--slug SLUG] [--batch-size N]
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$HERE"
if [[ $# -lt 1 ]]; then
echo "usage: $0 <epub_path> [--slug SLUG] [--batch-size N]"
exit 2
fi
EPUB="$1"; shift
SLUG=""
BATCH_SIZE="30"
while [[ $# -gt 0 ]]; do
case "$1" in
--slug) SLUG="$2"; shift 2 ;;
--batch-size) BATCH_SIZE="$2"; shift 2 ;;
*) echo "unknown option: $1" >&2; exit 2 ;;
esac
done
EPUB_ABS="$(cd "$(dirname "$EPUB")" && pwd)/$(basename "$EPUB")"
echo "=== Phase 1: extract_epub.py ==="
if [[ -n "$SLUG" ]]; then
python3 extract_epub.py "$EPUB_ABS" --slug "$SLUG"
else
python3 extract_epub.py "$EPUB_ABS"
fi
# If --slug wasn't passed, recover the slug from the chapters file just written.
if [[ -z "$SLUG" ]]; then
SLUG=$(python3 -c "import json,glob; p=sorted(glob.glob('build/*/chapters.json'), key=lambda x: -__import__('os').path.getmtime(x))[0]; print(json.load(open(p))['slug'])")
fi
echo
echo "=== Phase 2: translate_chapters.py ==="
python3 translate_chapters.py "$SLUG" --batch-size "$BATCH_SIZE"
PENDING_FILE="build/$SLUG/jobs/_pending.txt"
PENDING_COUNT=$(wc -l < "$PENDING_FILE" | tr -d ' ')
echo
echo "=== Phase 3: bundle_book.py ==="
if [[ "$PENDING_COUNT" -gt 0 ]]; then
echo " $PENDING_COUNT translation job(s) still pending."
echo " Run the Claude Code subagent translation step (see README.md), then re-run this script."
echo " Bundling with empty placeholders so you can preview app structure now."
python3 bundle_book.py "$SLUG"
else
python3 bundle_book.py "$SLUG" --require-all
fi
+136
View File
@@ -0,0 +1,136 @@
#!/usr/bin/env python3
"""Split chapters.json into translation jobs that Claude Code subagents can
process in parallel. Resumable: jobs whose output file already exists are
skipped.
Usage:
python3 translate_chapters.py <slug> [--batch-size N] [--build BUILD_DIR]
Inputs:
BUILD_DIR/<slug>/chapters.json (from extract_epub.py)
Outputs:
BUILD_DIR/<slug>/jobs/<jobid>.input.json (one per batch — read by subagents)
BUILD_DIR/<slug>/jobs/_pending.txt (list of job IDs still missing output)
BUILD_DIR/<slug>/jobs/_prompt_template.md (prompt the orchestrator hands each subagent)
Job layout (.input.json):
{
"jobId": "ch06_b00",
"chapter": 6,
"chapterTitle": "1. El Castillo",
"rangeStart": 0,
"rangeEnd": 30,
"paragraphsES": ["...", "..."]
}
Subagents must write `<jobid>.output.json` with shape:
{"jobId": "ch06_b00", "paragraphsEN": ["...", "..."]}
The output array MUST have the same length as paragraphsES, in the same order.
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
PROMPT_TEMPLATE = """\
You are translating a chunk of a Spanish-language book into English for a
language-learning app.
Input file: {input_path}
Output file: {output_path}
Read the input file. It contains a JSON object with a `paragraphsES` array.
Translate each paragraph into natural English. Preserve meaning, tone, and
dialogue markers (—, , ¡, ¿) as appropriate for the English output. Keep
the same number of paragraphs in the same order.
Notes for translation quality:
- This is a beginner Spanish reader, so prefer plain natural English over
literary flourish.
- Preserve proper nouns (character names, place names) verbatim.
- Convert Spanish dialogue dashes (, —) to English-style quotation marks
ONLY if it reads more naturally; otherwise keep them as em-dashes.
- Do NOT add explanatory parentheticals; the in-app dictionary handles
per-word lookup.
Write the output as JSON with shape:
{{"jobId": "<the jobId from the input>", "paragraphsEN": [...]}}
The `paragraphsEN` array MUST be the same length and order as `paragraphsES`
in the input. Write nothing else to disk and produce no other output.
"""
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("slug")
parser.add_argument("--batch-size", type=int, default=30)
parser.add_argument("--build", type=Path, default=Path("build"))
args = parser.parse_args()
base = args.build / args.slug
chapters_path = base / "chapters.json"
jobs_dir = base / "jobs"
jobs_dir.mkdir(parents=True, exist_ok=True)
data = json.loads(chapters_path.read_text(encoding="utf-8"))
pending: list[str] = []
completed: list[str] = []
total_jobs = 0
for ch in data["chapters"]:
paragraphs = ch["paragraphsES"]
if not paragraphs:
continue
for offset in range(0, len(paragraphs), args.batch_size):
chunk = paragraphs[offset : offset + args.batch_size]
job_id = f"ch{ch['number']:02d}_b{offset // args.batch_size:02d}"
input_path = jobs_dir / f"{job_id}.input.json"
output_path = jobs_dir / f"{job_id}.output.json"
input_path.write_text(
json.dumps(
{
"jobId": job_id,
"chapter": ch["number"],
"chapterTitle": ch["title"],
"rangeStart": offset,
"rangeEnd": offset + len(chunk),
"paragraphsES": chunk,
},
ensure_ascii=False,
indent=2,
),
encoding="utf-8",
)
total_jobs += 1
if output_path.exists():
completed.append(job_id)
else:
pending.append(job_id)
(jobs_dir / "_pending.txt").write_text("\n".join(pending) + ("\n" if pending else ""))
(jobs_dir / "_prompt_template.md").write_text(
PROMPT_TEMPLATE.format(
input_path="<JOB_INPUT_PATH>",
output_path="<JOB_OUTPUT_PATH>",
),
encoding="utf-8",
)
print(f"Total translation jobs: {total_jobs}")
print(f" Completed: {len(completed)}")
print(f" Pending: {len(pending)}")
print(f"Manifest at: {jobs_dir / '_pending.txt'}")
print(f"Prompt template at: {jobs_dir / '_prompt_template.md'}")
if __name__ == "__main__":
main()
@@ -0,0 +1,32 @@
import Foundation
import SwiftData
/// A long-form bilingual book bundled with the app. Chapter content lives in
/// `BookChapter` rows; this model carries the per-book metadata.
@Model
public final class Book {
@Attribute(.unique) public var id: String = "" // matches `slug`
public var slug: String = ""
public var title: String = ""
public var author: String = ""
public var language: String = ""
public var chapterCount: Int = 0
public var accentColorHex: String = ""
public init(
slug: String,
title: String,
author: String,
language: String,
chapterCount: Int,
accentColorHex: String
) {
self.id = slug
self.slug = slug
self.title = title
self.author = author
self.language = language
self.chapterCount = chapterCount
self.accentColorHex = accentColorHex
}
}
@@ -0,0 +1,39 @@
import Foundation
import SwiftData
/// One chapter of a `Book`. Spanish + English paragraphs are stored as JSON-
/// encoded `[String]` so SwiftData doesn't have to manage variable-length
/// arrays directly.
@Model
public final class BookChapter {
@Attribute(.unique) public var id: String = "" // "<bookSlug>-ch<number>"
public var bookSlug: String = ""
public var number: Int = 0
public var title: String = ""
public var paragraphsESJSON: Data = Data()
public var paragraphsENJSON: Data = Data()
public init(
id: String,
bookSlug: String,
number: Int,
title: String,
paragraphsESJSON: Data,
paragraphsENJSON: Data
) {
self.id = id
self.bookSlug = bookSlug
self.number = number
self.title = title
self.paragraphsESJSON = paragraphsESJSON
self.paragraphsENJSON = paragraphsENJSON
}
public func paragraphsES() -> [String] {
(try? JSONDecoder().decode([String].self, from: paragraphsESJSON)) ?? []
}
public func paragraphsEN() -> [String] {
(try? JSONDecoder().decode([String].self, from: paragraphsENJSON)) ?? []
}
}