Major changes: - Textbook UI: chapter list, reader, and interactive exercise view (keyboard + Apple Pencil) surfaced under the Course tab. 30 chapters, 251 exercises. - Stem-change conjugation toggle on Week 4 flashcard decks (E-IE, E-I, O-UE). Uses existing VerbForm + IrregularSpan data to render highlighted present tense conjugations inline. - Deterministic on-device answer grader with partial credit (correct / close for accent-stripped or single-char-typo / wrong). 11 unit tests cover it. - SharedModels: TextbookChapter (local), TextbookExerciseAttempt (cloud- synced), AnswerGrader helpers. Bumped schema. - DataLoader: textbook seeder (version 8) + refresh helpers that preserve LanGo course decks when textbook data is re-seeded. - Local extraction pipeline in Conjuga/Scripts/textbook/ — XHTML chapter parser, answer-key parser, macOS Vision image OCR + PDF page OCR, merger, NSSpellChecker validator, language-aware auto-fixer, and repair pass that re-pairs quarantined vocab rows using bounding-box coordinates. - UI test target (ConjugaUITests) with three tests: end-to-end textbook flow, all-chapters screenshot audit, and stem-change toggle verification. Generated textbook content (textbook_data.json, textbook_vocab.json) and third-party source files are gitignored — re-run Scripts/textbook/run_pipeline.sh locally to regenerate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
518 lines
21 KiB
Swift
518 lines
21 KiB
Swift
import SwiftData
|
|
import SharedModels
|
|
import Foundation
|
|
|
|
actor DataLoader {
|
|
static let courseDataVersion = 7
|
|
static let courseDataKey = "courseDataVersion"
|
|
|
|
static let textbookDataVersion = 8
|
|
static let textbookDataKey = "textbookDataVersion"
|
|
|
|
/// Quick check: does the DB need seeding or course data refresh?
|
|
static func needsSeeding(container: ModelContainer) async -> Bool {
|
|
let context = ModelContext(container)
|
|
let verbCount = (try? context.fetchCount(FetchDescriptor<Verb>())) ?? 0
|
|
if verbCount == 0 { return true }
|
|
|
|
let storedVersion = UserDefaults.standard.integer(forKey: courseDataKey)
|
|
if storedVersion < courseDataVersion { return true }
|
|
|
|
let textbookVersion = UserDefaults.standard.integer(forKey: textbookDataKey)
|
|
if textbookVersion < textbookDataVersion { return true }
|
|
|
|
return false
|
|
}
|
|
|
|
static func seedIfNeeded(container: ModelContainer) async {
|
|
let context = ModelContext(container)
|
|
|
|
let count: Int
|
|
do {
|
|
count = try context.fetchCount(FetchDescriptor<Verb>())
|
|
print("[DataLoader] seedIfNeeded: existing verb count = \(count)")
|
|
} catch {
|
|
print("[DataLoader] ⚠️ seedIfNeeded fetchCount threw: \(error)")
|
|
count = 0
|
|
}
|
|
if count > 0 { return }
|
|
|
|
print("Seeding database...")
|
|
|
|
// Try direct bundle lookup first, then subdirectory
|
|
let url = Bundle.main.url(forResource: "conjuga_data", withExtension: "json")
|
|
?? Bundle.main.url(forResource: "conjuga_data", withExtension: "json", subdirectory: "Resources")
|
|
?? Bundle.main.bundleURL.appendingPathComponent("Resources/conjuga_data.json")
|
|
|
|
guard let data = try? Data(contentsOf: url) else {
|
|
print("ERROR: Could not load conjuga_data.json from bundle at \(url)")
|
|
return
|
|
}
|
|
|
|
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
|
|
print("ERROR: Could not parse conjuga_data")
|
|
return
|
|
}
|
|
|
|
// Seed tense guides
|
|
if let guides = json["tenseGuides"] as? [[String: Any]] {
|
|
for g in guides {
|
|
guard let tenseId = g["tenseId"] as? String,
|
|
let title = g["title"] as? String,
|
|
let body = g["body"] as? String else { continue }
|
|
let guide = TenseGuide(tenseId: tenseId, title: title, body: body)
|
|
context.insert(guide)
|
|
}
|
|
}
|
|
|
|
// Seed verbs
|
|
var verbMap: [Int: Verb] = [:]
|
|
if let verbs = json["verbs"] as? [[String: Any]] {
|
|
for v in verbs {
|
|
guard let id = v["id"] as? Int,
|
|
let infinitive = v["infinitive"] as? String,
|
|
let english = v["english"] as? String,
|
|
let rank = v["rank"] as? Int,
|
|
let ending = v["ending"] as? String,
|
|
let reflexive = v["reflexive"] as? Int,
|
|
let level = v["level"] as? String else { continue }
|
|
let verb = Verb(id: id, infinitive: infinitive, english: english, rank: rank, ending: ending, reflexive: reflexive, level: level)
|
|
context.insert(verb)
|
|
verbMap[id] = verb
|
|
}
|
|
print("Inserted \(verbs.count) verbs")
|
|
}
|
|
|
|
try? context.save()
|
|
|
|
// Seed verb forms — bulk insert, no relationship assignment (use verbId for queries)
|
|
let chunkSize = 20000
|
|
if let forms = json["verbForms"] as? [[String: Any]] {
|
|
for i in stride(from: 0, to: forms.count, by: chunkSize) {
|
|
autoreleasepool {
|
|
let end = min(i + chunkSize, forms.count)
|
|
for j in i..<end {
|
|
let f = forms[j]
|
|
guard let verbId = f["verbId"] as? Int,
|
|
let tenseId = f["tenseId"] as? String,
|
|
let personIndex = f["personIndex"] as? Int,
|
|
let form = f["form"] as? String,
|
|
let regularity = f["regularity"] as? String else { continue }
|
|
let vf = VerbForm(verbId: verbId, tenseId: tenseId, personIndex: personIndex, form: form, regularity: regularity)
|
|
context.insert(vf)
|
|
}
|
|
try? context.save()
|
|
}
|
|
}
|
|
print("Inserted \(forms.count) verb forms")
|
|
}
|
|
|
|
// Seed irregular spans — bulk insert
|
|
if let spans = json["irregularSpans"] as? [[String: Any]] {
|
|
for i in stride(from: 0, to: spans.count, by: chunkSize) {
|
|
autoreleasepool {
|
|
let end = min(i + chunkSize, spans.count)
|
|
for j in i..<end {
|
|
let s = spans[j]
|
|
guard let verbId = s["verbId"] as? Int,
|
|
let tenseId = s["tenseId"] as? String,
|
|
let personIndex = s["personIndex"] as? Int,
|
|
let spanType = s["type"] as? Int,
|
|
let pattern = s["pattern"] as? Int,
|
|
let start = s["start"] as? Int,
|
|
let end = s["end"] as? Int else { continue }
|
|
let span = IrregularSpan(verbId: verbId, tenseId: tenseId, personIndex: personIndex, spanType: spanType, pattern: pattern, start: start, end: end)
|
|
context.insert(span)
|
|
}
|
|
try? context.save()
|
|
}
|
|
}
|
|
print("Inserted \(spans.count) irregular spans")
|
|
}
|
|
|
|
do {
|
|
try context.save()
|
|
} catch {
|
|
print("[DataLoader] 🔥 Final verb save error: \(error)")
|
|
}
|
|
print("Verb seeding complete")
|
|
|
|
// Seed course data (uses the same mainContext so @Query sees it)
|
|
seedCourseData(context: context)
|
|
|
|
// Seed textbook data
|
|
seedTextbookData(context: context)
|
|
UserDefaults.standard.set(textbookDataVersion, forKey: textbookDataKey)
|
|
}
|
|
|
|
/// Re-seed textbook data if the version has changed.
|
|
static func refreshTextbookDataIfNeeded(container: ModelContainer) async {
|
|
let shared = UserDefaults.standard
|
|
if shared.integer(forKey: textbookDataKey) >= textbookDataVersion { return }
|
|
|
|
print("Textbook data version outdated — re-seeding...")
|
|
let context = ModelContext(container)
|
|
|
|
// Only wipe textbook chapters and our textbook-scoped CourseDecks
|
|
// (not the LanGo decks, which live in the same tables).
|
|
try? context.delete(model: TextbookChapter.self)
|
|
let textbookCourseName = "Complete Spanish Step-by-Step"
|
|
let deckDescriptor = FetchDescriptor<CourseDeck>(
|
|
predicate: #Predicate<CourseDeck> { $0.courseName == textbookCourseName }
|
|
)
|
|
if let decks = try? context.fetch(deckDescriptor) {
|
|
for deck in decks { context.delete(deck) }
|
|
}
|
|
try? context.save()
|
|
|
|
seedTextbookData(context: context)
|
|
shared.set(textbookDataVersion, forKey: textbookDataKey)
|
|
print("Textbook data re-seeded to version \(textbookDataVersion)")
|
|
}
|
|
|
|
/// Re-seed course data if the version has changed (e.g. examples were added).
|
|
/// Call this on every launch — it checks a version key and only re-seeds when needed.
|
|
static func refreshCourseDataIfNeeded(container: ModelContainer) async {
|
|
let shared = UserDefaults.standard
|
|
|
|
if shared.integer(forKey: courseDataKey) >= courseDataVersion { return }
|
|
|
|
print("Course data version outdated — re-seeding...")
|
|
let context = ModelContext(container)
|
|
|
|
// Delete existing course data + tense guides so they can be re-seeded
|
|
// with updated bodies from the bundled conjuga_data.json.
|
|
try? context.delete(model: VocabCard.self)
|
|
try? context.delete(model: CourseDeck.self)
|
|
try? context.delete(model: TenseGuide.self)
|
|
try? context.save()
|
|
|
|
// Re-seed tense guides from the bundled JSON
|
|
if let url = Bundle.main.url(forResource: "conjuga_data", withExtension: "json"),
|
|
let data = try? Data(contentsOf: url),
|
|
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
|
|
let guides = json["tenseGuides"] as? [[String: Any]] {
|
|
for g in guides {
|
|
guard let tenseId = g["tenseId"] as? String,
|
|
let title = g["title"] as? String,
|
|
let body = g["body"] as? String else { continue }
|
|
context.insert(TenseGuide(tenseId: tenseId, title: title, body: body))
|
|
}
|
|
try? context.save()
|
|
print("Re-seeded \(guides.count) tense guides")
|
|
}
|
|
|
|
// Re-seed course data
|
|
seedCourseData(context: context)
|
|
|
|
// Textbook's vocab decks/cards share the same CourseDeck/VocabCard
|
|
// entities, so they were just wiped above. Reseed them.
|
|
seedTextbookVocabDecks(context: context, courseName: "Complete Spanish Step-by-Step")
|
|
|
|
shared.set(courseDataVersion, forKey: courseDataKey)
|
|
print("Course data re-seeded to version \(courseDataVersion)")
|
|
}
|
|
|
|
static func migrateCourseProgressIfNeeded(
|
|
localContainer: ModelContainer,
|
|
cloudContainer: ModelContainer
|
|
) async {
|
|
let migrationVersion = 2
|
|
let key = "courseProgressMigrationVersion"
|
|
let shared = UserDefaults.standard
|
|
|
|
if shared.integer(forKey: key) >= migrationVersion { return }
|
|
|
|
let localContext = ModelContext(localContainer)
|
|
let cloudContext = ModelContext(cloudContainer)
|
|
let descriptor = FetchDescriptor<VocabCard>()
|
|
let allCards = (try? localContext.fetch(descriptor)) ?? []
|
|
var migratedCount = 0
|
|
|
|
for card in allCards where hasLegacyCourseProgress(card) {
|
|
let reviewKey = CourseCardStore.reviewKey(for: card)
|
|
let reviewCard = findOrCreateCourseReviewCard(
|
|
id: reviewKey,
|
|
deckId: card.deckId,
|
|
front: card.front,
|
|
back: card.back,
|
|
context: cloudContext
|
|
)
|
|
|
|
if let reviewDate = reviewCard.lastReviewDate,
|
|
let legacyDate = card.lastReviewDate,
|
|
reviewDate >= legacyDate {
|
|
continue
|
|
}
|
|
|
|
reviewCard.easeFactor = card.easeFactor
|
|
reviewCard.interval = card.interval
|
|
reviewCard.repetitions = card.repetitions
|
|
reviewCard.dueDate = card.dueDate
|
|
reviewCard.lastReviewDate = card.lastReviewDate
|
|
migratedCount += 1
|
|
}
|
|
|
|
if migratedCount > 0 {
|
|
try? cloudContext.save()
|
|
print("Migrated \(migratedCount) course progress cards to cloud store")
|
|
}
|
|
|
|
shared.set(migrationVersion, forKey: key)
|
|
}
|
|
|
|
private static func seedCourseData(context: ModelContext) {
|
|
let url = Bundle.main.url(forResource: "course_data", withExtension: "json")
|
|
?? Bundle.main.bundleURL.appendingPathComponent("course_data.json")
|
|
|
|
guard let data = try? Data(contentsOf: url) else {
|
|
print("No course_data.json found — skipping course seeding")
|
|
return
|
|
}
|
|
|
|
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
|
|
print("ERROR: Could not parse course_data.json")
|
|
return
|
|
}
|
|
|
|
// Support both formats: {"courses": [...]} (new) and {"course": "...", "weeks": [...]} (old)
|
|
var courseList: [[String: Any]] = []
|
|
if let courses = json["courses"] as? [[String: Any]] {
|
|
courseList = courses
|
|
} else if json["weeks"] != nil {
|
|
courseList = [json]
|
|
}
|
|
|
|
var deckCount = 0
|
|
var cardCount = 0
|
|
|
|
for courseData in courseList {
|
|
guard let weeks = courseData["weeks"] as? [[String: Any]],
|
|
let courseName = courseData["course"] as? String else { continue }
|
|
|
|
let courseSlug = courseName.lowercased()
|
|
.replacingOccurrences(of: " ", with: "-")
|
|
.replacingOccurrences(of: "|", with: "")
|
|
|
|
for weekData in weeks {
|
|
guard let weekNum = weekData["week"] as? Int,
|
|
let decks = weekData["decks"] as? [[String: Any]] else { continue }
|
|
|
|
for (deckIndex, deckData) in decks.enumerated() {
|
|
guard let title = deckData["title"] as? String,
|
|
let cards = deckData["cards"] as? [Any] else { continue }
|
|
|
|
let isReversed = (deckData["isReversed"] as? Bool) ?? false
|
|
let deckId = "\(courseSlug)_w\(weekNum)_\(deckIndex)_\(isReversed ? "rev" : "fwd")"
|
|
|
|
let deck = CourseDeck(
|
|
id: deckId,
|
|
weekNumber: weekNum,
|
|
title: title,
|
|
cardCount: cards.count,
|
|
courseName: courseName,
|
|
isReversed: isReversed
|
|
)
|
|
context.insert(deck)
|
|
deckCount += 1
|
|
|
|
for rawCard in cards {
|
|
guard let cardDict = rawCard as? [String: Any],
|
|
let front = cardDict["front"] as? String,
|
|
let back = cardDict["back"] as? String else { continue }
|
|
|
|
// Parse example sentences
|
|
var exES: [String] = []
|
|
var exEN: [String] = []
|
|
var exBlanks: [String] = []
|
|
if let examples = cardDict["examples"] as? [[String: String]] {
|
|
for ex in examples {
|
|
if let es = ex["es"] {
|
|
exES.append(es)
|
|
exEN.append(ex["en"] ?? "")
|
|
exBlanks.append(ex["blank"] ?? "")
|
|
}
|
|
}
|
|
}
|
|
|
|
let card = VocabCard(front: front, back: back, deckId: deckId, examplesES: exES, examplesEN: exEN, examplesBlanks: exBlanks)
|
|
card.deck = deck
|
|
context.insert(card)
|
|
cardCount += 1
|
|
}
|
|
}
|
|
|
|
try? context.save()
|
|
}
|
|
}
|
|
|
|
print("Course seeding complete: \(deckCount) decks, \(cardCount) cards")
|
|
}
|
|
|
|
private static func hasLegacyCourseProgress(_ card: VocabCard) -> Bool {
|
|
card.repetitions > 0 ||
|
|
card.interval > 0 ||
|
|
abs(card.easeFactor - 2.5) > 0.0001 ||
|
|
card.lastReviewDate != nil
|
|
}
|
|
|
|
private static func findOrCreateCourseReviewCard(
|
|
id: String,
|
|
deckId: String,
|
|
front: String,
|
|
back: String,
|
|
context: ModelContext
|
|
) -> CourseReviewCard {
|
|
let descriptor = FetchDescriptor<CourseReviewCard>(
|
|
predicate: #Predicate<CourseReviewCard> { $0.id == id }
|
|
)
|
|
|
|
if let existing = (try? context.fetch(descriptor))?.first {
|
|
return existing
|
|
}
|
|
|
|
let reviewCard = CourseReviewCard(id: id, deckId: deckId, front: front, back: back)
|
|
context.insert(reviewCard)
|
|
return reviewCard
|
|
}
|
|
|
|
// MARK: - Textbook seeding
|
|
|
|
private static func seedTextbookData(context: ModelContext) {
|
|
let url = Bundle.main.url(forResource: "textbook_data", withExtension: "json")
|
|
?? Bundle.main.bundleURL.appendingPathComponent("textbook_data.json")
|
|
guard let data = try? Data(contentsOf: url) else {
|
|
print("[DataLoader] textbook_data.json not bundled — skipping textbook seed")
|
|
return
|
|
}
|
|
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
|
|
print("[DataLoader] ERROR: Could not parse textbook_data.json")
|
|
return
|
|
}
|
|
let courseName = (json["courseName"] as? String) ?? "Textbook"
|
|
guard let chapters = json["chapters"] as? [[String: Any]] else {
|
|
print("[DataLoader] ERROR: textbook_data.json missing chapters")
|
|
return
|
|
}
|
|
|
|
var inserted = 0
|
|
for ch in chapters {
|
|
guard let id = ch["id"] as? String,
|
|
let number = ch["number"] as? Int,
|
|
let title = ch["title"] as? String,
|
|
let blocksRaw = ch["blocks"] as? [[String: Any]] else { continue }
|
|
|
|
let part = (ch["part"] as? Int) ?? 0
|
|
|
|
// Normalize each block to canonical keys expected by TextbookBlock decoder.
|
|
var normalized: [[String: Any]] = []
|
|
var exerciseCount = 0
|
|
var vocabTableCount = 0
|
|
for (i, b) in blocksRaw.enumerated() {
|
|
var out: [String: Any] = [:]
|
|
out["index"] = i
|
|
let kind = (b["kind"] as? String) ?? ""
|
|
out["kind"] = kind
|
|
switch kind {
|
|
case "heading":
|
|
if let level = b["level"] { out["level"] = level }
|
|
if let text = b["text"] { out["text"] = text }
|
|
case "paragraph":
|
|
if let text = b["text"] { out["text"] = text }
|
|
case "key_vocab_header":
|
|
break
|
|
case "vocab_table":
|
|
vocabTableCount += 1
|
|
if let src = b["sourceImage"] { out["sourceImage"] = src }
|
|
if let lines = b["ocrLines"] { out["ocrLines"] = lines }
|
|
if let conf = b["ocrConfidence"] { out["ocrConfidence"] = conf }
|
|
case "exercise":
|
|
exerciseCount += 1
|
|
if let exId = b["id"] { out["exerciseId"] = exId }
|
|
if let inst = b["instruction"] { out["instruction"] = inst }
|
|
if let extra = b["extra"] { out["extra"] = extra }
|
|
if let prompts = b["prompts"] { out["prompts"] = prompts }
|
|
if let items = b["answerItems"] { out["answerItems"] = items }
|
|
if let freeform = b["freeform"] { out["freeform"] = freeform }
|
|
default:
|
|
break
|
|
}
|
|
normalized.append(out)
|
|
}
|
|
|
|
let bodyJSON: Data
|
|
do {
|
|
bodyJSON = try JSONSerialization.data(withJSONObject: normalized, options: [])
|
|
} catch {
|
|
print("[DataLoader] failed to encode chapter \(number) blocks: \(error)")
|
|
continue
|
|
}
|
|
|
|
let chapter = TextbookChapter(
|
|
id: id,
|
|
number: number,
|
|
title: title,
|
|
part: part,
|
|
courseName: courseName,
|
|
bodyJSON: bodyJSON,
|
|
exerciseCount: exerciseCount,
|
|
vocabTableCount: vocabTableCount
|
|
)
|
|
context.insert(chapter)
|
|
inserted += 1
|
|
}
|
|
|
|
try? context.save()
|
|
|
|
// Seed textbook-derived vocabulary flashcards as CourseDecks so the
|
|
// existing Course UI can surface them alongside LanGo decks.
|
|
seedTextbookVocabDecks(context: context, courseName: courseName)
|
|
|
|
print("Textbook seeding complete: \(inserted) chapters")
|
|
}
|
|
|
|
private static func seedTextbookVocabDecks(context: ModelContext, courseName: String) {
|
|
let url = Bundle.main.url(forResource: "textbook_vocab", withExtension: "json")
|
|
?? Bundle.main.bundleURL.appendingPathComponent("textbook_vocab.json")
|
|
guard let data = try? Data(contentsOf: url),
|
|
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
|
|
let chaptersArr = json["chapters"] as? [[String: Any]]
|
|
else { return }
|
|
|
|
let courseSlug = courseName.lowercased()
|
|
.replacingOccurrences(of: " ", with: "-")
|
|
|
|
var deckCount = 0
|
|
var cardCount = 0
|
|
for chData in chaptersArr {
|
|
guard let chNum = chData["chapter"] as? Int,
|
|
let cards = chData["cards"] as? [[String: Any]],
|
|
!cards.isEmpty else { continue }
|
|
|
|
let deckId = "textbook_\(courseSlug)_ch\(chNum)"
|
|
let title = "Chapter \(chNum) vocabulary"
|
|
let deck = CourseDeck(
|
|
id: deckId,
|
|
weekNumber: chNum,
|
|
title: title,
|
|
cardCount: cards.count,
|
|
courseName: courseName,
|
|
isReversed: false
|
|
)
|
|
context.insert(deck)
|
|
deckCount += 1
|
|
|
|
for c in cards {
|
|
guard let front = c["front"] as? String,
|
|
let back = c["back"] as? String else { continue }
|
|
let card = VocabCard(front: front, back: back, deckId: deckId)
|
|
card.deck = deck
|
|
context.insert(card)
|
|
cardCount += 1
|
|
}
|
|
}
|
|
try? context.save()
|
|
print("Textbook vocab seeding complete: \(deckCount) decks, \(cardCount) cards")
|
|
}
|
|
}
|