Files
Spanish/Conjuga/Conjuga/Services/DataLoader.swift
T
Trey T 7da98d786c Vocab study — noun & adjective flashcards with CEFR level toggles
Add SRS-driven noun and adjective flashcards modeled on the existing verb
flashcard flow:

- SharedModels/Lexeme — catalog of non-verb vocab, frequency-ranked, with
  gender for nouns and optional example sentences. Seeded from a bundled
  vocab_lexemes.json built by Scripts/vocab/build_lexemes.py, which joins
  frequency.csv + es-en.data from a pinned doozan/spanish_data commit
  (CC-BY-SA: hermitdave/FrequencyWords + Wiktionary). 1,449 nouns and 600
  adjectives, each with Wiktionary-sourced gender and (where available)
  an example sentence with English translation.
- LexemeReviewCard + LexemeReviewStore — cloud-synced SM-2 SRS, keyed by
  partOfSpeech + lexemeId + drillMode so future drill modes can coexist.
- LexemeSessionQueue + LexemePool — parallel to VocabSessionQueue; fresh
  cards sort by frequency rank.
- LexemeStudyGroup — cloud-synced resumable session per
  (partOfSpeech, drillMode).
- NounFlashcardPracticeView + AdjectiveFlashcardPracticeView — same flow
  as VocabFlashcardPracticeView: English prompt → tap to reveal Spanish
  → Again/Hard/Good/Easy. Nouns reveal with their article (la taza, el
  problema) so gender is taught alongside meaning, not as a separate
  quiz. Example sentence shown when present.

CEFR-style level toggles:
- LexemeLevel enum (A1/A2/B1/B2/C1+) derived from frequencyRank with
  standard Spanish-frequency-dictionary cutoffs (250/500/1000/2000).
- UserProgress.selectedLexemeLevels — cloud-synced multi-select, defaults
  to A1+A2 on first launch.
- SettingsView gains a "Vocabulary Levels" section with five toggles; the
  existing "Levels" section is renamed "Verb Levels" for clarity.
- Due SRS cards always surface regardless of toggles. Disabling a level
  only stops new cards from that band entering the pool.

PracticeView gets "Nouns" and "Adjectives" rows under "Books".

DataLoader: new lexemeDataVersion gate that re-seeds the Lexeme table
from vocab_lexemes.json independent of book seeding. project.yml lists
the new JSON resource and the existing book_olly-vol2.json (which the
previous build was silently excluding because xcodegen rewrote the
project from project.yml).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 20:16:55 -05:00

855 lines
36 KiB
Swift

import SwiftData
import SharedModels
import Foundation
actor DataLoader {
static let courseDataVersion = 9 // bump: all 19 tense guides + 36 grammar notes enriched to teacher-handout depth
static let courseDataKey = "courseDataVersion"
static let textbookDataVersion = 14
static let textbookDataKey = "textbookDataVersion"
static let bookDataVersion = 7 // Lexeme table + WordGloss.gender added
static let bookDataKey = "bookDataVersion"
static let lexemeDataVersion = 1 // initial seeded from vocab_lexemes.json
static let lexemeDataKey = "lexemeDataVersion"
/// Quick check: does the DB need seeding or course data refresh?
static func needsSeeding(container: ModelContainer) async -> Bool {
let context = ModelContext(container)
let verbCount = (try? context.fetchCount(FetchDescriptor<Verb>())) ?? 0
if verbCount == 0 { return true }
let storedVersion = UserDefaults.standard.integer(forKey: courseDataKey)
if storedVersion < courseDataVersion { return true }
let textbookVersion = UserDefaults.standard.integer(forKey: textbookDataKey)
if textbookVersion < textbookDataVersion { return true }
let bookVersion = UserDefaults.standard.integer(forKey: bookDataKey)
if bookVersion < bookDataVersion { return true }
return false
}
static func seedIfNeeded(container: ModelContainer) async {
let context = ModelContext(container)
let count: Int
do {
count = try context.fetchCount(FetchDescriptor<Verb>())
print("[DataLoader] seedIfNeeded: existing verb count = \(count)")
} catch {
print("[DataLoader] ⚠️ seedIfNeeded fetchCount threw: \(error)")
count = 0
}
if count > 0 { return }
print("Seeding database...")
// Try direct bundle lookup first, then subdirectory
let url = Bundle.main.url(forResource: "conjuga_data", withExtension: "json")
?? Bundle.main.url(forResource: "conjuga_data", withExtension: "json", subdirectory: "Resources")
?? Bundle.main.bundleURL.appendingPathComponent("Resources/conjuga_data.json")
guard let data = try? Data(contentsOf: url) else {
print("ERROR: Could not load conjuga_data.json from bundle at \(url)")
return
}
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
print("ERROR: Could not parse conjuga_data")
return
}
// Seed tense guides
if let guides = json["tenseGuides"] as? [[String: Any]] {
for g in guides {
guard let tenseId = g["tenseId"] as? String,
let title = g["title"] as? String,
let body = g["body"] as? String else { continue }
let guide = TenseGuide(tenseId: tenseId, title: title, body: body)
context.insert(guide)
}
}
// Seed verbs
var verbMap: [Int: Verb] = [:]
if let verbs = json["verbs"] as? [[String: Any]] {
for v in verbs {
guard let id = v["id"] as? Int,
let infinitive = v["infinitive"] as? String,
let english = v["english"] as? String,
let rank = v["rank"] as? Int,
let ending = v["ending"] as? String,
let reflexive = v["reflexive"] as? Int,
let level = v["level"] as? String else { continue }
let verb = Verb(id: id, infinitive: infinitive, english: english, rank: rank, ending: ending, reflexive: reflexive, level: level)
context.insert(verb)
verbMap[id] = verb
}
print("Inserted \(verbs.count) verbs")
}
try? context.save()
// Seed verb forms bulk insert, no relationship assignment (use verbId for queries)
let chunkSize = 20000
if let forms = json["verbForms"] as? [[String: Any]] {
for i in stride(from: 0, to: forms.count, by: chunkSize) {
autoreleasepool {
let end = min(i + chunkSize, forms.count)
for j in i..<end {
let f = forms[j]
guard let verbId = f["verbId"] as? Int,
let tenseId = f["tenseId"] as? String,
let personIndex = f["personIndex"] as? Int,
let form = f["form"] as? String,
let regularity = f["regularity"] as? String else { continue }
let vf = VerbForm(verbId: verbId, tenseId: tenseId, personIndex: personIndex, form: form, regularity: regularity)
context.insert(vf)
}
try? context.save()
}
}
print("Inserted \(forms.count) verb forms")
}
// Seed irregular spans bulk insert
if let spans = json["irregularSpans"] as? [[String: Any]] {
for i in stride(from: 0, to: spans.count, by: chunkSize) {
autoreleasepool {
let end = min(i + chunkSize, spans.count)
for j in i..<end {
let s = spans[j]
guard let verbId = s["verbId"] as? Int,
let tenseId = s["tenseId"] as? String,
let personIndex = s["personIndex"] as? Int,
let spanType = s["type"] as? Int,
let pattern = s["pattern"] as? Int,
let start = s["start"] as? Int,
let end = s["end"] as? Int else { continue }
let span = IrregularSpan(verbId: verbId, tenseId: tenseId, personIndex: personIndex, spanType: spanType, pattern: pattern, start: start, end: end)
context.insert(span)
}
try? context.save()
}
}
print("Inserted \(spans.count) irregular spans")
}
do {
try context.save()
} catch {
print("[DataLoader] 🔥 Final verb save error: \(error)")
}
print("Verb seeding complete")
// Seed course data (uses the same mainContext so @Query sees it)
seedCourseData(context: context)
// Seed textbook data only bump the version key if the seed
// actually inserted rows, so a missing/unparseable bundle doesn't
// permanently lock us out of future re-seeds.
if seedTextbookData(context: context) {
UserDefaults.standard.set(textbookDataVersion, forKey: textbookDataKey)
}
if seedBooks(context: context) {
UserDefaults.standard.set(bookDataVersion, forKey: bookDataKey)
}
}
/// Re-seed books if the version has changed or the rows are missing.
static func refreshBooksDataIfNeeded(container: ModelContainer) async {
let shared = UserDefaults.standard
let context = ModelContext(container)
let existingCount = (try? context.fetchCount(FetchDescriptor<Book>())) ?? 0
let storedVersion = shared.integer(forKey: bookDataKey)
let versionCurrent = storedVersion >= bookDataVersion
print("[DataLoader] refreshBooksDataIfNeeded: existing=\(existingCount) stored=\(storedVersion) target=\(bookDataVersion) versionCurrent=\(versionCurrent)")
if versionCurrent && existingCount > 0 { return }
if let existing = try? context.fetch(FetchDescriptor<Book>()) {
for book in existing { context.delete(book) }
}
if let existing = try? context.fetch(FetchDescriptor<BookChapter>()) {
for chapter in existing { context.delete(chapter) }
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: book wipe save failed: \(error)")
return
}
if seedBooks(context: context) {
shared.set(bookDataVersion, forKey: bookDataKey)
print("[DataLoader] Book data re-seeded to version \(bookDataVersion)")
} else {
print("[DataLoader] Book reseed produced no rows — leaving version key untouched")
}
}
/// Re-seed textbook data if the version has changed OR if the rows are
/// missing on disk. The row-count check exists because anything opening
/// this store with a subset schema (e.g. an out-of-date widget extension)
/// can destructively drop the rows without touching UserDefaults so a
/// pure version-flag trigger would leave us permanently empty.
static func refreshTextbookDataIfNeeded(container: ModelContainer) async {
let shared = UserDefaults.standard
let context = ModelContext(container)
let existingCount = (try? context.fetchCount(FetchDescriptor<TextbookChapter>())) ?? 0
let versionCurrent = shared.integer(forKey: textbookDataKey) >= textbookDataVersion
if versionCurrent && existingCount > 0 { return }
if versionCurrent {
print("Textbook data version current but store has \(existingCount) chapters — re-seeding...")
} else {
print("Textbook data version outdated — re-seeding...")
}
// Fetch + delete individually instead of batch delete. SwiftData's
// context.delete(model:) hits the store directly and doesn't always
// clear the unique-constraint index before the reseed's save runs,
// so re-inserting rows with the same .unique id can throw.
let textbookCourseName = "Complete Spanish Step-by-Step"
if let existing = try? context.fetch(FetchDescriptor<TextbookChapter>()) {
for chapter in existing { context.delete(chapter) }
}
let deckDescriptor = FetchDescriptor<CourseDeck>(
predicate: #Predicate<CourseDeck> { $0.courseName == textbookCourseName }
)
if let decks = try? context.fetch(deckDescriptor) {
for deck in decks { context.delete(deck) }
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: textbook wipe save failed: \(error)")
return
}
if seedTextbookData(context: context) {
shared.set(textbookDataVersion, forKey: textbookDataKey)
print("Textbook data re-seeded to version \(textbookDataVersion)")
} else {
print("Textbook re-seed failed — leaving version key untouched so next launch retries")
}
}
/// Re-seed course data if the version has changed (e.g. examples were added).
/// Call this on every launch it checks a version key and only re-seeds when needed.
static func refreshCourseDataIfNeeded(container: ModelContainer) async {
let shared = UserDefaults.standard
if shared.integer(forKey: courseDataKey) >= courseDataVersion { return }
print("Course data version outdated — re-seeding...")
let context = ModelContext(container)
// Delete existing course data + tense guides so they can be re-seeded
// with updated bodies from the bundled conjuga_data.json.
try? context.delete(model: VocabCard.self)
try? context.delete(model: CourseDeck.self)
try? context.delete(model: TenseGuide.self)
try? context.save()
// Re-seed tense guides from the bundled JSON
if let url = Bundle.main.url(forResource: "conjuga_data", withExtension: "json"),
let data = try? Data(contentsOf: url),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let guides = json["tenseGuides"] as? [[String: Any]] {
for g in guides {
guard let tenseId = g["tenseId"] as? String,
let title = g["title"] as? String,
let body = g["body"] as? String else { continue }
context.insert(TenseGuide(tenseId: tenseId, title: title, body: body))
}
try? context.save()
print("Re-seeded \(guides.count) tense guides")
}
// Re-seed course data
seedCourseData(context: context)
// Textbook's vocab decks/cards share the same CourseDeck/VocabCard
// entities, so they were just wiped above. Reseed them.
seedTextbookVocabDecks(context: context, courseName: "Complete Spanish Step-by-Step")
shared.set(courseDataVersion, forKey: courseDataKey)
print("Course data re-seeded to version \(courseDataVersion)")
}
static func migrateCourseProgressIfNeeded(
localContainer: ModelContainer,
cloudContainer: ModelContainer
) async {
let migrationVersion = 2
let key = "courseProgressMigrationVersion"
let shared = UserDefaults.standard
if shared.integer(forKey: key) >= migrationVersion { return }
let localContext = ModelContext(localContainer)
let cloudContext = ModelContext(cloudContainer)
let descriptor = FetchDescriptor<VocabCard>()
let allCards = (try? localContext.fetch(descriptor)) ?? []
var migratedCount = 0
for card in allCards where hasLegacyCourseProgress(card) {
let reviewKey = CourseCardStore.reviewKey(for: card)
let reviewCard = findOrCreateCourseReviewCard(
id: reviewKey,
deckId: card.deckId,
front: card.front,
back: card.back,
context: cloudContext
)
if let reviewDate = reviewCard.lastReviewDate,
let legacyDate = card.lastReviewDate,
reviewDate >= legacyDate {
continue
}
reviewCard.easeFactor = card.easeFactor
reviewCard.interval = card.interval
reviewCard.repetitions = card.repetitions
reviewCard.dueDate = card.dueDate
reviewCard.lastReviewDate = card.lastReviewDate
migratedCount += 1
}
if migratedCount > 0 {
try? cloudContext.save()
print("Migrated \(migratedCount) course progress cards to cloud store")
}
shared.set(migrationVersion, forKey: key)
}
private static func seedCourseData(context: ModelContext) {
let url = Bundle.main.url(forResource: "course_data", withExtension: "json")
?? Bundle.main.bundleURL.appendingPathComponent("course_data.json")
guard let data = try? Data(contentsOf: url) else {
print("No course_data.json found — skipping course seeding")
return
}
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
print("ERROR: Could not parse course_data.json")
return
}
// Support both formats: {"courses": [...]} (new) and {"course": "...", "weeks": [...]} (old)
var courseList: [[String: Any]] = []
if let courses = json["courses"] as? [[String: Any]] {
courseList = courses
} else if json["weeks"] != nil {
courseList = [json]
}
var deckCount = 0
var cardCount = 0
for courseData in courseList {
guard let weeks = courseData["weeks"] as? [[String: Any]],
let courseName = courseData["course"] as? String else { continue }
let courseSlug = courseName.lowercased()
.replacingOccurrences(of: " ", with: "-")
.replacingOccurrences(of: "|", with: "")
for weekData in weeks {
guard let weekNum = weekData["week"] as? Int,
let decks = weekData["decks"] as? [[String: Any]] else { continue }
for (deckIndex, deckData) in decks.enumerated() {
guard let title = deckData["title"] as? String,
let cards = deckData["cards"] as? [Any] else { continue }
let isReversed = (deckData["isReversed"] as? Bool) ?? false
let deckId = "\(courseSlug)_w\(weekNum)_\(deckIndex)_\(isReversed ? "rev" : "fwd")"
let deck = CourseDeck(
id: deckId,
weekNumber: weekNum,
title: title,
cardCount: cards.count,
courseName: courseName,
isReversed: isReversed
)
context.insert(deck)
deckCount += 1
for rawCard in cards {
guard let cardDict = rawCard as? [String: Any],
let front = cardDict["front"] as? String,
let back = cardDict["back"] as? String else { continue }
// Parse example sentences
var exES: [String] = []
var exEN: [String] = []
var exBlanks: [String] = []
if let examples = cardDict["examples"] as? [[String: String]] {
for ex in examples {
if let es = ex["es"] {
exES.append(es)
exEN.append(ex["en"] ?? "")
exBlanks.append(ex["blank"] ?? "")
}
}
}
let card = VocabCard(front: front, back: back, deckId: deckId, examplesES: exES, examplesEN: exEN, examplesBlanks: exBlanks)
card.deck = deck
context.insert(card)
cardCount += 1
}
}
try? context.save()
}
}
print("Course seeding complete: \(deckCount) decks, \(cardCount) cards")
}
private static func hasLegacyCourseProgress(_ card: VocabCard) -> Bool {
card.repetitions > 0 ||
card.interval > 0 ||
abs(card.easeFactor - 2.5) > 0.0001 ||
card.lastReviewDate != nil
}
private static func findOrCreateCourseReviewCard(
id: String,
deckId: String,
front: String,
back: String,
context: ModelContext
) -> CourseReviewCard {
let descriptor = FetchDescriptor<CourseReviewCard>(
predicate: #Predicate<CourseReviewCard> { $0.id == id }
)
if let existing = (try? context.fetch(descriptor))?.first {
return existing
}
let reviewCard = CourseReviewCard(id: id, deckId: deckId, front: front, back: back)
context.insert(reviewCard)
return reviewCard
}
// MARK: - Textbook seeding
@discardableResult
private static func seedTextbookData(context: ModelContext) -> Bool {
let url = Bundle.main.url(forResource: "textbook_data", withExtension: "json")
?? Bundle.main.bundleURL.appendingPathComponent("textbook_data.json")
guard let data = try? Data(contentsOf: url) else {
print("[DataLoader] textbook_data.json not bundled — skipping textbook seed")
return false
}
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
print("[DataLoader] ERROR: Could not parse textbook_data.json")
return false
}
let courseName = (json["courseName"] as? String) ?? "Textbook"
guard let chapters = json["chapters"] as? [[String: Any]] else {
print("[DataLoader] ERROR: textbook_data.json missing chapters")
return false
}
var inserted = 0
for ch in chapters {
guard let id = ch["id"] as? String,
let number = ch["number"] as? Int,
let title = ch["title"] as? String,
let blocksRaw = ch["blocks"] as? [[String: Any]] else { continue }
let part = (ch["part"] as? Int) ?? 0
// Normalize each block to canonical keys expected by TextbookBlock decoder.
var normalized: [[String: Any]] = []
var exerciseCount = 0
var vocabTableCount = 0
for (i, b) in blocksRaw.enumerated() {
var out: [String: Any] = [:]
out["index"] = i
let kind = (b["kind"] as? String) ?? ""
out["kind"] = kind
switch kind {
case "heading":
if let level = b["level"] { out["level"] = level }
if let text = b["text"] { out["text"] = text }
case "paragraph":
if let text = b["text"] { out["text"] = text }
case "key_vocab_header":
break
case "vocab_table":
vocabTableCount += 1
if let src = b["sourceImage"] { out["sourceImage"] = src }
if let lines = b["ocrLines"] { out["ocrLines"] = lines }
if let conf = b["ocrConfidence"] { out["ocrConfidence"] = conf }
// Paired SpanishEnglish cards from the bounding-box extractor.
if let cards = b["cards"] as? [[String: Any]], !cards.isEmpty {
let normalized: [[String: Any]] = cards.compactMap { c in
guard let front = c["front"] as? String,
let back = c["back"] as? String else { return nil }
return ["front": front, "back": back]
}
if !normalized.isEmpty {
out["cards"] = normalized
}
}
case "exercise":
exerciseCount += 1
if let exId = b["id"] { out["exerciseId"] = exId }
if let inst = b["instruction"] { out["instruction"] = inst }
if let extra = b["extra"] { out["extra"] = extra }
if let prompts = b["prompts"] { out["prompts"] = prompts }
if let items = b["answerItems"] { out["answerItems"] = items }
if let freeform = b["freeform"] { out["freeform"] = freeform }
default:
break
}
normalized.append(out)
}
let bodyJSON: Data
do {
bodyJSON = try JSONSerialization.data(withJSONObject: normalized, options: [])
} catch {
print("[DataLoader] failed to encode chapter \(number) blocks: \(error)")
continue
}
let chapter = TextbookChapter(
id: id,
number: number,
title: title,
part: part,
courseName: courseName,
bodyJSON: bodyJSON,
exerciseCount: exerciseCount,
vocabTableCount: vocabTableCount
)
context.insert(chapter)
inserted += 1
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: textbook chapter save failed: \(error)")
return false
}
// Verify rows actually hit the store guards against the case where
// save returned cleanly but no rows were persisted.
let persisted = (try? context.fetchCount(FetchDescriptor<TextbookChapter>())) ?? 0
guard persisted > 0 else {
print("[DataLoader] ERROR: textbook seeded \(inserted) chapters but persisted count is 0")
return false
}
// Seed textbook-derived vocabulary flashcards as CourseDecks so the
// existing Course UI can surface them alongside LanGo decks.
seedTextbookVocabDecks(context: context, courseName: courseName)
print("Textbook seeding complete: \(inserted) chapters inserted, \(persisted) persisted")
return true
}
// MARK: - Books seeding
/// Walk the bundle for any `book_*.json` resources and seed `Book` +
/// `BookChapter` rows from each one. Returns true when at least one row
/// was inserted (mirrors `seedTextbookData`'s contract).
@discardableResult
private static func seedBooks(context: ModelContext) -> Bool {
let bookURLs = bundledBookJSONURLs()
guard !bookURLs.isEmpty else {
print("[DataLoader] no book_*.json bundled — skipping book seed")
return false
}
var insertedBooks = 0
for url in bookURLs {
guard let data = try? Data(contentsOf: url),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
print("[DataLoader] WARN: could not read \(url.lastPathComponent)")
continue
}
guard let slug = json["slug"] as? String,
let title = json["title"] as? String,
let chaptersRaw = json["chapters"] as? [[String: Any]] else {
print("[DataLoader] WARN: \(url.lastPathComponent) missing required fields")
continue
}
let author = (json["author"] as? String) ?? ""
let language = (json["language"] as? String) ?? "es"
// Pre-computed per-book glossary, keyed by cleaned word.
var glossary: [String: WordGloss] = [:]
if let glossaryRaw = json["glossary"] as? [String: [String: String]] {
for (word, fields) in glossaryRaw {
glossary[word] = WordGloss(
baseForm: fields["baseForm"] ?? word,
english: fields["english"] ?? "",
partOfSpeech: fields["partOfSpeech"] ?? "",
gender: fields["gender"]
)
}
}
let glossaryData = (try? JSONEncoder().encode(glossary)) ?? Data()
let book = Book(
slug: slug,
title: title,
author: author,
language: language,
chapterCount: chaptersRaw.count,
accentColorHex: accentHex(forSlug: slug),
glossaryJSON: glossaryData
)
context.insert(book)
insertedBooks += 1
for ch in chaptersRaw {
guard let number = ch["number"] as? Int,
let chTitle = ch["title"] as? String else { continue }
let paragraphsES = (ch["paragraphsES"] as? [String]) ?? []
let paragraphsEN = (ch["paragraphsEN"] as? [String]) ?? []
let esData = (try? JSONEncoder().encode(paragraphsES)) ?? Data()
let enData = (try? JSONEncoder().encode(paragraphsEN)) ?? Data()
let chapter = BookChapter(
id: "\(slug)-ch\(number)",
bookSlug: slug,
number: number,
title: chTitle,
paragraphCount: paragraphsES.count,
paragraphsESJSON: esData,
paragraphsENJSON: enData
)
context.insert(chapter)
}
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: book save failed: \(error)")
return false
}
let persistedBooks = (try? context.fetchCount(FetchDescriptor<Book>())) ?? 0
let persistedChapters = (try? context.fetchCount(FetchDescriptor<BookChapter>())) ?? 0
guard persistedBooks > 0 else {
print("[DataLoader] ERROR: seeded \(insertedBooks) books but persisted count is 0")
return false
}
print("Book seeding complete: \(persistedBooks) books, \(persistedChapters) chapters")
return true
}
// MARK: - Lexeme catalog (Phase 3 of vocab study)
/// Re-seed the `Lexeme` catalog if the version has changed or the rows
/// are missing. The catalog is sourced from the bundled
/// `vocab_lexemes.json` (built by `Scripts/vocab/build_lexemes.py` from
/// doozan/spanish_data) independent from book seeding so a catalog
/// refresh doesn't require touching books.
static func refreshLexemesIfNeeded(container: ModelContainer) async {
let shared = UserDefaults.standard
let context = ModelContext(container)
let existingCount = (try? context.fetchCount(FetchDescriptor<Lexeme>())) ?? 0
let storedVersion = shared.integer(forKey: lexemeDataKey)
let versionCurrent = storedVersion >= lexemeDataVersion
print("[DataLoader] refreshLexemesIfNeeded: existing=\(existingCount) stored=\(storedVersion) target=\(lexemeDataVersion) versionCurrent=\(versionCurrent)")
if versionCurrent && existingCount > 0 { return }
if let existing = try? context.fetch(FetchDescriptor<Lexeme>()) {
for lexeme in existing { context.delete(lexeme) }
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: lexeme wipe save failed: \(error)")
return
}
if seedLexemesFromCatalog(context: context) {
shared.set(lexemeDataVersion, forKey: lexemeDataKey)
print("[DataLoader] Lexeme data re-seeded to version \(lexemeDataVersion)")
} else {
print("[DataLoader] Lexeme reseed produced no rows — leaving version key untouched")
}
}
/// Read `vocab_lexemes.json` from the app bundle and insert one `Lexeme`
/// per entry. Returns true when at least one row persisted.
private static func seedLexemesFromCatalog(context: ModelContext) -> Bool {
guard let url = Bundle.main.url(forResource: "vocab_lexemes", withExtension: "json") else {
print("[DataLoader] no vocab_lexemes.json bundled — skipping lexeme seed")
return false
}
guard let data = try? Data(contentsOf: url),
let array = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]] else {
print("[DataLoader] ERROR: vocab_lexemes.json malformed")
return false
}
var inserted = 0
// Defensive: the build script already dedupes, but skip any stray
// dupes so we never throw on the unique-constraint save.
var seen: Set<String> = []
for entry in array {
guard let baseForm = entry["baseForm"] as? String, !baseForm.isEmpty,
let english = entry["english"] as? String, !english.isEmpty,
let pos = entry["partOfSpeech"] as? String, !pos.isEmpty else {
continue
}
let dedupKey = "\(pos):\(baseForm)"
if seen.contains(dedupKey) { continue }
seen.insert(dedupKey)
let lexeme = Lexeme(
id: Lexeme.makeID(sourceBookSlug: "catalog", partOfSpeech: pos, baseForm: baseForm),
partOfSpeech: pos,
baseForm: baseForm,
english: english,
gender: entry["gender"] as? String,
sourceBookSlug: "catalog",
frequencyRank: (entry["frequencyRank"] as? Int) ?? 0,
exampleES: entry["exampleES"] as? String,
exampleEN: entry["exampleEN"] as? String
)
context.insert(lexeme)
inserted += 1
}
do {
try context.save()
} catch {
print("[DataLoader] ERROR: lexeme save failed: \(error)")
return false
}
let persisted = (try? context.fetchCount(FetchDescriptor<Lexeme>())) ?? 0
guard persisted > 0 else {
print("[DataLoader] ERROR: seeded \(inserted) lexemes but persisted count is 0")
return false
}
print("Lexeme seeding complete: \(persisted) lexemes from catalog")
return true
}
/// Slugs of books bundled with the app. Kept explicit so device installs
/// don't depend on `Bundle.urls(forResourcesWithExtension:subdirectory:)`
/// successfully enumerating the bundle that API has been observed to
/// return empty for some iOS configurations even when the resource is
/// present, matching the same `bundleURL.appendingPathComponent` fallback
/// used by the textbook seed.
private static let bundledBookSlugs: [String] = [
"olly-vol2",
]
/// Resolve URLs for every bundled book. Uses the explicit-slug fast path
/// first (mirrors `seedTextbookData`'s lookup pattern), then falls back to
/// directory enumeration so newly-bundled books are picked up without a
/// code change.
private static func bundledBookJSONURLs() -> [URL] {
var seen = Set<String>()
var out: [URL] = []
let bundle = Bundle.main
for slug in bundledBookSlugs {
let filename = "book_\(slug).json"
let url = bundle.url(forResource: "book_\(slug)", withExtension: "json")
?? bundle.bundleURL.appendingPathComponent(filename)
if FileManager.default.fileExists(atPath: url.path),
seen.insert(filename).inserted {
out.append(url)
}
}
if let urls = bundle.urls(forResourcesWithExtension: "json", subdirectory: nil) {
for url in urls where url.lastPathComponent.hasPrefix("book_") {
if seen.insert(url.lastPathComponent).inserted {
out.append(url)
}
}
}
let names = out.map(\.lastPathComponent).joined(separator: ", ")
print("[DataLoader] bundledBookJSONURLs found \(out.count) files: [\(names)]")
return out.sorted { $0.lastPathComponent < $1.lastPathComponent }
}
/// Deterministic accent colour for a book, derived from its slug so the
/// cover tile has a stable colour across launches.
private static func accentHex(forSlug slug: String) -> String {
let palette = [
"#7B6CF6", "#E07A5F", "#3D5A80", "#81B29A",
"#F2CC8F", "#D4A5A5", "#5B8A72", "#A06CD5",
]
let hash = slug.unicodeScalars.reduce(0) { ($0 &* 31) &+ Int($1.value) }
return palette[abs(hash) % palette.count]
}
private static func seedTextbookVocabDecks(context: ModelContext, courseName: String) {
let url = Bundle.main.url(forResource: "textbook_vocab", withExtension: "json")
?? Bundle.main.bundleURL.appendingPathComponent("textbook_vocab.json")
guard let data = try? Data(contentsOf: url),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let chaptersArr = json["chapters"] as? [[String: Any]]
else { return }
let courseSlug = courseName.lowercased()
.replacingOccurrences(of: " ", with: "-")
var deckCount = 0
var cardCount = 0
for chData in chaptersArr {
guard let chNum = chData["chapter"] as? Int,
let cards = chData["cards"] as? [[String: Any]],
!cards.isEmpty else { continue }
let deckId = "textbook_\(courseSlug)_ch\(chNum)"
let title = "Chapter \(chNum) vocabulary"
let deck = CourseDeck(
id: deckId,
weekNumber: chNum,
title: title,
cardCount: cards.count,
courseName: courseName,
isReversed: false
)
context.insert(deck)
deckCount += 1
for c in cards {
guard let front = c["front"] as? String,
let back = c["back"] as? String else { continue }
let card = VocabCard(front: front, back: back, deckId: deckId)
card.deck = deck
context.insert(card)
cardCount += 1
}
}
try? context.save()
print("Textbook vocab seeding complete: \(deckCount) decks, \(cardCount) cards")
}
}