7da98d786c
Add SRS-driven noun and adjective flashcards modeled on the existing verb flashcard flow: - SharedModels/Lexeme — catalog of non-verb vocab, frequency-ranked, with gender for nouns and optional example sentences. Seeded from a bundled vocab_lexemes.json built by Scripts/vocab/build_lexemes.py, which joins frequency.csv + es-en.data from a pinned doozan/spanish_data commit (CC-BY-SA: hermitdave/FrequencyWords + Wiktionary). 1,449 nouns and 600 adjectives, each with Wiktionary-sourced gender and (where available) an example sentence with English translation. - LexemeReviewCard + LexemeReviewStore — cloud-synced SM-2 SRS, keyed by partOfSpeech + lexemeId + drillMode so future drill modes can coexist. - LexemeSessionQueue + LexemePool — parallel to VocabSessionQueue; fresh cards sort by frequency rank. - LexemeStudyGroup — cloud-synced resumable session per (partOfSpeech, drillMode). - NounFlashcardPracticeView + AdjectiveFlashcardPracticeView — same flow as VocabFlashcardPracticeView: English prompt → tap to reveal Spanish → Again/Hard/Good/Easy. Nouns reveal with their article (la taza, el problema) so gender is taught alongside meaning, not as a separate quiz. Example sentence shown when present. CEFR-style level toggles: - LexemeLevel enum (A1/A2/B1/B2/C1+) derived from frequencyRank with standard Spanish-frequency-dictionary cutoffs (250/500/1000/2000). - UserProgress.selectedLexemeLevels — cloud-synced multi-select, defaults to A1+A2 on first launch. - SettingsView gains a "Vocabulary Levels" section with five toggles; the existing "Levels" section is renamed "Verb Levels" for clarity. - Due SRS cards always surface regardless of toggles. Disabling a level only stops new cards from that band entering the pool. PracticeView gets "Nouns" and "Adjectives" rows under "Books". DataLoader: new lexemeDataVersion gate that re-seeds the Lexeme table from vocab_lexemes.json independent of book seeding. project.yml lists the new JSON resource and the existing book_olly-vol2.json (which the previous build was silently excluding because xcodegen rewrote the project from project.yml). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
92 lines
3.8 KiB
Swift
92 lines
3.8 KiB
Swift
import Foundation
|
|
import SwiftData
|
|
|
|
/// A non-verb vocabulary item harvested from the books pipeline's per-book
|
|
/// glossary. Verbs keep their own richer `Verb` model — `Lexeme` covers
|
|
/// nouns, adjectives, etc. so the flashcard study modes can drill the grammar
|
|
/// that's specific to each part of speech.
|
|
///
|
|
/// Identity is `"<sourceBookSlug>:<partOfSpeech>:<baseForm>"`; the seeder
|
|
/// dedupes on `(partOfSpeech, baseForm)` across books and keeps the first-
|
|
/// seen source. Lives in the LOCAL reference-data store (same place as
|
|
/// `Book`/`BookChapter`), not the cloud container.
|
|
@Model
|
|
public final class Lexeme {
|
|
@Attribute(.unique) public var id: String = ""
|
|
public var partOfSpeech: String = ""
|
|
public var baseForm: String = ""
|
|
public var english: String = ""
|
|
/// For nouns: "m", "f", or "m/f". Nil for non-nouns or when unknown.
|
|
/// The curated catalog (`vocab_lexemes.json` from doozan/spanish_data)
|
|
/// emits Wiktionary-sourced gender; `Lexeme.inferGender` provides a
|
|
/// morphology fallback if a different seeder ever lands a noun without
|
|
/// one.
|
|
public var gender: String? = nil
|
|
/// Source tag — `"catalog"` for entries from `vocab_lexemes.json`, or a
|
|
/// book slug for legacy book-glossary-derived entries. Used to keep
|
|
/// catalog refreshes from wiping book-personal additions later.
|
|
public var sourceBookSlug: String = ""
|
|
/// 1-based rank in the source frequency list (lower = more common).
|
|
/// 0 means unknown/unranked. `LexemePool` sorts fresh cards by this so
|
|
/// the most-useful words surface first.
|
|
public var frequencyRank: Int = 0
|
|
/// Optional example sentence pair, shown below the answer in Recall
|
|
/// mode. Sourced from Wiktionary's `ex:`/`eng:` lines when available.
|
|
public var exampleES: String? = nil
|
|
public var exampleEN: String? = nil
|
|
|
|
public init(
|
|
id: String,
|
|
partOfSpeech: String,
|
|
baseForm: String,
|
|
english: String,
|
|
gender: String? = nil,
|
|
sourceBookSlug: String = "",
|
|
frequencyRank: Int = 0,
|
|
exampleES: String? = nil,
|
|
exampleEN: String? = nil
|
|
) {
|
|
self.id = id
|
|
self.partOfSpeech = partOfSpeech
|
|
self.baseForm = baseForm
|
|
self.english = english
|
|
self.gender = gender
|
|
self.sourceBookSlug = sourceBookSlug
|
|
self.frequencyRank = frequencyRank
|
|
self.exampleES = exampleES
|
|
self.exampleEN = exampleEN
|
|
}
|
|
|
|
public static func makeID(sourceBookSlug: String, partOfSpeech: String, baseForm: String) -> String {
|
|
"\(sourceBookSlug):\(partOfSpeech):\(baseForm)"
|
|
}
|
|
|
|
/// Best-effort gender from Spanish morphology. Used as a fallback when
|
|
/// the glossary pipeline hasn't emitted a `gender` field yet. Conservative:
|
|
/// returns nil for ambiguous endings rather than guessing wrong.
|
|
///
|
|
/// - `-ción/-sión/-dad/-tad/-tud/-umbre/-ez/-anza` → feminine
|
|
/// - `-aje/-or` → masculine
|
|
/// - `-ma/-pa/-ta` → nil (Greek-origin masculines mix with regular -a feminines)
|
|
/// - `-a` (other) → feminine
|
|
/// - `-o` → masculine
|
|
/// - everything else → nil
|
|
public static func inferGender(forBaseForm baseForm: String) -> String? {
|
|
let s = baseForm.lowercased()
|
|
if s.hasSuffix("ción") || s.hasSuffix("sión") || s.hasSuffix("dad") ||
|
|
s.hasSuffix("tad") || s.hasSuffix("tud") || s.hasSuffix("umbre") ||
|
|
s.hasSuffix("ez") || s.hasSuffix("anza") {
|
|
return "f"
|
|
}
|
|
if s.hasSuffix("aje") || s.hasSuffix("or") {
|
|
return "m"
|
|
}
|
|
if s.hasSuffix("ma") || s.hasSuffix("pa") || s.hasSuffix("ta") {
|
|
return nil
|
|
}
|
|
if s.hasSuffix("a") { return "f" }
|
|
if s.hasSuffix("o") { return "m" }
|
|
return nil
|
|
}
|
|
}
|