import Foundation import SwiftData /// A non-verb vocabulary item harvested from the books pipeline's per-book /// glossary. Verbs keep their own richer `Verb` model — `Lexeme` covers /// nouns, adjectives, etc. so the flashcard study modes can drill the grammar /// that's specific to each part of speech. /// /// Identity is `"::"`; the seeder /// dedupes on `(partOfSpeech, baseForm)` across books and keeps the first- /// seen source. Lives in the LOCAL reference-data store (same place as /// `Book`/`BookChapter`), not the cloud container. @Model public final class Lexeme { @Attribute(.unique) public var id: String = "" public var partOfSpeech: String = "" public var baseForm: String = "" public var english: String = "" /// For nouns: "m", "f", or "m/f". Nil for non-nouns or when unknown. /// The curated catalog (`vocab_lexemes.json` from doozan/spanish_data) /// emits Wiktionary-sourced gender; `Lexeme.inferGender` provides a /// morphology fallback if a different seeder ever lands a noun without /// one. public var gender: String? = nil /// Source tag — `"catalog"` for entries from `vocab_lexemes.json`, or a /// book slug for legacy book-glossary-derived entries. Used to keep /// catalog refreshes from wiping book-personal additions later. public var sourceBookSlug: String = "" /// 1-based rank in the source frequency list (lower = more common). /// 0 means unknown/unranked. `LexemePool` sorts fresh cards by this so /// the most-useful words surface first. public var frequencyRank: Int = 0 /// Optional example sentence pair, shown below the answer in Recall /// mode. Sourced from Wiktionary's `ex:`/`eng:` lines when available. public var exampleES: String? = nil public var exampleEN: String? = nil public init( id: String, partOfSpeech: String, baseForm: String, english: String, gender: String? = nil, sourceBookSlug: String = "", frequencyRank: Int = 0, exampleES: String? = nil, exampleEN: String? = nil ) { self.id = id self.partOfSpeech = partOfSpeech self.baseForm = baseForm self.english = english self.gender = gender self.sourceBookSlug = sourceBookSlug self.frequencyRank = frequencyRank self.exampleES = exampleES self.exampleEN = exampleEN } public static func makeID(sourceBookSlug: String, partOfSpeech: String, baseForm: String) -> String { "\(sourceBookSlug):\(partOfSpeech):\(baseForm)" } /// Best-effort gender from Spanish morphology. Used as a fallback when /// the glossary pipeline hasn't emitted a `gender` field yet. Conservative: /// returns nil for ambiguous endings rather than guessing wrong. /// /// - `-ción/-sión/-dad/-tad/-tud/-umbre/-ez/-anza` → feminine /// - `-aje/-or` → masculine /// - `-ma/-pa/-ta` → nil (Greek-origin masculines mix with regular -a feminines) /// - `-a` (other) → feminine /// - `-o` → masculine /// - everything else → nil public static func inferGender(forBaseForm baseForm: String) -> String? { let s = baseForm.lowercased() if s.hasSuffix("ción") || s.hasSuffix("sión") || s.hasSuffix("dad") || s.hasSuffix("tad") || s.hasSuffix("tud") || s.hasSuffix("umbre") || s.hasSuffix("ez") || s.hasSuffix("anza") { return "f" } if s.hasSuffix("aje") || s.hasSuffix("or") { return "m" } if s.hasSuffix("ma") || s.hasSuffix("pa") || s.hasSuffix("ta") { return nil } if s.hasSuffix("a") { return "f" } if s.hasSuffix("o") { return "m" } return nil } }