Fixes #33 — verb examples must actually use the verb
The example sentences in VerbDetailView (and the new vocab practice modes) frequently used the wrong verb — a "tener" set would show "Él estaba leyendo un libro" (estar), "Nosotros vamos a viajar" (ir), "Tú debes estudiar" (deber). The model drifted off the target verb partway through generating the 6-example batch, and nothing checked the output. Two defenses: Prompt grounding — VerbExampleGenerator.generate now takes a formsByTense map (tenseId → conjugated forms, from the new ReferenceStore.conjugatedForms). Each tense line in the prompt lists the verb's exact conjugated forms and instructs the model to use one of them. The model echoes a real form instead of recalling (and mis-recalling) the conjugation. Output validation — every generated sentence is checked against the conjugation table via accent/case-folded whole-word matching. Any sentence that doesn't contain a real conjugated form of the verb is rejected. Failures trigger one regeneration pass; anything still wrong is dropped rather than displayed. Better to show 4 correct examples than 6 with 2 wrong. Cache invalidation — VerbExampleCache now persists a versioned wrapper (version 2). Pre-fix cached example sets — which may contain wrong-verb sentences — fail the version check and are discarded, so they regenerate cleanly under the new path. Callers updated: VerbDetailView, VocabFlashcardPracticeView, VocabMultipleChoicePracticeView all build formsByTense from ReferenceStore and pass it through. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -94,6 +94,19 @@ struct ReferenceStore {
|
|||||||
return (try? context.fetch(descriptor)) ?? []
|
return (try? context.fetch(descriptor)) ?? []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Map of tenseId → conjugated forms for a verb, used to ground and
|
||||||
|
/// validate LLM-generated example sentences.
|
||||||
|
func conjugatedForms(verbId: Int, tenseIds: [String]) -> [String: [String]] {
|
||||||
|
var map: [String: [String]] = [:]
|
||||||
|
for tenseId in tenseIds {
|
||||||
|
let forms = fetchForms(verbId: verbId, tenseId: tenseId)
|
||||||
|
.map(\.form)
|
||||||
|
.filter { !$0.trimmingCharacters(in: .whitespaces).isEmpty }
|
||||||
|
if !forms.isEmpty { map[tenseId] = forms }
|
||||||
|
}
|
||||||
|
return map
|
||||||
|
}
|
||||||
|
|
||||||
func fetchForm(verbId: Int, tenseId: String, personIndex: Int) -> VerbForm? {
|
func fetchForm(verbId: Int, tenseId: String, personIndex: Int) -> VerbForm? {
|
||||||
let descriptor = FetchDescriptor<VerbForm>(
|
let descriptor = FetchDescriptor<VerbForm>(
|
||||||
predicate: #Predicate<VerbForm> { form in
|
predicate: #Predicate<VerbForm> { form in
|
||||||
|
|||||||
@@ -10,6 +10,16 @@ import SharedModels
|
|||||||
@Observable
|
@Observable
|
||||||
final class VerbExampleCache {
|
final class VerbExampleCache {
|
||||||
|
|
||||||
|
/// Bump to invalidate every cached example set. Raised to 2 for Issue #33
|
||||||
|
/// — examples generated before the verb-grounding/validation fix could
|
||||||
|
/// contain sentences built on the wrong verb.
|
||||||
|
private static let cacheVersion = 2
|
||||||
|
|
||||||
|
private struct CacheFile: Codable {
|
||||||
|
var version: Int
|
||||||
|
var entries: [String: [VerbExample]]
|
||||||
|
}
|
||||||
|
|
||||||
private var store: [Int: [VerbExample]] = [:]
|
private var store: [Int: [VerbExample]] = [:]
|
||||||
private var isLoaded = false
|
private var isLoaded = false
|
||||||
|
|
||||||
@@ -43,13 +53,18 @@ final class VerbExampleCache {
|
|||||||
defer { isLoaded = true }
|
defer { isLoaded = true }
|
||||||
|
|
||||||
guard let data = try? Data(contentsOf: Self.cacheURL),
|
guard let data = try? Data(contentsOf: Self.cacheURL),
|
||||||
let decoded = try? JSONDecoder().decode([String: [VerbExample]].self, from: data)
|
let decoded = try? JSONDecoder().decode(CacheFile.self, from: data),
|
||||||
else { return }
|
decoded.version == Self.cacheVersion
|
||||||
|
else {
|
||||||
|
// Missing, unreadable, old flat format, or stale version — start
|
||||||
|
// fresh so pre-fix examples don't linger.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Persisted with String keys because JSON object keys are strings;
|
// Persisted with String keys because JSON object keys are strings;
|
||||||
// convert back to Int for in-memory lookup.
|
// convert back to Int for in-memory lookup.
|
||||||
var rebuilt: [Int: [VerbExample]] = [:]
|
var rebuilt: [Int: [VerbExample]] = [:]
|
||||||
for (key, value) in decoded {
|
for (key, value) in decoded.entries {
|
||||||
if let id = Int(key) {
|
if let id = Int(key) {
|
||||||
rebuilt[id] = value
|
rebuilt[id] = value
|
||||||
}
|
}
|
||||||
@@ -58,8 +73,9 @@ final class VerbExampleCache {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private func save() {
|
private func save() {
|
||||||
let serialized = Dictionary(uniqueKeysWithValues: store.map { (String($0.key), $0.value) })
|
let entries = Dictionary(uniqueKeysWithValues: store.map { (String($0.key), $0.value) })
|
||||||
guard let data = try? JSONEncoder().encode(serialized) else { return }
|
let file = CacheFile(version: Self.cacheVersion, entries: entries)
|
||||||
|
guard let data = try? JSONEncoder().encode(file) else { return }
|
||||||
try? data.write(to: Self.cacheURL)
|
try? data.write(to: Self.cacheURL)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,15 @@ import SharedModels
|
|||||||
/// Generates a set of example sentences for a single verb, one per core tense
|
/// Generates a set of example sentences for a single verb, one per core tense
|
||||||
/// (Issue #27). Mirrors the StoryGenerator pattern: @Generable response types,
|
/// (Issue #27). Mirrors the StoryGenerator pattern: @Generable response types,
|
||||||
/// a static availability flag, and a single generate(...) entry point.
|
/// a static availability flag, and a single generate(...) entry point.
|
||||||
|
///
|
||||||
|
/// Issue #33: the model used to drift onto other verbs partway through the
|
||||||
|
/// 6-example batch (a "tener" set would contain sentences built on estar / ir
|
||||||
|
/// / deber). Two defenses now apply:
|
||||||
|
/// 1. The prompt embeds the verb's *exact* conjugated forms per tense, so
|
||||||
|
/// the model echoes a real form instead of recalling one.
|
||||||
|
/// 2. Every generated sentence is validated against those forms; failures
|
||||||
|
/// are regenerated once, and anything still wrong is dropped rather than
|
||||||
|
/// shown.
|
||||||
@MainActor
|
@MainActor
|
||||||
struct VerbExampleGenerator {
|
struct VerbExampleGenerator {
|
||||||
|
|
||||||
@@ -33,22 +42,82 @@ struct VerbExampleGenerator {
|
|||||||
|
|
||||||
// MARK: - Generation
|
// MARK: - Generation
|
||||||
|
|
||||||
/// Generate one example per tense in `tenseIds`. Returns the examples in the
|
/// Generate one validated example per tense in `tenseIds`.
|
||||||
/// same order as `tenseIds`, filling in placeholders for any the model skipped.
|
///
|
||||||
|
/// - Parameter formsByTense: the verb's conjugated forms keyed by tenseId
|
||||||
|
/// (from `ReferenceStore.conjugatedForms`). Used to ground the prompt
|
||||||
|
/// and validate the output. A tense with no forms here is accepted
|
||||||
|
/// without validation.
|
||||||
static func generate(
|
static func generate(
|
||||||
verbInfinitive: String,
|
verbInfinitive: String,
|
||||||
verbEnglish: String,
|
verbEnglish: String,
|
||||||
tenseIds: [String]
|
tenseIds: [String],
|
||||||
|
formsByTense: [String: [String]]
|
||||||
) async throws -> [VerbExample] {
|
) async throws -> [VerbExample] {
|
||||||
let tenseList = tenseIds
|
let firstPass = try await generateBatch(
|
||||||
.compactMap { id in TenseInfo.find(id).map { "\(id) (\($0.english))" } }
|
verbInfinitive: verbInfinitive,
|
||||||
.joined(separator: ", ")
|
verbEnglish: verbEnglish,
|
||||||
|
tenseIds: tenseIds,
|
||||||
|
formsByTense: formsByTense
|
||||||
|
)
|
||||||
|
|
||||||
|
var valid: [String: VerbExample] = [:]
|
||||||
|
var failedTenses: [String] = []
|
||||||
|
for id in tenseIds {
|
||||||
|
if let ex = firstPass[id], exampleUsesVerb(ex.spanish, forms: formsByTense[id] ?? []) {
|
||||||
|
valid[id] = ex
|
||||||
|
} else {
|
||||||
|
failedTenses.append(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// One focused retry — regenerate the whole batch, but only adopt the
|
||||||
|
// results for tenses that failed the first pass.
|
||||||
|
if !failedTenses.isEmpty {
|
||||||
|
let retry = try? await generateBatch(
|
||||||
|
verbInfinitive: verbInfinitive,
|
||||||
|
verbEnglish: verbEnglish,
|
||||||
|
tenseIds: tenseIds,
|
||||||
|
formsByTense: formsByTense
|
||||||
|
)
|
||||||
|
if let retry {
|
||||||
|
for id in failedTenses {
|
||||||
|
if let ex = retry[id], exampleUsesVerb(ex.spanish, forms: formsByTense[id] ?? []) {
|
||||||
|
valid[id] = ex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Requested order, dropping any tense that never produced a valid
|
||||||
|
// sentence — better to show fewer examples than wrong ones.
|
||||||
|
return tenseIds.compactMap { valid[$0] }
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Single batch call
|
||||||
|
|
||||||
|
private static func generateBatch(
|
||||||
|
verbInfinitive: String,
|
||||||
|
verbEnglish: String,
|
||||||
|
tenseIds: [String],
|
||||||
|
formsByTense: [String: [String]]
|
||||||
|
) async throws -> [String: VerbExample] {
|
||||||
|
let tenseBlock = tenseIds.compactMap { id -> String? in
|
||||||
|
guard let info = TenseInfo.find(id) else { return nil }
|
||||||
|
let forms = formsByTense[id] ?? []
|
||||||
|
if forms.isEmpty {
|
||||||
|
return "- \(id) (\(info.english))"
|
||||||
|
}
|
||||||
|
return "- \(id) (\(info.english)): use one of these exact conjugated forms — \(forms.joined(separator: ", "))"
|
||||||
|
}.joined(separator: "\n")
|
||||||
|
|
||||||
let session = LanguageModelSession(instructions: """
|
let session = LanguageModelSession(instructions: """
|
||||||
You are a Spanish language teacher writing short example sentences for a learner.
|
You are a Spanish language teacher writing short example sentences for a learner.
|
||||||
The learner is studying the verb "\(verbInfinitive)" (to \(verbEnglish)).
|
The learner is studying the verb "\(verbInfinitive)" (to \(verbEnglish)).
|
||||||
Write one sentence per requested tense. Each sentence must:
|
EVERY sentence must use "\(verbInfinitive)" as its main verb, conjugated in the
|
||||||
- Actually conjugate "\(verbInfinitive)" in that tense (not just mention it).
|
requested tense — never substitute a different verb (no estar, ir, deber, etc.
|
||||||
|
unless the target verb itself is that verb). Each sentence must:
|
||||||
|
- Contain one of the exact conjugated forms listed for its tense.
|
||||||
- Be 6-14 words, natural and everyday.
|
- Be 6-14 words, natural and everyday.
|
||||||
- Use vocabulary appropriate for intermediate learners.
|
- Use vocabulary appropriate for intermediate learners.
|
||||||
- Vary subjects and contexts across the set; do not reuse the same subject twice.
|
- Vary subjects and contexts across the set; do not reuse the same subject twice.
|
||||||
@@ -56,27 +125,42 @@ struct VerbExampleGenerator {
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
let prompt = """
|
let prompt = """
|
||||||
Write example sentences for "\(verbInfinitive)" in these tenses, in this order:
|
Write one example sentence for "\(verbInfinitive)" per tense below, in this order:
|
||||||
\(tenseList)
|
\(tenseBlock)
|
||||||
|
|
||||||
Return one GeneratedExample per tense with the matching tenseId, spanish, and english.
|
Return one GeneratedExample per tense with the matching tenseId, spanish, and english.
|
||||||
|
The Spanish sentence MUST contain one of the conjugated forms shown for that tense.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
let response = try await session.respond(to: prompt, generating: GeneratedExampleSet.self)
|
let response = try await session.respond(to: prompt, generating: GeneratedExampleSet.self)
|
||||||
|
|
||||||
// Map by tenseId and return in the caller's requested order so the UI
|
// `uniquingKeysWith` defensively — the schema forces 6 examples even
|
||||||
// renders a predictable sequence even if the model shuffles its output.
|
// when fewer tenses are requested, so the model may repeat a tenseId.
|
||||||
// Use `uniquingKeysWith` defensively — the @Generable schema requires
|
return Dictionary(
|
||||||
// exactly 6 examples, but if the model duplicates a tenseId (it does
|
|
||||||
// happen when the caller passes fewer than 6 distinct tenses), the
|
|
||||||
// strict `uniqueKeysWithValues:` initializer would trap.
|
|
||||||
let byTense = Dictionary(
|
|
||||||
response.content.examples.map {
|
response.content.examples.map {
|
||||||
($0.tenseId, VerbExample(tenseId: $0.tenseId, spanish: $0.spanish, english: $0.english))
|
($0.tenseId, VerbExample(tenseId: $0.tenseId, spanish: $0.spanish, english: $0.english))
|
||||||
},
|
},
|
||||||
uniquingKeysWith: { first, _ in first }
|
uniquingKeysWith: { first, _ in first }
|
||||||
)
|
)
|
||||||
return tenseIds.compactMap { byTense[$0] }
|
}
|
||||||
|
|
||||||
|
// MARK: - Validation
|
||||||
|
|
||||||
|
/// True when `sentence` contains at least one of `forms` as a whole word
|
||||||
|
/// (accent- and case-insensitive). Empty `forms` → accept (can't validate).
|
||||||
|
static func exampleUsesVerb(_ sentence: String, forms: [String]) -> Bool {
|
||||||
|
guard !forms.isEmpty else { return true }
|
||||||
|
let sentenceWords = foldedWords(sentence)
|
||||||
|
let formWords = Set(forms.flatMap { foldedWords($0) })
|
||||||
|
return !sentenceWords.isDisjoint(with: formWords)
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func foldedWords(_ text: String) -> Set<String> {
|
||||||
|
let folded = text.folding(
|
||||||
|
options: [.diacriticInsensitive, .caseInsensitive],
|
||||||
|
locale: nil
|
||||||
|
)
|
||||||
|
return Set(folded.split { !$0.isLetter }.map(String.init))
|
||||||
}
|
}
|
||||||
|
|
||||||
static var isAvailable: Bool {
|
static var isAvailable: Bool {
|
||||||
|
|||||||
@@ -205,6 +205,8 @@ struct VocabFlashcardPracticeView: View {
|
|||||||
let verbId = verb.id
|
let verbId = verb.id
|
||||||
let infinitive = verb.infinitive
|
let infinitive = verb.infinitive
|
||||||
let english = verb.english
|
let english = verb.english
|
||||||
|
let formsByTense = ReferenceStore(context: localContext)
|
||||||
|
.conjugatedForms(verbId: verbId, tenseIds: VocabExampleTenseIds.canonical)
|
||||||
Task {
|
Task {
|
||||||
do {
|
do {
|
||||||
// The generator's @Generable schema requires exactly 6
|
// The generator's @Generable schema requires exactly 6
|
||||||
@@ -214,7 +216,8 @@ struct VocabFlashcardPracticeView: View {
|
|||||||
let examples = try await VerbExampleGenerator.generate(
|
let examples = try await VerbExampleGenerator.generate(
|
||||||
verbInfinitive: infinitive,
|
verbInfinitive: infinitive,
|
||||||
verbEnglish: english,
|
verbEnglish: english,
|
||||||
tenseIds: VocabExampleTenseIds.canonical
|
tenseIds: VocabExampleTenseIds.canonical,
|
||||||
|
formsByTense: formsByTense
|
||||||
)
|
)
|
||||||
exampleCache.setExamples(examples, for: verbId)
|
exampleCache.setExamples(examples, for: verbId)
|
||||||
let pick = examples.first { $0.tenseId == "ind_presente" } ?? examples.first
|
let pick = examples.first { $0.tenseId == "ind_presente" } ?? examples.first
|
||||||
|
|||||||
@@ -214,12 +214,15 @@ struct VocabMultipleChoicePracticeView: View {
|
|||||||
let verbId = verb.id
|
let verbId = verb.id
|
||||||
let infinitive = verb.infinitive
|
let infinitive = verb.infinitive
|
||||||
let english = verb.english
|
let english = verb.english
|
||||||
|
let formsByTense = ReferenceStore(context: localContext)
|
||||||
|
.conjugatedForms(verbId: verbId, tenseIds: VocabExampleTenseIds.canonical)
|
||||||
Task {
|
Task {
|
||||||
do {
|
do {
|
||||||
let examples = try await VerbExampleGenerator.generate(
|
let examples = try await VerbExampleGenerator.generate(
|
||||||
verbInfinitive: infinitive,
|
verbInfinitive: infinitive,
|
||||||
verbEnglish: english,
|
verbEnglish: english,
|
||||||
tenseIds: VocabExampleTenseIds.canonical
|
tenseIds: VocabExampleTenseIds.canonical,
|
||||||
|
formsByTense: formsByTense
|
||||||
)
|
)
|
||||||
exampleCache.setExamples(examples, for: verbId)
|
exampleCache.setExamples(examples, for: verbId)
|
||||||
let pick = examples.first { $0.tenseId == "ind_presente" } ?? examples.first
|
let pick = examples.first { $0.tenseId == "ind_presente" } ?? examples.first
|
||||||
|
|||||||
@@ -215,10 +215,13 @@ struct VerbDetailView: View {
|
|||||||
|
|
||||||
examplesState = .loading
|
examplesState = .loading
|
||||||
do {
|
do {
|
||||||
|
let formsByTense = ReferenceStore(context: modelContext)
|
||||||
|
.conjugatedForms(verbId: verb.id, tenseIds: Self.exampleTenseIds)
|
||||||
let generated = try await VerbExampleGenerator.generate(
|
let generated = try await VerbExampleGenerator.generate(
|
||||||
verbInfinitive: verb.infinitive,
|
verbInfinitive: verb.infinitive,
|
||||||
verbEnglish: verb.english,
|
verbEnglish: verb.english,
|
||||||
tenseIds: Self.exampleTenseIds
|
tenseIds: Self.exampleTenseIds,
|
||||||
|
formsByTense: formsByTense
|
||||||
)
|
)
|
||||||
guard !generated.isEmpty else {
|
guard !generated.isEmpty else {
|
||||||
examplesState = .failed("Could not generate examples.")
|
examplesState = .failed("Could not generate examples.")
|
||||||
|
|||||||
Reference in New Issue
Block a user