Add textbook reader, exercise grading, stem-change toggle, extraction pipeline
Major changes: - Textbook UI: chapter list, reader, and interactive exercise view (keyboard + Apple Pencil) surfaced under the Course tab. 30 chapters, 251 exercises. - Stem-change conjugation toggle on Week 4 flashcard decks (E-IE, E-I, O-UE). Uses existing VerbForm + IrregularSpan data to render highlighted present tense conjugations inline. - Deterministic on-device answer grader with partial credit (correct / close for accent-stripped or single-char-typo / wrong). 11 unit tests cover it. - SharedModels: TextbookChapter (local), TextbookExerciseAttempt (cloud- synced), AnswerGrader helpers. Bumped schema. - DataLoader: textbook seeder (version 8) + refresh helpers that preserve LanGo course decks when textbook data is re-seeded. - Local extraction pipeline in Conjuga/Scripts/textbook/ — XHTML chapter parser, answer-key parser, macOS Vision image OCR + PDF page OCR, merger, NSSpellChecker validator, language-aware auto-fixer, and repair pass that re-pairs quarantined vocab rows using bounding-box coordinates. - UI test target (ConjugaUITests) with three tests: end-to-end textbook flow, all-chapters screenshot audit, and stem-change toggle verification. Generated textbook content (textbook_data.json, textbook_vocab.json) and third-party source files are gitignored — re-run Scripts/textbook/run_pipeline.sh locally to regenerate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
68
Conjuga/SharedModels/Sources/SharedModels/AnswerGrader.swift
Normal file
68
Conjuga/SharedModels/Sources/SharedModels/AnswerGrader.swift
Normal file
@@ -0,0 +1,68 @@
|
||||
import Foundation
|
||||
|
||||
/// On-device deterministic answer grader with partial-credit support.
|
||||
/// No network calls, no API keys. Handles accent stripping and single-char typos.
|
||||
public enum AnswerGrader {
|
||||
|
||||
/// Evaluate `userText` against the canonical answer (plus alternates).
|
||||
/// Returns `.correct` for exact/normalized match, `.close` for accent-strip
|
||||
/// match or Levenshtein distance 1, `.wrong` otherwise.
|
||||
public static func grade(userText: String, canonical: String, alternates: [String] = []) -> TextbookGrade {
|
||||
let candidates = [canonical] + alternates
|
||||
let normalizedUser = normalize(userText)
|
||||
if normalizedUser.isEmpty { return .wrong }
|
||||
|
||||
for c in candidates {
|
||||
if normalize(c) == normalizedUser { return .correct }
|
||||
}
|
||||
for c in candidates {
|
||||
if stripAccents(normalize(c)) == stripAccents(normalizedUser) {
|
||||
return .close
|
||||
}
|
||||
}
|
||||
for c in candidates {
|
||||
if levenshtein(normalizedUser, normalize(c)) <= 1 {
|
||||
return .close
|
||||
}
|
||||
}
|
||||
return .wrong
|
||||
}
|
||||
|
||||
/// Lowercase, collapse whitespace, strip leading/trailing punctuation.
|
||||
public static func normalize(_ s: String) -> String {
|
||||
let lowered = s.lowercased(with: Locale(identifier: "es"))
|
||||
let collapsed = lowered.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
|
||||
let trimmed = collapsed.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let punct = CharacterSet(charactersIn: ".,;:!?¿¡\"'()[]{}—–-")
|
||||
return trimmed.trimmingCharacters(in: punct)
|
||||
}
|
||||
|
||||
/// Remove combining diacritics (á→a, ñ→n, ü→u).
|
||||
public static func stripAccents(_ s: String) -> String {
|
||||
s.folding(options: .diacriticInsensitive, locale: Locale(identifier: "en"))
|
||||
}
|
||||
|
||||
/// Standard Levenshtein edit distance.
|
||||
public static func levenshtein(_ a: String, _ b: String) -> Int {
|
||||
if a == b { return 0 }
|
||||
if a.isEmpty { return b.count }
|
||||
if b.isEmpty { return a.count }
|
||||
let aa = Array(a)
|
||||
let bb = Array(b)
|
||||
var prev = Array(0...bb.count)
|
||||
var curr = Array(repeating: 0, count: bb.count + 1)
|
||||
for i in 1...aa.count {
|
||||
curr[0] = i
|
||||
for j in 1...bb.count {
|
||||
let cost = aa[i - 1] == bb[j - 1] ? 0 : 1
|
||||
curr[j] = min(
|
||||
prev[j] + 1,
|
||||
curr[j - 1] + 1,
|
||||
prev[j - 1] + cost
|
||||
)
|
||||
}
|
||||
swap(&prev, &curr)
|
||||
}
|
||||
return prev[bb.count]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
import Foundation
|
||||
import SwiftData
|
||||
|
||||
/// One chapter of the textbook. Ordered content blocks are stored as JSON in `bodyJSON`
|
||||
/// (encoded [TextbookBlock]) since SwiftData @Model doesn't support heterogeneous arrays.
|
||||
@Model
|
||||
public final class TextbookChapter {
|
||||
@Attribute(.unique) public var id: String = ""
|
||||
public var number: Int = 0
|
||||
public var title: String = ""
|
||||
public var part: Int = 0 // 0 = no part assignment
|
||||
public var courseName: String = ""
|
||||
public var bodyJSON: Data = Data()
|
||||
public var exerciseCount: Int = 0
|
||||
public var vocabTableCount: Int = 0
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
number: Int,
|
||||
title: String,
|
||||
part: Int,
|
||||
courseName: String,
|
||||
bodyJSON: Data,
|
||||
exerciseCount: Int,
|
||||
vocabTableCount: Int
|
||||
) {
|
||||
self.id = id
|
||||
self.number = number
|
||||
self.title = title
|
||||
self.part = part
|
||||
self.courseName = courseName
|
||||
self.bodyJSON = bodyJSON
|
||||
self.exerciseCount = exerciseCount
|
||||
self.vocabTableCount = vocabTableCount
|
||||
}
|
||||
|
||||
public func blocks() -> [TextbookBlock] {
|
||||
(try? JSONDecoder().decode([TextbookBlock].self, from: bodyJSON)) ?? []
|
||||
}
|
||||
}
|
||||
|
||||
/// One content block within a chapter. Polymorphic via `kind`.
|
||||
public struct TextbookBlock: Codable, Identifiable, Sendable {
|
||||
public enum Kind: String, Codable, Sendable {
|
||||
case heading
|
||||
case paragraph
|
||||
case keyVocabHeader = "key_vocab_header"
|
||||
case vocabTable = "vocab_table"
|
||||
case exercise
|
||||
}
|
||||
|
||||
public var id: String { "\(kind.rawValue):\(index)" }
|
||||
public var index: Int
|
||||
public var kind: Kind
|
||||
|
||||
// heading
|
||||
public var level: Int?
|
||||
// heading / paragraph
|
||||
public var text: String?
|
||||
|
||||
// vocab_table
|
||||
public var sourceImage: String?
|
||||
public var ocrLines: [String]?
|
||||
public var ocrConfidence: Double?
|
||||
public var cards: [TextbookVocabPair]?
|
||||
|
||||
// exercise
|
||||
public var exerciseId: String?
|
||||
public var instruction: String?
|
||||
public var extra: [String]?
|
||||
public var prompts: [String]?
|
||||
public var answerItems: [TextbookAnswerItem]?
|
||||
public var freeform: Bool?
|
||||
}
|
||||
|
||||
public struct TextbookVocabPair: Codable, Sendable {
|
||||
public var front: String
|
||||
public var back: String
|
||||
}
|
||||
|
||||
public struct TextbookAnswerItem: Codable, Sendable {
|
||||
public var label: String? // A/B/C subpart label or nil
|
||||
public var number: Int
|
||||
public var answer: String
|
||||
public var alternates: [String]
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
import Foundation
|
||||
import SwiftData
|
||||
|
||||
/// Per-prompt grading state recorded after the user submits an exercise.
|
||||
public enum TextbookGrade: Int, Codable, Sendable {
|
||||
case wrong = 0
|
||||
case close = 1
|
||||
case correct = 2
|
||||
}
|
||||
|
||||
/// User's attempt for one exercise. Stored in the cloud container so progress
|
||||
/// syncs across devices.
|
||||
@Model
|
||||
public final class TextbookExerciseAttempt {
|
||||
/// Deterministic id: "<courseName>|<exerciseId>". CloudKit-synced models can't
|
||||
/// use @Attribute(.unique); code that writes attempts must fetch-or-create.
|
||||
public var id: String = ""
|
||||
public var courseName: String = ""
|
||||
public var chapterNumber: Int = 0
|
||||
public var exerciseId: String = ""
|
||||
|
||||
/// JSON-encoded per-prompt state array.
|
||||
/// Each entry: { "number": Int, "userText": String, "grade": Int }
|
||||
public var stateJSON: Data = Data()
|
||||
|
||||
public var lastAttemptAt: Date = Date()
|
||||
public var correctCount: Int = 0
|
||||
public var closeCount: Int = 0
|
||||
public var wrongCount: Int = 0
|
||||
public var totalCount: Int = 0
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
courseName: String,
|
||||
chapterNumber: Int,
|
||||
exerciseId: String,
|
||||
stateJSON: Data = Data(),
|
||||
lastAttemptAt: Date = Date(),
|
||||
correctCount: Int = 0,
|
||||
closeCount: Int = 0,
|
||||
wrongCount: Int = 0,
|
||||
totalCount: Int = 0
|
||||
) {
|
||||
self.id = id
|
||||
self.courseName = courseName
|
||||
self.chapterNumber = chapterNumber
|
||||
self.exerciseId = exerciseId
|
||||
self.stateJSON = stateJSON
|
||||
self.lastAttemptAt = lastAttemptAt
|
||||
self.correctCount = correctCount
|
||||
self.closeCount = closeCount
|
||||
self.wrongCount = wrongCount
|
||||
self.totalCount = totalCount
|
||||
}
|
||||
|
||||
public func promptStates() -> [TextbookPromptState] {
|
||||
(try? JSONDecoder().decode([TextbookPromptState].self, from: stateJSON)) ?? []
|
||||
}
|
||||
|
||||
public func setPromptStates(_ states: [TextbookPromptState]) {
|
||||
stateJSON = (try? JSONEncoder().encode(states)) ?? Data()
|
||||
correctCount = states.filter { $0.grade == .correct }.count
|
||||
closeCount = states.filter { $0.grade == .close }.count
|
||||
wrongCount = states.filter { $0.grade == .wrong }.count
|
||||
totalCount = states.count
|
||||
}
|
||||
|
||||
public static func attemptId(courseName: String, exerciseId: String) -> String {
|
||||
"\(courseName)|\(exerciseId)"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TextbookPromptState: Codable, Sendable {
|
||||
public var number: Int
|
||||
public var userText: String
|
||||
public var grade: TextbookGrade
|
||||
|
||||
public init(number: Int, userText: String, grade: TextbookGrade) {
|
||||
self.number = number
|
||||
self.userText = userText
|
||||
self.grade = grade
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
import Testing
|
||||
@testable import SharedModels
|
||||
|
||||
@Suite("AnswerGrader")
|
||||
struct AnswerGraderTests {
|
||||
|
||||
@Test("exact match is correct")
|
||||
func exact() {
|
||||
#expect(AnswerGrader.grade(userText: "tengo", canonical: "tengo") == .correct)
|
||||
#expect(AnswerGrader.grade(userText: "Tengo", canonical: "tengo") == .correct)
|
||||
#expect(AnswerGrader.grade(userText: " tengo ", canonical: "tengo") == .correct)
|
||||
}
|
||||
|
||||
@Test("missing accent is close")
|
||||
func missingAccent() {
|
||||
#expect(AnswerGrader.grade(userText: "esta", canonical: "está") == .close)
|
||||
#expect(AnswerGrader.grade(userText: "nino", canonical: "niño") == .close)
|
||||
#expect(AnswerGrader.grade(userText: "asi", canonical: "así") == .close)
|
||||
}
|
||||
|
||||
@Test("single-char typo is close")
|
||||
func singleCharTypo() {
|
||||
// deletion
|
||||
#expect(AnswerGrader.grade(userText: "tngo", canonical: "tengo") == .close)
|
||||
// insertion
|
||||
#expect(AnswerGrader.grade(userText: "tengoo", canonical: "tengo") == .close)
|
||||
// substitution
|
||||
#expect(AnswerGrader.grade(userText: "tengu", canonical: "tengo") == .close)
|
||||
}
|
||||
|
||||
@Test("two-char typo is wrong")
|
||||
func twoCharTypo() {
|
||||
#expect(AnswerGrader.grade(userText: "tngu", canonical: "tengo") == .wrong)
|
||||
}
|
||||
|
||||
@Test("empty is wrong")
|
||||
func empty() {
|
||||
#expect(AnswerGrader.grade(userText: "", canonical: "tengo") == .wrong)
|
||||
#expect(AnswerGrader.grade(userText: " ", canonical: "tengo") == .wrong)
|
||||
}
|
||||
|
||||
@Test("alternates accepted")
|
||||
func alternates() {
|
||||
#expect(AnswerGrader.grade(userText: "flaca", canonical: "delgada", alternates: ["flaca"]) == .correct)
|
||||
#expect(AnswerGrader.grade(userText: "flacca", canonical: "delgada", alternates: ["flaca"]) == .close)
|
||||
}
|
||||
|
||||
@Test("punctuation stripped")
|
||||
func punctuation() {
|
||||
#expect(AnswerGrader.grade(userText: "el libro.", canonical: "el libro") == .correct)
|
||||
#expect(AnswerGrader.grade(userText: "¿dónde?", canonical: "dónde") == .correct)
|
||||
}
|
||||
|
||||
@Test("very different text is wrong")
|
||||
func wrong() {
|
||||
#expect(AnswerGrader.grade(userText: "hola", canonical: "tengo") == .wrong)
|
||||
#expect(AnswerGrader.grade(userText: "casa", canonical: "perro") == .wrong)
|
||||
}
|
||||
|
||||
@Test("normalize produces expected output")
|
||||
func normalize() {
|
||||
#expect(AnswerGrader.normalize(" Hola ") == "hola")
|
||||
#expect(AnswerGrader.normalize("ABC!") == "abc")
|
||||
}
|
||||
|
||||
@Test("stripAccents handles common Spanish diacritics")
|
||||
func stripAccents() {
|
||||
#expect(AnswerGrader.stripAccents("niño") == "nino")
|
||||
#expect(AnswerGrader.stripAccents("está") == "esta")
|
||||
#expect(AnswerGrader.stripAccents("güero") == "guero")
|
||||
}
|
||||
|
||||
@Test("levenshtein computes edit distance")
|
||||
func levenshtein() {
|
||||
#expect(AnswerGrader.levenshtein("kitten", "sitting") == 3)
|
||||
#expect(AnswerGrader.levenshtein("flaw", "lawn") == 2)
|
||||
#expect(AnswerGrader.levenshtein("abc", "abc") == 0)
|
||||
#expect(AnswerGrader.levenshtein("", "abc") == 3)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user