Major changes: - Textbook UI: chapter list, reader, and interactive exercise view (keyboard + Apple Pencil) surfaced under the Course tab. 30 chapters, 251 exercises. - Stem-change conjugation toggle on Week 4 flashcard decks (E-IE, E-I, O-UE). Uses existing VerbForm + IrregularSpan data to render highlighted present tense conjugations inline. - Deterministic on-device answer grader with partial credit (correct / close for accent-stripped or single-char-typo / wrong). 11 unit tests cover it. - SharedModels: TextbookChapter (local), TextbookExerciseAttempt (cloud- synced), AnswerGrader helpers. Bumped schema. - DataLoader: textbook seeder (version 8) + refresh helpers that preserve LanGo course decks when textbook data is re-seeded. - Local extraction pipeline in Conjuga/Scripts/textbook/ — XHTML chapter parser, answer-key parser, macOS Vision image OCR + PDF page OCR, merger, NSSpellChecker validator, language-aware auto-fixer, and repair pass that re-pairs quarantined vocab rows using bounding-box coordinates. - UI test target (ConjugaUITests) with three tests: end-to-end textbook flow, all-chapters screenshot audit, and stem-change toggle verification. Generated textbook content (textbook_data.json, textbook_vocab.json) and third-party source files are gitignored — re-run Scripts/textbook/run_pipeline.sh locally to regenerate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
127 lines
5.6 KiB
Python
127 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Render book.json + ocr.json into a static HTML review page.
|
|
|
|
The HTML surfaces low-confidence OCR results in red, and shows the parsed
|
|
exercise prompts/answers next to the original image. Designed for rapid
|
|
visual diffing against the source book.
|
|
"""
|
|
|
|
import html
|
|
import json
|
|
from pathlib import Path
|
|
|
|
HERE = Path(__file__).resolve().parent
|
|
BOOK = HERE / "book.json"
|
|
OCR = HERE / "ocr.json"
|
|
OUT_HTML = HERE / "review.html"
|
|
EPUB_IMAGES = Path(HERE).parents[2] / "epub_extract" / "OEBPS"
|
|
IMAGE_REL = EPUB_IMAGES.relative_to(HERE.parent) if False else EPUB_IMAGES
|
|
|
|
|
|
def load(p: Path) -> dict:
|
|
return json.loads(p.read_text(encoding="utf-8"))
|
|
|
|
|
|
def esc(s: str) -> str:
|
|
return html.escape(s or "")
|
|
|
|
|
|
def img_tag(src: str) -> str:
|
|
full = (EPUB_IMAGES / src).resolve()
|
|
return f'<img src="file://{full}" alt="{esc(src)}" class="src"/>'
|
|
|
|
|
|
def render() -> None:
|
|
book = load(BOOK)
|
|
ocr = load(OCR) if OCR.exists() else {}
|
|
|
|
out: list = []
|
|
out.append("""<!DOCTYPE html>
|
|
<html><head><meta charset='utf-8'><title>Book review</title>
|
|
<style>
|
|
body { font-family: -apple-system, system-ui, sans-serif; margin: 2em; max-width: 1000px; color: #222; }
|
|
h1 { color: #c44; }
|
|
h2.chapter { background: #eee; padding: 0.5em; border-left: 4px solid #c44; }
|
|
h3.heading { color: #555; }
|
|
.para { margin: 0.5em 0; }
|
|
.vocab-table { background: #fafff0; padding: 0.5em; margin: 0.5em 0; border: 1px solid #bda; border-radius: 6px; }
|
|
.ocr-line { font-family: ui-monospace, monospace; font-size: 12px; }
|
|
.lowconf { color: #c44; background: #fee; }
|
|
.exercise { background: #fff8e8; padding: 0.5em; margin: 0.75em 0; border: 1px solid #cb9; border-radius: 6px; }
|
|
.prompt { font-family: ui-monospace, monospace; font-size: 13px; margin: 2px 0; }
|
|
.answer { color: #080; font-family: ui-monospace, monospace; font-size: 13px; }
|
|
img.src { max-width: 520px; border: 1px solid #ccc; margin: 4px 0; }
|
|
.kv { color: #04a; font-weight: bold; }
|
|
summary { cursor: pointer; font-weight: bold; color: #666; }
|
|
.card-pair { font-family: ui-monospace, monospace; font-size: 12px; }
|
|
.card-es { color: #04a; }
|
|
.card-en { color: #555; }
|
|
.counts { color: #888; font-size: 12px; }
|
|
</style></head><body>""")
|
|
out.append(f"<h1>{esc(book['courseName'])} — review</h1>")
|
|
out.append(f"<p>{book['totalChapters']} chapters · {book['totalExercises']} exercises · {book['totalVocabTables']} vocab tables · {book['totalVocabCards']} auto-derived cards</p>")
|
|
|
|
for ch in book["chapters"]:
|
|
part = ch.get("part")
|
|
part_str = f" (Part {part})" if part else ""
|
|
out.append(f"<h2 class='chapter'>Chapter {ch['number']}: {esc(ch['title'])}{esc(part_str)}</h2>")
|
|
|
|
for b in ch["blocks"]:
|
|
kind = b["kind"]
|
|
if kind == "heading":
|
|
level = b["level"]
|
|
out.append(f"<h{level} class='heading'>{esc(b['text'])}</h{level}>")
|
|
elif kind == "paragraph":
|
|
out.append(f"<p class='para'>{esc(b['text'])}</p>")
|
|
elif kind == "key_vocab_header":
|
|
out.append(f"<p class='kv'>★ Key Vocabulary</p>")
|
|
elif kind == "vocab_table":
|
|
src = b["sourceImage"]
|
|
conf = b["ocrConfidence"]
|
|
conf_class = "lowconf" if conf < 0.85 else ""
|
|
out.append(f"<div class='vocab-table'>")
|
|
out.append(f"<details><summary>vocab {esc(src)} · confidence {conf:.2f} · {b['cardCount']} card(s)</summary>")
|
|
out.append(img_tag(src))
|
|
out.append("<div>")
|
|
for line in b.get("ocrLines", []):
|
|
out.append(f"<div class='ocr-line {conf_class}'>{esc(line)}</div>")
|
|
out.append("</div>")
|
|
# Show derived pairs (if any). We don't have them inline in book.json,
|
|
# but we can recompute from ocrLines using the same function.
|
|
out.append("</details></div>")
|
|
elif kind == "exercise":
|
|
out.append(f"<div class='exercise'>")
|
|
out.append(f"<b>Exercise {esc(b['id'])}</b> — <i>{esc(b['instruction'])}</i>")
|
|
if b.get("extra"):
|
|
for e in b["extra"]:
|
|
out.append(f"<div class='para'>{esc(e)}</div>")
|
|
if b.get("ocrLines"):
|
|
out.append(f"<details><summary>OCR lines from image</summary>")
|
|
for line in b["ocrLines"]:
|
|
out.append(f"<div class='ocr-line'>{esc(line)}</div>")
|
|
out.append("</details>")
|
|
if b.get("prompts"):
|
|
out.append("<div><b>Parsed prompts:</b></div>")
|
|
for p in b["prompts"]:
|
|
out.append(f"<div class='prompt'>• {esc(p)}</div>")
|
|
if b.get("answerItems"):
|
|
out.append("<div><b>Answer key:</b></div>")
|
|
for a in b["answerItems"]:
|
|
label_str = f"{a['label']}. " if a.get("label") else ""
|
|
alts = ", ".join(a["alternates"])
|
|
alt_str = f" <span style='color:#999'>(also: {esc(alts)})</span>" if alts else ""
|
|
out.append(f"<div class='answer'>{esc(label_str)}{a['number']}. {esc(a['answer'])}{alt_str}</div>")
|
|
if b.get("freeform"):
|
|
out.append("<div style='color:#c44'>(Freeform — answers will vary)</div>")
|
|
for img_src in b.get("image_refs", []):
|
|
out.append(img_tag(img_src))
|
|
out.append("</div>")
|
|
|
|
out.append("</body></html>")
|
|
OUT_HTML.write_text("\n".join(out), encoding="utf-8")
|
|
print(f"Wrote {OUT_HTML}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
render()
|