#!/usr/bin/env python3
"""Render book.json + ocr.json into a static HTML review page.
The HTML surfaces low-confidence OCR results in red, and shows the parsed
exercise prompts/answers next to the original image. Designed for rapid
visual diffing against the source book.
"""
import html
import json
from pathlib import Path
HERE = Path(__file__).resolve().parent
BOOK = HERE / "book.json"
OCR = HERE / "ocr.json"
OUT_HTML = HERE / "review.html"
EPUB_IMAGES = Path(HERE).parents[2] / "epub_extract" / "OEBPS"
IMAGE_REL = EPUB_IMAGES.relative_to(HERE.parent) if False else EPUB_IMAGES
def load(p: Path) -> dict:
return json.loads(p.read_text(encoding="utf-8"))
def esc(s: str) -> str:
return html.escape(s or "")
def img_tag(src: str) -> str:
full = (EPUB_IMAGES / src).resolve()
return f''
def render() -> None:
book = load(BOOK)
ocr = load(OCR) if OCR.exists() else {}
out: list = []
out.append("""
{book['totalChapters']} chapters · {book['totalExercises']} exercises · {book['totalVocabTables']} vocab tables · {book['totalVocabCards']} auto-derived cards
") for ch in book["chapters"]: part = ch.get("part") part_str = f" (Part {part})" if part else "" out.append(f"{esc(b['text'])}
") elif kind == "key_vocab_header": out.append(f"★ Key Vocabulary
") elif kind == "vocab_table": src = b["sourceImage"] conf = b["ocrConfidence"] conf_class = "lowconf" if conf < 0.85 else "" out.append(f"