Add Books — read EPUB-imported books in Practice with tap-to-define

New "Books" row in the Practice tab opens a library of bundled bilingual books. Each chapter renders Spanish paragraph-by-paragraph; tap any word for a definition sheet (DictionaryService with on-device AI fallback), or toggle the toolbar button to swap to the pre-computed English translation inline. Local-only Book + BookChapter SwiftData models added to the local container schema (reset version bumped to 5). DataLoader.seedBooks walks the bundle for `book_*.json` resources, so future books drop in without touching app code — just bundle a new JSON and bump bookDataVersion. First book: Olly Richards' "Spanish Short Stories For Beginners Vol 2" — 13 chapters, 2,646 paragraphs, bilingual. Scripts/books/ is the repeatable pipeline for future EPUBs: extract_epub.py → translate_chapters.py (per-chapter resumable jobs) → bundle_book.py. Translation is done by parallel Claude Code subagents reading per-job input files and writing output files — no API key required, matching the pattern used for the textbook vocab vision pass. See Scripts/books/README.md for the full how-to. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 09:21:44 -05:00
parent ade091f108
commit 09e49bda2c
17 changed files with 6782 additions and 1 deletions
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""Merge chapters.json + per-job translation outputs into the final bundled
+book_<slug>.json that the iOS app reads from its bundle.
+
+Usage:
+    python3 bundle_book.py <slug> [--build BUILD_DIR] [--dest DEST_DIR] [--require-all]
+
+Inputs:
+    BUILD_DIR/<slug>/chapters.json
+    BUILD_DIR/<slug>/jobs/*.output.json   (from translation subagents)
+
+Output:
+    DEST_DIR/book_<slug>.json
+        {
+          "slug": "...",
+          "title": "...",
+          "author": "...",
+          "language": "...",
+          "chapters": [
+            {"id": "ch1", "number": 1, "title": "Preface",
+             "paragraphsES": ["...", ...],
+             "paragraphsEN": ["...", ...]},
+            ...
+          ]
+        }
+
+If --require-all is passed, the script fails if any job is missing its output.
+Otherwise it fills missing translations with empty strings and warns.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+DEFAULT_DEST = Path("../../Conjuga")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("slug")
+    parser.add_argument("--build", type=Path, default=Path("build"))
+    parser.add_argument("--dest", type=Path, default=None)
+    parser.add_argument("--require-all", action="store_true")
+    args = parser.parse_args()
+
+    base = args.build / args.slug
+    chapters = json.loads((base / "chapters.json").read_text(encoding="utf-8"))
+    jobs_dir = base / "jobs"
+
+    # Index translation jobs by chapter -> ordered (offset, paragraphsEN).
+    chapter_translations: dict[int, list[tuple[int, list[str]]]] = {}
+    missing: list[str] = []
+
+    for input_path in sorted(jobs_dir.glob("*.input.json")):
+        job_id = input_path.stem.removesuffix(".input")
+        input_data = json.loads(input_path.read_text(encoding="utf-8"))
+        output_path = jobs_dir / f"{job_id}.output.json"
+        if not output_path.exists():
+            missing.append(job_id)
+            continue
+        output_data = json.loads(output_path.read_text(encoding="utf-8"))
+        paragraphs_en = output_data.get("paragraphsEN", [])
+        expected = len(input_data["paragraphsES"])
+        if len(paragraphs_en) != expected:
+            print(
+                f"WARN: {job_id} length mismatch — got {len(paragraphs_en)}, "
+                f"expected {expected}. Padding/truncating.",
+                file=sys.stderr,
+            )
+            if len(paragraphs_en) < expected:
+                paragraphs_en = paragraphs_en + [""] * (expected - len(paragraphs_en))
+            else:
+                paragraphs_en = paragraphs_en[:expected]
+        chapter_translations.setdefault(input_data["chapter"], []).append(
+            (input_data["rangeStart"], paragraphs_en)
+        )
+
+    if missing:
+        msg = f"{len(missing)} translation job(s) missing output: {missing[:5]}{'...' if len(missing) > 5 else ''}"
+        if args.require_all:
+            print(f"ERROR: {msg}", file=sys.stderr)
+            sys.exit(1)
+        print(f"WARN: {msg} — using empty strings for those paragraphs.", file=sys.stderr)
+
+    bundled_chapters: list[dict] = []
+    for ch in chapters["chapters"]:
+        translations = sorted(chapter_translations.get(ch["number"], []))
+        paragraphs_en: list[str] = []
+        for _, en_chunk in translations:
+            paragraphs_en.extend(en_chunk)
+        # Pad to match ES length if jobs were missing for parts of this chapter.
+        if len(paragraphs_en) < len(ch["paragraphsES"]):
+            paragraphs_en += [""] * (len(ch["paragraphsES"]) - len(paragraphs_en))
+        elif len(paragraphs_en) > len(ch["paragraphsES"]):
+            paragraphs_en = paragraphs_en[: len(ch["paragraphsES"])]
+        bundled_chapters.append(
+            {
+                "id": ch["id"],
+                "number": ch["number"],
+                "title": ch["title"],
+                "paragraphsES": ch["paragraphsES"],
+                "paragraphsEN": paragraphs_en,
+            }
+        )
+
+    payload = {
+        "slug": chapters["slug"],
+        "title": chapters["title"],
+        "author": chapters["author"],
+        "language": chapters["language"],
+        "chapters": bundled_chapters,
+    }
+
+    dest_dir = (args.dest or DEFAULT_DEST).resolve()
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    out_path = dest_dir / f"book_{args.slug}.json"
+    out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"Wrote {out_path}")
+    print(f"  Chapters:        {len(bundled_chapters)}")
+    print(f"  Translated jobs: {sum(len(v) for v in chapter_translations.values())} / {sum(len(v) for v in chapter_translations.values()) + len(missing)}")
+
+
+if __name__ == "__main__":
+    main()