Add Books — read EPUB-imported books in Practice with tap-to-define

New "Books" row in the Practice tab opens a library of bundled bilingual
books. Each chapter renders Spanish paragraph-by-paragraph; tap any
word for a definition sheet (DictionaryService with on-device AI
fallback), or toggle the toolbar button to swap to the pre-computed
English translation inline.

Local-only Book + BookChapter SwiftData models added to the local
container schema (reset version bumped to 5). DataLoader.seedBooks
walks the bundle for `book_*.json` resources, so future books drop in
without touching app code — just bundle a new JSON and bump
bookDataVersion.

First book: Olly Richards' "Spanish Short Stories For Beginners
Vol 2" — 13 chapters, 2,646 paragraphs, bilingual.

Scripts/books/ is the repeatable pipeline for future EPUBs:
extract_epub.py → translate_chapters.py (per-chapter resumable jobs) →
bundle_book.py. Translation is done by parallel Claude Code subagents
reading per-job input files and writing output files — no API key
required, matching the pattern used for the textbook vocab vision
pass. See Scripts/books/README.md for the full how-to.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trey T
2026-05-11 09:21:44 -05:00
parent ade091f108
commit 09e49bda2c
17 changed files with 6782 additions and 1 deletions
+128
View File
@@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""Merge chapters.json + per-job translation outputs into the final bundled
book_<slug>.json that the iOS app reads from its bundle.
Usage:
python3 bundle_book.py <slug> [--build BUILD_DIR] [--dest DEST_DIR] [--require-all]
Inputs:
BUILD_DIR/<slug>/chapters.json
BUILD_DIR/<slug>/jobs/*.output.json (from translation subagents)
Output:
DEST_DIR/book_<slug>.json
{
"slug": "...",
"title": "...",
"author": "...",
"language": "...",
"chapters": [
{"id": "ch1", "number": 1, "title": "Preface",
"paragraphsES": ["...", ...],
"paragraphsEN": ["...", ...]},
...
]
}
If --require-all is passed, the script fails if any job is missing its output.
Otherwise it fills missing translations with empty strings and warns.
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
DEFAULT_DEST = Path("../../Conjuga")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("slug")
parser.add_argument("--build", type=Path, default=Path("build"))
parser.add_argument("--dest", type=Path, default=None)
parser.add_argument("--require-all", action="store_true")
args = parser.parse_args()
base = args.build / args.slug
chapters = json.loads((base / "chapters.json").read_text(encoding="utf-8"))
jobs_dir = base / "jobs"
# Index translation jobs by chapter -> ordered (offset, paragraphsEN).
chapter_translations: dict[int, list[tuple[int, list[str]]]] = {}
missing: list[str] = []
for input_path in sorted(jobs_dir.glob("*.input.json")):
job_id = input_path.stem.removesuffix(".input")
input_data = json.loads(input_path.read_text(encoding="utf-8"))
output_path = jobs_dir / f"{job_id}.output.json"
if not output_path.exists():
missing.append(job_id)
continue
output_data = json.loads(output_path.read_text(encoding="utf-8"))
paragraphs_en = output_data.get("paragraphsEN", [])
expected = len(input_data["paragraphsES"])
if len(paragraphs_en) != expected:
print(
f"WARN: {job_id} length mismatch — got {len(paragraphs_en)}, "
f"expected {expected}. Padding/truncating.",
file=sys.stderr,
)
if len(paragraphs_en) < expected:
paragraphs_en = paragraphs_en + [""] * (expected - len(paragraphs_en))
else:
paragraphs_en = paragraphs_en[:expected]
chapter_translations.setdefault(input_data["chapter"], []).append(
(input_data["rangeStart"], paragraphs_en)
)
if missing:
msg = f"{len(missing)} translation job(s) missing output: {missing[:5]}{'...' if len(missing) > 5 else ''}"
if args.require_all:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(1)
print(f"WARN: {msg} — using empty strings for those paragraphs.", file=sys.stderr)
bundled_chapters: list[dict] = []
for ch in chapters["chapters"]:
translations = sorted(chapter_translations.get(ch["number"], []))
paragraphs_en: list[str] = []
for _, en_chunk in translations:
paragraphs_en.extend(en_chunk)
# Pad to match ES length if jobs were missing for parts of this chapter.
if len(paragraphs_en) < len(ch["paragraphsES"]):
paragraphs_en += [""] * (len(ch["paragraphsES"]) - len(paragraphs_en))
elif len(paragraphs_en) > len(ch["paragraphsES"]):
paragraphs_en = paragraphs_en[: len(ch["paragraphsES"])]
bundled_chapters.append(
{
"id": ch["id"],
"number": ch["number"],
"title": ch["title"],
"paragraphsES": ch["paragraphsES"],
"paragraphsEN": paragraphs_en,
}
)
payload = {
"slug": chapters["slug"],
"title": chapters["title"],
"author": chapters["author"],
"language": chapters["language"],
"chapters": bundled_chapters,
}
dest_dir = (args.dest or DEFAULT_DEST).resolve()
dest_dir.mkdir(parents=True, exist_ok=True)
out_path = dest_dir / f"book_{args.slug}.json"
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Wrote {out_path}")
print(f" Chapters: {len(bundled_chapters)}")
print(f" Translated jobs: {sum(len(v) for v in chapter_translations.values())} / {sum(len(v) for v in chapter_translations.values()) + len(missing)}")
if __name__ == "__main__":
main()