Add Books — read EPUB-imported books in Practice with tap-to-define

New "Books" row in the Practice tab opens a library of bundled bilingual books. Each chapter renders Spanish paragraph-by-paragraph; tap any word for a definition sheet (DictionaryService with on-device AI fallback), or toggle the toolbar button to swap to the pre-computed English translation inline. Local-only Book + BookChapter SwiftData models added to the local container schema (reset version bumped to 5). DataLoader.seedBooks walks the bundle for `book_*.json` resources, so future books drop in without touching app code — just bundle a new JSON and bump bookDataVersion. First book: Olly Richards' "Spanish Short Stories For Beginners Vol 2" — 13 chapters, 2,646 paragraphs, bilingual. Scripts/books/ is the repeatable pipeline for future EPUBs: extract_epub.py → translate_chapters.py (per-chapter resumable jobs) → bundle_book.py. Translation is done by parallel Claude Code subagents reading per-job input files and writing output files — no API key required, matching the pattern used for the textbook vocab vision pass. See Scripts/books/README.md for the full how-to. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 09:21:44 -05:00
parent ade091f108
commit 09e49bda2c
17 changed files with 6782 additions and 1 deletions
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""Split chapters.json into translation jobs that Claude Code subagents can
+process in parallel. Resumable: jobs whose output file already exists are
+skipped.
+
+Usage:
+    python3 translate_chapters.py <slug> [--batch-size N] [--build BUILD_DIR]
+
+Inputs:
+    BUILD_DIR/<slug>/chapters.json  (from extract_epub.py)
+
+Outputs:
+    BUILD_DIR/<slug>/jobs/<jobid>.input.json    (one per batch — read by subagents)
+    BUILD_DIR/<slug>/jobs/_pending.txt           (list of job IDs still missing output)
+    BUILD_DIR/<slug>/jobs/_prompt_template.md    (prompt the orchestrator hands each subagent)
+
+Job layout (.input.json):
+    {
+      "jobId": "ch06_b00",
+      "chapter": 6,
+      "chapterTitle": "1. El Castillo",
+      "rangeStart": 0,
+      "rangeEnd": 30,
+      "paragraphsES": ["...", "..."]
+    }
+
+Subagents must write `<jobid>.output.json` with shape:
+    {"jobId": "ch06_b00", "paragraphsEN": ["...", "..."]}
+
+The output array MUST have the same length as paragraphsES, in the same order.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+
+PROMPT_TEMPLATE = """\
+You are translating a chunk of a Spanish-language book into English for a
+language-learning app.
+
+Input file: {input_path}
+Output file: {output_path}
+
+Read the input file. It contains a JSON object with a `paragraphsES` array.
+Translate each paragraph into natural English. Preserve meaning, tone, and
+dialogue markers (—, –, ¡, ¿) as appropriate for the English output. Keep
+the same number of paragraphs in the same order.
+
+Notes for translation quality:
+- This is a beginner Spanish reader, so prefer plain natural English over
+  literary flourish.
+- Preserve proper nouns (character names, place names) verbatim.
+- Convert Spanish dialogue dashes (–, —) to English-style quotation marks
+  ONLY if it reads more naturally; otherwise keep them as em-dashes.
+- Do NOT add explanatory parentheticals; the in-app dictionary handles
+  per-word lookup.
+
+Write the output as JSON with shape:
+    {{"jobId": "<the jobId from the input>", "paragraphsEN": [...]}}
+
+The `paragraphsEN` array MUST be the same length and order as `paragraphsES`
+in the input. Write nothing else to disk and produce no other output.
+"""
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("slug")
+    parser.add_argument("--batch-size", type=int, default=30)
+    parser.add_argument("--build", type=Path, default=Path("build"))
+    args = parser.parse_args()
+
+    base = args.build / args.slug
+    chapters_path = base / "chapters.json"
+    jobs_dir = base / "jobs"
+    jobs_dir.mkdir(parents=True, exist_ok=True)
+
+    data = json.loads(chapters_path.read_text(encoding="utf-8"))
+
+    pending: list[str] = []
+    completed: list[str] = []
+    total_jobs = 0
+
+    for ch in data["chapters"]:
+        paragraphs = ch["paragraphsES"]
+        if not paragraphs:
+            continue
+        for offset in range(0, len(paragraphs), args.batch_size):
+            chunk = paragraphs[offset : offset + args.batch_size]
+            job_id = f"ch{ch['number']:02d}_b{offset // args.batch_size:02d}"
+            input_path = jobs_dir / f"{job_id}.input.json"
+            output_path = jobs_dir / f"{job_id}.output.json"
+
+            input_path.write_text(
+                json.dumps(
+                    {
+                        "jobId": job_id,
+                        "chapter": ch["number"],
+                        "chapterTitle": ch["title"],
+                        "rangeStart": offset,
+                        "rangeEnd": offset + len(chunk),
+                        "paragraphsES": chunk,
+                    },
+                    ensure_ascii=False,
+                    indent=2,
+                ),
+                encoding="utf-8",
+            )
+            total_jobs += 1
+            if output_path.exists():
+                completed.append(job_id)
+            else:
+                pending.append(job_id)
+
+    (jobs_dir / "_pending.txt").write_text("\n".join(pending) + ("\n" if pending else ""))
+
+    (jobs_dir / "_prompt_template.md").write_text(
+        PROMPT_TEMPLATE.format(
+            input_path="<JOB_INPUT_PATH>",
+            output_path="<JOB_OUTPUT_PATH>",
+        ),
+        encoding="utf-8",
+    )
+
+    print(f"Total translation jobs: {total_jobs}")
+    print(f"  Completed:            {len(completed)}")
+    print(f"  Pending:              {len(pending)}")
+    print(f"Manifest at:            {jobs_dir / '_pending.txt'}")
+    print(f"Prompt template at:     {jobs_dir / '_prompt_template.md'}")
+
+
+if __name__ == "__main__":
+    main()