#!/usr/bin/env python3 """Merge chapters.json + per-job translation outputs into the final bundled book_.json that the iOS app reads from its bundle. Usage: python3 bundle_book.py [--build BUILD_DIR] [--dest DEST_DIR] [--require-all] Inputs: BUILD_DIR//chapters.json BUILD_DIR//jobs/*.output.json (from translation subagents) BUILD_DIR//glossary/*.output.json (from glossary subagents, Phase 2b) Output: DEST_DIR/book_.json { "slug": "...", "title": "...", "author": "...", "language": "...", "chapters": [ {"id": "ch1", "number": 1, "title": "Preface", "paragraphsES": ["...", ...], "paragraphsEN": ["...", ...]}, ... ], "glossary": { "taza": {"baseForm": "taza", "english": "cup", "partOfSpeech": "noun"}, ... } } If --require-all is passed, the script fails if any translation OR glossary job is missing its output. Otherwise it fills missing translations with empty strings, leaves missing glossary entries out, and warns. """ from __future__ import annotations import argparse import json import sys from pathlib import Path DEFAULT_DEST = Path("../../Conjuga") def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("slug") parser.add_argument("--build", type=Path, default=Path("build")) parser.add_argument("--dest", type=Path, default=None) parser.add_argument("--require-all", action="store_true") args = parser.parse_args() base = args.build / args.slug chapters = json.loads((base / "chapters.json").read_text(encoding="utf-8")) jobs_dir = base / "jobs" # Index translation jobs by chapter -> ordered (offset, paragraphsEN). chapter_translations: dict[int, list[tuple[int, list[str]]]] = {} missing: list[str] = [] for input_path in sorted(jobs_dir.glob("*.input.json")): job_id = input_path.stem.removesuffix(".input") input_data = json.loads(input_path.read_text(encoding="utf-8")) output_path = jobs_dir / f"{job_id}.output.json" if not output_path.exists(): missing.append(job_id) continue output_data = json.loads(output_path.read_text(encoding="utf-8")) paragraphs_en = output_data.get("paragraphsEN", []) expected = len(input_data["paragraphsES"]) if len(paragraphs_en) != expected: print( f"WARN: {job_id} length mismatch — got {len(paragraphs_en)}, " f"expected {expected}. Padding/truncating.", file=sys.stderr, ) if len(paragraphs_en) < expected: paragraphs_en = paragraphs_en + [""] * (expected - len(paragraphs_en)) else: paragraphs_en = paragraphs_en[:expected] chapter_translations.setdefault(input_data["chapter"], []).append( (input_data["rangeStart"], paragraphs_en) ) if missing: msg = f"{len(missing)} translation job(s) missing output: {missing[:5]}{'...' if len(missing) > 5 else ''}" if args.require_all: print(f"ERROR: {msg}", file=sys.stderr) sys.exit(1) print(f"WARN: {msg} — using empty strings for those paragraphs.", file=sys.stderr) # Glossary (Phase 2b) — merge every glossary job's entries into one map # keyed by the cleaned word the app looks up. glossary_dir = base / "glossary" glossary: dict[str, dict] = {} glossary_missing: list[str] = [] if glossary_dir.exists(): for input_path in sorted(glossary_dir.glob("*.input.json")): job_id = input_path.stem.removesuffix(".input") output_path = glossary_dir / f"{job_id}.output.json" if not output_path.exists(): glossary_missing.append(job_id) continue output_data = json.loads(output_path.read_text(encoding="utf-8")) for entry in output_data.get("entries", []): word = (entry.get("word") or "").strip() if not word: continue gloss_entry: dict = { "baseForm": entry.get("baseForm") or word, "english": entry.get("english") or "", "partOfSpeech": entry.get("partOfSpeech") or "", } gender = entry.get("gender") if isinstance(gender, str) and gender.strip(): gloss_entry["gender"] = gender.strip() glossary[word] = gloss_entry if glossary_missing: msg = f"{len(glossary_missing)} glossary job(s) missing output: {glossary_missing[:5]}{'...' if len(glossary_missing) > 5 else ''}" if args.require_all: print(f"ERROR: {msg}", file=sys.stderr) sys.exit(1) print(f"WARN: {msg} — glossary will be incomplete.", file=sys.stderr) bundled_chapters: list[dict] = [] for ch in chapters["chapters"]: translations = sorted(chapter_translations.get(ch["number"], [])) paragraphs_en: list[str] = [] for _, en_chunk in translations: paragraphs_en.extend(en_chunk) # Pad to match ES length if jobs were missing for parts of this chapter. if len(paragraphs_en) < len(ch["paragraphsES"]): paragraphs_en += [""] * (len(ch["paragraphsES"]) - len(paragraphs_en)) elif len(paragraphs_en) > len(ch["paragraphsES"]): paragraphs_en = paragraphs_en[: len(ch["paragraphsES"])] bundled_chapters.append( { "id": ch["id"], "number": ch["number"], "title": ch["title"], "paragraphsES": ch["paragraphsES"], "paragraphsEN": paragraphs_en, } ) payload = { "slug": chapters["slug"], "title": chapters["title"], "author": chapters["author"], "language": chapters["language"], "chapters": bundled_chapters, "glossary": glossary, } dest_dir = (args.dest or DEFAULT_DEST).resolve() dest_dir.mkdir(parents=True, exist_ok=True) out_path = dest_dir / f"book_{args.slug}.json" out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") print(f"Wrote {out_path}") print(f" Chapters: {len(bundled_chapters)}") print(f" Translated jobs: {sum(len(v) for v in chapter_translations.values())} / {sum(len(v) for v in chapter_translations.values()) + len(missing)}") print(f" Glossary words: {len(glossary)}") if __name__ == "__main__": main()