Spanish/Conjuga/Scripts/books/run.sh

#!/usr/bin/env bash
# Orchestrate the books pipeline: EPUB -> chapters.json -> per-chapter job
# manifest -> (translation by Claude Code subagents) -> bundled book_<slug>.json.
#
# This script DOES NOT run the LLM translation pass. After Phase 2 it stops
# and prints how many jobs are pending. Use Claude Code subagents (or a fresh
# session per the README) to fill in build/<slug>/jobs/*.output.json, then
# re-run this script — it will pick up where it left off via Phase 3.
#
# Usage:
#     ./run.sh <epub_path> [--slug SLUG] [--batch-size N]

set -euo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$HERE"

if [[ $# -lt 1 ]]; then
    echo "usage: $0 <epub_path> [--slug SLUG] [--batch-size N]"
    exit 2
fi

EPUB="$1"; shift
SLUG=""
BATCH_SIZE="30"
GLOSSARY_BATCH_SIZE="150"

while [[ $# -gt 0 ]]; do
    case "$1" in
        --slug) SLUG="$2"; shift 2 ;;
        --batch-size) BATCH_SIZE="$2"; shift 2 ;;
        --glossary-batch-size) GLOSSARY_BATCH_SIZE="$2"; shift 2 ;;
        *) echo "unknown option: $1" >&2; exit 2 ;;
    esac
done

EPUB_ABS="$(cd "$(dirname "$EPUB")" && pwd)/$(basename "$EPUB")"

echo "=== Phase 1: extract_epub.py ==="
if [[ -n "$SLUG" ]]; then
    python3 extract_epub.py "$EPUB_ABS" --slug "$SLUG"
else
    python3 extract_epub.py "$EPUB_ABS"
fi

# If --slug wasn't passed, recover the slug from the chapters file just written.
if [[ -z "$SLUG" ]]; then
    SLUG=$(python3 -c "import json,glob; p=sorted(glob.glob('build/*/chapters.json'), key=lambda x: -__import__('os').path.getmtime(x))[0]; print(json.load(open(p))['slug'])")
fi

echo
echo "=== Phase 2: translate_chapters.py ==="
python3 translate_chapters.py "$SLUG" --batch-size "$BATCH_SIZE"

PENDING_FILE="build/$SLUG/jobs/_pending.txt"
PENDING_COUNT=$(wc -l < "$PENDING_FILE" | tr -d ' ')

echo
echo "=== Phase 2b: build_glossary.py ==="
python3 build_glossary.py "$SLUG" --batch-size "$GLOSSARY_BATCH_SIZE"

GLOSS_PENDING_FILE="build/$SLUG/glossary/_pending.txt"
GLOSS_PENDING_COUNT=$(wc -l < "$GLOSS_PENDING_FILE" | tr -d ' ')
TOTAL_PENDING=$((PENDING_COUNT + GLOSS_PENDING_COUNT))

echo
echo "=== Phase 3: bundle_book.py ==="
if [[ "$TOTAL_PENDING" -gt 0 ]]; then
    echo "  $PENDING_COUNT translation job(s) and $GLOSS_PENDING_COUNT glossary job(s) still pending."
    echo "  Run the Claude Code subagent step (see README.md) for BOTH manifests:"
    echo "    build/$SLUG/jobs/_pending.txt       (translation)"
    echo "    build/$SLUG/glossary/_pending.txt   (glossary)"
    echo "  then re-run this script. Bundling with placeholders so you can preview now."
    python3 bundle_book.py "$SLUG"
else
    python3 bundle_book.py "$SLUG" --require-all
fi