Files
Trey T 7da98d786c Vocab study — noun & adjective flashcards with CEFR level toggles
Add SRS-driven noun and adjective flashcards modeled on the existing verb
flashcard flow:

- SharedModels/Lexeme — catalog of non-verb vocab, frequency-ranked, with
  gender for nouns and optional example sentences. Seeded from a bundled
  vocab_lexemes.json built by Scripts/vocab/build_lexemes.py, which joins
  frequency.csv + es-en.data from a pinned doozan/spanish_data commit
  (CC-BY-SA: hermitdave/FrequencyWords + Wiktionary). 1,449 nouns and 600
  adjectives, each with Wiktionary-sourced gender and (where available)
  an example sentence with English translation.
- LexemeReviewCard + LexemeReviewStore — cloud-synced SM-2 SRS, keyed by
  partOfSpeech + lexemeId + drillMode so future drill modes can coexist.
- LexemeSessionQueue + LexemePool — parallel to VocabSessionQueue; fresh
  cards sort by frequency rank.
- LexemeStudyGroup — cloud-synced resumable session per
  (partOfSpeech, drillMode).
- NounFlashcardPracticeView + AdjectiveFlashcardPracticeView — same flow
  as VocabFlashcardPracticeView: English prompt → tap to reveal Spanish
  → Again/Hard/Good/Easy. Nouns reveal with their article (la taza, el
  problema) so gender is taught alongside meaning, not as a separate
  quiz. Example sentence shown when present.

CEFR-style level toggles:
- LexemeLevel enum (A1/A2/B1/B2/C1+) derived from frequencyRank with
  standard Spanish-frequency-dictionary cutoffs (250/500/1000/2000).
- UserProgress.selectedLexemeLevels — cloud-synced multi-select, defaults
  to A1+A2 on first launch.
- SettingsView gains a "Vocabulary Levels" section with five toggles; the
  existing "Levels" section is renamed "Verb Levels" for clarity.
- Due SRS cards always surface regardless of toggles. Disabling a level
  only stops new cards from that band entering the pool.

PracticeView gets "Nouns" and "Adjectives" rows under "Books".

DataLoader: new lexemeDataVersion gate that re-seeds the Lexeme table
from vocab_lexemes.json independent of book seeding. project.yml lists
the new JSON resource and the existing book_olly-vol2.json (which the
previous build was silently excluding because xcodegen rewrote the
project from project.yml).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 20:16:55 -05:00

170 lines
6.6 KiB
Python

#!/usr/bin/env python3
"""Merge chapters.json + per-job translation outputs into the final bundled
book_<slug>.json that the iOS app reads from its bundle.
Usage:
python3 bundle_book.py <slug> [--build BUILD_DIR] [--dest DEST_DIR] [--require-all]
Inputs:
BUILD_DIR/<slug>/chapters.json
BUILD_DIR/<slug>/jobs/*.output.json (from translation subagents)
BUILD_DIR/<slug>/glossary/*.output.json (from glossary subagents, Phase 2b)
Output:
DEST_DIR/book_<slug>.json
{
"slug": "...",
"title": "...",
"author": "...",
"language": "...",
"chapters": [
{"id": "ch1", "number": 1, "title": "Preface",
"paragraphsES": ["...", ...],
"paragraphsEN": ["...", ...]},
...
],
"glossary": {
"taza": {"baseForm": "taza", "english": "cup", "partOfSpeech": "noun"},
...
}
}
If --require-all is passed, the script fails if any translation OR glossary job
is missing its output. Otherwise it fills missing translations with empty
strings, leaves missing glossary entries out, and warns.
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
DEFAULT_DEST = Path("../../Conjuga")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("slug")
parser.add_argument("--build", type=Path, default=Path("build"))
parser.add_argument("--dest", type=Path, default=None)
parser.add_argument("--require-all", action="store_true")
args = parser.parse_args()
base = args.build / args.slug
chapters = json.loads((base / "chapters.json").read_text(encoding="utf-8"))
jobs_dir = base / "jobs"
# Index translation jobs by chapter -> ordered (offset, paragraphsEN).
chapter_translations: dict[int, list[tuple[int, list[str]]]] = {}
missing: list[str] = []
for input_path in sorted(jobs_dir.glob("*.input.json")):
job_id = input_path.stem.removesuffix(".input")
input_data = json.loads(input_path.read_text(encoding="utf-8"))
output_path = jobs_dir / f"{job_id}.output.json"
if not output_path.exists():
missing.append(job_id)
continue
output_data = json.loads(output_path.read_text(encoding="utf-8"))
paragraphs_en = output_data.get("paragraphsEN", [])
expected = len(input_data["paragraphsES"])
if len(paragraphs_en) != expected:
print(
f"WARN: {job_id} length mismatch — got {len(paragraphs_en)}, "
f"expected {expected}. Padding/truncating.",
file=sys.stderr,
)
if len(paragraphs_en) < expected:
paragraphs_en = paragraphs_en + [""] * (expected - len(paragraphs_en))
else:
paragraphs_en = paragraphs_en[:expected]
chapter_translations.setdefault(input_data["chapter"], []).append(
(input_data["rangeStart"], paragraphs_en)
)
if missing:
msg = f"{len(missing)} translation job(s) missing output: {missing[:5]}{'...' if len(missing) > 5 else ''}"
if args.require_all:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(1)
print(f"WARN: {msg} — using empty strings for those paragraphs.", file=sys.stderr)
# Glossary (Phase 2b) — merge every glossary job's entries into one map
# keyed by the cleaned word the app looks up.
glossary_dir = base / "glossary"
glossary: dict[str, dict] = {}
glossary_missing: list[str] = []
if glossary_dir.exists():
for input_path in sorted(glossary_dir.glob("*.input.json")):
job_id = input_path.stem.removesuffix(".input")
output_path = glossary_dir / f"{job_id}.output.json"
if not output_path.exists():
glossary_missing.append(job_id)
continue
output_data = json.loads(output_path.read_text(encoding="utf-8"))
for entry in output_data.get("entries", []):
word = (entry.get("word") or "").strip()
if not word:
continue
gloss_entry: dict = {
"baseForm": entry.get("baseForm") or word,
"english": entry.get("english") or "",
"partOfSpeech": entry.get("partOfSpeech") or "",
}
gender = entry.get("gender")
if isinstance(gender, str) and gender.strip():
gloss_entry["gender"] = gender.strip()
glossary[word] = gloss_entry
if glossary_missing:
msg = f"{len(glossary_missing)} glossary job(s) missing output: {glossary_missing[:5]}{'...' if len(glossary_missing) > 5 else ''}"
if args.require_all:
print(f"ERROR: {msg}", file=sys.stderr)
sys.exit(1)
print(f"WARN: {msg} — glossary will be incomplete.", file=sys.stderr)
bundled_chapters: list[dict] = []
for ch in chapters["chapters"]:
translations = sorted(chapter_translations.get(ch["number"], []))
paragraphs_en: list[str] = []
for _, en_chunk in translations:
paragraphs_en.extend(en_chunk)
# Pad to match ES length if jobs were missing for parts of this chapter.
if len(paragraphs_en) < len(ch["paragraphsES"]):
paragraphs_en += [""] * (len(ch["paragraphsES"]) - len(paragraphs_en))
elif len(paragraphs_en) > len(ch["paragraphsES"]):
paragraphs_en = paragraphs_en[: len(ch["paragraphsES"])]
bundled_chapters.append(
{
"id": ch["id"],
"number": ch["number"],
"title": ch["title"],
"paragraphsES": ch["paragraphsES"],
"paragraphsEN": paragraphs_en,
}
)
payload = {
"slug": chapters["slug"],
"title": chapters["title"],
"author": chapters["author"],
"language": chapters["language"],
"chapters": bundled_chapters,
"glossary": glossary,
}
dest_dir = (args.dest or DEFAULT_DEST).resolve()
dest_dir.mkdir(parents=True, exist_ok=True)
out_path = dest_dir / f"book_{args.slug}.json"
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Wrote {out_path}")
print(f" Chapters: {len(bundled_chapters)}")
print(f" Translated jobs: {sum(len(v) for v in chapter_translations.values())} / {sum(len(v) for v in chapter_translations.values()) + len(missing)}")
print(f" Glossary words: {len(glossary)}")
if __name__ == "__main__":
main()