7da98d786c
Add SRS-driven noun and adjective flashcards modeled on the existing verb flashcard flow: - SharedModels/Lexeme — catalog of non-verb vocab, frequency-ranked, with gender for nouns and optional example sentences. Seeded from a bundled vocab_lexemes.json built by Scripts/vocab/build_lexemes.py, which joins frequency.csv + es-en.data from a pinned doozan/spanish_data commit (CC-BY-SA: hermitdave/FrequencyWords + Wiktionary). 1,449 nouns and 600 adjectives, each with Wiktionary-sourced gender and (where available) an example sentence with English translation. - LexemeReviewCard + LexemeReviewStore — cloud-synced SM-2 SRS, keyed by partOfSpeech + lexemeId + drillMode so future drill modes can coexist. - LexemeSessionQueue + LexemePool — parallel to VocabSessionQueue; fresh cards sort by frequency rank. - LexemeStudyGroup — cloud-synced resumable session per (partOfSpeech, drillMode). - NounFlashcardPracticeView + AdjectiveFlashcardPracticeView — same flow as VocabFlashcardPracticeView: English prompt → tap to reveal Spanish → Again/Hard/Good/Easy. Nouns reveal with their article (la taza, el problema) so gender is taught alongside meaning, not as a separate quiz. Example sentence shown when present. CEFR-style level toggles: - LexemeLevel enum (A1/A2/B1/B2/C1+) derived from frequencyRank with standard Spanish-frequency-dictionary cutoffs (250/500/1000/2000). - UserProgress.selectedLexemeLevels — cloud-synced multi-select, defaults to A1+A2 on first launch. - SettingsView gains a "Vocabulary Levels" section with five toggles; the existing "Levels" section is renamed "Verb Levels" for clarity. - Due SRS cards always surface regardless of toggles. Disabling a level only stops new cards from that band entering the pool. PracticeView gets "Nouns" and "Adjectives" rows under "Books". DataLoader: new lexemeDataVersion gate that re-seeds the Lexeme table from vocab_lexemes.json independent of book seeding. project.yml lists the new JSON resource and the existing book_olly-vol2.json (which the previous build was silently excluding because xcodegen rewrote the project from project.yml). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
170 lines
6.6 KiB
Python
170 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Merge chapters.json + per-job translation outputs into the final bundled
|
|
book_<slug>.json that the iOS app reads from its bundle.
|
|
|
|
Usage:
|
|
python3 bundle_book.py <slug> [--build BUILD_DIR] [--dest DEST_DIR] [--require-all]
|
|
|
|
Inputs:
|
|
BUILD_DIR/<slug>/chapters.json
|
|
BUILD_DIR/<slug>/jobs/*.output.json (from translation subagents)
|
|
BUILD_DIR/<slug>/glossary/*.output.json (from glossary subagents, Phase 2b)
|
|
|
|
Output:
|
|
DEST_DIR/book_<slug>.json
|
|
{
|
|
"slug": "...",
|
|
"title": "...",
|
|
"author": "...",
|
|
"language": "...",
|
|
"chapters": [
|
|
{"id": "ch1", "number": 1, "title": "Preface",
|
|
"paragraphsES": ["...", ...],
|
|
"paragraphsEN": ["...", ...]},
|
|
...
|
|
],
|
|
"glossary": {
|
|
"taza": {"baseForm": "taza", "english": "cup", "partOfSpeech": "noun"},
|
|
...
|
|
}
|
|
}
|
|
|
|
If --require-all is passed, the script fails if any translation OR glossary job
|
|
is missing its output. Otherwise it fills missing translations with empty
|
|
strings, leaves missing glossary entries out, and warns.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
DEFAULT_DEST = Path("../../Conjuga")
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("slug")
|
|
parser.add_argument("--build", type=Path, default=Path("build"))
|
|
parser.add_argument("--dest", type=Path, default=None)
|
|
parser.add_argument("--require-all", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
base = args.build / args.slug
|
|
chapters = json.loads((base / "chapters.json").read_text(encoding="utf-8"))
|
|
jobs_dir = base / "jobs"
|
|
|
|
# Index translation jobs by chapter -> ordered (offset, paragraphsEN).
|
|
chapter_translations: dict[int, list[tuple[int, list[str]]]] = {}
|
|
missing: list[str] = []
|
|
|
|
for input_path in sorted(jobs_dir.glob("*.input.json")):
|
|
job_id = input_path.stem.removesuffix(".input")
|
|
input_data = json.loads(input_path.read_text(encoding="utf-8"))
|
|
output_path = jobs_dir / f"{job_id}.output.json"
|
|
if not output_path.exists():
|
|
missing.append(job_id)
|
|
continue
|
|
output_data = json.loads(output_path.read_text(encoding="utf-8"))
|
|
paragraphs_en = output_data.get("paragraphsEN", [])
|
|
expected = len(input_data["paragraphsES"])
|
|
if len(paragraphs_en) != expected:
|
|
print(
|
|
f"WARN: {job_id} length mismatch — got {len(paragraphs_en)}, "
|
|
f"expected {expected}. Padding/truncating.",
|
|
file=sys.stderr,
|
|
)
|
|
if len(paragraphs_en) < expected:
|
|
paragraphs_en = paragraphs_en + [""] * (expected - len(paragraphs_en))
|
|
else:
|
|
paragraphs_en = paragraphs_en[:expected]
|
|
chapter_translations.setdefault(input_data["chapter"], []).append(
|
|
(input_data["rangeStart"], paragraphs_en)
|
|
)
|
|
|
|
if missing:
|
|
msg = f"{len(missing)} translation job(s) missing output: {missing[:5]}{'...' if len(missing) > 5 else ''}"
|
|
if args.require_all:
|
|
print(f"ERROR: {msg}", file=sys.stderr)
|
|
sys.exit(1)
|
|
print(f"WARN: {msg} — using empty strings for those paragraphs.", file=sys.stderr)
|
|
|
|
# Glossary (Phase 2b) — merge every glossary job's entries into one map
|
|
# keyed by the cleaned word the app looks up.
|
|
glossary_dir = base / "glossary"
|
|
glossary: dict[str, dict] = {}
|
|
glossary_missing: list[str] = []
|
|
if glossary_dir.exists():
|
|
for input_path in sorted(glossary_dir.glob("*.input.json")):
|
|
job_id = input_path.stem.removesuffix(".input")
|
|
output_path = glossary_dir / f"{job_id}.output.json"
|
|
if not output_path.exists():
|
|
glossary_missing.append(job_id)
|
|
continue
|
|
output_data = json.loads(output_path.read_text(encoding="utf-8"))
|
|
for entry in output_data.get("entries", []):
|
|
word = (entry.get("word") or "").strip()
|
|
if not word:
|
|
continue
|
|
gloss_entry: dict = {
|
|
"baseForm": entry.get("baseForm") or word,
|
|
"english": entry.get("english") or "",
|
|
"partOfSpeech": entry.get("partOfSpeech") or "",
|
|
}
|
|
gender = entry.get("gender")
|
|
if isinstance(gender, str) and gender.strip():
|
|
gloss_entry["gender"] = gender.strip()
|
|
glossary[word] = gloss_entry
|
|
if glossary_missing:
|
|
msg = f"{len(glossary_missing)} glossary job(s) missing output: {glossary_missing[:5]}{'...' if len(glossary_missing) > 5 else ''}"
|
|
if args.require_all:
|
|
print(f"ERROR: {msg}", file=sys.stderr)
|
|
sys.exit(1)
|
|
print(f"WARN: {msg} — glossary will be incomplete.", file=sys.stderr)
|
|
|
|
bundled_chapters: list[dict] = []
|
|
for ch in chapters["chapters"]:
|
|
translations = sorted(chapter_translations.get(ch["number"], []))
|
|
paragraphs_en: list[str] = []
|
|
for _, en_chunk in translations:
|
|
paragraphs_en.extend(en_chunk)
|
|
# Pad to match ES length if jobs were missing for parts of this chapter.
|
|
if len(paragraphs_en) < len(ch["paragraphsES"]):
|
|
paragraphs_en += [""] * (len(ch["paragraphsES"]) - len(paragraphs_en))
|
|
elif len(paragraphs_en) > len(ch["paragraphsES"]):
|
|
paragraphs_en = paragraphs_en[: len(ch["paragraphsES"])]
|
|
bundled_chapters.append(
|
|
{
|
|
"id": ch["id"],
|
|
"number": ch["number"],
|
|
"title": ch["title"],
|
|
"paragraphsES": ch["paragraphsES"],
|
|
"paragraphsEN": paragraphs_en,
|
|
}
|
|
)
|
|
|
|
payload = {
|
|
"slug": chapters["slug"],
|
|
"title": chapters["title"],
|
|
"author": chapters["author"],
|
|
"language": chapters["language"],
|
|
"chapters": bundled_chapters,
|
|
"glossary": glossary,
|
|
}
|
|
|
|
dest_dir = (args.dest or DEFAULT_DEST).resolve()
|
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
out_path = dest_dir / f"book_{args.slug}.json"
|
|
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
print(f"Wrote {out_path}")
|
|
print(f" Chapters: {len(bundled_chapters)}")
|
|
print(f" Translated jobs: {sum(len(v) for v in chapter_translations.values())} / {sum(len(v) for v in chapter_translations.values()) + len(missing)}")
|
|
print(f" Glossary words: {len(glossary)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|