#!/usr/bin/env python3 """Apply enriched bodies from drafts/out/ back into the live source files. Tense guides → Conjuga/Conjuga/conjuga_data.json (tenseGuides[].body) Grammar notes → Conjuga/Conjuga/Models/GrammarNote.swift (body: \"\"\"...\"\"\") Filename conventions in drafts/out/: tense__.md — body to drop into the matching tenseGuide note__.md — body to drop into the matching GrammarNote(...) declaration Run from anywhere; uses absolute paths anchored at the repo root. """ from __future__ import annotations import json import re import sys from pathlib import Path REPO = Path('/Users/m4mini/Desktop/code/Spanish') OUT_DIR = REPO / 'Conjuga/Scripts/guide-enrichment/out' TENSE_JSON = REPO / 'Conjuga/Conjuga/conjuga_data.json' NOTES_SWIFT = REPO / 'Conjuga/Conjuga/Models/GrammarNote.swift' def read_draft(path: Path) -> str: """Drafts may start with comment blocks like `# Title: ...`, `# Category: ...`, and `# ENRICHED BODY` separated by blank lines. Strip every leading line that is blank or starts with `#` until we reach the actual body.""" raw = path.read_text(encoding='utf-8') lines = raw.splitlines() start = 0 for i, line in enumerate(lines): stripped = line.strip() if stripped == '' or stripped.startswith('#'): start = i + 1 continue # Hit the first real content line — keep everything from here. start = i break body = '\n'.join(lines[start:]).strip() if not body: raise ValueError(f'Empty body after stripping header in {path}') return body def apply_tense_guides() -> int: data = json.loads(TENSE_JSON.read_text(encoding='utf-8')) drafts = sorted(OUT_DIR.glob('tense__*.md')) by_id = {g['tenseId']: g for g in data['tenseGuides']} applied = 0 for path in drafts: tense_id = path.stem.removeprefix('tense__') if tense_id not in by_id: print(f' SKIP {tense_id}: not in tenseGuides', file=sys.stderr) continue body = read_draft(path) by_id[tense_id]['body'] = body applied += 1 print(f' applied tense: {tense_id} ({len(body)} chars)') TENSE_JSON.write_text( json.dumps(data, ensure_ascii=False, separators=(',', ':')), encoding='utf-8' ) return applied # Match each GrammarNote(...) declaration. Body uses """...""" — may contain # anything except a triple-quote. NOTE_PATTERN = re.compile( r'(GrammarNote\(\s*id:\s*"([^"]+)",\s*' r'title:\s*"(?:[^"\\]|\\.)*",\s*' r'category:\s*"[^"]+",\s*' r'body:\s*""")(.*?)(""")', re.DOTALL ) def apply_grammar_notes() -> int: src = NOTES_SWIFT.read_text(encoding='utf-8') drafts = sorted(OUT_DIR.glob('note__*.md')) by_id = {p.stem.removeprefix('note__'): p for p in drafts} applied = [0] def replace_match(m): prefix, note_id, _, suffix = m.group(1), m.group(2), m.group(3), m.group(4) if m.lastindex and m.lastindex >= 4 else m.group(3) return m.group(0) # placeholder, see real callback below def real_replace(m): prefix = m.group(1) note_id = m.group(2) suffix = m.group(4) if note_id not in by_id: return m.group(0) body = read_draft(by_id[note_id]) if '"""' in body: raise ValueError(f'Body for {note_id} contains triple-quote — would break Swift parser') # Re-indent to match the existing Swift block. The existing format uses # 8 spaces of leading indent inside body lines. We don't enforce that — # the Swift compiler handles multiline string indentation by stripping # the leading whitespace common to all lines based on the closing """. # Just write the body verbatim. applied[0] += 1 print(f' applied note: {note_id} ({len(body)} chars)') return f'{prefix}\n{body}\n{suffix}' new_src = NOTE_PATTERN.sub(real_replace, src) NOTES_SWIFT.write_text(new_src, encoding='utf-8') return applied[0] def main(): if not OUT_DIR.exists(): print(f'No drafts/out directory at {OUT_DIR}', file=sys.stderr) sys.exit(1) print(f'=== Tense guides ===') tense_count = apply_tense_guides() print(f'\n=== Grammar notes ===') note_count = apply_grammar_notes() print(f'\nTotal applied: {tense_count} tense guides + {note_count} grammar notes') if tense_count == 0 and note_count == 0: print('Nothing applied — drafts/out/ was empty.', file=sys.stderr) sys.exit(2) if __name__ == '__main__': main()