Initial commit: Conjuga Spanish conjugation app

Includes SwiftData dual-store architecture (local reference + CloudKit user data), JSON-based data seeding, 20 tense guides, 20 grammar notes, SRS review system, course vocabulary, and widget support. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 20:58:33 -05:00
commit 4b467ec136
95 changed files with 82599 additions and 0 deletions
@@ -0,0 +1,14 @@
+#!/usr/bin/env swift
+
+// Run this script to generate a pre-built SwiftData store (default.store)
+// that ships with the app bundle. No first-launch seeding needed.
+
+import Foundation
+import SwiftData
+
+// We can't easily run this as a standalone script because it needs
+// the @Model types compiled. Instead, we'll build it as part of the app.
+// See DataLoader.buildPreloadedStore() below.
+
+print("Use DataLoader.buildPreloadedStore() from within the app to generate the store.")
+print("Then copy the .store file to the bundle.")
@@ -0,0 +1,160 @@
+{
+  "stats": {
+    "verbs": 1750,
+    "verbForms": 209014,
+    "irregularSpans": 14078,
+    "tenseGuides": 20
+  },
+  "sampleVerb": {
+    "id": 1,
+    "infinitive": "ser",
+    "english": "to be",
+    "rank": 1,
+    "ending": "er",
+    "reflexive": 0,
+    "level": "basic",
+    "hasConjuuData": true
+  },
+  "sampleForms": [
+    {
+      "verbId": 1,
+      "tenseId": "ind_presente",
+      "personIndex": 0,
+      "form": "soy",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_presente",
+      "personIndex": 1,
+      "form": "eres",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_presente",
+      "personIndex": 2,
+      "form": "es",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_presente",
+      "personIndex": 3,
+      "form": "somos",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_presente",
+      "personIndex": 4,
+      "form": "sois",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_presente",
+      "personIndex": 5,
+      "form": "son",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_preterito",
+      "personIndex": 0,
+      "form": "fui",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_preterito",
+      "personIndex": 1,
+      "form": "fuiste",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_preterito",
+      "personIndex": 2,
+      "form": "fue",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_preterito",
+      "personIndex": 3,
+      "form": "fuimos",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_preterito",
+      "personIndex": 4,
+      "form": "fuisteis",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_preterito",
+      "personIndex": 5,
+      "form": "fueron",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_imperfecto",
+      "personIndex": 0,
+      "form": "era",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_imperfecto",
+      "personIndex": 1,
+      "form": "eras",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_imperfecto",
+      "personIndex": 2,
+      "form": "era",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_imperfecto",
+      "personIndex": 3,
+      "form": "éramos",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_imperfecto",
+      "personIndex": 4,
+      "form": "erais",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_imperfecto",
+      "personIndex": 5,
+      "form": "eran",
+      "regularity": "irregular"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_futuro",
+      "personIndex": 0,
+      "form": "seré",
+      "regularity": "ordinary"
+    },
+    {
+      "verbId": 1,
+      "tenseId": "ind_futuro",
+      "personIndex": 1,
+      "form": "serás",
+      "regularity": "ordinary"
+    }
+  ]
+}
@@ -0,0 +1,550 @@
+#!/usr/bin/env python3
+"""
+Merge ConjuGato + Conjuu ES data into unified JSON for Conjuga app.
+
+Sources:
+- ConjuGato: 1,750 verbs (verb.md), irregular forms, spans, irregularity bitmasks
+- Conjuu ES: 621 verbs with full conjugation tables, tense guides, conjugation rules
+
+Output: conjuga_data.json with all verbs, forms, spans, guides
+"""
+
+import csv
+import json
+import re
+import sqlite3
+import os
+import plistlib
+import subprocess
+
+BASE = "/Users/treyt/Desktop/code/Spanish"
+CONJUGATO_DB = "/Applications/ConjuGato.app/WrappedBundle/Verbs.sqlite"
+CONJUU_VOCAB = "/Applications/Conjuu ES.app/Contents/Resources/Vocabulary.csv"
+CONJUU_GUIDE = "/Applications/Conjuu ES.app/Contents/Resources/en.lproj/Guide.strings"
+CONJUU_RULES = "/Applications/Conjuu ES.app/Contents/Resources/GuideTableEntries.plist"
+CONJUU_LEVELS = "/Applications/Conjuu ES.app/Contents/Resources"
+OUTPUT = os.path.join(BASE, "Conjuga", "Scripts", "conjuga_data.json")
+
+# ─── Tense metadata ───
+TENSES = [
+    {"id": "ind_presente", "spanish": "Indicativo Presente", "english": "Present", "mood": "Indicative", "order": 0},
+    {"id": "ind_preterito", "spanish": "Indicativo Pretérito", "english": "Preterite", "mood": "Indicative", "order": 1},
+    {"id": "ind_imperfecto", "spanish": "Indicativo Imperfecto", "english": "Imperfect", "mood": "Indicative", "order": 2},
+    {"id": "ind_futuro", "spanish": "Indicativo Futuro", "english": "Future", "mood": "Indicative", "order": 3},
+    {"id": "ind_perfecto", "spanish": "Indicativo Perfecto", "english": "Present Perfect", "mood": "Indicative", "order": 4},
+    {"id": "ind_pluscuamperfecto", "spanish": "Indicativo Pluscuamperfecto", "english": "Pluperfect", "mood": "Indicative", "order": 5},
+    {"id": "ind_futuro_perfecto", "spanish": "Indicativo Futuro Perfecto", "english": "Future Perfect", "mood": "Indicative", "order": 6},
+    {"id": "ind_preterito_anterior", "spanish": "Indicativo Pretérito Anterior", "english": "Preterite Perfect", "mood": "Indicative", "order": 7},
+    {"id": "cond_presente", "spanish": "Condicional Presente", "english": "Conditional", "mood": "Conditional", "order": 8},
+    {"id": "cond_perfecto", "spanish": "Condicional Perfecto", "english": "Conditional Perfect", "mood": "Conditional", "order": 9},
+    {"id": "subj_presente", "spanish": "Subjuntivo Presente", "english": "Present Subjunctive", "mood": "Subjunctive", "order": 10},
+    {"id": "subj_imperfecto_1", "spanish": "Subjuntivo Imperfecto I", "english": "Past Subjunctive (ra)", "mood": "Subjunctive", "order": 11},
+    {"id": "subj_imperfecto_2", "spanish": "Subjuntivo Imperfecto II", "english": "Past Subjunctive (se)", "mood": "Subjunctive", "order": 12},
+    {"id": "subj_perfecto", "spanish": "Subjuntivo Perfecto", "english": "Subjunctive Perfect", "mood": "Subjunctive", "order": 13},
+    {"id": "subj_pluscuamperfecto_1", "spanish": "Subjuntivo Pluscuamperfecto I", "english": "Subjunctive Pluperfect (ra)", "mood": "Subjunctive", "order": 14},
+    {"id": "subj_pluscuamperfecto_2", "spanish": "Subjuntivo Pluscuamperfecto II", "english": "Subjunctive Pluperfect (se)", "mood": "Subjunctive", "order": 15},
+    {"id": "subj_futuro", "spanish": "Subjuntivo Futuro", "english": "Subjunctive Future", "mood": "Subjunctive", "order": 16},
+    {"id": "subj_futuro_perfecto", "spanish": "Subjuntivo Futuro Perfecto", "english": "Subjunctive Future Perfect", "mood": "Subjunctive", "order": 17},
+    {"id": "imp_afirmativo", "spanish": "Imperativo Afirmativo", "english": "Imperative", "mood": "Imperative", "order": 18},
+    {"id": "imp_negativo", "spanish": "Imperativo Negativo", "english": "Negative Imperative", "mood": "Imperative", "order": 19},
+]
+
+TENSE_LOOKUP = {}
+for t in TENSES:
+    TENSE_LOOKUP[t["spanish"]] = t["id"]
+
+PERSONS = ["yo", "tú", "él/ella/Ud.", "nosotros", "vosotros", "ellos/ellas/Uds."]
+
+ENDINGS = {
+    "ar": {
+        "ind_presente": ["o", "as", "a", "amos", "áis", "an"],
+        "ind_preterito": ["é", "aste", "ó", "amos", "asteis", "aron"],
+        "ind_imperfecto": ["aba", "abas", "aba", "ábamos", "abais", "aban"],
+        "ind_futuro": ["aré", "arás", "ará", "aremos", "aréis", "arán"],
+        "cond_presente": ["aría", "arías", "aría", "aríamos", "aríais", "arían"],
+        "subj_presente": ["e", "es", "e", "emos", "éis", "en"],
+        "subj_imperfecto_1": ["ara", "aras", "ara", "áramos", "arais", "aran"],
+        "subj_imperfecto_2": ["ase", "ases", "ase", "ásemos", "aseis", "asen"],
+        "subj_futuro": ["are", "ares", "are", "áremos", "areis", "aren"],
+        "imp_afirmativo": ["", "a", "e", "emos", "ad", "en"],
+        "imp_negativo": ["", "es", "e", "emos", "éis", "en"],
+    },
+    "er": {
+        "ind_presente": ["o", "es", "e", "emos", "éis", "en"],
+        "ind_preterito": ["í", "iste", "ió", "imos", "isteis", "ieron"],
+        "ind_imperfecto": ["ía", "ías", "ía", "íamos", "íais", "ían"],
+        "ind_futuro": ["eré", "erás", "erá", "eremos", "eréis", "erán"],
+        "cond_presente": ["ería", "erías", "ería", "eríamos", "eríais", "erían"],
+        "subj_presente": ["a", "as", "a", "amos", "áis", "an"],
+        "subj_imperfecto_1": ["iera", "ieras", "iera", "iéramos", "ierais", "ieran"],
+        "subj_imperfecto_2": ["iese", "ieses", "iese", "iésemos", "ieseis", "iesen"],
+        "subj_futuro": ["iere", "ieres", "iere", "iéremos", "iereis", "ieren"],
+        "imp_afirmativo": ["", "e", "a", "amos", "ed", "an"],
+        "imp_negativo": ["", "as", "a", "amos", "áis", "an"],
+    },
+    "ir": {
+        "ind_presente": ["o", "es", "e", "imos", "ís", "en"],
+        "ind_preterito": ["í", "iste", "ió", "imos", "isteis", "ieron"],
+        "ind_imperfecto": ["ía", "ías", "ía", "íamos", "íais", "ían"],
+        "ind_futuro": ["iré", "irás", "irá", "iremos", "iréis", "irán"],
+        "cond_presente": ["iría", "irías", "iría", "iríamos", "iríais", "irían"],
+        "subj_presente": ["a", "as", "a", "amos", "áis", "an"],
+        "subj_imperfecto_1": ["iera", "ieras", "iera", "iéramos", "ierais", "ieran"],
+        "subj_imperfecto_2": ["iese", "ieses", "iese", "iésemos", "ieseis", "iesen"],
+        "subj_futuro": ["iere", "ieres", "iere", "iéremos", "iereis", "ieren"],
+        "imp_afirmativo": ["", "e", "a", "amos", "id", "an"],
+        "imp_negativo": ["", "as", "a", "amos", "áis", "an"],
+    },
+}
+
+# Compound tenses: auxiliary haber forms
+HABER = {
+    "ind_perfecto": ["he", "has", "ha", "hemos", "habéis", "han"],
+    "ind_pluscuamperfecto": ["había", "habías", "había", "habíamos", "habíais", "habían"],
+    "ind_futuro_perfecto": ["habré", "habrás", "habrá", "habremos", "habréis", "habrán"],
+    "ind_preterito_anterior": ["hube", "hubiste", "hubo", "hubimos", "hubisteis", "hubieron"],
+    "cond_perfecto": ["habría", "habrías", "habría", "habríamos", "habríais", "habrían"],
+    "subj_perfecto": ["haya", "hayas", "haya", "hayamos", "hayáis", "hayan"],
+    "subj_pluscuamperfecto_1": ["hubiera", "hubieras", "hubiera", "hubiéramos", "hubierais", "hubieran"],
+    "subj_pluscuamperfecto_2": ["hubiese", "hubieses", "hubiese", "hubiésemos", "hubieseis", "hubiesen"],
+    "subj_futuro_perfecto": ["hubiere", "hubieres", "hubiere", "hubiéremos", "hubiereis", "hubieren"],
+}
+
+def get_ending_type(infinitive):
+    inf = infinitive.lower()
+    if inf.endswith("arse") or inf.endswith("erse") or inf.endswith("irse"):
+        core = inf[:-2]
+    else:
+        core = inf
+    if core.endswith("ar"):
+        return "ar"
+    elif core.endswith("er"):
+        return "er"
+    elif core.endswith("ir") or core.endswith("ír"):
+        return "ir"
+    return "ar"
+
+def get_stem(infinitive, ending_type):
+    inf = infinitive.lower()
+    if inf.endswith("se"):
+        inf = inf[:-2]
+    if ending_type == "ar" and inf.endswith("ar"):
+        return inf[:-2]
+    elif ending_type == "er" and inf.endswith("er"):
+        return inf[:-2]
+    elif ending_type == "ir" and (inf.endswith("ir") or inf.endswith("ír")):
+        return inf[:-2]
+    return inf[:-2]
+
+def get_participle(infinitive, ending_type):
+    stem = get_stem(infinitive, ending_type)
+    if ending_type == "ar":
+        return stem + "ado"
+    else:
+        return stem + "ido"
+
+def conjugate_regular(infinitive, tense_id, ending_type):
+    stem = get_stem(infinitive, ending_type)
+    if tense_id in HABER:
+        participle = get_participle(infinitive, ending_type)
+        return [f"{aux} {participle}" for aux in HABER[tense_id]]
+    if tense_id in ("ind_futuro", "cond_presente"):
+        return [infinitive.lower().rstrip("se") + e.lstrip(ending_type[0] if tense_id == "ind_futuro" else "")
+                for e in ENDINGS[ending_type][tense_id]]
+        # Actually for future/conditional, the stem is the full infinitive
+        base = infinitive.lower()
+        if base.endswith("se"):
+            base = base[:-2]
+        return [base + ENDINGS[ending_type][tense_id][i] for i in range(6)]
+    if tense_id in ENDINGS[ending_type]:
+        endings = ENDINGS[ending_type][tense_id]
+        return [stem + e for e in endings]
+    return [""] * 6
+
+def conjugate_future_cond(infinitive, tense_id, ending_type):
+    base = infinitive.lower()
+    if base.endswith("se"):
+        base = base[:-2]
+    endings_map = {
+        "ind_futuro": ["é", "ás", "á", "emos", "éis", "án"],
+        "cond_presente": ["ía", "ías", "ía", "íamos", "íais", "ían"],
+    }
+    if tense_id in endings_map:
+        return [base + e for e in endings_map[tense_id]]
+    return None
+
+
+# ─── Step 1: Load ConjuGato verbs ───
+print("Loading ConjuGato data...")
+conn = sqlite3.connect(CONJUGATO_DB)
+cursor = conn.cursor()
+
+# Verbs
+cursor.execute("SELECT Id, Rank, Ending, Reflexive, Spanish, English FROM Verb ORDER BY Rank")
+conjugato_verbs = {}
+for row in cursor.fetchall():
+    vid, rank, ending, reflexive, spanish, english = row
+    ending_map = {1: "ar", 2: "er", 4: "ir"}
+    conjugato_verbs[vid] = {
+        "id": vid,
+        "rank": rank,
+        "ending": ending_map.get(ending, "ar"),
+        "reflexive": reflexive,
+        "infinitive": spanish,
+        "english": english,
+    }
+
+# Irregular verb forms
+cursor.execute("SELECT VerbFormId, Form FROM IrregularVerbForm ORDER BY VerbFormId")
+irregular_forms = {}
+for vfid, form in cursor.fetchall():
+    irregular_forms[vfid] = form
+
+# Irregular spans
+cursor.execute("SELECT Id, VerbFormId, Type, Pattern, Start, End FROM IrregularSpan ORDER BY Id")
+irregular_spans = []
+for sid, vfid, stype, pattern, start, end in cursor.fetchall():
+    irregular_spans.append({
+        "verbFormId": vfid,
+        "type": stype,
+        "pattern": pattern,
+        "start": start,
+        "end": end,
+    })
+
+# Irregularity bitmasks
+cursor.execute("SELECT * FROM Irregularity ORDER BY VerbId")
+irregularity_cols = [d[0] for d in cursor.description]
+irregularity_data = {}
+for row in cursor.fetchall():
+    verb_id = row[0]
+    irregularity_data[verb_id] = dict(zip(irregularity_cols[1:], row[1:]))
+
+conn.close()
+print(f"  {len(conjugato_verbs)} verbs, {len(irregular_forms)} irregular forms, {len(irregular_spans)} spans")
+
+# ─── Step 2: Load Conjuu ES conjugations ───
+print("Loading Conjuu ES data...")
+conjuu_verbs = {}
+with open(CONJUU_VOCAB, 'r') as f:
+    for row in csv.reader(f):
+        verb_name = row[0]
+        tense_spanish = row[2]
+        tense_id = TENSE_LOOKUP.get(tense_spanish)
+        if not tense_id:
+            continue
+        regularity = row[1]
+        forms = row[3:9]  # yo, tú, él, nosotros, vosotros, ellos
+        english = row[9]
+        rank = int(row[13]) if row[13] else 99999
+
+        key = verb_name.lower()
+        if key not in conjuu_verbs:
+            conjuu_verbs[key] = {
+                "infinitive": verb_name,
+                "english": english,
+                "rank": rank,
+                "tenses": {},
+            }
+        conjuu_verbs[key]["tenses"][tense_id] = {
+            "regularity": regularity,
+            "forms": forms,
+        }
+
+print(f"  {len(conjuu_verbs)} verbs with conjugations")
+
+# ─── Step 3: Load tense guides ───
+print("Loading tense guides...")
+result = subprocess.run(['plutil', '-convert', 'xml1', '-o', '-', CONJUU_GUIDE], capture_output=True)
+guide_data = plistlib.loads(result.stdout)
+
+tense_guides = {}
+for key, value in guide_data.items():
+    m = re.match(r'LL(.+)Guide(Top|Bottom)', key)
+    if m:
+        tense_name = m.group(1)
+        part = m.group(2)
+        if tense_name not in tense_guides:
+            tense_guides[tense_name] = {}
+        tense_guides[tense_name][part] = value
+
+guides_output = []
+for t in TENSES:
+    guide_key = t["spanish"].replace("Indicativo ", "").replace("Condicional ", "").replace("Subjuntivo ", "").replace("Imperativo ", "")
+    # Try exact match first, then various key patterns
+    guide = None
+    for gk, gv in tense_guides.items():
+        if gk == guide_key or gk == t["spanish"] or gk.replace(" ", "") == guide_key.replace(" ", ""):
+            guide = gv
+            break
+    if not guide:
+        # Try partial match
+        for gk, gv in tense_guides.items():
+            if guide_key.lower() in gk.lower() or gk.lower() in guide_key.lower():
+                guide = gv
+                break
+
+    guides_output.append({
+        "tenseId": t["id"],
+        "title": guide.get("Top", t["english"]) if guide else t["english"],
+        "body": guide.get("Bottom", "") if guide else "",
+    })
+
+print(f"  {len(guides_output)} tense guides")
+
+# ─── Step 4: Load difficulty levels ───
+print("Loading difficulty levels...")
+level_files = [
+    ("basic", "Basic.csv"),
+    ("elementary_1", "Elementary-1.csv"),
+    ("elementary_2", "Elementary-2.csv"),
+    ("elementary_3", "Elementary-3.csv"),
+    ("intermediate_1", "Intermediate-1.csv"),
+    ("intermediate_2", "Intermediate-2.csv"),
+    ("intermediate_3", "Intermediate-3.csv"),
+    ("intermediate_4", "Intermediate-4.csv"),
+]
+
+level_verbs = {}
+for level_id, filename in level_files:
+    path = os.path.join(CONJUU_LEVELS, filename)
+    with open(path, 'r') as f:
+        for row in csv.reader(f):
+            level_verbs[row[0].lower()] = level_id
+
+print(f"  {len(level_verbs)} verbs with curated levels")
+
+# ─── Step 5: Merge everything ───
+print("Merging data...")
+
+# Map ConjuGato VerbFormId encoding
+# VerbFormId = (1000 + VerbId) * 10000 + MTPP
+# M: 1=Indicative, 2=Subjunctive, 3=Imperative
+# T: tense within mood
+# PP: person (01-08)
+CONJUGATO_TENSE_MAP = {
+    # (mood, tense) -> tense_id
+    (1, 1): "ind_presente",
+    (1, 2): "ind_preterito",
+    (1, 3): "ind_imperfecto",
+    (1, 6): "cond_presente",
+    (1, 7): "ind_futuro",
+    (2, 1): "subj_presente",
+    (2, 3): "subj_imperfecto_1",
+    (2, 4): "subj_imperfecto_2",
+    (2, 7): "subj_futuro",
+    (3, 0): "imp_afirmativo",  # person-specific
+}
+
+def decode_verb_form_id(vfid):
+    """Decode VerbFormId into (verb_id, tense_id, person_index)"""
+    s = str(vfid)
+    if len(s) != 8:
+        return None, None, None
+    verb_id = int(s[:4]) - 1000
+    mood = int(s[4])
+    tense_num = int(s[5])
+    person = int(s[6:8])
+
+    # Handle imperative
+    if mood == 3:
+        if person >= 800:
+            tense_id = "imp_negativo"
+            person = person - 800
+        else:
+            tense_id = "imp_afirmativo"
+    else:
+        tense_id = CONJUGATO_TENSE_MAP.get((mood, tense_num))
+
+    if person >= 1 and person <= 6:
+        person_idx = person - 1
+    elif person == 7 or person == 8:
+        person_idx = None  # vos/voseo - skip for now
+    else:
+        person_idx = None
+
+    return verb_id, tense_id, person_idx
+
+
+def assign_level(rank):
+    if rank <= 25:
+        return "basic"
+    elif rank <= 100:
+        return "elementary"
+    elif rank <= 300:
+        return "intermediate"
+    elif rank <= 700:
+        return "advanced"
+    else:
+        return "expert"
+
+
+# Build unified verb list
+all_verbs = []
+verb_forms = []
+spans_output = []
+
+for vid, cv in sorted(conjugato_verbs.items(), key=lambda x: x[1]["rank"]):
+    infinitive = cv["infinitive"]
+    inf_lower = infinitive.lower()
+    ending = cv["ending"]
+    rank = cv["rank"]
+
+    # Check Conjuu ES for this verb
+    conjuu = conjuu_verbs.get(inf_lower)
+
+    # Determine level
+    level = level_verbs.get(inf_lower, assign_level(rank))
+
+    verb_entry = {
+        "id": vid,
+        "infinitive": infinitive,
+        "english": cv["english"],
+        "rank": rank,
+        "ending": ending,
+        "reflexive": cv["reflexive"],
+        "level": level,
+        "hasConjuuData": conjuu is not None,
+    }
+    all_verbs.append(verb_entry)
+
+    # Generate forms for each tense
+    for tense in TENSES:
+        tid = tense["id"]
+
+        if conjuu and tid in conjuu["tenses"]:
+            # Use Conjuu ES data (pre-computed)
+            td = conjuu["tenses"][tid]
+            forms = td["forms"]
+            regularity = td["regularity"]
+        else:
+            # Generate from rules or ConjuGato irregular forms
+            regularity = "ordinary"
+
+            # Check if we have irregular forms from ConjuGato
+            has_irregular = vid in irregularity_data
+
+            if tid in HABER:
+                # Compound tense
+                participle = get_participle(infinitive, ending)
+                # Check for irregular participle from ConjuGato
+                forms = [f"{aux} {participle}" for aux in HABER[tid]]
+                regularity = "ordinary"
+            elif tid in ("ind_futuro", "cond_presente"):
+                # Future/conditional use full infinitive as stem
+                base = infinitive.lower()
+                if base.endswith("se"):
+                    base = base[:-2]
+                endings_map = {
+                    "ind_futuro": ["é", "ás", "á", "emos", "éis", "án"],
+                    "cond_presente": ["ía", "ías", "ía", "íamos", "íais", "ían"],
+                }
+                forms = [base + e for e in endings_map[tid]]
+                # Check for irregular future/conditional stems from ConjuGato
+                if has_irregular:
+                    # Try to find irregular forms
+                    for pi in range(6):
+                        mood_tense = (1, 7) if tid == "ind_futuro" else (1, 6)
+                        vfid = (1000 + vid) * 10000 + mood_tense[0] * 1000 + mood_tense[1] * 100 + (pi + 1)
+                        if vfid in irregular_forms:
+                            forms[pi] = irregular_forms[vfid]
+                            regularity = "irregular"
+            else:
+                # Simple tense
+                stem = get_stem(infinitive, ending)
+                if tid in ENDINGS.get(ending, {}):
+                    forms = [stem + e for e in ENDINGS[ending][tid]]
+                else:
+                    forms = [""] * 6
+
+                # Override with ConjuGato irregular forms
+                if has_irregular:
+                    mood_map = {
+                        "ind_presente": (1, 1), "ind_preterito": (1, 2),
+                        "ind_imperfecto": (1, 3),
+                        "subj_presente": (2, 1), "subj_imperfecto_1": (2, 3),
+                        "subj_imperfecto_2": (2, 4), "subj_futuro": (2, 7),
+                    }
+                    if tid in mood_map:
+                        mt = mood_map[tid]
+                        for pi in range(6):
+                            vfid = (1000 + vid) * 10000 + mt[0] * 1000 + mt[1] * 100 + (pi + 1)
+                            if vfid in irregular_forms:
+                                forms[pi] = irregular_forms[vfid]
+                                regularity = "irregular"
+                    elif tid == "imp_afirmativo":
+                        for pi in range(6):
+                            vfid = (1000 + vid) * 10000 + 3000 + (pi + 1)
+                            if vfid in irregular_forms:
+                                forms[pi] = irregular_forms[vfid]
+                                regularity = "irregular"
+                    elif tid == "imp_negativo":
+                        for pi in range(6):
+                            vfid = (1000 + vid) * 10000 + 3800 + (pi + 1)
+                            if vfid in irregular_forms:
+                                forms[pi] = irregular_forms[vfid]
+                                regularity = "irregular"
+
+        for pi, form in enumerate(forms):
+            if form:
+                verb_forms.append({
+                    "verbId": vid,
+                    "tenseId": tid,
+                    "personIndex": pi,
+                    "form": form,
+                    "regularity": regularity,
+                })
+
+# Build spans referencing verb forms
+print("Processing irregular spans...")
+for span in irregular_spans:
+    vfid = span["verbFormId"]
+    verb_id, tense_id, person_idx = decode_verb_form_id(vfid)
+    if verb_id is None or tense_id is None or person_idx is None:
+        continue
+    if verb_id not in conjugato_verbs:
+        continue
+    spans_output.append({
+        "verbId": verb_id,
+        "tenseId": tense_id,
+        "personIndex": person_idx,
+        "type": span["type"],
+        "pattern": span["pattern"],
+        "start": span["start"],
+        "end": span["end"],
+    })
+
+# ─── Step 6: Output ───
+print("Writing output...")
+output = {
+    "tenses": TENSES,
+    "persons": PERSONS,
+    "verbs": all_verbs,
+    "verbForms": verb_forms,
+    "irregularSpans": spans_output,
+    "tenseGuides": guides_output,
+}
+
+with open(OUTPUT, 'w', encoding='utf-8') as f:
+    json.dump(output, f, ensure_ascii=False, indent=None)
+
+# Also write a pretty version for debugging
+with open(OUTPUT.replace('.json', '_debug.json'), 'w', encoding='utf-8') as f:
+    json.dump({
+        "stats": {
+            "verbs": len(all_verbs),
+            "verbForms": len(verb_forms),
+            "irregularSpans": len(spans_output),
+            "tenseGuides": len(guides_output),
+        },
+        "sampleVerb": all_verbs[0] if all_verbs else None,
+        "sampleForms": verb_forms[:20],
+    }, f, ensure_ascii=False, indent=2)
+
+file_size = os.path.getsize(OUTPUT) / (1024 * 1024)
+print(f"\nDone!")
+print(f"  Verbs: {len(all_verbs)}")
+print(f"  Verb forms: {len(verb_forms)}")
+print(f"  Irregular spans: {len(spans_output)}")
+print(f"  Tense guides: {len(guides_output)}")
+print(f"  Output: {OUTPUT} ({file_size:.1f} MB)")
@@ -0,0 +1,453 @@
+#!/usr/bin/env python3
+"""
+Scrape 7 LanGo Spanish course packs from Brainscape, plus example sentences
+from SpanishDict. Outputs all_courses_data.json with all courses, decks, cards,
+and examples organized by week.
+"""
+
+import asyncio
+import json
+import re
+import os
+from playwright.async_api import async_playwright
+
+BASE_URL = "https://www.brainscape.com"
+OUTPUT = "/Users/treyt/Desktop/code/Spanish/Conjuga/Scripts/all_courses_data.json"
+MAX_EXAMPLES = 3
+
+PACK_URLS = [
+    "https://www.brainscape.com/packs/lango-spanish-beginner-ii-16514996",
+    "https://www.brainscape.com/packs/lango-spanish-beginner-iii-conversation-18477688",
+    "https://www.brainscape.com/packs/lango-spanish-intermediate-i-21508666",
+    "https://www.brainscape.com/packs/lango-spanish-intermediate-ii-21906841",
+    "https://www.brainscape.com/packs/lango-spanish-intermediate-iii-spanish-through-stories-20677744",
+    "https://www.brainscape.com/packs/lango-spanish-advanced-i-21511244",
+    "https://www.brainscape.com/packs/lango-spanish-advanced-ii-21649461",
+]
+
+USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+
+
+# ---------------------------------------------------------------------------
+# Parsing helpers (copied from scrape_brainscape.py and scrape_examples.py)
+# ---------------------------------------------------------------------------
+
+def parse_title_and_week(text):
+    """Extract week number and clean title from page text."""
+    # Match "Week N: Title" or "Semana N: Title" or "Semana N Title"
+    m = re.match(r'(?:Week|Semana)\s+(\d+)[:\s]+(.+)', text, re.IGNORECASE)
+    if m:
+        return int(m.group(1)), m.group(2).strip()
+    return 0, text.strip()
+
+
+def parse_cards(text):
+    """Parse flashcard Q/A pairs from page text."""
+    cards = []
+    lines = text.split('\n')
+
+    skip = {'Q', 'A', 'Study These Flashcards', '', 'Brainscape', 'Find Flashcards',
+            'Make Flashcards', 'How It Works', 'Educators', 'Businesses', 'Academy',
+            'Log in', 'Get Started'}
+
+    i = 0
+    while i < len(lines):
+        line = lines[i].strip()
+
+        if re.match(r'^\d+$', line):
+            num = int(line)
+            parts = []
+            j = i + 1
+            while j < len(lines) and len(parts) < 6:
+                nextline = lines[j].strip()
+
+                if re.match(r'^\d+$', nextline) and int(nextline) == num + 1:
+                    break
+                if nextline.startswith('LanGo Spanish') or nextline.startswith('Decks in class'):
+                    break
+                if re.match(r'^(?:Week|Semana) \d+', nextline):
+                    break
+                if nextline in skip:
+                    j += 1
+                    continue
+
+                parts.append(nextline)
+                j += 1
+
+            if len(parts) >= 2:
+                cards.append({
+                    "front": parts[0],
+                    "back": parts[1],
+                })
+            i = j
+        else:
+            i += 1
+
+    cards = [c for c in cards if not re.match(r'^(?:Week|Semana) \d+', c['front'])
+             and c['front'] not in ('Decks in class (39)', '# Cards')
+             and not c['front'].startswith('LanGo Spanish')
+             and not c['front'].startswith('You may prefer')]
+    return cards
+
+
+def extract_word_for_lookup(front):
+    """Extract the best lookup word from a card front."""
+    word = front.strip()
+    word = re.sub(r'^(el|la|los|las|un|una)\s+', '', word, flags=re.IGNORECASE)
+    word = re.sub(r'^(el/la|los/las)\s+', '', word, flags=re.IGNORECASE)
+    if ',' in word:
+        word = word.split(',')[0].strip()
+    if '/' in word:
+        word = word.split('/')[0].strip()
+    return word.lower().strip()
+
+
+def parse_examples(text, lookup_word):
+    """Parse example sentences from SpanishDict page text."""
+    examples = []
+    lines = text.split('\n')
+
+    for i, line in enumerate(lines):
+        l = line.strip()
+        if not l or len(l) < 15:
+            continue
+
+        inline_match = re.match(r'^(.+?[.!?])([A-Z].+)$', l)
+        if inline_match:
+            es = inline_match.group(1).strip()
+            en = inline_match.group(2).strip()
+            if lookup_word.lower() in es.lower() and len(es) > 10 and len(en) > 5:
+                examples.append({"es": es, "en": en})
+                if len(examples) >= MAX_EXAMPLES:
+                    break
+                continue
+
+        if lookup_word.lower() in l.lower() and len(l) > 15 and len(l) < 300:
+            for j in range(i + 1, min(i + 3, len(lines))):
+                next_l = lines[j].strip()
+                if not next_l:
+                    continue
+                if (next_l[0].isupper() and
+                    not any(c in next_l for c in ['á', 'é', 'í', 'ó', 'ú', 'ñ', '¿', '¡'])):
+                    examples.append({"es": l, "en": next_l})
+                    if len(examples) >= MAX_EXAMPLES:
+                        break
+                break
+
+        if len(examples) >= MAX_EXAMPLES:
+            break
+
+    return examples
+
+
+# ---------------------------------------------------------------------------
+# Scraping logic
+# ---------------------------------------------------------------------------
+
+async def discover_deck_urls(page, pack_url):
+    """Visit a pack page and discover all deck URLs within it."""
+    print(f"\nDiscovering decks in {pack_url}...")
+    await page.goto(pack_url, wait_until="networkidle", timeout=30000)
+    await page.wait_for_timeout(2000)
+
+    # Scroll to load all content
+    for _ in range(10):
+        await page.evaluate("window.scrollBy(0, 1000)")
+        await page.wait_for_timeout(300)
+
+    # Extract pack ID from URL
+    pack_id = pack_url.rstrip('/').split('-')[-1]
+
+    # Find all deck links matching /flashcards/*/packs/*
+    links = await page.eval_on_selector_all(
+        'a[href*="/flashcards/"]',
+        'els => els.map(e => e.getAttribute("href"))'
+    )
+
+    deck_urls = []
+    seen = set()
+    for href in links:
+        if href and '/flashcards/' in href and '/packs/' in href:
+            # Normalize
+            if href.startswith('http'):
+                href = href.replace(BASE_URL, '')
+            if href not in seen:
+                seen.add(href)
+                deck_urls.append(href)
+
+    # Extract course name from the page
+    text = await page.inner_text("body")
+    course_name = None
+    # Try to find "LanGo Spanish | ..." pattern
+    m = re.search(r'(LanGo Spanish\s*\|\s*[^>\n]+)', text)
+    if m:
+        course_name = m.group(1).strip()
+        # Clean trailing noise
+        course_name = re.sub(r'\s*>\s*$', '', course_name).strip()
+        # Remove "Flashcards" suffix if present
+        course_name = re.sub(r'\s*Flashcards\s*$', '', course_name).strip()
+    else:
+        # Fallback: derive from URL slug
+        slug = pack_url.rstrip('/').split('/')[-1]
+        slug = re.sub(r'-\d+$', '', slug)
+        course_name = slug.replace('-', ' ').title()
+
+    print(f"  Course: {course_name}")
+    print(f"  Found {len(deck_urls)} deck URLs")
+    return course_name, deck_urls
+
+
+async def scrape_deck(page, url):
+    """Scrape a single deck page for flashcard data."""
+    full_url = BASE_URL + url if url.startswith('/') else url
+    await page.goto(full_url, wait_until="networkidle", timeout=30000)
+    await page.wait_for_timeout(2000)
+
+    for _ in range(5):
+        await page.evaluate("window.scrollBy(0, 1000)")
+        await page.wait_for_timeout(300)
+
+    text = await page.inner_text("body")
+
+    # Extract title — handle both "Week N:" and "Semana N" patterns
+    title_match = re.search(r'>\s*((?:Week|Semana)\s+\d+[:\s].+?)\s*>\s*Flashcards', text)
+    if title_match:
+        raw_title = title_match.group(1).strip()
+    else:
+        heading_match = re.search(r'((?:Week|Semana)\s+\d+[:\s].+?)\s*Flashcards', text)
+        if heading_match:
+            raw_title = heading_match.group(1).strip()
+        else:
+            slug = url.split('/')[2] if len(url.split('/')) > 2 else url
+            slug_clean = re.sub(r'-\d+$', '', slug)
+            slug_clean = re.sub(r'-al-rev(e|é)s$', ' AL REVÉS', slug_clean)
+            raw_title = slug_clean.replace('-', ' ').title()
+            wm = re.match(r'Week\s+(\d+)', raw_title, re.IGNORECASE)
+            if not wm:
+                raw_title = "Week 0: " + raw_title
+
+    week, title = parse_title_and_week(raw_title)
+    cards = parse_cards(text)
+    is_reversed = "al rev" in url.lower() or "AL REVÉS" in raw_title.upper()
+
+    return {
+        "week": week,
+        "title": title,
+        "isReversed": is_reversed,
+        "cardCount": len(cards),
+        "cards": cards,
+        "url": url,
+    }
+
+
+async def scrape_examples_for_word(page, lookup):
+    """Scrape example sentences from SpanishDict for a single word."""
+    url = f"https://www.spanishdict.com/translate/{lookup}"
+    try:
+        await page.goto(url, wait_until="domcontentloaded", timeout=15000)
+        await page.wait_for_timeout(2000)
+        text = await page.inner_text("body")
+        return parse_examples(text, lookup)
+    except Exception:
+        return []
+
+
+def save_progress(data):
+    """Save current data to output file."""
+    with open(OUTPUT, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+
+def load_progress():
+    """Load existing progress if available."""
+    if os.path.exists(OUTPUT):
+        try:
+            with open(OUTPUT) as f:
+                return json.load(f)
+        except (json.JSONDecodeError, KeyError):
+            pass
+    return None
+
+
+async def main():
+    # Check for existing progress
+    existing = load_progress()
+    completed_courses = set()
+    examples_done = {}  # lookup -> examples list
+
+    if existing and 'courses' in existing:
+        for course in existing['courses']:
+            if course.get('_examples_done'):
+                completed_courses.add(course['course'])
+            # Collect already-scraped examples
+            for week in course.get('weeks', []):
+                for deck in week.get('decks', []):
+                    for card in deck.get('cards', []):
+                        if card.get('examples'):
+                            lookup = extract_word_for_lookup(card['front'])
+                            examples_done[lookup] = card['examples']
+        print(f"Loaded progress: {len(completed_courses)} completed courses, {len(examples_done)} words with examples")
+
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        context = await browser.new_context(user_agent=USER_AGENT)
+        page = await context.new_page()
+
+        all_courses = []
+
+        # If we have existing data for completed courses, keep them
+        if existing and 'courses' in existing:
+            for course in existing['courses']:
+                if course['course'] in completed_courses:
+                    all_courses.append(course)
+
+        # ---------------------------------------------------------------
+        # Phase 1: Discover decks and scrape cards for each course pack
+        # ---------------------------------------------------------------
+        for pack_url in PACK_URLS:
+            course_name, deck_urls = await discover_deck_urls(page, pack_url)
+
+            # Skip if already completed
+            if course_name in completed_courses:
+                print(f"  Skipping {course_name} (already completed)")
+                continue
+
+            await page.wait_for_timeout(300)
+
+            all_decks = []
+            total_cards = 0
+
+            for i, deck_url in enumerate(deck_urls):
+                slug = deck_url.split('/')[2] if len(deck_url.split('/')) > 2 else deck_url
+                print(f"  [{i+1}/{len(deck_urls)}] Scraping {slug[:60]}...")
+                try:
+                    deck = await scrape_deck(page, deck_url)
+                    all_decks.append(deck)
+                    total_cards += deck["cardCount"]
+                    print(f"    -> Week {deck['week']}: {deck['title']} ({deck['cardCount']} cards)")
+                except Exception as e:
+                    print(f"    ERROR: {e}")
+
+                await page.wait_for_timeout(300)
+
+            # Organize by week
+            weeks = {}
+            for deck in all_decks:
+                w = deck["week"]
+                if w not in weeks:
+                    weeks[w] = []
+                weeks[w].append({
+                    "title": deck["title"],
+                    "isReversed": deck["isReversed"],
+                    "cardCount": deck["cardCount"],
+                    "cards": deck["cards"],
+                })
+
+            course_data = {
+                "course": course_name,
+                "totalDecks": len(all_decks),
+                "totalCards": total_cards,
+                "_examples_done": False,
+                "weeks": [
+                    {"week": w, "decks": weeks[w]}
+                    for w in sorted(weeks.keys())
+                ],
+            }
+            all_courses.append(course_data)
+
+            # Save after each course
+            save_progress({"courses": all_courses})
+            print(f"  Saved {course_name}: {len(all_decks)} decks, {total_cards} cards")
+
+        # ---------------------------------------------------------------
+        # Phase 2: Scrape example sentences from SpanishDict
+        # ---------------------------------------------------------------
+        print("\n" + "=" * 60)
+        print("Phase 2: Scraping example sentences from SpanishDict")
+        print("=" * 60)
+
+        # Collect all unique words across all courses (non-reversed decks)
+        unique_words = {}  # lookup -> original front
+        for course in all_courses:
+            for week in course['weeks']:
+                for deck in week['decks']:
+                    if deck.get('isReversed'):
+                        continue
+                    for card in deck['cards']:
+                        front = card['front']
+                        lookup = extract_word_for_lookup(front)
+                        if lookup and lookup not in unique_words:
+                            unique_words[lookup] = front
+
+        print(f"Found {len(unique_words)} unique words to look up")
+        print(f"Already have examples for {len(examples_done)} words")
+
+        words_scraped = 0
+        total_words = len(unique_words)
+
+        for i, (lookup, original) in enumerate(unique_words.items()):
+            if lookup in examples_done:
+                continue
+
+            print(f"[{i+1}/{total_words}] {lookup}...", end=" ", flush=True)
+            try:
+                examples = await scrape_examples_for_word(page, lookup)
+                examples_done[lookup] = examples
+                if examples:
+                    print(f"{len(examples)} examples")
+                else:
+                    print("no examples")
+            except Exception as e:
+                print(f"error: {e}")
+                examples_done[lookup] = []
+
+            words_scraped += 1
+
+            # Save progress every 20 words
+            if words_scraped % 20 == 0:
+                # Attach examples to cards before saving
+                _attach_examples(all_courses, examples_done)
+                save_progress({"courses": all_courses})
+                print(f"  [saved progress - {len(examples_done)} words done]")
+
+            await page.wait_for_timeout(300)
+
+        await browser.close()
+
+    # ---------------------------------------------------------------
+    # Final: attach all examples to cards and save
+    # ---------------------------------------------------------------
+    _attach_examples(all_courses, examples_done)
+
+    # Mark all courses as examples_done and remove internal flag
+    for course in all_courses:
+        course['_examples_done'] = True
+
+    # Clean up internal flags before final save
+    for course in all_courses:
+        course.pop('_examples_done', None)
+
+    save_progress({"courses": all_courses})
+
+    total_decks = sum(c['totalDecks'] for c in all_courses)
+    total_cards = sum(c['totalCards'] for c in all_courses)
+    print(f"\nDone! {len(all_courses)} courses, {total_decks} decks, {total_cards} cards")
+    print(f"Examples scraped for {len(examples_done)} unique words")
+    print(f"Output: {OUTPUT}")
+
+
+def _attach_examples(courses, examples_done):
+    """Attach scraped examples to card objects in place."""
+    for course in courses:
+        for week in course['weeks']:
+            for deck in week['decks']:
+                for card in deck['cards']:
+                    lookup = extract_word_for_lookup(card['front'])
+                    if lookup in examples_done and examples_done[lookup]:
+                        card['examples'] = examples_done[lookup]
+                    elif 'examples' not in card:
+                        card['examples'] = []
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+"""
+Scrape all 39 LanGo Spanish Beginner I decks from Brainscape using Playwright.
+Outputs course_data.json with all decks and cards organized by week.
+"""
+
+import asyncio
+import json
+import re
+from playwright.async_api import async_playwright
+
+BASE_URL = "https://www.brainscape.com"
+PACK_ID = "18164266"
+OUTPUT = "/Users/treyt/Desktop/code/Spanish/Conjuga/Scripts/course_data.json"
+
+DECK_URLS = [
+    "/flashcards/week-1-greetings-los-saludos-10176532/packs/18164266",
+    "/flashcards/week-1-greetings-los-saludos-al-reves-12745728/packs/18164266",
+    "/flashcards/week-2-adjectives-los-adjetivos-12745741/packs/18164266",
+    "/flashcards/week-2-adjectives-los-adjetivos-al-reves-12745829/packs/18164266",
+    "/flashcards/week-2-numbers-los-numeros-12797877/packs/18164266",
+    "/flashcards/week-2-numbers-los-numeros-al-reves-13698219/packs/18164266",
+    "/flashcards/week-2-professions-las-profesiones-12740531/packs/18164266",
+    "/flashcards/week-2-professions-las-profesiones-al-re-12745832/packs/18164266",
+    "/flashcards/week-3-house-la-casa-10216249/packs/18164266",
+    "/flashcards/week-3-house-la-casa-al-reves-12745837/packs/18164266",
+    "/flashcards/week-3-ar-verbs-10207117/packs/18164266",
+    "/flashcards/week-3-ar-verbs-al-reves-12745833/packs/18164266",
+    "/flashcards/week-3-er-verbs-12745857/packs/18164266",
+    "/flashcards/week-3-er-verbs-al-reves-12745888/packs/18164266",
+    "/flashcards/week-3-ir-verbs-10207120/packs/18164266",
+    "/flashcards/week-3-ir-verbs-al-reves-12745835/packs/18164266",
+    "/flashcards/week-4-family-la-familia-10266419/packs/18164266",
+    "/flashcards/week-4-family-la-familia-al-reves-12745978/packs/18164266",
+    "/flashcards/week-4-e-ie-stem-changing-verbs-10270069/packs/18164266",
+    "/flashcards/week-4-e-ie-stem-changing-verbs-al-reves-12749152/packs/18164266",
+    "/flashcards/week-4-e-i-stem-changing-verbs-10270070/packs/18164266",
+    "/flashcards/week-4-e-i-stem-changing-verbs-al-reves-12749160/packs/18164266",
+    "/flashcards/week-4-o-ue-stem-changing-verbs-10270071/packs/18164266",
+    "/flashcards/week-4-o-ue-stem-changing-verbs-al-reves-12749172/packs/18164266",
+    "/flashcards/week-4-exceptional-yo-forms-10286213/packs/18164266",
+    "/flashcards/week-4-exceptional-yo-forms-al-reves-12749234/packs/18164266",
+    "/flashcards/week-5-reflexive-verbs-los-verbos-reflex-10270072/packs/18164266",
+    "/flashcards/week-5-reflexive-verbs-los-verbos-reflex-12745842/packs/18164266",
+    "/flashcards/week-5-daily-routine-la-rutina-cotidiana-11869082/packs/18164266",
+    "/flashcards/week-5-daily-routine-la-rutina-cotidiana-12745840/packs/18164266",
+    "/flashcards/week-6-city-la-ciudad-10232784/packs/18164266",
+    "/flashcards/week-6-city-la-ciudad-al-reves-12745942/packs/18164266",
+    "/flashcards/week-6-time-expressions-las-expresiones-12797878/packs/18164266",
+    "/flashcards/week-6-time-expressions-las-expresiones-13698220/packs/18164266",
+    "/flashcards/week-7-idioms-with-the-verb-tener-los-mo-11951594/packs/18164266",
+    "/flashcards/week-8-prepositions-and-negation-las-pre-11951441/packs/18164266",
+    "/flashcards/week-8-prepositions-and-negation-las-pre-16094943/packs/18164266",
+    "/flashcards/week-8-hobbies-los-pasatiempos-10232782/packs/18164266",
+    "/flashcards/week-8-hobbies-los-pasatiempos-al-reves-12745838/packs/18164266",
+]
+
+
+def parse_title_and_week(text):
+    """Extract week number and clean title from page text."""
+    # Match "Week N: Title" pattern
+    m = re.match(r'Week\s+(\d+):\s*(.+)', text, re.IGNORECASE)
+    if m:
+        return int(m.group(1)), m.group(2).strip()
+    return 0, text.strip()
+
+
+def parse_cards(text):
+    """Parse flashcard Q/A pairs from page text."""
+    cards = []
+    lines = text.split('\n')
+
+    # Filter out noise lines
+    skip = {'Q', 'A', 'Study These Flashcards', '', 'Brainscape', 'Find Flashcards',
+            'Make Flashcards', 'How It Works', 'Educators', 'Businesses', 'Academy',
+            'Log in', 'Get Started'}
+
+    i = 0
+    while i < len(lines):
+        line = lines[i].strip()
+
+        # Look for a card number
+        if re.match(r'^\d+$', line):
+            num = int(line)
+            # Collect content lines until the next card number or deck list
+            parts = []
+            j = i + 1
+            while j < len(lines) and len(parts) < 6:
+                nextline = lines[j].strip()
+
+                # Stop at next card number
+                if re.match(r'^\d+$', nextline) and int(nextline) == num + 1:
+                    break
+
+                # Stop at deck list / footer
+                if nextline.startswith('LanGo Spanish') or nextline.startswith('Decks in class'):
+                    break
+
+                # Stop at other deck titles leaking in
+                if re.match(r'^Week \d+:', nextline):
+                    break
+
+                # Skip noise
+                if nextline in skip:
+                    j += 1
+                    continue
+
+                parts.append(nextline)
+                j += 1
+
+            if len(parts) >= 2:
+                cards.append({
+                    "front": parts[0],
+                    "back": parts[1],
+                })
+            i = j
+        else:
+            i += 1
+
+    # Post-filter: remove any cards that are actually deck titles
+    cards = [c for c in cards if not re.match(r'^Week \d+:', c['front'])
+             and c['front'] not in ('Decks in class (39)', '# Cards')
+             and not c['front'].startswith('LanGo Spanish')
+             and not c['front'].startswith('You may prefer')]
+    return cards
+
+
+async def scrape_deck(page, url):
+    """Scrape a single deck page."""
+    full_url = BASE_URL + url
+    await page.goto(full_url, wait_until="networkidle", timeout=30000)
+    await page.wait_for_timeout(2000)
+    # Scroll to load lazy content
+    for _ in range(5):
+        await page.evaluate("window.scrollBy(0, 1000)")
+        await page.wait_for_timeout(300)
+
+    text = await page.inner_text("body")
+
+    # Extract title — try multiple patterns
+    # Format: "LanGo Spanish | Beginner I > Week N: Title > Flashcards"
+    title_match = re.search(r'>\s*(Week\s+\d+:.+?)\s*>\s*Flashcards', text)
+    if title_match:
+        raw_title = title_match.group(1).strip()
+    else:
+        # Try: "Week N: Title (Subtitle) Flashcards"
+        heading_match = re.search(r'(Week\s+\d+:.+?)\s*Flashcards', text)
+        if heading_match:
+            raw_title = heading_match.group(1).strip()
+        else:
+            # Last resort: extract from URL slug
+            slug = url.split('/')[2]
+            # Convert "week-5-reflexive-verbs-los-verbos-reflex-10270072" to title
+            slug_clean = re.sub(r'-\d+$', '', slug)  # remove trailing ID
+            slug_clean = re.sub(r'-al-rev(e|é)s$', ' AL REVÉS', slug_clean)
+            raw_title = slug_clean.replace('-', ' ').title()
+            # Try to extract week number
+            wm = re.match(r'Week\s+(\d+)', raw_title, re.IGNORECASE)
+            if wm:
+                raw_title = raw_title  # already has Week N
+            else:
+                raw_title = "Week 0: " + raw_title
+
+    week, title = parse_title_and_week(raw_title)
+    cards = parse_cards(text)
+
+    is_reversed = "al rev" in url.lower() or "AL REVÉS" in raw_title.upper()
+
+    return {
+        "week": week,
+        "title": title,
+        "isReversed": is_reversed,
+        "cardCount": len(cards),
+        "cards": cards,
+        "url": url,
+    }
+
+
+async def main():
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        context = await browser.new_context(
+            user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+        )
+        page = await context.new_page()
+
+        all_decks = []
+        total_cards = 0
+
+        for i, url in enumerate(DECK_URLS):
+            print(f"[{i+1}/{len(DECK_URLS)}] Scraping {url.split('/')[2][:50]}...")
+            try:
+                deck = await scrape_deck(page, url)
+                all_decks.append(deck)
+                total_cards += deck["cardCount"]
+                print(f"  → Week {deck['week']}: {deck['title']} ({deck['cardCount']} cards)")
+            except Exception as e:
+                print(f"  ERROR: {e}")
+
+            # Be polite
+            await page.wait_for_timeout(500)
+
+        await browser.close()
+
+    # Organize by week
+    weeks = {}
+    for deck in all_decks:
+        w = deck["week"]
+        if w not in weeks:
+            weeks[w] = []
+        weeks[w].append({
+            "title": deck["title"],
+            "isReversed": deck["isReversed"],
+            "cardCount": deck["cardCount"],
+            "cards": deck["cards"],
+        })
+
+    output = {
+        "course": "LanGo Spanish | Beginner I",
+        "totalDecks": len(all_decks),
+        "totalCards": total_cards,
+        "weeks": [
+            {
+                "week": w,
+                "decks": weeks[w],
+            }
+            for w in sorted(weeks.keys())
+        ],
+    }
+
+    with open(OUTPUT, 'w', encoding='utf-8') as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+
+    print(f"\nDone! {len(all_decks)} decks, {total_cards} cards → {OUTPUT}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+"""
+Scrape 2-3 example sentences per vocab word from SpanishDict.
+Reads words from course_data.json, outputs examples to course_examples.json.
+"""
+
+import asyncio
+import json
+import re
+import os
+from playwright.async_api import async_playwright
+
+INPUT = "/Users/treyt/Desktop/code/Spanish/Conjuga/Scripts/course_data.json"
+OUTPUT = "/Users/treyt/Desktop/code/Spanish/Conjuga/Scripts/course_examples.json"
+MAX_EXAMPLES = 3
+
+def extract_word_for_lookup(front):
+    """Extract the best lookup word from a card front.
+    e.g. 'barato, barata' -> 'barato'
+    e.g. 'el/la periodista' -> 'periodista'
+    """
+    word = front.strip()
+    # Remove articles
+    word = re.sub(r'^(el|la|los|las|un|una)\s+', '', word, flags=re.IGNORECASE)
+    word = re.sub(r'^(el/la|los/las)\s+', '', word, flags=re.IGNORECASE)
+    # Take first word if comma-separated (barato, barata -> barato)
+    if ',' in word:
+        word = word.split(',')[0].strip()
+    # Take first word if slash-separated
+    if '/' in word:
+        word = word.split('/')[0].strip()
+    return word.lower().strip()
+
+
+def parse_examples(text, lookup_word):
+    """Parse example sentences from SpanishDict page text."""
+    examples = []
+    lines = text.split('\n')
+
+    for i, line in enumerate(lines):
+        l = line.strip()
+        if not l or len(l) < 15:
+            continue
+
+        # Pattern: "Spanish sentence.English sentence." (inline on one line)
+        # SpanishDict puts them together with no space between period and capital
+        # e.g. "Esta tienda es muy barata.This store is really cheap."
+        inline_match = re.match(r'^(.+?[.!?])([A-Z].+)$', l)
+        if inline_match:
+            es = inline_match.group(1).strip()
+            en = inline_match.group(2).strip()
+            # Verify it contains our word (case-insensitive)
+            if lookup_word.lower() in es.lower() and len(es) > 10 and len(en) > 5:
+                examples.append({"es": es, "en": en})
+                if len(examples) >= MAX_EXAMPLES:
+                    break
+                continue
+
+        # Pattern: standalone Spanish sentence with word, followed by English on next line
+        if lookup_word.lower() in l.lower() and len(l) > 15 and len(l) < 300:
+            # Check if next non-empty line is English
+            for j in range(i + 1, min(i + 3, len(lines))):
+                next_l = lines[j].strip()
+                if not next_l:
+                    continue
+                # Check if it looks English (starts with capital, has common English words)
+                if (next_l[0].isupper() and
+                    not any(c in next_l for c in ['á', 'é', 'í', 'ó', 'ú', 'ñ', '¿', '¡'])):
+                    examples.append({"es": l, "en": next_l})
+                    if len(examples) >= MAX_EXAMPLES:
+                        break
+                break
+
+        if len(examples) >= MAX_EXAMPLES:
+            break
+
+    return examples
+
+
+async def scrape_word(page, word, lookup):
+    """Scrape examples for a single word."""
+    url = f"https://www.spanishdict.com/translate/{lookup}"
+    try:
+        await page.goto(url, wait_until="domcontentloaded", timeout=15000)
+        await page.wait_for_timeout(2000)
+        text = await page.inner_text("body")
+        examples = parse_examples(text, lookup)
+        return examples
+    except Exception as e:
+        return []
+
+
+async def main():
+    # Load course data
+    with open(INPUT) as f:
+        data = json.load(f)
+
+    # Collect unique words (front values from non-reversed decks)
+    words = {}  # lookup -> original front
+    for week in data['weeks']:
+        for deck in week['decks']:
+            if deck.get('isReversed'):
+                continue
+            for card in deck['cards']:
+                front = card['front']
+                lookup = extract_word_for_lookup(front)
+                if lookup and lookup not in words:
+                    words[lookup] = front
+
+    print(f"Found {len(words)} unique words to look up")
+
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        ctx = await browser.new_context(
+            user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+        )
+        page = await ctx.new_page()
+
+        # Load existing progress if any
+        results = {}
+        if os.path.exists(OUTPUT):
+            with open(OUTPUT) as f:
+                results = json.load(f)
+            print(f"Loaded {len(results)} existing results")
+
+        found = len(results)
+        total = len(words)
+
+        for i, (lookup, original) in enumerate(words.items()):
+            # Skip already scraped
+            if original in results:
+                continue
+
+            print(f"[{i+1}/{total}] {lookup}...", end=" ", flush=True)
+            try:
+                examples = await scrape_word(page, original, lookup)
+                if examples:
+                    results[original] = examples
+                    found += 1
+                    print(f"{len(examples)} examples")
+                else:
+                    results[original] = []
+                    print("no examples")
+            except Exception as e:
+                print(f"error: {e}")
+                results[original] = []
+
+            # Save progress every 20 words
+            if (i + 1) % 20 == 0:
+                with open(OUTPUT, 'w', encoding='utf-8') as f:
+                    json.dump(results, f, ensure_ascii=False, indent=2)
+                print(f"  [saved {len(results)} results]")
+
+            await page.wait_for_timeout(300)
+
+        await browser.close()
+
+    # Save results
+    with open(OUTPUT, 'w', encoding='utf-8') as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+
+    print(f"\nDone! {found}/{total} words with examples → {OUTPUT}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())