Sportstime/Scripts/cloudkit_import.py

#!/usr/bin/env python3
"""
CloudKit Import Script
======================
Imports canonical JSON data into CloudKit. Run after canonicalization pipeline.

Expected input files (from canonicalization pipeline):
  - stadiums_canonical.json
  - teams_canonical.json
  - games_canonical.json OR canonical/games/*.json (new structure)
  - stadium_aliases.json
  - league_structure.json
  - team_aliases.json

File Structure (Option B - by sport/season):
  data/
    games/              # Raw scraped games
      mlb_2025.json
      nba_2025.json
      ...
    canonical/          # Canonicalized data
      games/
        mlb_2025.json
        nba_2025.json
        ...
      stadiums.json
    games_canonical.json      # Combined (backward compatibility)
    stadiums_canonical.json
    teams_canonical.json

Setup:
  1. CloudKit Dashboard > Tokens & Keys > Server-to-Server Keys
  2. Create key with Read/Write access to public database
  3. Download .p8 file and note Key ID

Usage:
  python cloudkit_import.py                               # Interactive menu
  python cloudkit_import.py --dry-run                     # Preview first
  python cloudkit_import.py --key-id XX --key-file key.p8 # Import all
  python cloudkit_import.py --stadiums-only               # Stadiums first
  python cloudkit_import.py --games-only                  # All games
  python cloudkit_import.py --games-files mlb_2025.json   # Specific game file
  python cloudkit_import.py --games-files mlb_2025.json,nba_2025.json  # Multiple files
  python cloudkit_import.py --stadium-aliases-only        # Stadium aliases only
  python cloudkit_import.py --delete-all                  # Delete then import
  python cloudkit_import.py --delete-only                 # Delete only (no import)
"""

import argparse, json, time, os, sys, hashlib, base64, requests
from datetime import datetime, timezone
from pathlib import Path

try:
    from cryptography.hazmat.primitives import hashes, serialization
    from cryptography.hazmat.primitives.asymmetric import ec
    from cryptography.hazmat.backends import default_backend
    HAS_CRYPTO = True
except ImportError:
    HAS_CRYPTO = False

CONTAINER = "iCloud.com.sportstime.app"
HOST = "https://api.apple-cloudkit.com"
BATCH_SIZE = 200

# Hardcoded credentials
DEFAULT_KEY_ID = "152be0715e0276e31aaea5cbfe79dc872f298861a55c70fae14e5fe3e026cff9"
DEFAULT_KEY_FILE = "eckey.pem"


def show_game_files_menu(data_dir: Path) -> list[str]:
    """Show available game files and let user select which to import."""
    canonical_games_dir = data_dir / 'canonical' / 'games'

    if not canonical_games_dir.exists():
        print("\n  No canonical/games/ directory found.")
        return []

    game_files = sorted(canonical_games_dir.glob('*.json'))
    if not game_files:
        print("\n  No game files found in canonical/games/")
        return []

    print("\n" + "="*50)
    print("Select Game Files to Import")
    print("="*50)
    print("\n  Available files:")
    for i, f in enumerate(game_files, 1):
        # Count games in file
        with open(f) as fp:
            games = json.load(fp)
        print(f"    {i}. {f.name} ({len(games):,} games)")

    print(f"\n    a. All files")
    print(f"    0. Cancel")
    print()

    while True:
        try:
            choice = input("Enter file numbers (comma-separated), 'a' for all, or 0 to cancel: ").strip().lower()
            if choice == '0':
                return []
            if choice == 'a':
                return [f.name for f in game_files]

            # Parse comma-separated numbers
            indices = [int(x.strip()) for x in choice.split(',')]
            selected = []
            for idx in indices:
                if 1 <= idx <= len(game_files):
                    selected.append(game_files[idx-1].name)
                else:
                    print(f"Invalid selection: {idx}")
                    continue
            if selected:
                return selected
            print("No valid selections. Try again.")
        except (ValueError, EOFError, KeyboardInterrupt):
            print("\nCancelled.")
            return []


def show_menu():
    """Show interactive menu and return selected action."""
    print("\n" + "="*50)
    print("CloudKit Import - Select Action")
    print("="*50)
    print("\n  1. Import all (stadiums, teams, games, league structure, team aliases, stadium aliases)")
    print("  2. Stadiums only")
    print("  3. Games only (all files)")
    print("  4. Games - select specific files")
    print("  5. League structure only")
    print("  6. Team aliases only")
    print("  7. Stadium aliases only")
    print("  8. Canonical only (league structure + team aliases + stadium aliases)")
    print("  9. Delete all then import")
    print("  10. Delete only (no import)")
    print("  11. Dry run (preview only)")
    print("  0. Exit")
    print()

    while True:
        try:
            choice = input("Enter choice [1-11, 0 to exit]: ").strip()
            if choice == '0':
                return None
            if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11']:
                return int(choice)
            print("Invalid choice. Please enter 1-11 or 0.")
        except (EOFError, KeyboardInterrupt):
            print("\nExiting.")
            return None


def deterministic_uuid(string: str) -> str:
    """
    Generate a deterministic UUID from a string using SHA256.
    Matches the StubDataProvider.deterministicUUID() implementation in Swift.
    """
    # SHA256 hash of the string
    hash_bytes = hashlib.sha256(string.encode('utf-8')).digest()
    # Use first 16 bytes
    uuid_bytes = bytearray(hash_bytes[:16])
    # Set UUID version (4) and variant bits to match Swift implementation
    uuid_bytes[6] = (uuid_bytes[6] & 0x0F) | 0x40
    uuid_bytes[8] = (uuid_bytes[8] & 0x3F) | 0x80
    # Format as UUID string
    return f"{uuid_bytes[0:4].hex()}-{uuid_bytes[4:6].hex()}-{uuid_bytes[6:8].hex()}-{uuid_bytes[8:10].hex()}-{uuid_bytes[10:16].hex()}".upper()


class CloudKit:
    def __init__(self, key_id, private_key, container, env):
        self.key_id = key_id
        self.private_key = private_key
        self.path_base = f"/database/1/{container}/{env}/public"

    def _sign(self, date, body, path):
        key = serialization.load_pem_private_key(self.private_key, None, default_backend())
        body_hash = base64.b64encode(hashlib.sha256(body.encode()).digest()).decode()
        sig = key.sign(f"{date}:{body_hash}:{path}".encode(), ec.ECDSA(hashes.SHA256()))
        return base64.b64encode(sig).decode()

    def modify(self, operations):
        path = f"{self.path_base}/records/modify"
        body = json.dumps({'operations': operations})
        date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
        headers = {
            'Content-Type': 'application/json',
            'X-Apple-CloudKit-Request-KeyID': self.key_id,
            'X-Apple-CloudKit-Request-ISO8601Date': date,
            'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path),
        }
        r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=60)
        if r.status_code == 200:
            return r.json()
        else:
            try:
                err = r.json()
                reason = err.get('reason', 'Unknown')
                code = err.get('serverErrorCode', r.status_code)
                return {'error': f"{code}: {reason}"}
            except:
                return {'error': f"{r.status_code}: {r.text[:200]}"}

    def query(self, record_type, limit=200, verbose=False):
        """Query records of a given type."""
        path = f"{self.path_base}/records/query"
        body = json.dumps({
            'query': {'recordType': record_type},
            'resultsLimit': limit
        })
        date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
        headers = {
            'Content-Type': 'application/json',
            'X-Apple-CloudKit-Request-KeyID': self.key_id,
            'X-Apple-CloudKit-Request-ISO8601Date': date,
            'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path),
        }
        if verbose:
            print(f"    Querying {record_type}...")
        try:
            r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=30)
            if verbose:
                print(f"    Response: {r.status_code}")
            if r.status_code == 200:
                result = r.json()
                if verbose:
                    print(f"    Found {len(result.get('records', []))} records")
                return result
            return {'error': f"{r.status_code}: {r.text[:200]}"}
        except requests.exceptions.Timeout:
            return {'error': 'Request timed out after 30s'}
        except Exception as e:
            return {'error': f"Request failed: {e}"}

    def delete_all(self, record_type, verbose=False):
        """Delete all records of a given type."""
        total_deleted = 0
        while True:
            result = self.query(record_type, verbose=verbose)
            if 'error' in result:
                print(f"  Query error: {result['error']}")
                break

            records = result.get('records', [])
            if not records:
                break

            # Build delete operations (recordChangeTag required for delete)
            ops = [{
                'operationType': 'delete',
                'record': {
                    'recordName': r['recordName'],
                    'recordType': record_type,
                    'recordChangeTag': r.get('recordChangeTag', '')
                }
            } for r in records]

            if verbose:
                print(f"    Sending delete for {len(ops)} records...")

            delete_result = self.modify(ops)

            if verbose:
                print(f"    Delete response: {json.dumps(delete_result)[:500]}")

            if 'error' in delete_result:
                print(f"  Delete error: {delete_result['error']}")
                break

            # Check for individual record errors
            result_records = delete_result.get('records', [])
            successful = [r for r in result_records if 'serverErrorCode' not in r]
            failed = [r for r in result_records if 'serverErrorCode' in r]

            if failed and verbose:
                print(f"    Failed: {failed[0]}")

            total_deleted += len(successful)
            print(f"  Deleted {len(successful)} {record_type} records" + (f" ({len(failed)} failed)" if failed else ""))

            time.sleep(0.5)

        return total_deleted


def import_data(ck, records, name, dry_run, verbose):
    total = 0
    errors = 0
    for i in range(0, len(records), BATCH_SIZE):
        batch = records[i:i+BATCH_SIZE]
        ops = [{'operationType': 'forceReplace', 'record': r} for r in batch]

        if verbose:
            print(f"  Batch {i//BATCH_SIZE + 1}: {len(batch)} records, {len(ops)} ops")

        if not ops:
            print(f"  Warning: Empty batch at index {i}, skipping")
            continue

        if dry_run:
            print(f"  [DRY RUN] Would create {len(batch)} {name}")
            total += len(batch)
        else:
            result = ck.modify(ops)
            if 'error' in result:
                errors += 1
                if errors <= 3:  # Only show first 3 errors
                    print(f"  Error: {result['error']}")
                    if verbose and batch:
                        print(f"  Sample record: {json.dumps(batch[0], indent=2)[:500]}")
                if errors == 3:
                    print("  (suppressing further errors...)")
            else:
                result_records = result.get('records', [])
                # Count only successful records (no serverErrorCode)
                successful = [r for r in result_records if 'serverErrorCode' not in r]
                failed = [r for r in result_records if 'serverErrorCode' in r]
                n = len(successful)
                total += n
                print(f"  Created {n} {name}")
                if failed:
                    print(f"  Failed {len(failed)} records: {failed[0].get('serverErrorCode')}: {failed[0].get('reason')}")
                if verbose:
                    print(f"  Response: {json.dumps(result, indent=2)[:1000]}")
            time.sleep(0.5)
    if errors > 0:
        print(f"  Total errors: {errors}")
    return total


def main():
    p = argparse.ArgumentParser(description='Import JSON to CloudKit')
    p.add_argument('--key-id', default=DEFAULT_KEY_ID)
    p.add_argument('--key-file', default=DEFAULT_KEY_FILE)
    p.add_argument('--container', default=CONTAINER)
    p.add_argument('--env', choices=['development', 'production'], default='development')
    p.add_argument('--data-dir', default='./data')
    p.add_argument('--stadiums-only', action='store_true')
    p.add_argument('--games-only', action='store_true')
    p.add_argument('--games-files', type=str, help='Comma-separated list of game files to import (e.g., mlb_2025.json,nba_2025.json)')
    p.add_argument('--league-structure-only', action='store_true', help='Import only league structure')
    p.add_argument('--team-aliases-only', action='store_true', help='Import only team aliases')
    p.add_argument('--stadium-aliases-only', action='store_true', help='Import only stadium aliases')
    p.add_argument('--canonical-only', action='store_true', help='Import only canonical data (league structure + team aliases + stadium aliases)')
    p.add_argument('--delete-all', action='store_true', help='Delete all records before importing')
    p.add_argument('--delete-only', action='store_true', help='Only delete records, do not import')
    p.add_argument('--dry-run', action='store_true')
    p.add_argument('--verbose', '-v', action='store_true')
    p.add_argument('--interactive', '-i', action='store_true', help='Show interactive menu')
    args = p.parse_args()

    # Show interactive menu if no action flags provided or --interactive
    has_action_flag = any([
        args.stadiums_only, args.games_only, args.games_files, args.league_structure_only,
        args.team_aliases_only, args.stadium_aliases_only, args.canonical_only,
        args.delete_all, args.delete_only, args.dry_run
    ])

    # Track selected game files (for option 4 or --games-files)
    selected_game_files = None
    if args.games_files:
        # Parse comma-separated list from command line
        selected_game_files = [f.strip() for f in args.games_files.split(',')]
        args.games_only = True  # Imply --games-only

    if args.interactive or not has_action_flag:
        choice = show_menu()
        if choice is None:
            return

        # Map menu choice to flags
        if choice == 1:  # Import all
            pass  # Default behavior
        elif choice == 2:  # Stadiums only
            args.stadiums_only = True
        elif choice == 3:  # Games only (all files)
            args.games_only = True
        elif choice == 4:  # Games - select specific files
            args.games_only = True
            selected_game_files = show_game_files_menu(Path(args.data_dir))
            if not selected_game_files:
                print("No files selected. Exiting.")
                return
        elif choice == 5:  # League structure only
            args.league_structure_only = True
        elif choice == 6:  # Team aliases only
            args.team_aliases_only = True
        elif choice == 7:  # Stadium aliases only
            args.stadium_aliases_only = True
        elif choice == 8:  # Canonical only
            args.canonical_only = True
        elif choice == 9:  # Delete all then import
            args.delete_all = True
        elif choice == 10:  # Delete only
            args.delete_only = True
        elif choice == 11:  # Dry run
            args.dry_run = True

    print(f"\n{'='*50}")
    print(f"CloudKit Import {'(DRY RUN)' if args.dry_run else ''}")
    print(f"{'='*50}")
    print(f"Container: {args.container}")
    print(f"Environment: {args.env}\n")

    data_dir = Path(args.data_dir)

    # Load canonical format files (from canonicalization pipeline)
    # Fall back to legacy format for backward compatibility
    if (data_dir / 'stadiums_canonical.json').exists():
        stadiums = json.load(open(data_dir / 'stadiums_canonical.json'))
        use_canonical = True
    else:
        stadiums = json.load(open(data_dir / 'stadiums.json'))
        use_canonical = False

    if (data_dir / 'teams_canonical.json').exists():
        teams = json.load(open(data_dir / 'teams_canonical.json'))
    else:
        teams = []  # Legacy: extracted from stadiums

    # Load games: try new structure first (canonical/games/*.json), then fallback
    canonical_games_dir = data_dir / 'canonical' / 'games'
    games = []
    games_source = None

    if selected_game_files:
        # Load only the selected files
        for filename in selected_game_files:
            filepath = canonical_games_dir / filename
            if filepath.exists():
                with open(filepath) as f:
                    file_games = json.load(f)
                    games.extend(file_games)
                    print(f"  Loading {filename}: {len(file_games):,} games")
        games_source = f"selected files: {', '.join(selected_game_files)}"
    elif canonical_games_dir.exists() and any(canonical_games_dir.glob('*.json')):
        # New structure: load all sport/season files
        for games_file in sorted(canonical_games_dir.glob('*.json')):
            with open(games_file) as f:
                file_games = json.load(f)
                games.extend(file_games)
        games_source = "canonical/games/*.json"
    elif (data_dir / 'games_canonical.json').exists():
        games = json.load(open(data_dir / 'games_canonical.json'))
        games_source = "games_canonical.json"
    elif (data_dir / 'games.json').exists():
        games = json.load(open(data_dir / 'games.json'))
        games_source = "games.json (legacy)"

    league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else []
    team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else []
    stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else []

    print(f"Using {'canonical' if use_canonical else 'legacy'} format")
    print(f"Loaded {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games")
    if games_source:
        print(f"  Games loaded from: {games_source}")
    print(f"Loaded {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n")

    ck = None
    if not args.dry_run:
        if not HAS_CRYPTO:
            sys.exit("Error: pip install cryptography")
        if not os.path.exists(args.key_file):
            sys.exit(f"Error: Key file not found: {args.key_file}")
        ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env)

    # Handle deletion
    if args.delete_all or args.delete_only:
        if not ck:
            sys.exit("Error: --key-id and --key-file required for deletion")

        print("--- Deleting Existing Records ---")
        # Delete in order: dependent records first, then base records
        for record_type in ['Game', 'TeamAlias', 'StadiumAlias', 'Team', 'LeagueStructure', 'Stadium']:
            print(f"  Deleting {record_type} records...")
            deleted = ck.delete_all(record_type, verbose=args.verbose)
            print(f"  Deleted {deleted} {record_type} records")

        if args.delete_only:
            print(f"\n{'='*50}")
            print("DELETE COMPLETE")
            print()
            return

    stats = {'stadiums': 0, 'teams': 0, 'games': 0, 'league_structures': 0, 'team_aliases': 0, 'stadium_aliases': 0}
    team_map = {}

    # Determine what to import based on flags
    import_stadiums = not args.games_only and not args.league_structure_only and not args.team_aliases_only and not args.stadium_aliases_only and not args.canonical_only
    import_teams = not args.games_only and not args.league_structure_only and not args.team_aliases_only and not args.stadium_aliases_only and not args.canonical_only
    import_games = not args.stadiums_only and not args.league_structure_only and not args.team_aliases_only and not args.stadium_aliases_only and not args.canonical_only
    import_league_structure = args.league_structure_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.team_aliases_only and not args.stadium_aliases_only)
    import_team_aliases = args.team_aliases_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.league_structure_only and not args.stadium_aliases_only)
    import_stadium_aliases = args.stadium_aliases_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.league_structure_only and not args.team_aliases_only)

    # Build stadium ID lookup
    # Canonical format uses canonical_id, legacy uses id
    def get_stadium_id(s):
        return s.get('canonical_id', s.get('id', ''))

    def get_team_id(t):
        return t.get('canonical_id', '')

    stadium_id_map = {get_stadium_id(s): deterministic_uuid(get_stadium_id(s)) for s in stadiums}

    # Import stadiums
    if import_stadiums:
        print("--- Stadiums ---")
        recs = []
        for s in stadiums:
            stadium_id = get_stadium_id(s)
            record_name = deterministic_uuid(stadium_id)
            # Canonical format uses primary_team_abbrevs, legacy uses team_abbrevs
            team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))

            fields = {
                'stadiumId': {'value': record_name},
                'canonicalId': {'value': stadium_id},  # Store canonical_id as string
                'name': {'value': s['name']},
                'city': {'value': s['city']},
                'state': {'value': s.get('state', '')},
                'sport': {'value': s['sport']},
                'source': {'value': s.get('source', 'canonical')},
                'teamAbbrevs': {'value': team_abbrevs},
            }
            if s.get('latitude'):
                fields['location'] = {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}}
            if s.get('capacity'):
                fields['capacity'] = {'value': s['capacity']}

            recs.append({'recordType': 'Stadium', 'recordName': record_name, 'fields': fields})
        stats['stadiums'] = import_data(ck, recs, 'stadiums', args.dry_run, args.verbose)

    # Import teams (canonical format has dedicated teams file)
    if import_teams:
        print("--- Teams ---")
        if teams:
            # Canonical format: use teams_canonical.json
            recs = []
            for t in teams:
                team_id = get_team_id(t)
                record_name = deterministic_uuid(team_id)
                team_map[(t['sport'], t['abbreviation'])] = record_name

                fields = {
                    'teamId': {'value': record_name},
                    'canonicalId': {'value': team_id},  # Store canonical_id as string
                    'abbreviation': {'value': t['abbreviation']},
                    'name': {'value': t['name']},
                    'city': {'value': t['city']},
                    'sport': {'value': t['sport']},
                    'stadiumCanonicalId': {'value': t.get('stadium_canonical_id', '')},
                }
                if t.get('conference_id'):
                    fields['conferenceId'] = {'value': t['conference_id']}
                if t.get('division_id'):
                    fields['divisionId'] = {'value': t['division_id']}

                recs.append({'recordType': 'Team', 'recordName': record_name, 'fields': fields})
            stats['teams'] = import_data(ck, recs, 'teams', args.dry_run, args.verbose)
        else:
            # Legacy format: extract teams from stadiums
            teams_dict = {}
            for s in stadiums:
                team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
                for abbr in team_abbrevs:
                    team_key = f"{s['sport']}_{abbr}"
                    if team_key not in teams_dict:
                        teams_dict[team_key] = {'abbr': abbr, 'city': s['city'], 'sport': s['sport']}
                        team_uuid = deterministic_uuid(team_key)
                        team_map[(s['sport'], abbr)] = team_uuid

            recs = [{
                'recordType': 'Team', 'recordName': deterministic_uuid(team_key),
                'fields': {
                    'teamId': {'value': deterministic_uuid(team_key)},
                    'canonicalId': {'value': team_key},
                    'abbreviation': {'value': info['abbr']},
                    'name': {'value': info['abbr']},
                    'city': {'value': info['city']},
                    'sport': {'value': info['sport']},
                }
            } for team_key, info in teams_dict.items()]
            stats['teams'] = import_data(ck, recs, 'teams', args.dry_run, args.verbose)

    # Import games
    if import_games and games:
        # Detect canonical game format (has canonical_id field)
        use_canonical_games = games and 'canonical_id' in games[0]

        # Rebuild team_map if only importing games (--games-only flag)
        if not team_map:
            if teams:
                # Canonical format: use teams_canonical.json
                for t in teams:
                    team_id = get_team_id(t)
                    team_map[(t['sport'], t['abbreviation'])] = deterministic_uuid(team_id)
            else:
                # Legacy format: extract from stadiums
                for s in stadiums:
                    team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
                    for abbr in team_abbrevs:
                        team_key = f"{s['sport']}_{abbr}"
                        team_map[(s['sport'], abbr)] = deterministic_uuid(team_key)

        # Build team -> stadium map for stadiumRef (legacy format needs this)
        team_stadium_map = {}
        for s in stadiums:
            stadium_id = get_stadium_id(s)
            stadium_uuid = stadium_id_map[stadium_id]
            team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
            for abbr in team_abbrevs:
                team_stadium_map[(s['sport'], abbr)] = stadium_uuid

        print("--- Games ---")
        print(f"  Using {'canonical' if use_canonical_games else 'legacy'} game format")

        # Deduplicate games by ID (canonical_id or id)
        seen_ids = set()
        unique_games = []
        for g in games:
            game_id = g.get('canonical_id', g.get('id', ''))
            if game_id not in seen_ids:
                seen_ids.add(game_id)
                unique_games.append(g)

        if len(unique_games) < len(games):
            print(f"  Removed {len(games) - len(unique_games)} duplicate games")

        recs = []
        for g in unique_games:
            # Get game ID (canonical or legacy)
            game_id = g.get('canonical_id', g.get('id', ''))
            game_uuid = deterministic_uuid(game_id)
            sport = g['sport']

            fields = {
                'gameId': {'value': game_uuid},
                'canonicalId': {'value': game_id},  # Store canonical_id as string
                'sport': {'value': sport},
                'season': {'value': g.get('season', '')},
                'source': {'value': g.get('source', 'canonical' if use_canonical_games else '')},
            }

            # Parse date/time
            if g.get('date'):
                try:
                    # Parse time like "7:30p" or "10:00a"
                    time_str = g.get('time', '7:00p')
                    hour, minute = 19, 0
                    if time_str:
                        clean_time = time_str.lower().replace(' ', '')
                        is_pm = 'p' in clean_time
                        time_parts = clean_time.replace('p', '').replace('a', '').split(':')
                        if time_parts:
                            hour = int(time_parts[0])
                            if is_pm and hour != 12:
                                hour += 12
                            elif not is_pm and hour == 12:
                                hour = 0
                            if len(time_parts) > 1:
                                minute = int(time_parts[1])
                    dt = datetime.strptime(f"{g['date']} {hour:02d}:{minute:02d}", '%Y-%m-%d %H:%M')
                    # CloudKit expects TIMESTAMP type with milliseconds since epoch
                    fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
                except Exception as e:
                    if args.verbose:
                        print(f"  Warning: Failed to parse date/time for {game_id}: {e}")

            # Team references
            if use_canonical_games:
                # Canonical format: extract team abbrev from canonical ID (team_nba_atl -> atl)
                home_team_canonical_id = g.get('home_team_canonical_id', '')
                away_team_canonical_id = g.get('away_team_canonical_id', '')
                home_team_uuid = deterministic_uuid(home_team_canonical_id)
                away_team_uuid = deterministic_uuid(away_team_canonical_id)
            else:
                # Legacy format: use abbreviations
                home_team_key = f"{sport}_{g.get('home_team_abbrev', '')}"
                away_team_key = f"{sport}_{g.get('away_team_abbrev', '')}"
                home_team_uuid = deterministic_uuid(home_team_key)
                away_team_uuid = deterministic_uuid(away_team_key)

            fields['homeTeamRef'] = {'value': {'recordName': home_team_uuid, 'action': 'NONE'}}
            fields['awayTeamRef'] = {'value': {'recordName': away_team_uuid, 'action': 'NONE'}}

            # Stadium reference
            if use_canonical_games and g.get('stadium_canonical_id'):
                # Canonical format: use stadium_canonical_id directly
                stadium_canonical_id = g['stadium_canonical_id']
                stadium_uuid = stadium_id_map.get(stadium_canonical_id)
                if stadium_uuid:
                    fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}}
                    fields['stadiumCanonicalId'] = {'value': stadium_canonical_id}
            else:
                # Legacy format: look up by home team abbrev
                stadium_uuid = team_stadium_map.get((sport, g.get('home_team_abbrev', '')))
                if stadium_uuid:
                    fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}}

            recs.append({'recordType': 'Game', 'recordName': game_uuid, 'fields': fields})

        stats['games'] = import_data(ck, recs, 'games', args.dry_run, args.verbose)

    # Import league structure
    if import_league_structure and league_structure:
        print("--- League Structure ---")
        now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
        recs = [{
            'recordType': 'LeagueStructure',
            'recordName': ls['id'],  # Use the id as recordName
            'fields': {
                'structureId': {'value': ls['id']},
                'sport': {'value': ls['sport']},
                'type': {'value': ls['type']},
                'name': {'value': ls['name']},
                'displayOrder': {'value': ls['display_order']},
                'schemaVersion': {'value': 1},
                'lastModified': {'value': now_ms, 'type': 'TIMESTAMP'},
                **({'abbreviation': {'value': ls['abbreviation']}} if ls.get('abbreviation') else {}),
                **({'parentId': {'value': ls['parent_id']}} if ls.get('parent_id') else {}),
            }
        } for ls in league_structure]
        stats['league_structures'] = import_data(ck, recs, 'league structures', args.dry_run, args.verbose)

    # Import team aliases
    if import_team_aliases and team_aliases:
        print("--- Team Aliases ---")
        now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
        recs = []
        for ta in team_aliases:
            fields = {
                'aliasId': {'value': ta['id']},
                'teamCanonicalId': {'value': ta['team_canonical_id']},
                'aliasType': {'value': ta['alias_type']},
                'aliasValue': {'value': ta['alias_value']},
                'schemaVersion': {'value': 1},
                'lastModified': {'value': now_ms, 'type': 'TIMESTAMP'},
            }
            # Add optional date fields
            if ta.get('valid_from'):
                try:
                    dt = datetime.strptime(ta['valid_from'], '%Y-%m-%d')
                    fields['validFrom'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
                except:
                    pass
            if ta.get('valid_until'):
                try:
                    dt = datetime.strptime(ta['valid_until'], '%Y-%m-%d')
                    fields['validUntil'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
                except:
                    pass
            recs.append({
                'recordType': 'TeamAlias',
                'recordName': ta['id'],  # Use the id as recordName
                'fields': fields
            })
        stats['team_aliases'] = import_data(ck, recs, 'team aliases', args.dry_run, args.verbose)

    # Import stadium aliases
    if import_stadium_aliases and stadium_aliases:
        print("--- Stadium Aliases ---")
        now_ms = int(datetime.now(timezone.utc).timestamp() * 1000)
        recs = []
        for sa in stadium_aliases:
            fields = {
                'aliasName': {'value': sa['alias_name'].lower()},  # Normalize to lowercase
                'stadiumCanonicalId': {'value': sa['stadium_canonical_id']},
                'schemaVersion': {'value': 1},
                'lastModified': {'value': now_ms, 'type': 'TIMESTAMP'},
            }
            # Add optional date fields
            if sa.get('valid_from'):
                try:
                    dt = datetime.strptime(sa['valid_from'], '%Y-%m-%d')
                    fields['validFrom'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
                except:
                    pass
            if sa.get('valid_until'):
                try:
                    dt = datetime.strptime(sa['valid_until'], '%Y-%m-%d')
                    fields['validUntil'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
                except:
                    pass
            # Extract sport from stadium_canonical_id (e.g., "stadium_nba_td_garden" -> "nba")
            # This makes record names unique for shared venues (TD Garden has NBA and NHL entries)
            stadium_id = sa['stadium_canonical_id']
            sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown'
            record_name = f"{sport}_{sa['alias_name'].lower()}"
            recs.append({
                'recordType': 'StadiumAlias',
                'recordName': record_name,
                'fields': fields
            })
        stats['stadium_aliases'] = import_data(ck, recs, 'stadium aliases', args.dry_run, args.verbose)

    print(f"\n{'='*50}")
    print(f"COMPLETE: {stats['stadiums']} stadiums, {stats['teams']} teams, {stats['games']} games, {stats['league_structures']} league structures, {stats['team_aliases']} team aliases, {stats['stadium_aliases']} stadium aliases")
    if args.dry_run:
        print("[DRY RUN - nothing imported]")
    print()


if __name__ == '__main__':
    main()