#!/usr/bin/env python3 """ CloudKit Import Script ====================== Imports canonical JSON data into CloudKit. Run after canonicalization pipeline. Expected input files (from canonicalization pipeline): - stadiums_canonical.json - teams_canonical.json - games_canonical.json OR canonical/games/*.json (new structure) - stadium_aliases.json - league_structure.json - team_aliases.json File Structure (Option B - by sport/season): data/ games/ # Raw scraped games mlb_2025.json nba_2025.json ... canonical/ # Canonicalized data games/ mlb_2025.json nba_2025.json ... stadiums.json games_canonical.json # Combined (backward compatibility) stadiums_canonical.json teams_canonical.json Setup: 1. CloudKit Dashboard > Tokens & Keys > Server-to-Server Keys 2. Create key with Read/Write access to public database 3. Download .p8 file and note Key ID Usage: python cloudkit_import.py # Interactive menu python cloudkit_import.py --dry-run # Preview first python cloudkit_import.py --key-id XX --key-file key.p8 # Import all python cloudkit_import.py --stadiums-only # Stadiums first python cloudkit_import.py --games-only # All games python cloudkit_import.py --games-files mlb_2025.json # Specific game file python cloudkit_import.py --games-files mlb_2025.json,nba_2025.json # Multiple files python cloudkit_import.py --stadium-aliases-only # Stadium aliases only python cloudkit_import.py --delete-all # Delete then import python cloudkit_import.py --delete-only # Delete only (no import) """ import argparse, json, time, os, sys, hashlib, base64, requests from datetime import datetime, timezone from pathlib import Path try: from cryptography.hazmat.primitives import hashes, serialization from cryptography.hazmat.primitives.asymmetric import ec from cryptography.hazmat.backends import default_backend HAS_CRYPTO = True except ImportError: HAS_CRYPTO = False CONTAINER = "iCloud.com.sportstime.app" HOST = "https://api.apple-cloudkit.com" BATCH_SIZE = 200 # Hardcoded credentials DEFAULT_KEY_ID = "152be0715e0276e31aaea5cbfe79dc872f298861a55c70fae14e5fe3e026cff9" DEFAULT_KEY_FILE = "eckey.pem" def show_game_files_menu(data_dir: Path) -> list[str]: """Show available game files and let user select which to import.""" canonical_games_dir = data_dir / 'canonical' / 'games' if not canonical_games_dir.exists(): print("\n No canonical/games/ directory found.") return [] game_files = sorted(canonical_games_dir.glob('*.json')) if not game_files: print("\n No game files found in canonical/games/") return [] print("\n" + "="*50) print("Select Game Files to Import") print("="*50) print("\n Available files:") for i, f in enumerate(game_files, 1): # Count games in file with open(f) as fp: games = json.load(fp) print(f" {i}. {f.name} ({len(games):,} games)") print(f"\n a. All files") print(f" 0. Cancel") print() while True: try: choice = input("Enter file numbers (comma-separated), 'a' for all, or 0 to cancel: ").strip().lower() if choice == '0': return [] if choice == 'a': return [f.name for f in game_files] # Parse comma-separated numbers indices = [int(x.strip()) for x in choice.split(',')] selected = [] for idx in indices: if 1 <= idx <= len(game_files): selected.append(game_files[idx-1].name) else: print(f"Invalid selection: {idx}") continue if selected: return selected print("No valid selections. Try again.") except (ValueError, EOFError, KeyboardInterrupt): print("\nCancelled.") return [] def show_menu(): """Show interactive menu and return selected action.""" print("\n" + "="*50) print("CloudKit Import - Select Action") print("="*50) print("\n 1. Import all (stadiums, teams, games, league structure, team aliases, stadium aliases)") print(" 2. Stadiums only") print(" 3. Games only (all files)") print(" 4. Games - select specific files") print(" 5. League structure only") print(" 6. Team aliases only") print(" 7. Stadium aliases only") print(" 8. Canonical only (league structure + team aliases + stadium aliases)") print(" 9. Delete all then import") print(" 10. Delete only (no import)") print(" 11. Dry run (preview only)") print(" 12. Smart sync (diff-based, only upload changes)") print(" 13. Smart sync + delete orphans") print(" 0. Exit") print() while True: try: choice = input("Enter choice [1-13, 0 to exit]: ").strip() if choice == '0': return None if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']: return int(choice) print("Invalid choice. Please enter 1-13 or 0.") except (EOFError, KeyboardInterrupt): print("\nExiting.") return None def deterministic_uuid(string: str) -> str: """ Generate a deterministic UUID from a string using SHA256. Matches the StubDataProvider.deterministicUUID() implementation in Swift. """ # SHA256 hash of the string hash_bytes = hashlib.sha256(string.encode('utf-8')).digest() # Use first 16 bytes uuid_bytes = bytearray(hash_bytes[:16]) # Set UUID version (4) and variant bits to match Swift implementation uuid_bytes[6] = (uuid_bytes[6] & 0x0F) | 0x40 uuid_bytes[8] = (uuid_bytes[8] & 0x3F) | 0x80 # Format as UUID string return f"{uuid_bytes[0:4].hex()}-{uuid_bytes[4:6].hex()}-{uuid_bytes[6:8].hex()}-{uuid_bytes[8:10].hex()}-{uuid_bytes[10:16].hex()}".upper() class CloudKit: def __init__(self, key_id, private_key, container, env): self.key_id = key_id self.private_key = private_key self.path_base = f"/database/1/{container}/{env}/public" def _sign(self, date, body, path): key = serialization.load_pem_private_key(self.private_key, None, default_backend()) body_hash = base64.b64encode(hashlib.sha256(body.encode()).digest()).decode() sig = key.sign(f"{date}:{body_hash}:{path}".encode(), ec.ECDSA(hashes.SHA256())) return base64.b64encode(sig).decode() def modify(self, operations): path = f"{self.path_base}/records/modify" body = json.dumps({'operations': operations}) date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') headers = { 'Content-Type': 'application/json', 'X-Apple-CloudKit-Request-KeyID': self.key_id, 'X-Apple-CloudKit-Request-ISO8601Date': date, 'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path), } r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=60) if r.status_code == 200: return r.json() else: try: err = r.json() reason = err.get('reason', 'Unknown') code = err.get('serverErrorCode', r.status_code) return {'error': f"{code}: {reason}"} except: return {'error': f"{r.status_code}: {r.text[:200]}"} def query(self, record_type, limit=200, verbose=False): """Query records of a given type.""" path = f"{self.path_base}/records/query" body = json.dumps({ 'query': {'recordType': record_type}, 'resultsLimit': limit }) date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') headers = { 'Content-Type': 'application/json', 'X-Apple-CloudKit-Request-KeyID': self.key_id, 'X-Apple-CloudKit-Request-ISO8601Date': date, 'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path), } if verbose: print(f" Querying {record_type}...") try: r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=30) if verbose: print(f" Response: {r.status_code}") if r.status_code == 200: result = r.json() if verbose: print(f" Found {len(result.get('records', []))} records") return result return {'error': f"{r.status_code}: {r.text[:200]}"} except requests.exceptions.Timeout: return {'error': 'Request timed out after 30s'} except Exception as e: return {'error': f"Request failed: {e}"} def query_all(self, record_type, verbose=False): """Query all records of a given type with pagination.""" all_records = {} continuation_marker = None while True: path = f"{self.path_base}/records/query" query_body = { 'query': {'recordType': record_type}, 'resultsLimit': 200 } if continuation_marker: query_body['continuationMarker'] = continuation_marker body = json.dumps(query_body) date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') headers = { 'Content-Type': 'application/json', 'X-Apple-CloudKit-Request-KeyID': self.key_id, 'X-Apple-CloudKit-Request-ISO8601Date': date, 'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path), } if verbose: page_num = len(all_records) // 200 + 1 print(f" Querying {record_type} (page {page_num})...") try: r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=60) if r.status_code != 200: print(f" Query error: {r.status_code}: {r.text[:200]}") break result = r.json() records = result.get('records', []) for rec in records: record_name = rec.get('recordName', '') all_records[record_name] = rec if verbose: print(f" Found {len(records)} records (total: {len(all_records)})") # Check for continuation continuation_marker = result.get('continuationMarker') if not continuation_marker: break time.sleep(0.3) # Rate limiting except requests.exceptions.Timeout: print(f" Query timeout after 60s") break except Exception as e: print(f" Query failed: {e}") break return all_records def delete_all(self, record_type, verbose=False): """Delete all records of a given type.""" total_deleted = 0 while True: result = self.query(record_type, verbose=verbose) if 'error' in result: print(f" Query error: {result['error']}") break records = result.get('records', []) if not records: break # Build delete operations (recordChangeTag required for delete) ops = [{ 'operationType': 'delete', 'record': { 'recordName': r['recordName'], 'recordType': record_type, 'recordChangeTag': r.get('recordChangeTag', '') } } for r in records] if verbose: print(f" Sending delete for {len(ops)} records...") delete_result = self.modify(ops) if verbose: print(f" Delete response: {json.dumps(delete_result)[:500]}") if 'error' in delete_result: print(f" Delete error: {delete_result['error']}") break # Check for individual record errors result_records = delete_result.get('records', []) successful = [r for r in result_records if 'serverErrorCode' not in r] failed = [r for r in result_records if 'serverErrorCode' in r] if failed and verbose: print(f" Failed: {failed[0]}") total_deleted += len(successful) print(f" Deleted {len(successful)} {record_type} records" + (f" ({len(failed)} failed)" if failed else "")) time.sleep(0.5) return total_deleted def import_data(ck, records, name, dry_run, verbose): total = 0 errors = 0 for i in range(0, len(records), BATCH_SIZE): batch = records[i:i+BATCH_SIZE] ops = [{'operationType': 'forceReplace', 'record': r} for r in batch] if verbose: print(f" Batch {i//BATCH_SIZE + 1}: {len(batch)} records, {len(ops)} ops") if not ops: print(f" Warning: Empty batch at index {i}, skipping") continue if dry_run: print(f" [DRY RUN] Would create {len(batch)} {name}") total += len(batch) else: result = ck.modify(ops) if 'error' in result: errors += 1 if errors <= 3: # Only show first 3 errors print(f" Error: {result['error']}") if verbose and batch: print(f" Sample record: {json.dumps(batch[0], indent=2)[:500]}") if errors == 3: print(" (suppressing further errors...)") else: result_records = result.get('records', []) # Count only successful records (no serverErrorCode) successful = [r for r in result_records if 'serverErrorCode' not in r] failed = [r for r in result_records if 'serverErrorCode' in r] n = len(successful) total += n print(f" Created {n} {name}") if failed: print(f" Failed {len(failed)} records: {failed[0].get('serverErrorCode')}: {failed[0].get('reason')}") if verbose: print(f" Response: {json.dumps(result, indent=2)[:1000]}") time.sleep(0.5) if errors > 0: print(f" Total errors: {errors}") return total def fields_equal(local_value, cloud_value, field_type=None): """ Compare a local field value with a cloud field value. Handles special cases for locations and references. """ # Handle location fields (compare with tolerance) if isinstance(local_value, dict) and 'latitude' in local_value: if not isinstance(cloud_value, dict) or 'latitude' not in cloud_value: return False lat_diff = abs(float(local_value['latitude']) - float(cloud_value['latitude'])) lng_diff = abs(float(local_value['longitude']) - float(cloud_value['longitude'])) return lat_diff < 0.0001 and lng_diff < 0.0001 # Handle reference fields (compare by recordName only) if isinstance(local_value, dict) and 'recordName' in local_value: if not isinstance(cloud_value, dict) or 'recordName' not in cloud_value: return False return local_value['recordName'] == cloud_value['recordName'] # Handle lists if isinstance(local_value, list): if not isinstance(cloud_value, list): return False return sorted(local_value) == sorted(cloud_value) # Direct comparison for strings, ints, etc. return local_value == cloud_value def compute_diff(local_records, cloud_records, verbose=False): """ Compare local records against cloud records. Returns dict with: 'new', 'updated', 'unchanged', 'deleted' lists. Each item includes the record and (for updates) the cloud recordChangeTag. """ diff = { 'new': [], 'updated': [], 'unchanged': [], 'deleted': [] } local_names = set(local_records.keys()) cloud_names = set(cloud_records.keys()) # New records: in local but not in cloud for name in local_names - cloud_names: diff['new'].append({'record': local_records[name]}) # Deleted records: in cloud but not in local for name in cloud_names - local_names: diff['deleted'].append({ 'record': cloud_records[name], 'recordChangeTag': cloud_records[name].get('recordChangeTag', '') }) # Check existing records for changes for name in local_names & cloud_names: local_rec = local_records[name] cloud_rec = cloud_records[name] local_fields = local_rec.get('fields', {}) cloud_fields = cloud_rec.get('fields', {}) # Compare all fields (except metadata like recordChangeTag) has_diff = False diff_fields = [] for field_name, field_data in local_fields.items(): local_value = field_data.get('value') cloud_field = cloud_fields.get(field_name, {}) cloud_value = cloud_field.get('value') if not fields_equal(local_value, cloud_value): has_diff = True diff_fields.append(field_name) if has_diff: diff['updated'].append({ 'record': local_rec, 'recordChangeTag': cloud_rec.get('recordChangeTag', ''), 'changed_fields': diff_fields }) if verbose: print(f" Changed: {name} - fields: {', '.join(diff_fields)}") else: diff['unchanged'].append({'record': local_rec}) return diff def show_diff_report(ck, data_dir, verbose=False): """Query CloudKit and show diff report for all record types.""" from pathlib import Path data_dir = Path(data_dir) print("\n" + "="*50) print("CloudKit Diff Report") print("="*50 + "\n") # Load local data stadiums = json.load(open(data_dir / 'stadiums_canonical.json')) if (data_dir / 'stadiums_canonical.json').exists() else [] teams = json.load(open(data_dir / 'teams_canonical.json')) if (data_dir / 'teams_canonical.json').exists() else [] # Load games from canonical/games/*.json canonical_games_dir = data_dir / 'canonical' / 'games' games = [] if canonical_games_dir.exists(): for games_file in sorted(canonical_games_dir.glob('*.json')): with open(games_file) as f: games.extend(json.load(f)) league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else [] team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else [] stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else [] print(f"Local data: {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games") print(f" {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n") # Build local record maps (same format as CloudKit records) def build_stadium_records(stadiums): records = {} for s in stadiums: stadium_id = s.get('canonical_id', s.get('id', '')) record_name = deterministic_uuid(stadium_id) team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', [])) fields = { 'stadiumId': {'value': record_name}, 'canonicalId': {'value': stadium_id}, 'name': {'value': s['name']}, 'city': {'value': s['city']}, 'state': {'value': s.get('state', '')}, 'sport': {'value': s['sport']}, 'source': {'value': s.get('source', 'canonical')}, 'teamAbbrevs': {'value': team_abbrevs}, } if s.get('latitude'): fields['location'] = {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}} if s.get('capacity'): fields['capacity'] = {'value': s['capacity']} records[record_name] = {'recordType': 'Stadium', 'recordName': record_name, 'fields': fields} return records def build_team_records(teams): records = {} for t in teams: team_id = t.get('canonical_id', '') record_name = deterministic_uuid(team_id) fields = { 'teamId': {'value': record_name}, 'canonicalId': {'value': team_id}, 'abbreviation': {'value': t['abbreviation']}, 'name': {'value': t['name']}, 'city': {'value': t['city']}, 'sport': {'value': t['sport']}, 'stadiumCanonicalId': {'value': t.get('stadium_canonical_id', '')}, } if t.get('conference_id'): fields['conferenceId'] = {'value': t['conference_id']} if t.get('division_id'): fields['divisionId'] = {'value': t['division_id']} records[record_name] = {'recordType': 'Team', 'recordName': record_name, 'fields': fields} return records def build_game_records(games, stadiums): records = {} stadium_id_map = {s.get('canonical_id', s.get('id', '')): deterministic_uuid(s.get('canonical_id', s.get('id', ''))) for s in stadiums} seen_ids = set() for g in games: game_id = g.get('canonical_id', g.get('id', '')) if game_id in seen_ids: continue seen_ids.add(game_id) game_uuid = deterministic_uuid(game_id) sport = g['sport'] fields = { 'gameId': {'value': game_uuid}, 'canonicalId': {'value': game_id}, 'sport': {'value': sport}, 'season': {'value': g.get('season', '')}, 'source': {'value': g.get('source', 'canonical')}, } # Parse date/time if g.get('date'): try: time_str = g.get('time', '7:00p') hour, minute = 19, 0 if time_str: clean_time = time_str.lower().replace(' ', '') is_pm = 'p' in clean_time time_parts = clean_time.replace('p', '').replace('a', '').split(':') if time_parts: hour = int(time_parts[0]) if is_pm and hour != 12: hour += 12 elif not is_pm and hour == 12: hour = 0 if len(time_parts) > 1: minute = int(time_parts[1]) dt = datetime.strptime(f"{g['date']} {hour:02d}:{minute:02d}", '%Y-%m-%d %H:%M') fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} except: pass # Team references home_team_canonical_id = g.get('home_team_canonical_id', '') away_team_canonical_id = g.get('away_team_canonical_id', '') home_team_uuid = deterministic_uuid(home_team_canonical_id) away_team_uuid = deterministic_uuid(away_team_canonical_id) fields['homeTeamRef'] = {'value': {'recordName': home_team_uuid, 'action': 'NONE'}} fields['awayTeamRef'] = {'value': {'recordName': away_team_uuid, 'action': 'NONE'}} # Stadium reference if g.get('stadium_canonical_id'): stadium_canonical_id = g['stadium_canonical_id'] stadium_uuid = stadium_id_map.get(stadium_canonical_id) if stadium_uuid: fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}} fields['stadiumCanonicalId'] = {'value': stadium_canonical_id} records[game_uuid] = {'recordType': 'Game', 'recordName': game_uuid, 'fields': fields} return records def build_league_structure_records(league_structure): records = {} for ls in league_structure: record_name = ls['id'] fields = { 'structureId': {'value': ls['id']}, 'sport': {'value': ls['sport']}, 'type': {'value': ls['type']}, 'name': {'value': ls['name']}, 'displayOrder': {'value': ls['display_order']}, 'schemaVersion': {'value': 1}, } if ls.get('abbreviation'): fields['abbreviation'] = {'value': ls['abbreviation']} if ls.get('parent_id'): fields['parentId'] = {'value': ls['parent_id']} records[record_name] = {'recordType': 'LeagueStructure', 'recordName': record_name, 'fields': fields} return records def build_team_alias_records(team_aliases): records = {} for ta in team_aliases: record_name = ta['id'] fields = { 'aliasId': {'value': ta['id']}, 'teamCanonicalId': {'value': ta['team_canonical_id']}, 'aliasType': {'value': ta['alias_type']}, 'aliasValue': {'value': ta['alias_value']}, 'schemaVersion': {'value': 1}, } records[record_name] = {'recordType': 'TeamAlias', 'recordName': record_name, 'fields': fields} return records def build_stadium_alias_records(stadium_aliases): records = {} for sa in stadium_aliases: stadium_id = sa['stadium_canonical_id'] sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown' record_name = f"{sport}_{sa['alias_name'].lower()}" fields = { 'aliasName': {'value': sa['alias_name'].lower()}, 'stadiumCanonicalId': {'value': sa['stadium_canonical_id']}, 'schemaVersion': {'value': 1}, } records[record_name] = {'recordType': 'StadiumAlias', 'recordName': record_name, 'fields': fields} return records # Compute and display diffs for each record type record_types = [ ('Stadium', stadiums, build_stadium_records), ('Team', teams, build_team_records), ('Game', games, lambda g: build_game_records(g, stadiums)), ('LeagueStructure', league_structure, build_league_structure_records), ('TeamAlias', team_aliases, build_team_alias_records), ('StadiumAlias', stadium_aliases, build_stadium_alias_records), ] all_diffs = {} for record_type, data, builder in record_types: if not data: print(f"{record_type}: No local data") continue print(f"Checking {record_type}...") local_records = builder(data) # Query cloud records cloud_records = ck.query_all(record_type, verbose=verbose) # Compute diff diff = compute_diff(local_records, cloud_records, verbose=verbose) all_diffs[record_type] = diff # Report print(f" {record_type}: {len(diff['unchanged'])} unchanged, {len(diff['new'])} new, {len(diff['updated'])} updated, {len(diff['deleted'])} deleted") print("\n" + "="*50) print("Diff Summary") print("="*50) for record_type, diff in all_diffs.items(): total_changes = len(diff['new']) + len(diff['updated']) + len(diff['deleted']) status = "in sync" if total_changes == 0 else f"{total_changes} changes needed" print(f" {record_type}: {status}") return all_diffs def sync_diff(ck, diff, record_type, dry_run=False, verbose=False, delete_orphans=False): """ Sync only changed records based on diff. Returns counts: created, updated, deleted, skipped, errors. """ stats = {'created': 0, 'updated': 0, 'deleted': 0, 'skipped': 0, 'errors': 0} # Handle new records (forceReplace) new_records = [item['record'] for item in diff['new']] if new_records: if dry_run: print(f" [DRY RUN] Would create {len(new_records)} new {record_type}") stats['created'] = len(new_records) else: for i in range(0, len(new_records), BATCH_SIZE): batch = new_records[i:i+BATCH_SIZE] ops = [{'operationType': 'forceReplace', 'record': r} for r in batch] result = ck.modify(ops) if 'error' in result: print(f" Create error: {result['error']}") stats['errors'] += len(batch) else: result_records = result.get('records', []) successful = [r for r in result_records if 'serverErrorCode' not in r] failed = [r for r in result_records if 'serverErrorCode' in r] stats['created'] += len(successful) stats['errors'] += len(failed) if failed and verbose: print(f" Create failed: {failed[0].get('serverErrorCode')}: {failed[0].get('reason')}") time.sleep(0.3) # Handle updated records (update with recordChangeTag) updated_items = diff['updated'] if updated_items: if dry_run: print(f" [DRY RUN] Would update {len(updated_items)} {record_type}") if verbose: for item in updated_items[:5]: print(f" - {item['record'].get('recordName')}: {', '.join(item.get('changed_fields', []))}") if len(updated_items) > 5: print(f" ... and {len(updated_items) - 5} more") stats['updated'] = len(updated_items) else: for i in range(0, len(updated_items), BATCH_SIZE): batch = updated_items[i:i+BATCH_SIZE] ops = [] for item in batch: record = item['record'].copy() record['recordChangeTag'] = item['recordChangeTag'] ops.append({'operationType': 'update', 'record': record}) result = ck.modify(ops) if 'error' in result: print(f" Update error: {result['error']}") stats['errors'] += len(batch) else: result_records = result.get('records', []) successful = [r for r in result_records if 'serverErrorCode' not in r] failed = [r for r in result_records if 'serverErrorCode' in r] stats['updated'] += len(successful) # Handle conflicts (409) conflicts = [r for r in failed if r.get('serverErrorCode') == 'CONFLICT'] other_errors = [r for r in failed if r.get('serverErrorCode') != 'CONFLICT'] if conflicts: print(f" {len(conflicts)} conflicts detected (records modified since query)") # Re-try with forceReplace for conflicts (data loss is acceptable as we have source of truth) conflict_records = [item['record'] for item in batch if any( c.get('recordName') == item['record'].get('recordName') for c in conflicts )] if conflict_records: retry_ops = [{'operationType': 'forceReplace', 'record': r} for r in conflict_records] retry_result = ck.modify(retry_ops) if 'error' not in retry_result: retry_records = retry_result.get('records', []) retry_success = [r for r in retry_records if 'serverErrorCode' not in r] stats['updated'] += len(retry_success) stats['errors'] -= len(retry_success) if other_errors: stats['errors'] += len(other_errors) if verbose: print(f" Update failed: {other_errors[0].get('serverErrorCode')}: {other_errors[0].get('reason')}") time.sleep(0.3) # Handle deleted records (only if delete_orphans is True) deleted_items = diff['deleted'] if deleted_items: if delete_orphans: if dry_run: print(f" [DRY RUN] Would delete {len(deleted_items)} orphan {record_type}") stats['deleted'] = len(deleted_items) else: for i in range(0, len(deleted_items), BATCH_SIZE): batch = deleted_items[i:i+BATCH_SIZE] ops = [{ 'operationType': 'delete', 'record': { 'recordName': item['record'].get('recordName'), 'recordType': record_type, 'recordChangeTag': item['recordChangeTag'] } } for item in batch] result = ck.modify(ops) if 'error' in result: print(f" Delete error: {result['error']}") stats['errors'] += len(batch) else: result_records = result.get('records', []) successful = [r for r in result_records if 'serverErrorCode' not in r] failed = [r for r in result_records if 'serverErrorCode' in r] stats['deleted'] += len(successful) stats['errors'] += len(failed) time.sleep(0.3) else: print(f" Warning: {len(deleted_items)} orphan {record_type} in CloudKit (use --delete-orphans to remove)") # Count unchanged as skipped stats['skipped'] = len(diff['unchanged']) return stats def run_smart_sync(ck, data_dir, dry_run=False, verbose=False, delete_orphans=False): """Run differential sync for all record types.""" from pathlib import Path data_dir = Path(data_dir) print("\n" + "="*50) print(f"CloudKit Smart Sync {'(DRY RUN)' if dry_run else ''}") print("="*50 + "\n") # Load local data stadiums = json.load(open(data_dir / 'stadiums_canonical.json')) if (data_dir / 'stadiums_canonical.json').exists() else [] teams = json.load(open(data_dir / 'teams_canonical.json')) if (data_dir / 'teams_canonical.json').exists() else [] # Load games from canonical/games/*.json canonical_games_dir = data_dir / 'canonical' / 'games' games = [] if canonical_games_dir.exists(): for games_file in sorted(canonical_games_dir.glob('*.json')): with open(games_file) as f: games.extend(json.load(f)) league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else [] team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else [] stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else [] print(f"Local data: {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games") print(f" {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n") # Build local record maps (reuse from show_diff_report) def build_stadium_records(stadiums): records = {} for s in stadiums: stadium_id = s.get('canonical_id', s.get('id', '')) record_name = deterministic_uuid(stadium_id) team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', [])) fields = { 'stadiumId': {'value': record_name}, 'canonicalId': {'value': stadium_id}, 'name': {'value': s['name']}, 'city': {'value': s['city']}, 'state': {'value': s.get('state', '')}, 'sport': {'value': s['sport']}, 'source': {'value': s.get('source', 'canonical')}, 'teamAbbrevs': {'value': team_abbrevs}, } if s.get('latitude'): fields['location'] = {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}} if s.get('capacity'): fields['capacity'] = {'value': s['capacity']} records[record_name] = {'recordType': 'Stadium', 'recordName': record_name, 'fields': fields} return records def build_team_records(teams): records = {} for t in teams: team_id = t.get('canonical_id', '') record_name = deterministic_uuid(team_id) fields = { 'teamId': {'value': record_name}, 'canonicalId': {'value': team_id}, 'abbreviation': {'value': t['abbreviation']}, 'name': {'value': t['name']}, 'city': {'value': t['city']}, 'sport': {'value': t['sport']}, 'stadiumCanonicalId': {'value': t.get('stadium_canonical_id', '')}, } if t.get('conference_id'): fields['conferenceId'] = {'value': t['conference_id']} if t.get('division_id'): fields['divisionId'] = {'value': t['division_id']} records[record_name] = {'recordType': 'Team', 'recordName': record_name, 'fields': fields} return records def build_game_records(games, stadiums): records = {} stadium_id_map = {s.get('canonical_id', s.get('id', '')): deterministic_uuid(s.get('canonical_id', s.get('id', ''))) for s in stadiums} seen_ids = set() for g in games: game_id = g.get('canonical_id', g.get('id', '')) if game_id in seen_ids: continue seen_ids.add(game_id) game_uuid = deterministic_uuid(game_id) sport = g['sport'] fields = { 'gameId': {'value': game_uuid}, 'canonicalId': {'value': game_id}, 'sport': {'value': sport}, 'season': {'value': g.get('season', '')}, 'source': {'value': g.get('source', 'canonical')}, } if g.get('date'): try: time_str = g.get('time', '7:00p') hour, minute = 19, 0 if time_str: clean_time = time_str.lower().replace(' ', '') is_pm = 'p' in clean_time time_parts = clean_time.replace('p', '').replace('a', '').split(':') if time_parts: hour = int(time_parts[0]) if is_pm and hour != 12: hour += 12 elif not is_pm and hour == 12: hour = 0 if len(time_parts) > 1: minute = int(time_parts[1]) dt = datetime.strptime(f"{g['date']} {hour:02d}:{minute:02d}", '%Y-%m-%d %H:%M') fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} except: pass home_team_canonical_id = g.get('home_team_canonical_id', '') away_team_canonical_id = g.get('away_team_canonical_id', '') home_team_uuid = deterministic_uuid(home_team_canonical_id) away_team_uuid = deterministic_uuid(away_team_canonical_id) fields['homeTeamRef'] = {'value': {'recordName': home_team_uuid, 'action': 'NONE'}} fields['awayTeamRef'] = {'value': {'recordName': away_team_uuid, 'action': 'NONE'}} if g.get('stadium_canonical_id'): stadium_canonical_id = g['stadium_canonical_id'] stadium_uuid = stadium_id_map.get(stadium_canonical_id) if stadium_uuid: fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}} fields['stadiumCanonicalId'] = {'value': stadium_canonical_id} records[game_uuid] = {'recordType': 'Game', 'recordName': game_uuid, 'fields': fields} return records def build_league_structure_records(league_structure): records = {} for ls in league_structure: record_name = ls['id'] fields = { 'structureId': {'value': ls['id']}, 'sport': {'value': ls['sport']}, 'type': {'value': ls['type']}, 'name': {'value': ls['name']}, 'displayOrder': {'value': ls['display_order']}, 'schemaVersion': {'value': 1}, } if ls.get('abbreviation'): fields['abbreviation'] = {'value': ls['abbreviation']} if ls.get('parent_id'): fields['parentId'] = {'value': ls['parent_id']} records[record_name] = {'recordType': 'LeagueStructure', 'recordName': record_name, 'fields': fields} return records def build_team_alias_records(team_aliases): records = {} for ta in team_aliases: record_name = ta['id'] fields = { 'aliasId': {'value': ta['id']}, 'teamCanonicalId': {'value': ta['team_canonical_id']}, 'aliasType': {'value': ta['alias_type']}, 'aliasValue': {'value': ta['alias_value']}, 'schemaVersion': {'value': 1}, } records[record_name] = {'recordType': 'TeamAlias', 'recordName': record_name, 'fields': fields} return records def build_stadium_alias_records(stadium_aliases): records = {} for sa in stadium_aliases: stadium_id = sa['stadium_canonical_id'] sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown' record_name = f"{sport}_{sa['alias_name'].lower()}" fields = { 'aliasName': {'value': sa['alias_name'].lower()}, 'stadiumCanonicalId': {'value': sa['stadium_canonical_id']}, 'schemaVersion': {'value': 1}, } records[record_name] = {'recordType': 'StadiumAlias', 'recordName': record_name, 'fields': fields} return records # Sync each record type record_types = [ ('Stadium', stadiums, build_stadium_records), ('Team', teams, build_team_records), ('Game', games, lambda g: build_game_records(g, stadiums)), ('LeagueStructure', league_structure, build_league_structure_records), ('TeamAlias', team_aliases, build_team_alias_records), ('StadiumAlias', stadium_aliases, build_stadium_alias_records), ] total_stats = {'created': 0, 'updated': 0, 'deleted': 0, 'skipped': 0, 'errors': 0} for record_type, data, builder in record_types: if not data: print(f"{record_type}: No local data, skipping") continue print(f"\n--- {record_type} ---") local_records = builder(data) # Query cloud records print(f" Querying CloudKit...") cloud_records = ck.query_all(record_type, verbose=verbose) print(f" Found {len(cloud_records)} cloud records, {len(local_records)} local records") # Compute diff diff = compute_diff(local_records, cloud_records, verbose=verbose) print(f" Diff: {len(diff['new'])} new, {len(diff['updated'])} updated, {len(diff['unchanged'])} unchanged, {len(diff['deleted'])} orphans") # Sync stats = sync_diff(ck, diff, record_type, dry_run=dry_run, verbose=verbose, delete_orphans=delete_orphans) # Accumulate stats for key in total_stats: total_stats[key] += stats[key] print(f" Result: {stats['created']} created, {stats['updated']} updated, {stats['deleted']} deleted, {stats['skipped']} skipped") if stats['errors']: print(f" Errors: {stats['errors']}") # Summary print("\n" + "="*50) print("Smart Sync Summary") print("="*50) print(f" Created: {total_stats['created']}") print(f" Updated: {total_stats['updated']}") print(f" Deleted: {total_stats['deleted']}") print(f" Skipped (unchanged): {total_stats['skipped']}") if total_stats['errors']: print(f" Errors: {total_stats['errors']}") if dry_run: print("\n[DRY RUN - no changes made]") return total_stats def main(): p = argparse.ArgumentParser(description='Import JSON to CloudKit') p.add_argument('--key-id', default=DEFAULT_KEY_ID) p.add_argument('--key-file', default=DEFAULT_KEY_FILE) p.add_argument('--container', default=CONTAINER) p.add_argument('--env', choices=['development', 'production'], default='development') p.add_argument('--data-dir', default='./data') p.add_argument('--stadiums-only', action='store_true') p.add_argument('--games-only', action='store_true') p.add_argument('--games-files', type=str, help='Comma-separated list of game files to import (e.g., mlb_2025.json,nba_2025.json)') p.add_argument('--league-structure-only', action='store_true', help='Import only league structure') p.add_argument('--team-aliases-only', action='store_true', help='Import only team aliases') p.add_argument('--stadium-aliases-only', action='store_true', help='Import only stadium aliases') p.add_argument('--canonical-only', action='store_true', help='Import only canonical data (league structure + team aliases + stadium aliases)') p.add_argument('--delete-all', action='store_true', help='Delete all records before importing') p.add_argument('--delete-only', action='store_true', help='Only delete records, do not import') p.add_argument('--diff', action='store_true', help='Show diff between local and CloudKit without importing') p.add_argument('--smart-sync', action='store_true', help='Differential sync: only upload new/changed records') p.add_argument('--delete-orphans', action='store_true', help='With --smart-sync, also delete records not in local data') p.add_argument('--dry-run', action='store_true') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--interactive', '-i', action='store_true', help='Show interactive menu') args = p.parse_args() # Show interactive menu if no action flags provided or --interactive has_action_flag = any([ args.stadiums_only, args.games_only, args.games_files, args.league_structure_only, args.team_aliases_only, args.stadium_aliases_only, args.canonical_only, args.delete_all, args.delete_only, args.dry_run, args.diff, args.smart_sync ]) # Track selected game files (for option 4 or --games-files) selected_game_files = None if args.games_files: # Parse comma-separated list from command line selected_game_files = [f.strip() for f in args.games_files.split(',')] args.games_only = True # Imply --games-only if args.interactive or not has_action_flag: choice = show_menu() if choice is None: return # Map menu choice to flags if choice == 1: # Import all pass # Default behavior elif choice == 2: # Stadiums only args.stadiums_only = True elif choice == 3: # Games only (all files) args.games_only = True elif choice == 4: # Games - select specific files args.games_only = True selected_game_files = show_game_files_menu(Path(args.data_dir)) if not selected_game_files: print("No files selected. Exiting.") return elif choice == 5: # League structure only args.league_structure_only = True elif choice == 6: # Team aliases only args.team_aliases_only = True elif choice == 7: # Stadium aliases only args.stadium_aliases_only = True elif choice == 8: # Canonical only args.canonical_only = True elif choice == 9: # Delete all then import args.delete_all = True elif choice == 10: # Delete only args.delete_only = True elif choice == 11: # Dry run args.dry_run = True elif choice == 12: # Smart sync args.smart_sync = True elif choice == 13: # Smart sync + delete orphans args.smart_sync = True args.delete_orphans = True print(f"\n{'='*50}") print(f"CloudKit Import {'(DRY RUN)' if args.dry_run else ''}") print(f"{'='*50}") print(f"Container: {args.container}") print(f"Environment: {args.env}\n") data_dir = Path(args.data_dir) # Load canonical format files (from canonicalization pipeline) # Fall back to legacy format for backward compatibility if (data_dir / 'stadiums_canonical.json').exists(): stadiums = json.load(open(data_dir / 'stadiums_canonical.json')) use_canonical = True else: stadiums = json.load(open(data_dir / 'stadiums.json')) use_canonical = False if (data_dir / 'teams_canonical.json').exists(): teams = json.load(open(data_dir / 'teams_canonical.json')) else: teams = [] # Legacy: extracted from stadiums # Load games: try new structure first (canonical/games/*.json), then fallback canonical_games_dir = data_dir / 'canonical' / 'games' games = [] games_source = None if selected_game_files: # Load only the selected files for filename in selected_game_files: filepath = canonical_games_dir / filename if filepath.exists(): with open(filepath) as f: file_games = json.load(f) games.extend(file_games) print(f" Loading {filename}: {len(file_games):,} games") games_source = f"selected files: {', '.join(selected_game_files)}" elif canonical_games_dir.exists() and any(canonical_games_dir.glob('*.json')): # New structure: load all sport/season files for games_file in sorted(canonical_games_dir.glob('*.json')): with open(games_file) as f: file_games = json.load(f) games.extend(file_games) games_source = "canonical/games/*.json" elif (data_dir / 'games_canonical.json').exists(): games = json.load(open(data_dir / 'games_canonical.json')) games_source = "games_canonical.json" elif (data_dir / 'games.json').exists(): games = json.load(open(data_dir / 'games.json')) games_source = "games.json (legacy)" league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else [] team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else [] stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else [] print(f"Using {'canonical' if use_canonical else 'legacy'} format") print(f"Loaded {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games") if games_source: print(f" Games loaded from: {games_source}") print(f"Loaded {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n") ck = None if not args.dry_run: if not HAS_CRYPTO: sys.exit("Error: pip install cryptography") if not os.path.exists(args.key_file): sys.exit(f"Error: Key file not found: {args.key_file}") ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env) # Handle diff mode (show diff without importing) if args.diff: if not ck: # Need CloudKit connection for diff if not HAS_CRYPTO: sys.exit("Error: pip install cryptography") if not os.path.exists(args.key_file): sys.exit(f"Error: Key file not found: {args.key_file}") ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env) show_diff_report(ck, args.data_dir, verbose=args.verbose) return # Handle smart sync mode (differential upload) if args.smart_sync: if not ck: # Need CloudKit connection for smart sync if not HAS_CRYPTO: sys.exit("Error: pip install cryptography") if not os.path.exists(args.key_file): sys.exit(f"Error: Key file not found: {args.key_file}") ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env) run_smart_sync(ck, args.data_dir, dry_run=args.dry_run, verbose=args.verbose, delete_orphans=args.delete_orphans) return # Handle deletion if args.delete_all or args.delete_only: if not ck: sys.exit("Error: --key-id and --key-file required for deletion") print("--- Deleting Existing Records ---") # Delete in order: dependent records first, then base records for record_type in ['Game', 'TeamAlias', 'StadiumAlias', 'Team', 'LeagueStructure', 'Stadium']: print(f" Deleting {record_type} records...") deleted = ck.delete_all(record_type, verbose=args.verbose) print(f" Deleted {deleted} {record_type} records") if args.delete_only: print(f"\n{'='*50}") print("DELETE COMPLETE") print() return stats = {'stadiums': 0, 'teams': 0, 'games': 0, 'league_structures': 0, 'team_aliases': 0, 'stadium_aliases': 0} team_map = {} # Determine what to import based on flags import_stadiums = not args.games_only and not args.league_structure_only and not args.team_aliases_only and not args.stadium_aliases_only and not args.canonical_only import_teams = not args.games_only and not args.league_structure_only and not args.team_aliases_only and not args.stadium_aliases_only and not args.canonical_only import_games = not args.stadiums_only and not args.league_structure_only and not args.team_aliases_only and not args.stadium_aliases_only and not args.canonical_only import_league_structure = args.league_structure_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.team_aliases_only and not args.stadium_aliases_only) import_team_aliases = args.team_aliases_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.league_structure_only and not args.stadium_aliases_only) import_stadium_aliases = args.stadium_aliases_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.league_structure_only and not args.team_aliases_only) # Build stadium ID lookup # Canonical format uses canonical_id, legacy uses id def get_stadium_id(s): return s.get('canonical_id', s.get('id', '')) def get_team_id(t): return t.get('canonical_id', '') stadium_id_map = {get_stadium_id(s): deterministic_uuid(get_stadium_id(s)) for s in stadiums} # Import stadiums if import_stadiums: print("--- Stadiums ---") recs = [] for s in stadiums: stadium_id = get_stadium_id(s) record_name = deterministic_uuid(stadium_id) # Canonical format uses primary_team_abbrevs, legacy uses team_abbrevs team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', [])) fields = { 'stadiumId': {'value': record_name}, 'canonicalId': {'value': stadium_id}, # Store canonical_id as string 'name': {'value': s['name']}, 'city': {'value': s['city']}, 'state': {'value': s.get('state', '')}, 'sport': {'value': s['sport']}, 'source': {'value': s.get('source', 'canonical')}, 'teamAbbrevs': {'value': team_abbrevs}, } if s.get('latitude'): fields['location'] = {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}} if s.get('capacity'): fields['capacity'] = {'value': s['capacity']} recs.append({'recordType': 'Stadium', 'recordName': record_name, 'fields': fields}) stats['stadiums'] = import_data(ck, recs, 'stadiums', args.dry_run, args.verbose) # Import teams (canonical format has dedicated teams file) if import_teams: print("--- Teams ---") if teams: # Canonical format: use teams_canonical.json recs = [] for t in teams: team_id = get_team_id(t) record_name = deterministic_uuid(team_id) team_map[(t['sport'], t['abbreviation'])] = record_name fields = { 'teamId': {'value': record_name}, 'canonicalId': {'value': team_id}, # Store canonical_id as string 'abbreviation': {'value': t['abbreviation']}, 'name': {'value': t['name']}, 'city': {'value': t['city']}, 'sport': {'value': t['sport']}, 'stadiumCanonicalId': {'value': t.get('stadium_canonical_id', '')}, } if t.get('conference_id'): fields['conferenceId'] = {'value': t['conference_id']} if t.get('division_id'): fields['divisionId'] = {'value': t['division_id']} recs.append({'recordType': 'Team', 'recordName': record_name, 'fields': fields}) stats['teams'] = import_data(ck, recs, 'teams', args.dry_run, args.verbose) else: # Legacy format: extract teams from stadiums teams_dict = {} for s in stadiums: team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', [])) for abbr in team_abbrevs: team_key = f"{s['sport']}_{abbr}" if team_key not in teams_dict: teams_dict[team_key] = {'abbr': abbr, 'city': s['city'], 'sport': s['sport']} team_uuid = deterministic_uuid(team_key) team_map[(s['sport'], abbr)] = team_uuid recs = [{ 'recordType': 'Team', 'recordName': deterministic_uuid(team_key), 'fields': { 'teamId': {'value': deterministic_uuid(team_key)}, 'canonicalId': {'value': team_key}, 'abbreviation': {'value': info['abbr']}, 'name': {'value': info['abbr']}, 'city': {'value': info['city']}, 'sport': {'value': info['sport']}, } } for team_key, info in teams_dict.items()] stats['teams'] = import_data(ck, recs, 'teams', args.dry_run, args.verbose) # Import games if import_games and games: # Detect canonical game format (has canonical_id field) use_canonical_games = games and 'canonical_id' in games[0] # Rebuild team_map if only importing games (--games-only flag) if not team_map: if teams: # Canonical format: use teams_canonical.json for t in teams: team_id = get_team_id(t) team_map[(t['sport'], t['abbreviation'])] = deterministic_uuid(team_id) else: # Legacy format: extract from stadiums for s in stadiums: team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', [])) for abbr in team_abbrevs: team_key = f"{s['sport']}_{abbr}" team_map[(s['sport'], abbr)] = deterministic_uuid(team_key) # Build team -> stadium map for stadiumRef (legacy format needs this) team_stadium_map = {} for s in stadiums: stadium_id = get_stadium_id(s) stadium_uuid = stadium_id_map[stadium_id] team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', [])) for abbr in team_abbrevs: team_stadium_map[(s['sport'], abbr)] = stadium_uuid print("--- Games ---") print(f" Using {'canonical' if use_canonical_games else 'legacy'} game format") # Deduplicate games by ID (canonical_id or id) seen_ids = set() unique_games = [] for g in games: game_id = g.get('canonical_id', g.get('id', '')) if game_id not in seen_ids: seen_ids.add(game_id) unique_games.append(g) if len(unique_games) < len(games): print(f" Removed {len(games) - len(unique_games)} duplicate games") recs = [] for g in unique_games: # Get game ID (canonical or legacy) game_id = g.get('canonical_id', g.get('id', '')) game_uuid = deterministic_uuid(game_id) sport = g['sport'] fields = { 'gameId': {'value': game_uuid}, 'canonicalId': {'value': game_id}, # Store canonical_id as string 'sport': {'value': sport}, 'season': {'value': g.get('season', '')}, 'source': {'value': g.get('source', 'canonical' if use_canonical_games else '')}, } # Parse date/time if g.get('date'): try: # Parse time like "7:30p" or "10:00a" time_str = g.get('time', '7:00p') hour, minute = 19, 0 if time_str: clean_time = time_str.lower().replace(' ', '') is_pm = 'p' in clean_time time_parts = clean_time.replace('p', '').replace('a', '').split(':') if time_parts: hour = int(time_parts[0]) if is_pm and hour != 12: hour += 12 elif not is_pm and hour == 12: hour = 0 if len(time_parts) > 1: minute = int(time_parts[1]) dt = datetime.strptime(f"{g['date']} {hour:02d}:{minute:02d}", '%Y-%m-%d %H:%M') # CloudKit expects TIMESTAMP type with milliseconds since epoch fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} except Exception as e: if args.verbose: print(f" Warning: Failed to parse date/time for {game_id}: {e}") # Team references if use_canonical_games: # Canonical format: extract team abbrev from canonical ID (team_nba_atl -> atl) home_team_canonical_id = g.get('home_team_canonical_id', '') away_team_canonical_id = g.get('away_team_canonical_id', '') home_team_uuid = deterministic_uuid(home_team_canonical_id) away_team_uuid = deterministic_uuid(away_team_canonical_id) else: # Legacy format: use abbreviations home_team_key = f"{sport}_{g.get('home_team_abbrev', '')}" away_team_key = f"{sport}_{g.get('away_team_abbrev', '')}" home_team_uuid = deterministic_uuid(home_team_key) away_team_uuid = deterministic_uuid(away_team_key) fields['homeTeamRef'] = {'value': {'recordName': home_team_uuid, 'action': 'NONE'}} fields['awayTeamRef'] = {'value': {'recordName': away_team_uuid, 'action': 'NONE'}} # Stadium reference if use_canonical_games and g.get('stadium_canonical_id'): # Canonical format: use stadium_canonical_id directly stadium_canonical_id = g['stadium_canonical_id'] stadium_uuid = stadium_id_map.get(stadium_canonical_id) if stadium_uuid: fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}} fields['stadiumCanonicalId'] = {'value': stadium_canonical_id} else: # Legacy format: look up by home team abbrev stadium_uuid = team_stadium_map.get((sport, g.get('home_team_abbrev', ''))) if stadium_uuid: fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}} recs.append({'recordType': 'Game', 'recordName': game_uuid, 'fields': fields}) stats['games'] = import_data(ck, recs, 'games', args.dry_run, args.verbose) # Import league structure if import_league_structure and league_structure: print("--- League Structure ---") now_ms = int(datetime.now(timezone.utc).timestamp() * 1000) recs = [{ 'recordType': 'LeagueStructure', 'recordName': ls['id'], # Use the id as recordName 'fields': { 'structureId': {'value': ls['id']}, 'sport': {'value': ls['sport']}, 'type': {'value': ls['type']}, 'name': {'value': ls['name']}, 'displayOrder': {'value': ls['display_order']}, 'schemaVersion': {'value': 1}, 'lastModified': {'value': now_ms, 'type': 'TIMESTAMP'}, **({'abbreviation': {'value': ls['abbreviation']}} if ls.get('abbreviation') else {}), **({'parentId': {'value': ls['parent_id']}} if ls.get('parent_id') else {}), } } for ls in league_structure] stats['league_structures'] = import_data(ck, recs, 'league structures', args.dry_run, args.verbose) # Import team aliases if import_team_aliases and team_aliases: print("--- Team Aliases ---") now_ms = int(datetime.now(timezone.utc).timestamp() * 1000) recs = [] for ta in team_aliases: fields = { 'aliasId': {'value': ta['id']}, 'teamCanonicalId': {'value': ta['team_canonical_id']}, 'aliasType': {'value': ta['alias_type']}, 'aliasValue': {'value': ta['alias_value']}, 'schemaVersion': {'value': 1}, 'lastModified': {'value': now_ms, 'type': 'TIMESTAMP'}, } # Add optional date fields if ta.get('valid_from'): try: dt = datetime.strptime(ta['valid_from'], '%Y-%m-%d') fields['validFrom'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} except: pass if ta.get('valid_until'): try: dt = datetime.strptime(ta['valid_until'], '%Y-%m-%d') fields['validUntil'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} except: pass recs.append({ 'recordType': 'TeamAlias', 'recordName': ta['id'], # Use the id as recordName 'fields': fields }) stats['team_aliases'] = import_data(ck, recs, 'team aliases', args.dry_run, args.verbose) # Import stadium aliases if import_stadium_aliases and stadium_aliases: print("--- Stadium Aliases ---") now_ms = int(datetime.now(timezone.utc).timestamp() * 1000) recs = [] for sa in stadium_aliases: fields = { 'aliasName': {'value': sa['alias_name'].lower()}, # Normalize to lowercase 'stadiumCanonicalId': {'value': sa['stadium_canonical_id']}, 'schemaVersion': {'value': 1}, 'lastModified': {'value': now_ms, 'type': 'TIMESTAMP'}, } # Add optional date fields if sa.get('valid_from'): try: dt = datetime.strptime(sa['valid_from'], '%Y-%m-%d') fields['validFrom'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} except: pass if sa.get('valid_until'): try: dt = datetime.strptime(sa['valid_until'], '%Y-%m-%d') fields['validUntil'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} except: pass # Extract sport from stadium_canonical_id (e.g., "stadium_nba_td_garden" -> "nba") # This makes record names unique for shared venues (TD Garden has NBA and NHL entries) stadium_id = sa['stadium_canonical_id'] sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown' record_name = f"{sport}_{sa['alias_name'].lower()}" recs.append({ 'recordType': 'StadiumAlias', 'recordName': record_name, 'fields': fields }) stats['stadium_aliases'] = import_data(ck, recs, 'stadium aliases', args.dry_run, args.verbose) print(f"\n{'='*50}") print(f"COMPLETE: {stats['stadiums']} stadiums, {stats['teams']} teams, {stats['games']} games, {stats['league_structures']} league structures, {stats['team_aliases']} team aliases, {stats['stadium_aliases']} stadium aliases") if args.dry_run: print("[DRY RUN - nothing imported]") print() if __name__ == '__main__': main()