diff --git a/Scripts/cloudkit_import.py b/Scripts/cloudkit_import.py index d0ebc38..f99a1e0 100755 --- a/Scripts/cloudkit_import.py +++ b/Scripts/cloudkit_import.py @@ -135,17 +135,19 @@ def show_menu(): print(" 9. Delete all then import") print(" 10. Delete only (no import)") print(" 11. Dry run (preview only)") + print(" 12. Smart sync (diff-based, only upload changes)") + print(" 13. Smart sync + delete orphans") print(" 0. Exit") print() while True: try: - choice = input("Enter choice [1-11, 0 to exit]: ").strip() + choice = input("Enter choice [1-13, 0 to exit]: ").strip() if choice == '0': return None - if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11']: + if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']: return int(choice) - print("Invalid choice. Please enter 1-11 or 0.") + print("Invalid choice. Please enter 1-13 or 0.") except (EOFError, KeyboardInterrupt): print("\nExiting.") return None @@ -693,6 +695,362 @@ def show_diff_report(ck, data_dir, verbose=False): return all_diffs +def sync_diff(ck, diff, record_type, dry_run=False, verbose=False, delete_orphans=False): + """ + Sync only changed records based on diff. + Returns counts: created, updated, deleted, skipped, errors. + """ + stats = {'created': 0, 'updated': 0, 'deleted': 0, 'skipped': 0, 'errors': 0} + + # Handle new records (forceReplace) + new_records = [item['record'] for item in diff['new']] + if new_records: + if dry_run: + print(f" [DRY RUN] Would create {len(new_records)} new {record_type}") + stats['created'] = len(new_records) + else: + for i in range(0, len(new_records), BATCH_SIZE): + batch = new_records[i:i+BATCH_SIZE] + ops = [{'operationType': 'forceReplace', 'record': r} for r in batch] + result = ck.modify(ops) + if 'error' in result: + print(f" Create error: {result['error']}") + stats['errors'] += len(batch) + else: + result_records = result.get('records', []) + successful = [r for r in result_records if 'serverErrorCode' not in r] + failed = [r for r in result_records if 'serverErrorCode' in r] + stats['created'] += len(successful) + stats['errors'] += len(failed) + if failed and verbose: + print(f" Create failed: {failed[0].get('serverErrorCode')}: {failed[0].get('reason')}") + time.sleep(0.3) + + # Handle updated records (update with recordChangeTag) + updated_items = diff['updated'] + if updated_items: + if dry_run: + print(f" [DRY RUN] Would update {len(updated_items)} {record_type}") + if verbose: + for item in updated_items[:5]: + print(f" - {item['record'].get('recordName')}: {', '.join(item.get('changed_fields', []))}") + if len(updated_items) > 5: + print(f" ... and {len(updated_items) - 5} more") + stats['updated'] = len(updated_items) + else: + for i in range(0, len(updated_items), BATCH_SIZE): + batch = updated_items[i:i+BATCH_SIZE] + ops = [] + for item in batch: + record = item['record'].copy() + record['recordChangeTag'] = item['recordChangeTag'] + ops.append({'operationType': 'update', 'record': record}) + + result = ck.modify(ops) + if 'error' in result: + print(f" Update error: {result['error']}") + stats['errors'] += len(batch) + else: + result_records = result.get('records', []) + successful = [r for r in result_records if 'serverErrorCode' not in r] + failed = [r for r in result_records if 'serverErrorCode' in r] + stats['updated'] += len(successful) + + # Handle conflicts (409) + conflicts = [r for r in failed if r.get('serverErrorCode') == 'CONFLICT'] + other_errors = [r for r in failed if r.get('serverErrorCode') != 'CONFLICT'] + + if conflicts: + print(f" {len(conflicts)} conflicts detected (records modified since query)") + # Re-try with forceReplace for conflicts (data loss is acceptable as we have source of truth) + conflict_records = [item['record'] for item in batch if any( + c.get('recordName') == item['record'].get('recordName') for c in conflicts + )] + if conflict_records: + retry_ops = [{'operationType': 'forceReplace', 'record': r} for r in conflict_records] + retry_result = ck.modify(retry_ops) + if 'error' not in retry_result: + retry_records = retry_result.get('records', []) + retry_success = [r for r in retry_records if 'serverErrorCode' not in r] + stats['updated'] += len(retry_success) + stats['errors'] -= len(retry_success) + + if other_errors: + stats['errors'] += len(other_errors) + if verbose: + print(f" Update failed: {other_errors[0].get('serverErrorCode')}: {other_errors[0].get('reason')}") + + time.sleep(0.3) + + # Handle deleted records (only if delete_orphans is True) + deleted_items = diff['deleted'] + if deleted_items: + if delete_orphans: + if dry_run: + print(f" [DRY RUN] Would delete {len(deleted_items)} orphan {record_type}") + stats['deleted'] = len(deleted_items) + else: + for i in range(0, len(deleted_items), BATCH_SIZE): + batch = deleted_items[i:i+BATCH_SIZE] + ops = [{ + 'operationType': 'delete', + 'record': { + 'recordName': item['record'].get('recordName'), + 'recordType': record_type, + 'recordChangeTag': item['recordChangeTag'] + } + } for item in batch] + + result = ck.modify(ops) + if 'error' in result: + print(f" Delete error: {result['error']}") + stats['errors'] += len(batch) + else: + result_records = result.get('records', []) + successful = [r for r in result_records if 'serverErrorCode' not in r] + failed = [r for r in result_records if 'serverErrorCode' in r] + stats['deleted'] += len(successful) + stats['errors'] += len(failed) + + time.sleep(0.3) + else: + print(f" Warning: {len(deleted_items)} orphan {record_type} in CloudKit (use --delete-orphans to remove)") + + # Count unchanged as skipped + stats['skipped'] = len(diff['unchanged']) + + return stats + + +def run_smart_sync(ck, data_dir, dry_run=False, verbose=False, delete_orphans=False): + """Run differential sync for all record types.""" + from pathlib import Path + data_dir = Path(data_dir) + + print("\n" + "="*50) + print(f"CloudKit Smart Sync {'(DRY RUN)' if dry_run else ''}") + print("="*50 + "\n") + + # Load local data + stadiums = json.load(open(data_dir / 'stadiums_canonical.json')) if (data_dir / 'stadiums_canonical.json').exists() else [] + teams = json.load(open(data_dir / 'teams_canonical.json')) if (data_dir / 'teams_canonical.json').exists() else [] + + # Load games from canonical/games/*.json + canonical_games_dir = data_dir / 'canonical' / 'games' + games = [] + if canonical_games_dir.exists(): + for games_file in sorted(canonical_games_dir.glob('*.json')): + with open(games_file) as f: + games.extend(json.load(f)) + + league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else [] + team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else [] + stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else [] + + print(f"Local data: {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games") + print(f" {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n") + + # Build local record maps (reuse from show_diff_report) + def build_stadium_records(stadiums): + records = {} + for s in stadiums: + stadium_id = s.get('canonical_id', s.get('id', '')) + record_name = deterministic_uuid(stadium_id) + team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', [])) + fields = { + 'stadiumId': {'value': record_name}, + 'canonicalId': {'value': stadium_id}, + 'name': {'value': s['name']}, + 'city': {'value': s['city']}, + 'state': {'value': s.get('state', '')}, + 'sport': {'value': s['sport']}, + 'source': {'value': s.get('source', 'canonical')}, + 'teamAbbrevs': {'value': team_abbrevs}, + } + if s.get('latitude'): + fields['location'] = {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}} + if s.get('capacity'): + fields['capacity'] = {'value': s['capacity']} + records[record_name] = {'recordType': 'Stadium', 'recordName': record_name, 'fields': fields} + return records + + def build_team_records(teams): + records = {} + for t in teams: + team_id = t.get('canonical_id', '') + record_name = deterministic_uuid(team_id) + fields = { + 'teamId': {'value': record_name}, + 'canonicalId': {'value': team_id}, + 'abbreviation': {'value': t['abbreviation']}, + 'name': {'value': t['name']}, + 'city': {'value': t['city']}, + 'sport': {'value': t['sport']}, + 'stadiumCanonicalId': {'value': t.get('stadium_canonical_id', '')}, + } + if t.get('conference_id'): + fields['conferenceId'] = {'value': t['conference_id']} + if t.get('division_id'): + fields['divisionId'] = {'value': t['division_id']} + records[record_name] = {'recordType': 'Team', 'recordName': record_name, 'fields': fields} + return records + + def build_game_records(games, stadiums): + records = {} + stadium_id_map = {s.get('canonical_id', s.get('id', '')): deterministic_uuid(s.get('canonical_id', s.get('id', ''))) for s in stadiums} + seen_ids = set() + for g in games: + game_id = g.get('canonical_id', g.get('id', '')) + if game_id in seen_ids: + continue + seen_ids.add(game_id) + game_uuid = deterministic_uuid(game_id) + sport = g['sport'] + fields = { + 'gameId': {'value': game_uuid}, + 'canonicalId': {'value': game_id}, + 'sport': {'value': sport}, + 'season': {'value': g.get('season', '')}, + 'source': {'value': g.get('source', 'canonical')}, + } + if g.get('date'): + try: + time_str = g.get('time', '7:00p') + hour, minute = 19, 0 + if time_str: + clean_time = time_str.lower().replace(' ', '') + is_pm = 'p' in clean_time + time_parts = clean_time.replace('p', '').replace('a', '').split(':') + if time_parts: + hour = int(time_parts[0]) + if is_pm and hour != 12: + hour += 12 + elif not is_pm and hour == 12: + hour = 0 + if len(time_parts) > 1: + minute = int(time_parts[1]) + dt = datetime.strptime(f"{g['date']} {hour:02d}:{minute:02d}", '%Y-%m-%d %H:%M') + fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} + except: + pass + home_team_canonical_id = g.get('home_team_canonical_id', '') + away_team_canonical_id = g.get('away_team_canonical_id', '') + home_team_uuid = deterministic_uuid(home_team_canonical_id) + away_team_uuid = deterministic_uuid(away_team_canonical_id) + fields['homeTeamRef'] = {'value': {'recordName': home_team_uuid, 'action': 'NONE'}} + fields['awayTeamRef'] = {'value': {'recordName': away_team_uuid, 'action': 'NONE'}} + if g.get('stadium_canonical_id'): + stadium_canonical_id = g['stadium_canonical_id'] + stadium_uuid = stadium_id_map.get(stadium_canonical_id) + if stadium_uuid: + fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}} + fields['stadiumCanonicalId'] = {'value': stadium_canonical_id} + records[game_uuid] = {'recordType': 'Game', 'recordName': game_uuid, 'fields': fields} + return records + + def build_league_structure_records(league_structure): + records = {} + for ls in league_structure: + record_name = ls['id'] + fields = { + 'structureId': {'value': ls['id']}, + 'sport': {'value': ls['sport']}, + 'type': {'value': ls['type']}, + 'name': {'value': ls['name']}, + 'displayOrder': {'value': ls['display_order']}, + 'schemaVersion': {'value': 1}, + } + if ls.get('abbreviation'): + fields['abbreviation'] = {'value': ls['abbreviation']} + if ls.get('parent_id'): + fields['parentId'] = {'value': ls['parent_id']} + records[record_name] = {'recordType': 'LeagueStructure', 'recordName': record_name, 'fields': fields} + return records + + def build_team_alias_records(team_aliases): + records = {} + for ta in team_aliases: + record_name = ta['id'] + fields = { + 'aliasId': {'value': ta['id']}, + 'teamCanonicalId': {'value': ta['team_canonical_id']}, + 'aliasType': {'value': ta['alias_type']}, + 'aliasValue': {'value': ta['alias_value']}, + 'schemaVersion': {'value': 1}, + } + records[record_name] = {'recordType': 'TeamAlias', 'recordName': record_name, 'fields': fields} + return records + + def build_stadium_alias_records(stadium_aliases): + records = {} + for sa in stadium_aliases: + stadium_id = sa['stadium_canonical_id'] + sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown' + record_name = f"{sport}_{sa['alias_name'].lower()}" + fields = { + 'aliasName': {'value': sa['alias_name'].lower()}, + 'stadiumCanonicalId': {'value': sa['stadium_canonical_id']}, + 'schemaVersion': {'value': 1}, + } + records[record_name] = {'recordType': 'StadiumAlias', 'recordName': record_name, 'fields': fields} + return records + + # Sync each record type + record_types = [ + ('Stadium', stadiums, build_stadium_records), + ('Team', teams, build_team_records), + ('Game', games, lambda g: build_game_records(g, stadiums)), + ('LeagueStructure', league_structure, build_league_structure_records), + ('TeamAlias', team_aliases, build_team_alias_records), + ('StadiumAlias', stadium_aliases, build_stadium_alias_records), + ] + + total_stats = {'created': 0, 'updated': 0, 'deleted': 0, 'skipped': 0, 'errors': 0} + + for record_type, data, builder in record_types: + if not data: + print(f"{record_type}: No local data, skipping") + continue + + print(f"\n--- {record_type} ---") + local_records = builder(data) + + # Query cloud records + print(f" Querying CloudKit...") + cloud_records = ck.query_all(record_type, verbose=verbose) + print(f" Found {len(cloud_records)} cloud records, {len(local_records)} local records") + + # Compute diff + diff = compute_diff(local_records, cloud_records, verbose=verbose) + print(f" Diff: {len(diff['new'])} new, {len(diff['updated'])} updated, {len(diff['unchanged'])} unchanged, {len(diff['deleted'])} orphans") + + # Sync + stats = sync_diff(ck, diff, record_type, dry_run=dry_run, verbose=verbose, delete_orphans=delete_orphans) + + # Accumulate stats + for key in total_stats: + total_stats[key] += stats[key] + + print(f" Result: {stats['created']} created, {stats['updated']} updated, {stats['deleted']} deleted, {stats['skipped']} skipped") + if stats['errors']: + print(f" Errors: {stats['errors']}") + + # Summary + print("\n" + "="*50) + print("Smart Sync Summary") + print("="*50) + print(f" Created: {total_stats['created']}") + print(f" Updated: {total_stats['updated']}") + print(f" Deleted: {total_stats['deleted']}") + print(f" Skipped (unchanged): {total_stats['skipped']}") + if total_stats['errors']: + print(f" Errors: {total_stats['errors']}") + if dry_run: + print("\n[DRY RUN - no changes made]") + + return total_stats + + def main(): p = argparse.ArgumentParser(description='Import JSON to CloudKit') p.add_argument('--key-id', default=DEFAULT_KEY_ID) @@ -710,6 +1068,8 @@ def main(): p.add_argument('--delete-all', action='store_true', help='Delete all records before importing') p.add_argument('--delete-only', action='store_true', help='Only delete records, do not import') p.add_argument('--diff', action='store_true', help='Show diff between local and CloudKit without importing') + p.add_argument('--smart-sync', action='store_true', help='Differential sync: only upload new/changed records') + p.add_argument('--delete-orphans', action='store_true', help='With --smart-sync, also delete records not in local data') p.add_argument('--dry-run', action='store_true') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--interactive', '-i', action='store_true', help='Show interactive menu') @@ -719,7 +1079,7 @@ def main(): has_action_flag = any([ args.stadiums_only, args.games_only, args.games_files, args.league_structure_only, args.team_aliases_only, args.stadium_aliases_only, args.canonical_only, - args.delete_all, args.delete_only, args.dry_run, args.diff + args.delete_all, args.delete_only, args.dry_run, args.diff, args.smart_sync ]) # Track selected game files (for option 4 or --games-files) @@ -761,6 +1121,11 @@ def main(): args.delete_only = True elif choice == 11: # Dry run args.dry_run = True + elif choice == 12: # Smart sync + args.smart_sync = True + elif choice == 13: # Smart sync + delete orphans + args.smart_sync = True + args.delete_orphans = True print(f"\n{'='*50}") print(f"CloudKit Import {'(DRY RUN)' if args.dry_run else ''}") @@ -843,6 +1208,18 @@ def main(): show_diff_report(ck, args.data_dir, verbose=args.verbose) return + # Handle smart sync mode (differential upload) + if args.smart_sync: + if not ck: + # Need CloudKit connection for smart sync + if not HAS_CRYPTO: + sys.exit("Error: pip install cryptography") + if not os.path.exists(args.key_file): + sys.exit(f"Error: Key file not found: {args.key_file}") + ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env) + run_smart_sync(ck, args.data_dir, dry_run=args.dry_run, verbose=args.verbose, delete_orphans=args.delete_orphans) + return + # Handle deletion if args.delete_all or args.delete_only: if not ck: