Add canonical ID pipeline and fix UUID consistency for CloudKit sync
- Add local canonicalization pipeline (stadiums, teams, games) that generates deterministic canonical IDs before CloudKit upload - Fix CanonicalSyncService to use deterministic UUIDs from canonical IDs instead of random UUIDs from CloudKit records - Add SyncStadium/SyncTeam/SyncGame types to CloudKitService that preserve canonical ID relationships during sync - Add canonical ID field keys to CKModels for reading from CloudKit records - Bundle canonical JSON files (stadiums_canonical, teams_canonical, games_canonical, stadium_aliases) for consistent bootstrap data - Update BootstrapService to prefer canonical format files over legacy format This ensures all entities use consistent deterministic UUIDs derived from their canonical IDs, preventing duplicate records when syncing CloudKit data with bootstrapped local data. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,15 @@
|
||||
"""
|
||||
CloudKit Import Script
|
||||
======================
|
||||
Imports JSON data into CloudKit. Run separately from pipeline.
|
||||
Imports canonical JSON data into CloudKit. Run after canonicalization pipeline.
|
||||
|
||||
Expected input files (from canonicalization pipeline):
|
||||
- stadiums_canonical.json
|
||||
- teams_canonical.json
|
||||
- games_canonical.json
|
||||
- stadium_aliases.json
|
||||
- league_structure.json
|
||||
- team_aliases.json
|
||||
|
||||
Setup:
|
||||
1. CloudKit Dashboard > Tokens & Keys > Server-to-Server Keys
|
||||
@@ -309,12 +317,35 @@ def main():
|
||||
print(f"Environment: {args.env}\n")
|
||||
|
||||
data_dir = Path(args.data_dir)
|
||||
stadiums = json.load(open(data_dir / 'stadiums.json'))
|
||||
games = json.load(open(data_dir / 'games.json')) if (data_dir / 'games.json').exists() else []
|
||||
|
||||
# Load canonical format files (from canonicalization pipeline)
|
||||
# Fall back to legacy format for backward compatibility
|
||||
if (data_dir / 'stadiums_canonical.json').exists():
|
||||
stadiums = json.load(open(data_dir / 'stadiums_canonical.json'))
|
||||
use_canonical = True
|
||||
else:
|
||||
stadiums = json.load(open(data_dir / 'stadiums.json'))
|
||||
use_canonical = False
|
||||
|
||||
if (data_dir / 'teams_canonical.json').exists():
|
||||
teams = json.load(open(data_dir / 'teams_canonical.json'))
|
||||
else:
|
||||
teams = [] # Legacy: extracted from stadiums
|
||||
|
||||
if (data_dir / 'games_canonical.json').exists():
|
||||
games = json.load(open(data_dir / 'games_canonical.json'))
|
||||
elif (data_dir / 'games.json').exists():
|
||||
games = json.load(open(data_dir / 'games.json'))
|
||||
else:
|
||||
games = []
|
||||
|
||||
league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else []
|
||||
team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else []
|
||||
stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else []
|
||||
print(f"Loaded {len(stadiums)} stadiums, {len(games)} games, {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n")
|
||||
|
||||
print(f"Using {'canonical' if use_canonical else 'legacy'} format")
|
||||
print(f"Loaded {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games")
|
||||
print(f"Loaded {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n")
|
||||
|
||||
ck = None
|
||||
if not args.dry_run:
|
||||
@@ -353,72 +384,135 @@ def main():
|
||||
import_team_aliases = args.team_aliases_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.league_structure_only and not args.stadium_aliases_only)
|
||||
import_stadium_aliases = args.stadium_aliases_only or args.canonical_only or (not args.stadiums_only and not args.games_only and not args.league_structure_only and not args.team_aliases_only)
|
||||
|
||||
# Build stadium UUID lookup (stadium string ID -> UUID)
|
||||
stadium_uuid_map = {s['id']: deterministic_uuid(s['id']) for s in stadiums}
|
||||
# Build stadium ID lookup
|
||||
# Canonical format uses canonical_id, legacy uses id
|
||||
def get_stadium_id(s):
|
||||
return s.get('canonical_id', s.get('id', ''))
|
||||
|
||||
# Import stadiums & teams
|
||||
def get_team_id(t):
|
||||
return t.get('canonical_id', '')
|
||||
|
||||
stadium_id_map = {get_stadium_id(s): deterministic_uuid(get_stadium_id(s)) for s in stadiums}
|
||||
|
||||
# Import stadiums
|
||||
if import_stadiums:
|
||||
print("--- Stadiums ---")
|
||||
recs = [{
|
||||
'recordType': 'Stadium', 'recordName': stadium_uuid_map[s['id']],
|
||||
'fields': {
|
||||
'stadiumId': {'value': stadium_uuid_map[s['id']]}, 'name': {'value': s['name']},
|
||||
'city': {'value': s['city']}, 'state': {'value': s.get('state', '')},
|
||||
'sport': {'value': s['sport']}, 'source': {'value': s.get('source', '')},
|
||||
'teamAbbrevs': {'value': s.get('team_abbrevs', [])},
|
||||
**({'location': {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}}}
|
||||
if s.get('latitude') else {}),
|
||||
**({'capacity': {'value': s['capacity']}} if s.get('capacity') else {}),
|
||||
recs = []
|
||||
for s in stadiums:
|
||||
stadium_id = get_stadium_id(s)
|
||||
record_name = deterministic_uuid(stadium_id)
|
||||
# Canonical format uses primary_team_abbrevs, legacy uses team_abbrevs
|
||||
team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
|
||||
|
||||
fields = {
|
||||
'stadiumId': {'value': record_name},
|
||||
'canonicalId': {'value': stadium_id}, # Store canonical_id as string
|
||||
'name': {'value': s['name']},
|
||||
'city': {'value': s['city']},
|
||||
'state': {'value': s.get('state', '')},
|
||||
'sport': {'value': s['sport']},
|
||||
'source': {'value': s.get('source', 'canonical')},
|
||||
'teamAbbrevs': {'value': team_abbrevs},
|
||||
}
|
||||
} for s in stadiums]
|
||||
if s.get('latitude'):
|
||||
fields['location'] = {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}}
|
||||
if s.get('capacity'):
|
||||
fields['capacity'] = {'value': s['capacity']}
|
||||
|
||||
recs.append({'recordType': 'Stadium', 'recordName': record_name, 'fields': fields})
|
||||
stats['stadiums'] = import_data(ck, recs, 'stadiums', args.dry_run, args.verbose)
|
||||
|
||||
# Import teams (canonical format has dedicated teams file)
|
||||
if import_teams:
|
||||
print("--- Teams ---")
|
||||
teams = {}
|
||||
for s in stadiums:
|
||||
for abbr in s.get('team_abbrevs', []):
|
||||
team_key = f"{s['sport']}_{abbr}" # Match Swift: "{sport.rawValue}_{abbrev}"
|
||||
if team_key not in teams:
|
||||
teams[team_key] = {'abbr': abbr, 'city': s['city'], 'sport': s['sport']}
|
||||
team_uuid = deterministic_uuid(team_key)
|
||||
team_map[(s['sport'], abbr)] = team_uuid
|
||||
if teams:
|
||||
# Canonical format: use teams_canonical.json
|
||||
recs = []
|
||||
for t in teams:
|
||||
team_id = get_team_id(t)
|
||||
record_name = deterministic_uuid(team_id)
|
||||
team_map[(t['sport'], t['abbreviation'])] = record_name
|
||||
|
||||
recs = [{
|
||||
'recordType': 'Team', 'recordName': deterministic_uuid(team_key),
|
||||
'fields': {
|
||||
'teamId': {'value': deterministic_uuid(team_key)},
|
||||
'abbreviation': {'value': info['abbr']},
|
||||
'name': {'value': info['abbr']},
|
||||
'city': {'value': info['city']},
|
||||
'sport': {'value': info['sport']},
|
||||
}
|
||||
} for team_key, info in teams.items()]
|
||||
stats['teams'] = import_data(ck, recs, 'teams', args.dry_run, args.verbose)
|
||||
fields = {
|
||||
'teamId': {'value': record_name},
|
||||
'canonicalId': {'value': team_id}, # Store canonical_id as string
|
||||
'abbreviation': {'value': t['abbreviation']},
|
||||
'name': {'value': t['name']},
|
||||
'city': {'value': t['city']},
|
||||
'sport': {'value': t['sport']},
|
||||
'stadiumCanonicalId': {'value': t.get('stadium_canonical_id', '')},
|
||||
}
|
||||
if t.get('conference_id'):
|
||||
fields['conferenceId'] = {'value': t['conference_id']}
|
||||
if t.get('division_id'):
|
||||
fields['divisionId'] = {'value': t['division_id']}
|
||||
|
||||
recs.append({'recordType': 'Team', 'recordName': record_name, 'fields': fields})
|
||||
stats['teams'] = import_data(ck, recs, 'teams', args.dry_run, args.verbose)
|
||||
else:
|
||||
# Legacy format: extract teams from stadiums
|
||||
teams_dict = {}
|
||||
for s in stadiums:
|
||||
team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
|
||||
for abbr in team_abbrevs:
|
||||
team_key = f"{s['sport']}_{abbr}"
|
||||
if team_key not in teams_dict:
|
||||
teams_dict[team_key] = {'abbr': abbr, 'city': s['city'], 'sport': s['sport']}
|
||||
team_uuid = deterministic_uuid(team_key)
|
||||
team_map[(s['sport'], abbr)] = team_uuid
|
||||
|
||||
recs = [{
|
||||
'recordType': 'Team', 'recordName': deterministic_uuid(team_key),
|
||||
'fields': {
|
||||
'teamId': {'value': deterministic_uuid(team_key)},
|
||||
'canonicalId': {'value': team_key},
|
||||
'abbreviation': {'value': info['abbr']},
|
||||
'name': {'value': info['abbr']},
|
||||
'city': {'value': info['city']},
|
||||
'sport': {'value': info['sport']},
|
||||
}
|
||||
} for team_key, info in teams_dict.items()]
|
||||
stats['teams'] = import_data(ck, recs, 'teams', args.dry_run, args.verbose)
|
||||
|
||||
# Import games
|
||||
if import_games and games:
|
||||
# Detect canonical game format (has canonical_id field)
|
||||
use_canonical_games = games and 'canonical_id' in games[0]
|
||||
|
||||
# Rebuild team_map if only importing games (--games-only flag)
|
||||
if not team_map:
|
||||
for s in stadiums:
|
||||
for abbr in s.get('team_abbrevs', []):
|
||||
team_key = f"{s['sport']}_{abbr}"
|
||||
team_map[(s['sport'], abbr)] = deterministic_uuid(team_key)
|
||||
if teams:
|
||||
# Canonical format: use teams_canonical.json
|
||||
for t in teams:
|
||||
team_id = get_team_id(t)
|
||||
team_map[(t['sport'], t['abbreviation'])] = deterministic_uuid(team_id)
|
||||
else:
|
||||
# Legacy format: extract from stadiums
|
||||
for s in stadiums:
|
||||
team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
|
||||
for abbr in team_abbrevs:
|
||||
team_key = f"{s['sport']}_{abbr}"
|
||||
team_map[(s['sport'], abbr)] = deterministic_uuid(team_key)
|
||||
|
||||
# Build team -> stadium map for stadiumRef
|
||||
# Build team -> stadium map for stadiumRef (legacy format needs this)
|
||||
team_stadium_map = {}
|
||||
for s in stadiums:
|
||||
stadium_uuid = stadium_uuid_map[s['id']]
|
||||
for abbr in s.get('team_abbrevs', []):
|
||||
stadium_id = get_stadium_id(s)
|
||||
stadium_uuid = stadium_id_map[stadium_id]
|
||||
team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
|
||||
for abbr in team_abbrevs:
|
||||
team_stadium_map[(s['sport'], abbr)] = stadium_uuid
|
||||
|
||||
print("--- Games ---")
|
||||
print(f" Using {'canonical' if use_canonical_games else 'legacy'} game format")
|
||||
|
||||
# Deduplicate games by ID
|
||||
# Deduplicate games by ID (canonical_id or id)
|
||||
seen_ids = set()
|
||||
unique_games = []
|
||||
for g in games:
|
||||
if g['id'] not in seen_ids:
|
||||
seen_ids.add(g['id'])
|
||||
game_id = g.get('canonical_id', g.get('id', ''))
|
||||
if game_id not in seen_ids:
|
||||
seen_ids.add(game_id)
|
||||
unique_games.append(g)
|
||||
|
||||
if len(unique_games) < len(games):
|
||||
@@ -426,13 +520,20 @@ def main():
|
||||
|
||||
recs = []
|
||||
for g in unique_games:
|
||||
game_uuid = deterministic_uuid(g['id'])
|
||||
# Get game ID (canonical or legacy)
|
||||
game_id = g.get('canonical_id', g.get('id', ''))
|
||||
game_uuid = deterministic_uuid(game_id)
|
||||
sport = g['sport']
|
||||
|
||||
fields = {
|
||||
'gameId': {'value': game_uuid}, 'sport': {'value': sport},
|
||||
'season': {'value': g.get('season', '')}, 'source': {'value': g.get('source', '')},
|
||||
'gameId': {'value': game_uuid},
|
||||
'canonicalId': {'value': game_id}, # Store canonical_id as string
|
||||
'sport': {'value': sport},
|
||||
'season': {'value': g.get('season', '')},
|
||||
'source': {'value': g.get('source', 'canonical' if use_canonical_games else '')},
|
||||
}
|
||||
|
||||
# Parse date/time
|
||||
if g.get('date'):
|
||||
try:
|
||||
# Parse time like "7:30p" or "10:00a"
|
||||
@@ -455,20 +556,38 @@ def main():
|
||||
fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
|
||||
except Exception as e:
|
||||
if args.verbose:
|
||||
print(f" Warning: Failed to parse date/time for {g['id']}: {e}")
|
||||
print(f" Warning: Failed to parse date/time for {game_id}: {e}")
|
||||
|
||||
# Team references
|
||||
if use_canonical_games:
|
||||
# Canonical format: extract team abbrev from canonical ID (team_nba_atl -> atl)
|
||||
home_team_canonical_id = g.get('home_team_canonical_id', '')
|
||||
away_team_canonical_id = g.get('away_team_canonical_id', '')
|
||||
home_team_uuid = deterministic_uuid(home_team_canonical_id)
|
||||
away_team_uuid = deterministic_uuid(away_team_canonical_id)
|
||||
else:
|
||||
# Legacy format: use abbreviations
|
||||
home_team_key = f"{sport}_{g.get('home_team_abbrev', '')}"
|
||||
away_team_key = f"{sport}_{g.get('away_team_abbrev', '')}"
|
||||
home_team_uuid = deterministic_uuid(home_team_key)
|
||||
away_team_uuid = deterministic_uuid(away_team_key)
|
||||
|
||||
# Team references - use (sport, abbrev) tuple for lookup
|
||||
home_team_key = f"{sport}_{g.get('home_team_abbrev', '')}"
|
||||
away_team_key = f"{sport}_{g.get('away_team_abbrev', '')}"
|
||||
home_team_uuid = deterministic_uuid(home_team_key)
|
||||
away_team_uuid = deterministic_uuid(away_team_key)
|
||||
fields['homeTeamRef'] = {'value': {'recordName': home_team_uuid, 'action': 'NONE'}}
|
||||
fields['awayTeamRef'] = {'value': {'recordName': away_team_uuid, 'action': 'NONE'}}
|
||||
|
||||
# Stadium reference - look up by home team abbrev
|
||||
stadium_uuid = team_stadium_map.get((sport, g.get('home_team_abbrev', '')))
|
||||
if stadium_uuid:
|
||||
fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}}
|
||||
# Stadium reference
|
||||
if use_canonical_games and g.get('stadium_canonical_id'):
|
||||
# Canonical format: use stadium_canonical_id directly
|
||||
stadium_canonical_id = g['stadium_canonical_id']
|
||||
stadium_uuid = stadium_id_map.get(stadium_canonical_id)
|
||||
if stadium_uuid:
|
||||
fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}}
|
||||
fields['stadiumCanonicalId'] = {'value': stadium_canonical_id}
|
||||
else:
|
||||
# Legacy format: look up by home team abbrev
|
||||
stadium_uuid = team_stadium_map.get((sport, g.get('home_team_abbrev', '')))
|
||||
if stadium_uuid:
|
||||
fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}}
|
||||
|
||||
recs.append({'recordType': 'Game', 'recordName': game_uuid, 'fields': fields})
|
||||
|
||||
@@ -554,9 +673,14 @@ def main():
|
||||
fields['validUntil'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
|
||||
except:
|
||||
pass
|
||||
# Extract sport from stadium_canonical_id (e.g., "stadium_nba_td_garden" -> "nba")
|
||||
# This makes record names unique for shared venues (TD Garden has NBA and NHL entries)
|
||||
stadium_id = sa['stadium_canonical_id']
|
||||
sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown'
|
||||
record_name = f"{sport}_{sa['alias_name'].lower()}"
|
||||
recs.append({
|
||||
'recordType': 'StadiumAlias',
|
||||
'recordName': sa['alias_name'].lower(), # Use alias_name as recordName (unique key)
|
||||
'recordName': record_name,
|
||||
'fields': fields
|
||||
})
|
||||
stats['stadium_aliases'] = import_data(ck, recs, 'stadium aliases', args.dry_run, args.verbose)
|
||||
|
||||
Reference in New Issue
Block a user