feat(05-01): add change detection with diff reporting
- query_all() method with pagination - compute_diff() returns new/updated/unchanged/deleted - --diff flag shows report without importing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -232,6 +232,65 @@ class CloudKit:
|
||||
except Exception as e:
|
||||
return {'error': f"Request failed: {e}"}
|
||||
|
||||
def query_all(self, record_type, verbose=False):
|
||||
"""Query all records of a given type with pagination."""
|
||||
all_records = {}
|
||||
continuation_marker = None
|
||||
|
||||
while True:
|
||||
path = f"{self.path_base}/records/query"
|
||||
query_body = {
|
||||
'query': {'recordType': record_type},
|
||||
'resultsLimit': 200
|
||||
}
|
||||
if continuation_marker:
|
||||
query_body['continuationMarker'] = continuation_marker
|
||||
|
||||
body = json.dumps(query_body)
|
||||
date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Apple-CloudKit-Request-KeyID': self.key_id,
|
||||
'X-Apple-CloudKit-Request-ISO8601Date': date,
|
||||
'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path),
|
||||
}
|
||||
|
||||
if verbose:
|
||||
page_num = len(all_records) // 200 + 1
|
||||
print(f" Querying {record_type} (page {page_num})...")
|
||||
|
||||
try:
|
||||
r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=60)
|
||||
if r.status_code != 200:
|
||||
print(f" Query error: {r.status_code}: {r.text[:200]}")
|
||||
break
|
||||
|
||||
result = r.json()
|
||||
records = result.get('records', [])
|
||||
|
||||
for rec in records:
|
||||
record_name = rec.get('recordName', '')
|
||||
all_records[record_name] = rec
|
||||
|
||||
if verbose:
|
||||
print(f" Found {len(records)} records (total: {len(all_records)})")
|
||||
|
||||
# Check for continuation
|
||||
continuation_marker = result.get('continuationMarker')
|
||||
if not continuation_marker:
|
||||
break
|
||||
|
||||
time.sleep(0.3) # Rate limiting
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
print(f" Query timeout after 60s")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f" Query failed: {e}")
|
||||
break
|
||||
|
||||
return all_records
|
||||
|
||||
def delete_all(self, record_type, verbose=False):
|
||||
"""Delete all records of a given type."""
|
||||
total_deleted = 0
|
||||
@@ -328,6 +387,312 @@ def import_data(ck, records, name, dry_run, verbose):
|
||||
return total
|
||||
|
||||
|
||||
def fields_equal(local_value, cloud_value, field_type=None):
|
||||
"""
|
||||
Compare a local field value with a cloud field value.
|
||||
Handles special cases for locations and references.
|
||||
"""
|
||||
# Handle location fields (compare with tolerance)
|
||||
if isinstance(local_value, dict) and 'latitude' in local_value:
|
||||
if not isinstance(cloud_value, dict) or 'latitude' not in cloud_value:
|
||||
return False
|
||||
lat_diff = abs(float(local_value['latitude']) - float(cloud_value['latitude']))
|
||||
lng_diff = abs(float(local_value['longitude']) - float(cloud_value['longitude']))
|
||||
return lat_diff < 0.0001 and lng_diff < 0.0001
|
||||
|
||||
# Handle reference fields (compare by recordName only)
|
||||
if isinstance(local_value, dict) and 'recordName' in local_value:
|
||||
if not isinstance(cloud_value, dict) or 'recordName' not in cloud_value:
|
||||
return False
|
||||
return local_value['recordName'] == cloud_value['recordName']
|
||||
|
||||
# Handle lists
|
||||
if isinstance(local_value, list):
|
||||
if not isinstance(cloud_value, list):
|
||||
return False
|
||||
return sorted(local_value) == sorted(cloud_value)
|
||||
|
||||
# Direct comparison for strings, ints, etc.
|
||||
return local_value == cloud_value
|
||||
|
||||
|
||||
def compute_diff(local_records, cloud_records, verbose=False):
|
||||
"""
|
||||
Compare local records against cloud records.
|
||||
Returns dict with: 'new', 'updated', 'unchanged', 'deleted' lists.
|
||||
Each item includes the record and (for updates) the cloud recordChangeTag.
|
||||
"""
|
||||
diff = {
|
||||
'new': [],
|
||||
'updated': [],
|
||||
'unchanged': [],
|
||||
'deleted': []
|
||||
}
|
||||
|
||||
local_names = set(local_records.keys())
|
||||
cloud_names = set(cloud_records.keys())
|
||||
|
||||
# New records: in local but not in cloud
|
||||
for name in local_names - cloud_names:
|
||||
diff['new'].append({'record': local_records[name]})
|
||||
|
||||
# Deleted records: in cloud but not in local
|
||||
for name in cloud_names - local_names:
|
||||
diff['deleted'].append({
|
||||
'record': cloud_records[name],
|
||||
'recordChangeTag': cloud_records[name].get('recordChangeTag', '')
|
||||
})
|
||||
|
||||
# Check existing records for changes
|
||||
for name in local_names & cloud_names:
|
||||
local_rec = local_records[name]
|
||||
cloud_rec = cloud_records[name]
|
||||
local_fields = local_rec.get('fields', {})
|
||||
cloud_fields = cloud_rec.get('fields', {})
|
||||
|
||||
# Compare all fields (except metadata like recordChangeTag)
|
||||
has_diff = False
|
||||
diff_fields = []
|
||||
|
||||
for field_name, field_data in local_fields.items():
|
||||
local_value = field_data.get('value')
|
||||
cloud_field = cloud_fields.get(field_name, {})
|
||||
cloud_value = cloud_field.get('value')
|
||||
|
||||
if not fields_equal(local_value, cloud_value):
|
||||
has_diff = True
|
||||
diff_fields.append(field_name)
|
||||
|
||||
if has_diff:
|
||||
diff['updated'].append({
|
||||
'record': local_rec,
|
||||
'recordChangeTag': cloud_rec.get('recordChangeTag', ''),
|
||||
'changed_fields': diff_fields
|
||||
})
|
||||
if verbose:
|
||||
print(f" Changed: {name} - fields: {', '.join(diff_fields)}")
|
||||
else:
|
||||
diff['unchanged'].append({'record': local_rec})
|
||||
|
||||
return diff
|
||||
|
||||
|
||||
def show_diff_report(ck, data_dir, verbose=False):
|
||||
"""Query CloudKit and show diff report for all record types."""
|
||||
from pathlib import Path
|
||||
data_dir = Path(data_dir)
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("CloudKit Diff Report")
|
||||
print("="*50 + "\n")
|
||||
|
||||
# Load local data
|
||||
stadiums = json.load(open(data_dir / 'stadiums_canonical.json')) if (data_dir / 'stadiums_canonical.json').exists() else []
|
||||
teams = json.load(open(data_dir / 'teams_canonical.json')) if (data_dir / 'teams_canonical.json').exists() else []
|
||||
|
||||
# Load games from canonical/games/*.json
|
||||
canonical_games_dir = data_dir / 'canonical' / 'games'
|
||||
games = []
|
||||
if canonical_games_dir.exists():
|
||||
for games_file in sorted(canonical_games_dir.glob('*.json')):
|
||||
with open(games_file) as f:
|
||||
games.extend(json.load(f))
|
||||
|
||||
league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else []
|
||||
team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else []
|
||||
stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else []
|
||||
|
||||
print(f"Local data: {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games")
|
||||
print(f" {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n")
|
||||
|
||||
# Build local record maps (same format as CloudKit records)
|
||||
def build_stadium_records(stadiums):
|
||||
records = {}
|
||||
for s in stadiums:
|
||||
stadium_id = s.get('canonical_id', s.get('id', ''))
|
||||
record_name = deterministic_uuid(stadium_id)
|
||||
team_abbrevs = s.get('primary_team_abbrevs', s.get('team_abbrevs', []))
|
||||
fields = {
|
||||
'stadiumId': {'value': record_name},
|
||||
'canonicalId': {'value': stadium_id},
|
||||
'name': {'value': s['name']},
|
||||
'city': {'value': s['city']},
|
||||
'state': {'value': s.get('state', '')},
|
||||
'sport': {'value': s['sport']},
|
||||
'source': {'value': s.get('source', 'canonical')},
|
||||
'teamAbbrevs': {'value': team_abbrevs},
|
||||
}
|
||||
if s.get('latitude'):
|
||||
fields['location'] = {'value': {'latitude': s['latitude'], 'longitude': s['longitude']}}
|
||||
if s.get('capacity'):
|
||||
fields['capacity'] = {'value': s['capacity']}
|
||||
records[record_name] = {'recordType': 'Stadium', 'recordName': record_name, 'fields': fields}
|
||||
return records
|
||||
|
||||
def build_team_records(teams):
|
||||
records = {}
|
||||
for t in teams:
|
||||
team_id = t.get('canonical_id', '')
|
||||
record_name = deterministic_uuid(team_id)
|
||||
fields = {
|
||||
'teamId': {'value': record_name},
|
||||
'canonicalId': {'value': team_id},
|
||||
'abbreviation': {'value': t['abbreviation']},
|
||||
'name': {'value': t['name']},
|
||||
'city': {'value': t['city']},
|
||||
'sport': {'value': t['sport']},
|
||||
'stadiumCanonicalId': {'value': t.get('stadium_canonical_id', '')},
|
||||
}
|
||||
if t.get('conference_id'):
|
||||
fields['conferenceId'] = {'value': t['conference_id']}
|
||||
if t.get('division_id'):
|
||||
fields['divisionId'] = {'value': t['division_id']}
|
||||
records[record_name] = {'recordType': 'Team', 'recordName': record_name, 'fields': fields}
|
||||
return records
|
||||
|
||||
def build_game_records(games, stadiums):
|
||||
records = {}
|
||||
stadium_id_map = {s.get('canonical_id', s.get('id', '')): deterministic_uuid(s.get('canonical_id', s.get('id', ''))) for s in stadiums}
|
||||
seen_ids = set()
|
||||
for g in games:
|
||||
game_id = g.get('canonical_id', g.get('id', ''))
|
||||
if game_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(game_id)
|
||||
game_uuid = deterministic_uuid(game_id)
|
||||
sport = g['sport']
|
||||
fields = {
|
||||
'gameId': {'value': game_uuid},
|
||||
'canonicalId': {'value': game_id},
|
||||
'sport': {'value': sport},
|
||||
'season': {'value': g.get('season', '')},
|
||||
'source': {'value': g.get('source', 'canonical')},
|
||||
}
|
||||
# Parse date/time
|
||||
if g.get('date'):
|
||||
try:
|
||||
time_str = g.get('time', '7:00p')
|
||||
hour, minute = 19, 0
|
||||
if time_str:
|
||||
clean_time = time_str.lower().replace(' ', '')
|
||||
is_pm = 'p' in clean_time
|
||||
time_parts = clean_time.replace('p', '').replace('a', '').split(':')
|
||||
if time_parts:
|
||||
hour = int(time_parts[0])
|
||||
if is_pm and hour != 12:
|
||||
hour += 12
|
||||
elif not is_pm and hour == 12:
|
||||
hour = 0
|
||||
if len(time_parts) > 1:
|
||||
minute = int(time_parts[1])
|
||||
dt = datetime.strptime(f"{g['date']} {hour:02d}:{minute:02d}", '%Y-%m-%d %H:%M')
|
||||
fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'}
|
||||
except:
|
||||
pass
|
||||
# Team references
|
||||
home_team_canonical_id = g.get('home_team_canonical_id', '')
|
||||
away_team_canonical_id = g.get('away_team_canonical_id', '')
|
||||
home_team_uuid = deterministic_uuid(home_team_canonical_id)
|
||||
away_team_uuid = deterministic_uuid(away_team_canonical_id)
|
||||
fields['homeTeamRef'] = {'value': {'recordName': home_team_uuid, 'action': 'NONE'}}
|
||||
fields['awayTeamRef'] = {'value': {'recordName': away_team_uuid, 'action': 'NONE'}}
|
||||
# Stadium reference
|
||||
if g.get('stadium_canonical_id'):
|
||||
stadium_canonical_id = g['stadium_canonical_id']
|
||||
stadium_uuid = stadium_id_map.get(stadium_canonical_id)
|
||||
if stadium_uuid:
|
||||
fields['stadiumRef'] = {'value': {'recordName': stadium_uuid, 'action': 'NONE'}}
|
||||
fields['stadiumCanonicalId'] = {'value': stadium_canonical_id}
|
||||
records[game_uuid] = {'recordType': 'Game', 'recordName': game_uuid, 'fields': fields}
|
||||
return records
|
||||
|
||||
def build_league_structure_records(league_structure):
|
||||
records = {}
|
||||
for ls in league_structure:
|
||||
record_name = ls['id']
|
||||
fields = {
|
||||
'structureId': {'value': ls['id']},
|
||||
'sport': {'value': ls['sport']},
|
||||
'type': {'value': ls['type']},
|
||||
'name': {'value': ls['name']},
|
||||
'displayOrder': {'value': ls['display_order']},
|
||||
'schemaVersion': {'value': 1},
|
||||
}
|
||||
if ls.get('abbreviation'):
|
||||
fields['abbreviation'] = {'value': ls['abbreviation']}
|
||||
if ls.get('parent_id'):
|
||||
fields['parentId'] = {'value': ls['parent_id']}
|
||||
records[record_name] = {'recordType': 'LeagueStructure', 'recordName': record_name, 'fields': fields}
|
||||
return records
|
||||
|
||||
def build_team_alias_records(team_aliases):
|
||||
records = {}
|
||||
for ta in team_aliases:
|
||||
record_name = ta['id']
|
||||
fields = {
|
||||
'aliasId': {'value': ta['id']},
|
||||
'teamCanonicalId': {'value': ta['team_canonical_id']},
|
||||
'aliasType': {'value': ta['alias_type']},
|
||||
'aliasValue': {'value': ta['alias_value']},
|
||||
'schemaVersion': {'value': 1},
|
||||
}
|
||||
records[record_name] = {'recordType': 'TeamAlias', 'recordName': record_name, 'fields': fields}
|
||||
return records
|
||||
|
||||
def build_stadium_alias_records(stadium_aliases):
|
||||
records = {}
|
||||
for sa in stadium_aliases:
|
||||
stadium_id = sa['stadium_canonical_id']
|
||||
sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown'
|
||||
record_name = f"{sport}_{sa['alias_name'].lower()}"
|
||||
fields = {
|
||||
'aliasName': {'value': sa['alias_name'].lower()},
|
||||
'stadiumCanonicalId': {'value': sa['stadium_canonical_id']},
|
||||
'schemaVersion': {'value': 1},
|
||||
}
|
||||
records[record_name] = {'recordType': 'StadiumAlias', 'recordName': record_name, 'fields': fields}
|
||||
return records
|
||||
|
||||
# Compute and display diffs for each record type
|
||||
record_types = [
|
||||
('Stadium', stadiums, build_stadium_records),
|
||||
('Team', teams, build_team_records),
|
||||
('Game', games, lambda g: build_game_records(g, stadiums)),
|
||||
('LeagueStructure', league_structure, build_league_structure_records),
|
||||
('TeamAlias', team_aliases, build_team_alias_records),
|
||||
('StadiumAlias', stadium_aliases, build_stadium_alias_records),
|
||||
]
|
||||
|
||||
all_diffs = {}
|
||||
for record_type, data, builder in record_types:
|
||||
if not data:
|
||||
print(f"{record_type}: No local data")
|
||||
continue
|
||||
|
||||
print(f"Checking {record_type}...")
|
||||
local_records = builder(data)
|
||||
|
||||
# Query cloud records
|
||||
cloud_records = ck.query_all(record_type, verbose=verbose)
|
||||
|
||||
# Compute diff
|
||||
diff = compute_diff(local_records, cloud_records, verbose=verbose)
|
||||
all_diffs[record_type] = diff
|
||||
|
||||
# Report
|
||||
print(f" {record_type}: {len(diff['unchanged'])} unchanged, {len(diff['new'])} new, {len(diff['updated'])} updated, {len(diff['deleted'])} deleted")
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Diff Summary")
|
||||
print("="*50)
|
||||
for record_type, diff in all_diffs.items():
|
||||
total_changes = len(diff['new']) + len(diff['updated']) + len(diff['deleted'])
|
||||
status = "in sync" if total_changes == 0 else f"{total_changes} changes needed"
|
||||
print(f" {record_type}: {status}")
|
||||
|
||||
return all_diffs
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description='Import JSON to CloudKit')
|
||||
p.add_argument('--key-id', default=DEFAULT_KEY_ID)
|
||||
@@ -344,6 +709,7 @@ def main():
|
||||
p.add_argument('--canonical-only', action='store_true', help='Import only canonical data (league structure + team aliases + stadium aliases)')
|
||||
p.add_argument('--delete-all', action='store_true', help='Delete all records before importing')
|
||||
p.add_argument('--delete-only', action='store_true', help='Only delete records, do not import')
|
||||
p.add_argument('--diff', action='store_true', help='Show diff between local and CloudKit without importing')
|
||||
p.add_argument('--dry-run', action='store_true')
|
||||
p.add_argument('--verbose', '-v', action='store_true')
|
||||
p.add_argument('--interactive', '-i', action='store_true', help='Show interactive menu')
|
||||
@@ -353,7 +719,7 @@ def main():
|
||||
has_action_flag = any([
|
||||
args.stadiums_only, args.games_only, args.games_files, args.league_structure_only,
|
||||
args.team_aliases_only, args.stadium_aliases_only, args.canonical_only,
|
||||
args.delete_all, args.delete_only, args.dry_run
|
||||
args.delete_all, args.delete_only, args.dry_run, args.diff
|
||||
])
|
||||
|
||||
# Track selected game files (for option 4 or --games-files)
|
||||
@@ -465,6 +831,18 @@ def main():
|
||||
sys.exit(f"Error: Key file not found: {args.key_file}")
|
||||
ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env)
|
||||
|
||||
# Handle diff mode (show diff without importing)
|
||||
if args.diff:
|
||||
if not ck:
|
||||
# Need CloudKit connection for diff
|
||||
if not HAS_CRYPTO:
|
||||
sys.exit("Error: pip install cryptography")
|
||||
if not os.path.exists(args.key_file):
|
||||
sys.exit(f"Error: Key file not found: {args.key_file}")
|
||||
ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env)
|
||||
show_diff_report(ck, args.data_dir, verbose=args.verbose)
|
||||
return
|
||||
|
||||
# Handle deletion
|
||||
if args.delete_all or args.delete_only:
|
||||
if not ck:
|
||||
|
||||
Reference in New Issue
Block a user