feat(05-02): add sync verification with --verify flag
- Add --verify flag for quick verification (counts + 5-record spot-check) - Add --verify-deep flag for full field-by-field comparison - Add verify_sync() function to compare CloudKit vs local data - Add lookup() method to CloudKit class for record lookups - Add menu options 14-15 for verify sync quick/deep
This commit is contained in:
@@ -137,17 +137,19 @@ def show_menu():
|
||||
print(" 11. Dry run (preview only)")
|
||||
print(" 12. Smart sync (diff-based, only upload changes)")
|
||||
print(" 13. Smart sync + delete orphans")
|
||||
print(" 14. Verify sync (quick)")
|
||||
print(" 15. Verify sync (deep)")
|
||||
print(" 0. Exit")
|
||||
print()
|
||||
|
||||
while True:
|
||||
try:
|
||||
choice = input("Enter choice [1-13, 0 to exit]: ").strip()
|
||||
choice = input("Enter choice [1-15, 0 to exit]: ").strip()
|
||||
if choice == '0':
|
||||
return None
|
||||
if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']:
|
||||
if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15']:
|
||||
return int(choice)
|
||||
print("Invalid choice. Please enter 1-13 or 0.")
|
||||
print("Invalid choice. Please enter 1-15 or 0.")
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\nExiting.")
|
||||
return None
|
||||
@@ -293,6 +295,36 @@ class CloudKit:
|
||||
|
||||
return all_records
|
||||
|
||||
def lookup(self, record_type, record_names, verbose=False):
|
||||
"""Lookup specific records by recordName."""
|
||||
if not record_names:
|
||||
return []
|
||||
|
||||
path = f"{self.path_base}/records/lookup"
|
||||
records_to_lookup = [{'recordName': name} for name in record_names]
|
||||
body = json.dumps({'records': records_to_lookup})
|
||||
date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Apple-CloudKit-Request-KeyID': self.key_id,
|
||||
'X-Apple-CloudKit-Request-ISO8601Date': date,
|
||||
'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path),
|
||||
}
|
||||
|
||||
if verbose:
|
||||
print(f" Looking up {len(record_names)} {record_type} records...")
|
||||
|
||||
try:
|
||||
r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=30)
|
||||
if r.status_code == 200:
|
||||
result = r.json()
|
||||
return result.get('records', [])
|
||||
return {'error': f"{r.status_code}: {r.text[:200]}"}
|
||||
except requests.exceptions.Timeout:
|
||||
return {'error': 'Request timed out after 30s'}
|
||||
except Exception as e:
|
||||
return {'error': f"Request failed: {e}"}
|
||||
|
||||
def delete_all(self, record_type, verbose=False):
|
||||
"""Delete all records of a given type."""
|
||||
total_deleted = 0
|
||||
@@ -1051,6 +1083,238 @@ def run_smart_sync(ck, data_dir, dry_run=False, verbose=False, delete_orphans=Fa
|
||||
return total_stats
|
||||
|
||||
|
||||
def verify_sync(ck, data_dir, verbose=False, deep=False):
|
||||
"""
|
||||
Verify that CloudKit data matches local canonical data.
|
||||
Quick mode: compares counts and spot-checks 5 random records per type.
|
||||
Deep mode: full field-by-field comparison of all records.
|
||||
"""
|
||||
import random
|
||||
from pathlib import Path
|
||||
data_dir = Path(data_dir)
|
||||
|
||||
print("\n" + "="*50)
|
||||
print(f"CloudKit Sync Verification {'(DEEP)' if deep else '(Quick)'}")
|
||||
print("="*50)
|
||||
|
||||
if deep:
|
||||
print("\n⚠️ Deep verification may take several minutes for large datasets\n")
|
||||
|
||||
# Load local data
|
||||
stadiums = json.load(open(data_dir / 'stadiums_canonical.json')) if (data_dir / 'stadiums_canonical.json').exists() else []
|
||||
teams = json.load(open(data_dir / 'teams_canonical.json')) if (data_dir / 'teams_canonical.json').exists() else []
|
||||
|
||||
# Load games from canonical/games/*.json
|
||||
canonical_games_dir = data_dir / 'canonical' / 'games'
|
||||
games = []
|
||||
if canonical_games_dir.exists():
|
||||
for games_file in sorted(canonical_games_dir.glob('*.json')):
|
||||
with open(games_file) as f:
|
||||
games.extend(json.load(f))
|
||||
|
||||
league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else []
|
||||
team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else []
|
||||
stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else []
|
||||
|
||||
# Deduplicate games by canonical_id
|
||||
seen_ids = set()
|
||||
unique_games = []
|
||||
for g in games:
|
||||
game_id = g.get('canonical_id', g.get('id', ''))
|
||||
if game_id not in seen_ids:
|
||||
seen_ids.add(game_id)
|
||||
unique_games.append(g)
|
||||
games = unique_games
|
||||
|
||||
local_counts = {
|
||||
'Stadium': len(stadiums),
|
||||
'Team': len(teams),
|
||||
'Game': len(games),
|
||||
'LeagueStructure': len(league_structure),
|
||||
'TeamAlias': len(team_aliases),
|
||||
'StadiumAlias': len(stadium_aliases),
|
||||
}
|
||||
|
||||
print(f"Local data: {local_counts['Stadium']} stadiums, {local_counts['Team']} teams, {local_counts['Game']} games")
|
||||
print(f" {local_counts['LeagueStructure']} league structures, {local_counts['TeamAlias']} team aliases, {local_counts['StadiumAlias']} stadium aliases\n")
|
||||
|
||||
# Build local record maps for spot-check comparison
|
||||
def build_local_record_map(record_type, data):
|
||||
"""Build a map of recordName -> fields for comparison."""
|
||||
records = {}
|
||||
if record_type == 'Stadium':
|
||||
for s in data:
|
||||
stadium_id = s.get('canonical_id', s.get('id', ''))
|
||||
record_name = deterministic_uuid(stadium_id)
|
||||
records[record_name] = {
|
||||
'canonicalId': stadium_id,
|
||||
'name': s['name'],
|
||||
'city': s['city'],
|
||||
'sport': s['sport'],
|
||||
}
|
||||
elif record_type == 'Team':
|
||||
for t in data:
|
||||
team_id = t.get('canonical_id', '')
|
||||
record_name = deterministic_uuid(team_id)
|
||||
records[record_name] = {
|
||||
'canonicalId': team_id,
|
||||
'abbreviation': t['abbreviation'],
|
||||
'name': t['name'],
|
||||
'city': t['city'],
|
||||
'sport': t['sport'],
|
||||
}
|
||||
elif record_type == 'Game':
|
||||
for g in data:
|
||||
game_id = g.get('canonical_id', g.get('id', ''))
|
||||
record_name = deterministic_uuid(game_id)
|
||||
records[record_name] = {
|
||||
'canonicalId': game_id,
|
||||
'sport': g['sport'],
|
||||
'season': g.get('season', ''),
|
||||
}
|
||||
elif record_type == 'LeagueStructure':
|
||||
for ls in data:
|
||||
record_name = ls['id']
|
||||
records[record_name] = {
|
||||
'structureId': ls['id'],
|
||||
'sport': ls['sport'],
|
||||
'type': ls['type'],
|
||||
'name': ls['name'],
|
||||
}
|
||||
elif record_type == 'TeamAlias':
|
||||
for ta in data:
|
||||
record_name = ta['id']
|
||||
records[record_name] = {
|
||||
'aliasId': ta['id'],
|
||||
'teamCanonicalId': ta['team_canonical_id'],
|
||||
'aliasType': ta['alias_type'],
|
||||
'aliasValue': ta['alias_value'],
|
||||
}
|
||||
elif record_type == 'StadiumAlias':
|
||||
for sa in data:
|
||||
stadium_id = sa['stadium_canonical_id']
|
||||
sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown'
|
||||
record_name = f"{sport}_{sa['alias_name'].lower()}"
|
||||
records[record_name] = {
|
||||
'aliasName': sa['alias_name'].lower(),
|
||||
'stadiumCanonicalId': sa['stadium_canonical_id'],
|
||||
}
|
||||
return records
|
||||
|
||||
data_map = {
|
||||
'Stadium': stadiums,
|
||||
'Team': teams,
|
||||
'Game': games,
|
||||
'LeagueStructure': league_structure,
|
||||
'TeamAlias': team_aliases,
|
||||
'StadiumAlias': stadium_aliases,
|
||||
}
|
||||
|
||||
results = []
|
||||
total_mismatches = 0
|
||||
|
||||
for record_type in ['Stadium', 'Team', 'Game', 'LeagueStructure', 'TeamAlias', 'StadiumAlias']:
|
||||
local_count = local_counts[record_type]
|
||||
if local_count == 0:
|
||||
print(f"{record_type}: No local data, skipping")
|
||||
continue
|
||||
|
||||
# Query CloudKit count
|
||||
print(f"Checking {record_type}...")
|
||||
cloud_records = ck.query_all(record_type, verbose=verbose)
|
||||
cloud_count = len(cloud_records)
|
||||
|
||||
# Count comparison
|
||||
if cloud_count == local_count:
|
||||
status = "[OK]"
|
||||
elif cloud_count < local_count:
|
||||
status = f"[MISMATCH: {local_count - cloud_count} missing in CloudKit]"
|
||||
total_mismatches += 1
|
||||
else:
|
||||
status = f"[MISMATCH: {cloud_count - local_count} extra in CloudKit]"
|
||||
total_mismatches += 1
|
||||
|
||||
print(f" {record_type}: CloudKit={cloud_count}, Local={local_count} {status}")
|
||||
|
||||
# Spot-check or deep verification
|
||||
local_records = build_local_record_map(record_type, data_map[record_type])
|
||||
|
||||
if deep:
|
||||
# Full field-by-field comparison
|
||||
field_mismatches = []
|
||||
for record_name, local_fields in local_records.items():
|
||||
cloud_rec = cloud_records.get(record_name)
|
||||
if not cloud_rec:
|
||||
field_mismatches.append(f" {record_name}: Missing in CloudKit")
|
||||
continue
|
||||
|
||||
cloud_fields = cloud_rec.get('fields', {})
|
||||
for field_name, expected_value in local_fields.items():
|
||||
cloud_field = cloud_fields.get(field_name, {})
|
||||
cloud_value = cloud_field.get('value')
|
||||
if cloud_value != expected_value:
|
||||
field_mismatches.append(f" {record_name}.{field_name}: expected '{expected_value}', got '{cloud_value}'")
|
||||
|
||||
if field_mismatches:
|
||||
print(f" Field mismatches ({len(field_mismatches)}):")
|
||||
for m in field_mismatches[:10]: # Show first 10
|
||||
print(m)
|
||||
if len(field_mismatches) > 10:
|
||||
print(f" ... and {len(field_mismatches) - 10} more")
|
||||
total_mismatches += len(field_mismatches)
|
||||
else:
|
||||
print(f" All fields verified [OK]")
|
||||
|
||||
elif cloud_count == local_count and cloud_count > 0:
|
||||
# Spot-check 5 random records
|
||||
sample_size = min(5, cloud_count)
|
||||
sample_names = random.sample(list(local_records.keys()), sample_size)
|
||||
|
||||
spot_check_ok = True
|
||||
for record_name in sample_names:
|
||||
local_fields = local_records[record_name]
|
||||
cloud_rec = cloud_records.get(record_name)
|
||||
if not cloud_rec:
|
||||
print(f" Spot-check failed: {record_name} missing in CloudKit")
|
||||
spot_check_ok = False
|
||||
continue
|
||||
|
||||
cloud_fields = cloud_rec.get('fields', {})
|
||||
for field_name, expected_value in local_fields.items():
|
||||
cloud_field = cloud_fields.get(field_name, {})
|
||||
cloud_value = cloud_field.get('value')
|
||||
if cloud_value != expected_value:
|
||||
print(f" Spot-check mismatch: {record_name}.{field_name}: expected '{expected_value}', got '{cloud_value}'")
|
||||
spot_check_ok = False
|
||||
|
||||
if spot_check_ok:
|
||||
print(f" Spot-check ({sample_size} records): [OK]")
|
||||
else:
|
||||
total_mismatches += 1
|
||||
|
||||
results.append({
|
||||
'type': record_type,
|
||||
'local': local_count,
|
||||
'cloud': cloud_count,
|
||||
'match': cloud_count == local_count,
|
||||
})
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*50)
|
||||
print("Verification Summary")
|
||||
print("="*50)
|
||||
for r in results:
|
||||
status = "[OK]" if r['match'] else "[MISMATCH]"
|
||||
print(f" {r['type']}: Local={r['local']}, CloudKit={r['cloud']} {status}")
|
||||
|
||||
if total_mismatches == 0:
|
||||
print("\n✓ All data verified - CloudKit matches local data")
|
||||
else:
|
||||
print(f"\n⚠ Found {total_mismatches} mismatch(es)")
|
||||
|
||||
return total_mismatches == 0
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description='Import JSON to CloudKit')
|
||||
p.add_argument('--key-id', default=DEFAULT_KEY_ID)
|
||||
@@ -1070,6 +1334,8 @@ def main():
|
||||
p.add_argument('--diff', action='store_true', help='Show diff between local and CloudKit without importing')
|
||||
p.add_argument('--smart-sync', action='store_true', help='Differential sync: only upload new/changed records')
|
||||
p.add_argument('--delete-orphans', action='store_true', help='With --smart-sync, also delete records not in local data')
|
||||
p.add_argument('--verify', action='store_true', help='Verify CloudKit matches local data (quick: counts + spot-check)')
|
||||
p.add_argument('--verify-deep', action='store_true', help='Verify CloudKit matches local data (deep: full field comparison)')
|
||||
p.add_argument('--dry-run', action='store_true')
|
||||
p.add_argument('--verbose', '-v', action='store_true')
|
||||
p.add_argument('--interactive', '-i', action='store_true', help='Show interactive menu')
|
||||
@@ -1079,7 +1345,8 @@ def main():
|
||||
has_action_flag = any([
|
||||
args.stadiums_only, args.games_only, args.games_files, args.league_structure_only,
|
||||
args.team_aliases_only, args.stadium_aliases_only, args.canonical_only,
|
||||
args.delete_all, args.delete_only, args.dry_run, args.diff, args.smart_sync
|
||||
args.delete_all, args.delete_only, args.dry_run, args.diff, args.smart_sync,
|
||||
args.verify, args.verify_deep
|
||||
])
|
||||
|
||||
# Track selected game files (for option 4 or --games-files)
|
||||
@@ -1126,6 +1393,10 @@ def main():
|
||||
elif choice == 13: # Smart sync + delete orphans
|
||||
args.smart_sync = True
|
||||
args.delete_orphans = True
|
||||
elif choice == 14: # Verify sync (quick)
|
||||
args.verify = True
|
||||
elif choice == 15: # Verify sync (deep)
|
||||
args.verify_deep = True
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"CloudKit Import {'(DRY RUN)' if args.dry_run else ''}")
|
||||
@@ -1208,6 +1479,18 @@ def main():
|
||||
show_diff_report(ck, args.data_dir, verbose=args.verbose)
|
||||
return
|
||||
|
||||
# Handle verify mode
|
||||
if args.verify or args.verify_deep:
|
||||
if not ck:
|
||||
# Need CloudKit connection for verification
|
||||
if not HAS_CRYPTO:
|
||||
sys.exit("Error: pip install cryptography")
|
||||
if not os.path.exists(args.key_file):
|
||||
sys.exit(f"Error: Key file not found: {args.key_file}")
|
||||
ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env)
|
||||
verify_sync(ck, args.data_dir, verbose=args.verbose, deep=args.verify_deep)
|
||||
return
|
||||
|
||||
# Handle smart sync mode (differential upload)
|
||||
if args.smart_sync:
|
||||
if not ck:
|
||||
|
||||
Reference in New Issue
Block a user