feat(05-02): add sync verification with --verify flag
- Add --verify flag for quick verification (counts + 5-record spot-check) - Add --verify-deep flag for full field-by-field comparison - Add verify_sync() function to compare CloudKit vs local data - Add lookup() method to CloudKit class for record lookups - Add menu options 14-15 for verify sync quick/deep
This commit is contained in:
@@ -137,17 +137,19 @@ def show_menu():
|
|||||||
print(" 11. Dry run (preview only)")
|
print(" 11. Dry run (preview only)")
|
||||||
print(" 12. Smart sync (diff-based, only upload changes)")
|
print(" 12. Smart sync (diff-based, only upload changes)")
|
||||||
print(" 13. Smart sync + delete orphans")
|
print(" 13. Smart sync + delete orphans")
|
||||||
|
print(" 14. Verify sync (quick)")
|
||||||
|
print(" 15. Verify sync (deep)")
|
||||||
print(" 0. Exit")
|
print(" 0. Exit")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
choice = input("Enter choice [1-13, 0 to exit]: ").strip()
|
choice = input("Enter choice [1-15, 0 to exit]: ").strip()
|
||||||
if choice == '0':
|
if choice == '0':
|
||||||
return None
|
return None
|
||||||
if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']:
|
if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15']:
|
||||||
return int(choice)
|
return int(choice)
|
||||||
print("Invalid choice. Please enter 1-13 or 0.")
|
print("Invalid choice. Please enter 1-15 or 0.")
|
||||||
except (EOFError, KeyboardInterrupt):
|
except (EOFError, KeyboardInterrupt):
|
||||||
print("\nExiting.")
|
print("\nExiting.")
|
||||||
return None
|
return None
|
||||||
@@ -293,6 +295,36 @@ class CloudKit:
|
|||||||
|
|
||||||
return all_records
|
return all_records
|
||||||
|
|
||||||
|
def lookup(self, record_type, record_names, verbose=False):
|
||||||
|
"""Lookup specific records by recordName."""
|
||||||
|
if not record_names:
|
||||||
|
return []
|
||||||
|
|
||||||
|
path = f"{self.path_base}/records/lookup"
|
||||||
|
records_to_lookup = [{'recordName': name} for name in record_names]
|
||||||
|
body = json.dumps({'records': records_to_lookup})
|
||||||
|
date = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Apple-CloudKit-Request-KeyID': self.key_id,
|
||||||
|
'X-Apple-CloudKit-Request-ISO8601Date': date,
|
||||||
|
'X-Apple-CloudKit-Request-SignatureV1': self._sign(date, body, path),
|
||||||
|
}
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" Looking up {len(record_names)} {record_type} records...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.post(f"{HOST}{path}", headers=headers, data=body, timeout=30)
|
||||||
|
if r.status_code == 200:
|
||||||
|
result = r.json()
|
||||||
|
return result.get('records', [])
|
||||||
|
return {'error': f"{r.status_code}: {r.text[:200]}"}
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return {'error': 'Request timed out after 30s'}
|
||||||
|
except Exception as e:
|
||||||
|
return {'error': f"Request failed: {e}"}
|
||||||
|
|
||||||
def delete_all(self, record_type, verbose=False):
|
def delete_all(self, record_type, verbose=False):
|
||||||
"""Delete all records of a given type."""
|
"""Delete all records of a given type."""
|
||||||
total_deleted = 0
|
total_deleted = 0
|
||||||
@@ -1051,6 +1083,238 @@ def run_smart_sync(ck, data_dir, dry_run=False, verbose=False, delete_orphans=Fa
|
|||||||
return total_stats
|
return total_stats
|
||||||
|
|
||||||
|
|
||||||
|
def verify_sync(ck, data_dir, verbose=False, deep=False):
|
||||||
|
"""
|
||||||
|
Verify that CloudKit data matches local canonical data.
|
||||||
|
Quick mode: compares counts and spot-checks 5 random records per type.
|
||||||
|
Deep mode: full field-by-field comparison of all records.
|
||||||
|
"""
|
||||||
|
import random
|
||||||
|
from pathlib import Path
|
||||||
|
data_dir = Path(data_dir)
|
||||||
|
|
||||||
|
print("\n" + "="*50)
|
||||||
|
print(f"CloudKit Sync Verification {'(DEEP)' if deep else '(Quick)'}")
|
||||||
|
print("="*50)
|
||||||
|
|
||||||
|
if deep:
|
||||||
|
print("\n⚠️ Deep verification may take several minutes for large datasets\n")
|
||||||
|
|
||||||
|
# Load local data
|
||||||
|
stadiums = json.load(open(data_dir / 'stadiums_canonical.json')) if (data_dir / 'stadiums_canonical.json').exists() else []
|
||||||
|
teams = json.load(open(data_dir / 'teams_canonical.json')) if (data_dir / 'teams_canonical.json').exists() else []
|
||||||
|
|
||||||
|
# Load games from canonical/games/*.json
|
||||||
|
canonical_games_dir = data_dir / 'canonical' / 'games'
|
||||||
|
games = []
|
||||||
|
if canonical_games_dir.exists():
|
||||||
|
for games_file in sorted(canonical_games_dir.glob('*.json')):
|
||||||
|
with open(games_file) as f:
|
||||||
|
games.extend(json.load(f))
|
||||||
|
|
||||||
|
league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else []
|
||||||
|
team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else []
|
||||||
|
stadium_aliases = json.load(open(data_dir / 'stadium_aliases.json')) if (data_dir / 'stadium_aliases.json').exists() else []
|
||||||
|
|
||||||
|
# Deduplicate games by canonical_id
|
||||||
|
seen_ids = set()
|
||||||
|
unique_games = []
|
||||||
|
for g in games:
|
||||||
|
game_id = g.get('canonical_id', g.get('id', ''))
|
||||||
|
if game_id not in seen_ids:
|
||||||
|
seen_ids.add(game_id)
|
||||||
|
unique_games.append(g)
|
||||||
|
games = unique_games
|
||||||
|
|
||||||
|
local_counts = {
|
||||||
|
'Stadium': len(stadiums),
|
||||||
|
'Team': len(teams),
|
||||||
|
'Game': len(games),
|
||||||
|
'LeagueStructure': len(league_structure),
|
||||||
|
'TeamAlias': len(team_aliases),
|
||||||
|
'StadiumAlias': len(stadium_aliases),
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"Local data: {local_counts['Stadium']} stadiums, {local_counts['Team']} teams, {local_counts['Game']} games")
|
||||||
|
print(f" {local_counts['LeagueStructure']} league structures, {local_counts['TeamAlias']} team aliases, {local_counts['StadiumAlias']} stadium aliases\n")
|
||||||
|
|
||||||
|
# Build local record maps for spot-check comparison
|
||||||
|
def build_local_record_map(record_type, data):
|
||||||
|
"""Build a map of recordName -> fields for comparison."""
|
||||||
|
records = {}
|
||||||
|
if record_type == 'Stadium':
|
||||||
|
for s in data:
|
||||||
|
stadium_id = s.get('canonical_id', s.get('id', ''))
|
||||||
|
record_name = deterministic_uuid(stadium_id)
|
||||||
|
records[record_name] = {
|
||||||
|
'canonicalId': stadium_id,
|
||||||
|
'name': s['name'],
|
||||||
|
'city': s['city'],
|
||||||
|
'sport': s['sport'],
|
||||||
|
}
|
||||||
|
elif record_type == 'Team':
|
||||||
|
for t in data:
|
||||||
|
team_id = t.get('canonical_id', '')
|
||||||
|
record_name = deterministic_uuid(team_id)
|
||||||
|
records[record_name] = {
|
||||||
|
'canonicalId': team_id,
|
||||||
|
'abbreviation': t['abbreviation'],
|
||||||
|
'name': t['name'],
|
||||||
|
'city': t['city'],
|
||||||
|
'sport': t['sport'],
|
||||||
|
}
|
||||||
|
elif record_type == 'Game':
|
||||||
|
for g in data:
|
||||||
|
game_id = g.get('canonical_id', g.get('id', ''))
|
||||||
|
record_name = deterministic_uuid(game_id)
|
||||||
|
records[record_name] = {
|
||||||
|
'canonicalId': game_id,
|
||||||
|
'sport': g['sport'],
|
||||||
|
'season': g.get('season', ''),
|
||||||
|
}
|
||||||
|
elif record_type == 'LeagueStructure':
|
||||||
|
for ls in data:
|
||||||
|
record_name = ls['id']
|
||||||
|
records[record_name] = {
|
||||||
|
'structureId': ls['id'],
|
||||||
|
'sport': ls['sport'],
|
||||||
|
'type': ls['type'],
|
||||||
|
'name': ls['name'],
|
||||||
|
}
|
||||||
|
elif record_type == 'TeamAlias':
|
||||||
|
for ta in data:
|
||||||
|
record_name = ta['id']
|
||||||
|
records[record_name] = {
|
||||||
|
'aliasId': ta['id'],
|
||||||
|
'teamCanonicalId': ta['team_canonical_id'],
|
||||||
|
'aliasType': ta['alias_type'],
|
||||||
|
'aliasValue': ta['alias_value'],
|
||||||
|
}
|
||||||
|
elif record_type == 'StadiumAlias':
|
||||||
|
for sa in data:
|
||||||
|
stadium_id = sa['stadium_canonical_id']
|
||||||
|
sport = stadium_id.split('_')[1] if '_' in stadium_id else 'unknown'
|
||||||
|
record_name = f"{sport}_{sa['alias_name'].lower()}"
|
||||||
|
records[record_name] = {
|
||||||
|
'aliasName': sa['alias_name'].lower(),
|
||||||
|
'stadiumCanonicalId': sa['stadium_canonical_id'],
|
||||||
|
}
|
||||||
|
return records
|
||||||
|
|
||||||
|
data_map = {
|
||||||
|
'Stadium': stadiums,
|
||||||
|
'Team': teams,
|
||||||
|
'Game': games,
|
||||||
|
'LeagueStructure': league_structure,
|
||||||
|
'TeamAlias': team_aliases,
|
||||||
|
'StadiumAlias': stadium_aliases,
|
||||||
|
}
|
||||||
|
|
||||||
|
results = []
|
||||||
|
total_mismatches = 0
|
||||||
|
|
||||||
|
for record_type in ['Stadium', 'Team', 'Game', 'LeagueStructure', 'TeamAlias', 'StadiumAlias']:
|
||||||
|
local_count = local_counts[record_type]
|
||||||
|
if local_count == 0:
|
||||||
|
print(f"{record_type}: No local data, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Query CloudKit count
|
||||||
|
print(f"Checking {record_type}...")
|
||||||
|
cloud_records = ck.query_all(record_type, verbose=verbose)
|
||||||
|
cloud_count = len(cloud_records)
|
||||||
|
|
||||||
|
# Count comparison
|
||||||
|
if cloud_count == local_count:
|
||||||
|
status = "[OK]"
|
||||||
|
elif cloud_count < local_count:
|
||||||
|
status = f"[MISMATCH: {local_count - cloud_count} missing in CloudKit]"
|
||||||
|
total_mismatches += 1
|
||||||
|
else:
|
||||||
|
status = f"[MISMATCH: {cloud_count - local_count} extra in CloudKit]"
|
||||||
|
total_mismatches += 1
|
||||||
|
|
||||||
|
print(f" {record_type}: CloudKit={cloud_count}, Local={local_count} {status}")
|
||||||
|
|
||||||
|
# Spot-check or deep verification
|
||||||
|
local_records = build_local_record_map(record_type, data_map[record_type])
|
||||||
|
|
||||||
|
if deep:
|
||||||
|
# Full field-by-field comparison
|
||||||
|
field_mismatches = []
|
||||||
|
for record_name, local_fields in local_records.items():
|
||||||
|
cloud_rec = cloud_records.get(record_name)
|
||||||
|
if not cloud_rec:
|
||||||
|
field_mismatches.append(f" {record_name}: Missing in CloudKit")
|
||||||
|
continue
|
||||||
|
|
||||||
|
cloud_fields = cloud_rec.get('fields', {})
|
||||||
|
for field_name, expected_value in local_fields.items():
|
||||||
|
cloud_field = cloud_fields.get(field_name, {})
|
||||||
|
cloud_value = cloud_field.get('value')
|
||||||
|
if cloud_value != expected_value:
|
||||||
|
field_mismatches.append(f" {record_name}.{field_name}: expected '{expected_value}', got '{cloud_value}'")
|
||||||
|
|
||||||
|
if field_mismatches:
|
||||||
|
print(f" Field mismatches ({len(field_mismatches)}):")
|
||||||
|
for m in field_mismatches[:10]: # Show first 10
|
||||||
|
print(m)
|
||||||
|
if len(field_mismatches) > 10:
|
||||||
|
print(f" ... and {len(field_mismatches) - 10} more")
|
||||||
|
total_mismatches += len(field_mismatches)
|
||||||
|
else:
|
||||||
|
print(f" All fields verified [OK]")
|
||||||
|
|
||||||
|
elif cloud_count == local_count and cloud_count > 0:
|
||||||
|
# Spot-check 5 random records
|
||||||
|
sample_size = min(5, cloud_count)
|
||||||
|
sample_names = random.sample(list(local_records.keys()), sample_size)
|
||||||
|
|
||||||
|
spot_check_ok = True
|
||||||
|
for record_name in sample_names:
|
||||||
|
local_fields = local_records[record_name]
|
||||||
|
cloud_rec = cloud_records.get(record_name)
|
||||||
|
if not cloud_rec:
|
||||||
|
print(f" Spot-check failed: {record_name} missing in CloudKit")
|
||||||
|
spot_check_ok = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
cloud_fields = cloud_rec.get('fields', {})
|
||||||
|
for field_name, expected_value in local_fields.items():
|
||||||
|
cloud_field = cloud_fields.get(field_name, {})
|
||||||
|
cloud_value = cloud_field.get('value')
|
||||||
|
if cloud_value != expected_value:
|
||||||
|
print(f" Spot-check mismatch: {record_name}.{field_name}: expected '{expected_value}', got '{cloud_value}'")
|
||||||
|
spot_check_ok = False
|
||||||
|
|
||||||
|
if spot_check_ok:
|
||||||
|
print(f" Spot-check ({sample_size} records): [OK]")
|
||||||
|
else:
|
||||||
|
total_mismatches += 1
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'type': record_type,
|
||||||
|
'local': local_count,
|
||||||
|
'cloud': cloud_count,
|
||||||
|
'match': cloud_count == local_count,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
print("\n" + "="*50)
|
||||||
|
print("Verification Summary")
|
||||||
|
print("="*50)
|
||||||
|
for r in results:
|
||||||
|
status = "[OK]" if r['match'] else "[MISMATCH]"
|
||||||
|
print(f" {r['type']}: Local={r['local']}, CloudKit={r['cloud']} {status}")
|
||||||
|
|
||||||
|
if total_mismatches == 0:
|
||||||
|
print("\n✓ All data verified - CloudKit matches local data")
|
||||||
|
else:
|
||||||
|
print(f"\n⚠ Found {total_mismatches} mismatch(es)")
|
||||||
|
|
||||||
|
return total_mismatches == 0
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
p = argparse.ArgumentParser(description='Import JSON to CloudKit')
|
p = argparse.ArgumentParser(description='Import JSON to CloudKit')
|
||||||
p.add_argument('--key-id', default=DEFAULT_KEY_ID)
|
p.add_argument('--key-id', default=DEFAULT_KEY_ID)
|
||||||
@@ -1070,6 +1334,8 @@ def main():
|
|||||||
p.add_argument('--diff', action='store_true', help='Show diff between local and CloudKit without importing')
|
p.add_argument('--diff', action='store_true', help='Show diff between local and CloudKit without importing')
|
||||||
p.add_argument('--smart-sync', action='store_true', help='Differential sync: only upload new/changed records')
|
p.add_argument('--smart-sync', action='store_true', help='Differential sync: only upload new/changed records')
|
||||||
p.add_argument('--delete-orphans', action='store_true', help='With --smart-sync, also delete records not in local data')
|
p.add_argument('--delete-orphans', action='store_true', help='With --smart-sync, also delete records not in local data')
|
||||||
|
p.add_argument('--verify', action='store_true', help='Verify CloudKit matches local data (quick: counts + spot-check)')
|
||||||
|
p.add_argument('--verify-deep', action='store_true', help='Verify CloudKit matches local data (deep: full field comparison)')
|
||||||
p.add_argument('--dry-run', action='store_true')
|
p.add_argument('--dry-run', action='store_true')
|
||||||
p.add_argument('--verbose', '-v', action='store_true')
|
p.add_argument('--verbose', '-v', action='store_true')
|
||||||
p.add_argument('--interactive', '-i', action='store_true', help='Show interactive menu')
|
p.add_argument('--interactive', '-i', action='store_true', help='Show interactive menu')
|
||||||
@@ -1079,7 +1345,8 @@ def main():
|
|||||||
has_action_flag = any([
|
has_action_flag = any([
|
||||||
args.stadiums_only, args.games_only, args.games_files, args.league_structure_only,
|
args.stadiums_only, args.games_only, args.games_files, args.league_structure_only,
|
||||||
args.team_aliases_only, args.stadium_aliases_only, args.canonical_only,
|
args.team_aliases_only, args.stadium_aliases_only, args.canonical_only,
|
||||||
args.delete_all, args.delete_only, args.dry_run, args.diff, args.smart_sync
|
args.delete_all, args.delete_only, args.dry_run, args.diff, args.smart_sync,
|
||||||
|
args.verify, args.verify_deep
|
||||||
])
|
])
|
||||||
|
|
||||||
# Track selected game files (for option 4 or --games-files)
|
# Track selected game files (for option 4 or --games-files)
|
||||||
@@ -1126,6 +1393,10 @@ def main():
|
|||||||
elif choice == 13: # Smart sync + delete orphans
|
elif choice == 13: # Smart sync + delete orphans
|
||||||
args.smart_sync = True
|
args.smart_sync = True
|
||||||
args.delete_orphans = True
|
args.delete_orphans = True
|
||||||
|
elif choice == 14: # Verify sync (quick)
|
||||||
|
args.verify = True
|
||||||
|
elif choice == 15: # Verify sync (deep)
|
||||||
|
args.verify_deep = True
|
||||||
|
|
||||||
print(f"\n{'='*50}")
|
print(f"\n{'='*50}")
|
||||||
print(f"CloudKit Import {'(DRY RUN)' if args.dry_run else ''}")
|
print(f"CloudKit Import {'(DRY RUN)' if args.dry_run else ''}")
|
||||||
@@ -1208,6 +1479,18 @@ def main():
|
|||||||
show_diff_report(ck, args.data_dir, verbose=args.verbose)
|
show_diff_report(ck, args.data_dir, verbose=args.verbose)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Handle verify mode
|
||||||
|
if args.verify or args.verify_deep:
|
||||||
|
if not ck:
|
||||||
|
# Need CloudKit connection for verification
|
||||||
|
if not HAS_CRYPTO:
|
||||||
|
sys.exit("Error: pip install cryptography")
|
||||||
|
if not os.path.exists(args.key_file):
|
||||||
|
sys.exit(f"Error: Key file not found: {args.key_file}")
|
||||||
|
ck = CloudKit(args.key_id, open(args.key_file, 'rb').read(), args.container, args.env)
|
||||||
|
verify_sync(ck, args.data_dir, verbose=args.verbose, deep=args.verify_deep)
|
||||||
|
return
|
||||||
|
|
||||||
# Handle smart sync mode (differential upload)
|
# Handle smart sync mode (differential upload)
|
||||||
if args.smart_sync:
|
if args.smart_sync:
|
||||||
if not ck:
|
if not ck:
|
||||||
|
|||||||
Reference in New Issue
Block a user