#!/usr/bin/env python3 """Validate alias files for orphan references and format issues. This script checks stadium_aliases.json and team_aliases.json for: 1. Orphan references (aliases pointing to non-existent canonical IDs) 2. JSON syntax errors 3. Required field presence Usage: python validate_aliases.py Returns exit code 0 on success, 1 on failure. """ import json import sys from pathlib import Path # Add parent to path for imports sys.path.insert(0, str(Path(__file__).parent)) from sportstime_parser.normalizers.stadium_resolver import STADIUM_MAPPINGS from sportstime_parser.normalizers.team_resolver import TEAM_MAPPINGS def main() -> int: """Run validation checks on alias files.""" errors: list[str] = [] # Build valid stadium ID set valid_stadium_ids: set[str] = set() for sport_stadiums in STADIUM_MAPPINGS.values(): for stadium_id in sport_stadiums.keys(): valid_stadium_ids.add(stadium_id) # Build valid team ID set valid_team_ids: set[str] = set() for sport_teams in TEAM_MAPPINGS.values(): for abbrev, team_data in sport_teams.items(): valid_team_ids.add(team_data[0]) # team_id is first element print(f"Valid stadium IDs: {len(valid_stadium_ids)}") print(f"Valid team IDs: {len(valid_team_ids)}") print() # Check stadium aliases try: stadium_aliases = json.load(open("stadium_aliases.json")) print(f"✓ stadium_aliases.json: Valid JSON ({len(stadium_aliases)} aliases)") for alias in stadium_aliases: # Check required fields if "alias_name" not in alias: errors.append(f"Stadium alias missing 'alias_name': {alias}") if "stadium_canonical_id" not in alias: errors.append(f"Stadium alias missing 'stadium_canonical_id': {alias}") elif alias["stadium_canonical_id"] not in valid_stadium_ids: errors.append( f"Orphan stadium alias: '{alias.get('alias_name', '?')}' -> " f"'{alias['stadium_canonical_id']}'" ) except FileNotFoundError: errors.append("stadium_aliases.json not found") except json.JSONDecodeError as e: errors.append(f"stadium_aliases.json: Invalid JSON - {e}") # Check team aliases try: team_aliases = json.load(open("team_aliases.json")) print(f"✓ team_aliases.json: Valid JSON ({len(team_aliases)} aliases)") for alias in team_aliases: # Check required fields if "team_canonical_id" not in alias: errors.append(f"Team alias missing 'team_canonical_id': {alias}") elif alias["team_canonical_id"] not in valid_team_ids: errors.append( f"Orphan team alias: '{alias.get('alias_value', '?')}' -> " f"'{alias['team_canonical_id']}'" ) except FileNotFoundError: errors.append("team_aliases.json not found") except json.JSONDecodeError as e: errors.append(f"team_aliases.json: Invalid JSON - {e}") # Report results print() if errors: print(f"❌ Validation failed with {len(errors)} error(s):") for error in errors: print(f" - {error}") return 1 print("✅ All aliases valid") return 0 if __name__ == "__main__": sys.exit(main())