"""JSON Schema validation for canonical output matching iOS app expectations. This module defines schemas that match the Swift structs in BootstrapService.swift: - JSONCanonicalStadium - JSONCanonicalTeam - JSONCanonicalGame Validation is performed at runtime before outputting JSON to ensure Python output matches what the iOS app expects. """ import re from dataclasses import dataclass from typing import Any, Callable, Optional, Union class SchemaValidationError(Exception): """Raised when canonical output fails schema validation.""" def __init__(self, model_type: str, errors: list[str]): self.model_type = model_type self.errors = errors super().__init__(f"{model_type} schema validation failed:\n" + "\n".join(f" - {e}" for e in errors)) # ISO8601 UTC datetime pattern: YYYY-MM-DDTHH:MM:SSZ ISO8601_UTC_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$") # Season format patterns SEASON_SPLIT_PATTERN = re.compile(r"^\d{4}-\d{2}$") # e.g., "2025-26" SEASON_SINGLE_PATTERN = re.compile(r"^\d{4}$") # e.g., "2025" @dataclass class FieldSpec: """Specification for a field in the canonical schema.""" name: str required: bool field_type: Union[type, tuple] validator: Optional[Callable] = None # Schema definitions matching Swift structs in BootstrapService.swift STADIUM_SCHEMA: list[FieldSpec] = [ FieldSpec("canonical_id", required=True, field_type=str), FieldSpec("name", required=True, field_type=str), FieldSpec("city", required=True, field_type=str), FieldSpec("state", required=True, field_type=str), FieldSpec("latitude", required=True, field_type=(int, float)), FieldSpec("longitude", required=True, field_type=(int, float)), FieldSpec("capacity", required=True, field_type=int), FieldSpec("sport", required=True, field_type=str), FieldSpec("primary_team_abbrevs", required=True, field_type=list), FieldSpec("year_opened", required=False, field_type=(int, type(None))), ] TEAM_SCHEMA: list[FieldSpec] = [ FieldSpec("canonical_id", required=True, field_type=str), FieldSpec("name", required=True, field_type=str), FieldSpec("abbreviation", required=True, field_type=str), FieldSpec("sport", required=True, field_type=str), FieldSpec("city", required=True, field_type=str), FieldSpec("stadium_canonical_id", required=True, field_type=str), FieldSpec("conference_id", required=False, field_type=(str, type(None))), FieldSpec("division_id", required=False, field_type=(str, type(None))), FieldSpec("primary_color", required=False, field_type=(str, type(None))), FieldSpec("secondary_color", required=False, field_type=(str, type(None))), ] GAME_SCHEMA: list[FieldSpec] = [ FieldSpec("canonical_id", required=True, field_type=str), FieldSpec("sport", required=True, field_type=str), FieldSpec( "season", required=True, field_type=str, validator=lambda v: SEASON_SPLIT_PATTERN.match(v) or SEASON_SINGLE_PATTERN.match(v), ), FieldSpec( "game_datetime_utc", required=True, field_type=str, validator=lambda v: ISO8601_UTC_PATTERN.match(v), ), FieldSpec("home_team_canonical_id", required=True, field_type=str), FieldSpec("away_team_canonical_id", required=True, field_type=str), FieldSpec("stadium_canonical_id", required=True, field_type=str), FieldSpec("is_playoff", required=True, field_type=bool), FieldSpec("broadcast", required=False, field_type=(str, type(None))), ] def validate_field(data: dict[str, Any], spec: FieldSpec) -> list[str]: """Validate a single field against its specification. Args: data: The dictionary to validate spec: The field specification Returns: List of error messages (empty if valid) """ errors = [] if spec.name not in data: if spec.required: errors.append(f"Missing required field: {spec.name}") return errors value = data[spec.name] # Check type if not isinstance(value, spec.field_type): expected = spec.field_type.__name__ if isinstance(spec.field_type, type) else str(spec.field_type) actual = type(value).__name__ errors.append(f"Field '{spec.name}' has wrong type: expected {expected}, got {actual} (value: {value!r})") return errors # Check custom validator if spec.validator and value is not None: if not spec.validator(value): errors.append(f"Field '{spec.name}' failed validation: {value!r}") return errors def validate_canonical_stadium(data: dict[str, Any]) -> list[str]: """Validate a canonical stadium dictionary. Args: data: Stadium dictionary from to_canonical_dict() Returns: List of error messages (empty if valid) """ errors = [] for spec in STADIUM_SCHEMA: errors.extend(validate_field(data, spec)) # Additional validation: primary_team_abbrevs should contain strings if "primary_team_abbrevs" in data and isinstance(data["primary_team_abbrevs"], list): for i, abbrev in enumerate(data["primary_team_abbrevs"]): if not isinstance(abbrev, str): errors.append(f"primary_team_abbrevs[{i}] must be string, got {type(abbrev).__name__}") return errors def validate_canonical_team(data: dict[str, Any]) -> list[str]: """Validate a canonical team dictionary. Args: data: Team dictionary from to_canonical_dict() Returns: List of error messages (empty if valid) """ errors = [] for spec in TEAM_SCHEMA: errors.extend(validate_field(data, spec)) return errors def validate_canonical_game(data: dict[str, Any]) -> list[str]: """Validate a canonical game dictionary. Args: data: Game dictionary from to_canonical_dict() Returns: List of error messages (empty if valid) """ errors = [] for spec in GAME_SCHEMA: errors.extend(validate_field(data, spec)) return errors def validate_and_raise(data: dict[str, Any], model_type: str) -> None: """Validate a canonical dictionary and raise on error. Args: data: Dictionary from to_canonical_dict() model_type: One of 'stadium', 'team', 'game' Raises: SchemaValidationError: If validation fails ValueError: If model_type is unknown """ validators = { "stadium": validate_canonical_stadium, "team": validate_canonical_team, "game": validate_canonical_game, } if model_type not in validators: raise ValueError(f"Unknown model type: {model_type}") errors = validators[model_type](data) if errors: raise SchemaValidationError(model_type, errors) def validate_batch( items: list[dict[str, Any]], model_type: str, fail_fast: bool = True, ) -> list[tuple[int, list[str]]]: """Validate a batch of canonical dictionaries. Args: items: List of dictionaries from to_canonical_dict() model_type: One of 'stadium', 'team', 'game' fail_fast: If True, raise on first error; if False, collect all errors Returns: List of (index, errors) tuples for items with validation errors Raises: SchemaValidationError: If fail_fast=True and validation fails """ validators = { "stadium": validate_canonical_stadium, "team": validate_canonical_team, "game": validate_canonical_game, } if model_type not in validators: raise ValueError(f"Unknown model type: {model_type}") validator = validators[model_type] all_errors = [] for i, item in enumerate(items): errors = validator(item) if errors: if fail_fast: raise SchemaValidationError( model_type, [f"Item {i}: {e}" for e in errors], ) all_errors.append((i, errors)) return all_errors