247 lines
7.8 KiB
Python
247 lines
7.8 KiB
Python
"""JSON Schema validation for canonical output matching iOS app expectations.
|
|
|
|
This module defines schemas that match the Swift structs in BootstrapService.swift:
|
|
- JSONCanonicalStadium
|
|
- JSONCanonicalTeam
|
|
- JSONCanonicalGame
|
|
|
|
Validation is performed at runtime before outputting JSON to ensure
|
|
Python output matches what the iOS app expects.
|
|
"""
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from typing import Any, Callable, Optional, Union
|
|
|
|
|
|
class SchemaValidationError(Exception):
|
|
"""Raised when canonical output fails schema validation."""
|
|
|
|
def __init__(self, model_type: str, errors: list[str]):
|
|
self.model_type = model_type
|
|
self.errors = errors
|
|
super().__init__(f"{model_type} schema validation failed:\n" + "\n".join(f" - {e}" for e in errors))
|
|
|
|
|
|
# ISO8601 UTC datetime pattern: YYYY-MM-DDTHH:MM:SSZ
|
|
ISO8601_UTC_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$")
|
|
|
|
# Season format patterns
|
|
SEASON_SPLIT_PATTERN = re.compile(r"^\d{4}-\d{2}$") # e.g., "2025-26"
|
|
SEASON_SINGLE_PATTERN = re.compile(r"^\d{4}$") # e.g., "2025"
|
|
|
|
|
|
@dataclass
|
|
class FieldSpec:
|
|
"""Specification for a field in the canonical schema."""
|
|
|
|
name: str
|
|
required: bool
|
|
field_type: Union[type, tuple]
|
|
validator: Optional[Callable] = None
|
|
|
|
|
|
# Schema definitions matching Swift structs in BootstrapService.swift
|
|
|
|
STADIUM_SCHEMA: list[FieldSpec] = [
|
|
FieldSpec("canonical_id", required=True, field_type=str),
|
|
FieldSpec("name", required=True, field_type=str),
|
|
FieldSpec("city", required=True, field_type=str),
|
|
FieldSpec("state", required=True, field_type=str),
|
|
FieldSpec("latitude", required=True, field_type=(int, float)),
|
|
FieldSpec("longitude", required=True, field_type=(int, float)),
|
|
FieldSpec("capacity", required=True, field_type=int),
|
|
FieldSpec("sport", required=True, field_type=str),
|
|
FieldSpec("primary_team_abbrevs", required=True, field_type=list),
|
|
FieldSpec("year_opened", required=False, field_type=(int, type(None))),
|
|
]
|
|
|
|
TEAM_SCHEMA: list[FieldSpec] = [
|
|
FieldSpec("canonical_id", required=True, field_type=str),
|
|
FieldSpec("name", required=True, field_type=str),
|
|
FieldSpec("abbreviation", required=True, field_type=str),
|
|
FieldSpec("sport", required=True, field_type=str),
|
|
FieldSpec("city", required=True, field_type=str),
|
|
FieldSpec("stadium_canonical_id", required=True, field_type=str),
|
|
FieldSpec("conference_id", required=False, field_type=(str, type(None))),
|
|
FieldSpec("division_id", required=False, field_type=(str, type(None))),
|
|
FieldSpec("primary_color", required=False, field_type=(str, type(None))),
|
|
FieldSpec("secondary_color", required=False, field_type=(str, type(None))),
|
|
]
|
|
|
|
GAME_SCHEMA: list[FieldSpec] = [
|
|
FieldSpec("canonical_id", required=True, field_type=str),
|
|
FieldSpec("sport", required=True, field_type=str),
|
|
FieldSpec(
|
|
"season",
|
|
required=True,
|
|
field_type=str,
|
|
validator=lambda v: SEASON_SPLIT_PATTERN.match(v) or SEASON_SINGLE_PATTERN.match(v),
|
|
),
|
|
FieldSpec(
|
|
"game_datetime_utc",
|
|
required=True,
|
|
field_type=str,
|
|
validator=lambda v: ISO8601_UTC_PATTERN.match(v),
|
|
),
|
|
FieldSpec("home_team_canonical_id", required=True, field_type=str),
|
|
FieldSpec("away_team_canonical_id", required=True, field_type=str),
|
|
FieldSpec("stadium_canonical_id", required=True, field_type=str),
|
|
FieldSpec("is_playoff", required=True, field_type=bool),
|
|
FieldSpec("broadcast", required=False, field_type=(str, type(None))),
|
|
]
|
|
|
|
|
|
def validate_field(data: dict[str, Any], spec: FieldSpec) -> list[str]:
|
|
"""Validate a single field against its specification.
|
|
|
|
Args:
|
|
data: The dictionary to validate
|
|
spec: The field specification
|
|
|
|
Returns:
|
|
List of error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
|
|
if spec.name not in data:
|
|
if spec.required:
|
|
errors.append(f"Missing required field: {spec.name}")
|
|
return errors
|
|
|
|
value = data[spec.name]
|
|
|
|
# Check type
|
|
if not isinstance(value, spec.field_type):
|
|
expected = spec.field_type.__name__ if isinstance(spec.field_type, type) else str(spec.field_type)
|
|
actual = type(value).__name__
|
|
errors.append(f"Field '{spec.name}' has wrong type: expected {expected}, got {actual} (value: {value!r})")
|
|
return errors
|
|
|
|
# Check custom validator
|
|
if spec.validator and value is not None:
|
|
if not spec.validator(value):
|
|
errors.append(f"Field '{spec.name}' failed validation: {value!r}")
|
|
|
|
return errors
|
|
|
|
|
|
def validate_canonical_stadium(data: dict[str, Any]) -> list[str]:
|
|
"""Validate a canonical stadium dictionary.
|
|
|
|
Args:
|
|
data: Stadium dictionary from to_canonical_dict()
|
|
|
|
Returns:
|
|
List of error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
for spec in STADIUM_SCHEMA:
|
|
errors.extend(validate_field(data, spec))
|
|
|
|
# Additional validation: primary_team_abbrevs should contain strings
|
|
if "primary_team_abbrevs" in data and isinstance(data["primary_team_abbrevs"], list):
|
|
for i, abbrev in enumerate(data["primary_team_abbrevs"]):
|
|
if not isinstance(abbrev, str):
|
|
errors.append(f"primary_team_abbrevs[{i}] must be string, got {type(abbrev).__name__}")
|
|
|
|
return errors
|
|
|
|
|
|
def validate_canonical_team(data: dict[str, Any]) -> list[str]:
|
|
"""Validate a canonical team dictionary.
|
|
|
|
Args:
|
|
data: Team dictionary from to_canonical_dict()
|
|
|
|
Returns:
|
|
List of error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
for spec in TEAM_SCHEMA:
|
|
errors.extend(validate_field(data, spec))
|
|
return errors
|
|
|
|
|
|
def validate_canonical_game(data: dict[str, Any]) -> list[str]:
|
|
"""Validate a canonical game dictionary.
|
|
|
|
Args:
|
|
data: Game dictionary from to_canonical_dict()
|
|
|
|
Returns:
|
|
List of error messages (empty if valid)
|
|
"""
|
|
errors = []
|
|
for spec in GAME_SCHEMA:
|
|
errors.extend(validate_field(data, spec))
|
|
return errors
|
|
|
|
|
|
def validate_and_raise(data: dict[str, Any], model_type: str) -> None:
|
|
"""Validate a canonical dictionary and raise on error.
|
|
|
|
Args:
|
|
data: Dictionary from to_canonical_dict()
|
|
model_type: One of 'stadium', 'team', 'game'
|
|
|
|
Raises:
|
|
SchemaValidationError: If validation fails
|
|
ValueError: If model_type is unknown
|
|
"""
|
|
validators = {
|
|
"stadium": validate_canonical_stadium,
|
|
"team": validate_canonical_team,
|
|
"game": validate_canonical_game,
|
|
}
|
|
|
|
if model_type not in validators:
|
|
raise ValueError(f"Unknown model type: {model_type}")
|
|
|
|
errors = validators[model_type](data)
|
|
if errors:
|
|
raise SchemaValidationError(model_type, errors)
|
|
|
|
|
|
def validate_batch(
|
|
items: list[dict[str, Any]],
|
|
model_type: str,
|
|
fail_fast: bool = True,
|
|
) -> list[tuple[int, list[str]]]:
|
|
"""Validate a batch of canonical dictionaries.
|
|
|
|
Args:
|
|
items: List of dictionaries from to_canonical_dict()
|
|
model_type: One of 'stadium', 'team', 'game'
|
|
fail_fast: If True, raise on first error; if False, collect all errors
|
|
|
|
Returns:
|
|
List of (index, errors) tuples for items with validation errors
|
|
|
|
Raises:
|
|
SchemaValidationError: If fail_fast=True and validation fails
|
|
"""
|
|
validators = {
|
|
"stadium": validate_canonical_stadium,
|
|
"team": validate_canonical_team,
|
|
"game": validate_canonical_game,
|
|
}
|
|
|
|
if model_type not in validators:
|
|
raise ValueError(f"Unknown model type: {model_type}")
|
|
|
|
validator = validators[model_type]
|
|
all_errors = []
|
|
|
|
for i, item in enumerate(items):
|
|
errors = validator(item)
|
|
if errors:
|
|
if fail_fast:
|
|
raise SchemaValidationError(
|
|
model_type,
|
|
[f"Item {i}: {e}" for e in errors],
|
|
)
|
|
all_errors.append((i, errors))
|
|
|
|
return all_errors
|