This commit is contained in:
Trey t
2026-01-19 22:12:53 -06:00
parent 11c0ae70d2
commit a8b0491571
19 changed files with 1328 additions and 525 deletions

View File

@@ -0,0 +1,246 @@
"""JSON Schema validation for canonical output matching iOS app expectations.
This module defines schemas that match the Swift structs in BootstrapService.swift:
- JSONCanonicalStadium
- JSONCanonicalTeam
- JSONCanonicalGame
Validation is performed at runtime before outputting JSON to ensure
Python output matches what the iOS app expects.
"""
import re
from dataclasses import dataclass
from typing import Any, Callable, Optional, Union
class SchemaValidationError(Exception):
"""Raised when canonical output fails schema validation."""
def __init__(self, model_type: str, errors: list[str]):
self.model_type = model_type
self.errors = errors
super().__init__(f"{model_type} schema validation failed:\n" + "\n".join(f" - {e}" for e in errors))
# ISO8601 UTC datetime pattern: YYYY-MM-DDTHH:MM:SSZ
ISO8601_UTC_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$")
# Season format patterns
SEASON_SPLIT_PATTERN = re.compile(r"^\d{4}-\d{2}$") # e.g., "2025-26"
SEASON_SINGLE_PATTERN = re.compile(r"^\d{4}$") # e.g., "2025"
@dataclass
class FieldSpec:
"""Specification for a field in the canonical schema."""
name: str
required: bool
field_type: Union[type, tuple]
validator: Optional[Callable] = None
# Schema definitions matching Swift structs in BootstrapService.swift
STADIUM_SCHEMA: list[FieldSpec] = [
FieldSpec("canonical_id", required=True, field_type=str),
FieldSpec("name", required=True, field_type=str),
FieldSpec("city", required=True, field_type=str),
FieldSpec("state", required=True, field_type=str),
FieldSpec("latitude", required=True, field_type=(int, float)),
FieldSpec("longitude", required=True, field_type=(int, float)),
FieldSpec("capacity", required=True, field_type=int),
FieldSpec("sport", required=True, field_type=str),
FieldSpec("primary_team_abbrevs", required=True, field_type=list),
FieldSpec("year_opened", required=False, field_type=(int, type(None))),
]
TEAM_SCHEMA: list[FieldSpec] = [
FieldSpec("canonical_id", required=True, field_type=str),
FieldSpec("name", required=True, field_type=str),
FieldSpec("abbreviation", required=True, field_type=str),
FieldSpec("sport", required=True, field_type=str),
FieldSpec("city", required=True, field_type=str),
FieldSpec("stadium_canonical_id", required=True, field_type=str),
FieldSpec("conference_id", required=False, field_type=(str, type(None))),
FieldSpec("division_id", required=False, field_type=(str, type(None))),
FieldSpec("primary_color", required=False, field_type=(str, type(None))),
FieldSpec("secondary_color", required=False, field_type=(str, type(None))),
]
GAME_SCHEMA: list[FieldSpec] = [
FieldSpec("canonical_id", required=True, field_type=str),
FieldSpec("sport", required=True, field_type=str),
FieldSpec(
"season",
required=True,
field_type=str,
validator=lambda v: SEASON_SPLIT_PATTERN.match(v) or SEASON_SINGLE_PATTERN.match(v),
),
FieldSpec(
"game_datetime_utc",
required=True,
field_type=str,
validator=lambda v: ISO8601_UTC_PATTERN.match(v),
),
FieldSpec("home_team_canonical_id", required=True, field_type=str),
FieldSpec("away_team_canonical_id", required=True, field_type=str),
FieldSpec("stadium_canonical_id", required=True, field_type=str),
FieldSpec("is_playoff", required=True, field_type=bool),
FieldSpec("broadcast", required=False, field_type=(str, type(None))),
]
def validate_field(data: dict[str, Any], spec: FieldSpec) -> list[str]:
"""Validate a single field against its specification.
Args:
data: The dictionary to validate
spec: The field specification
Returns:
List of error messages (empty if valid)
"""
errors = []
if spec.name not in data:
if spec.required:
errors.append(f"Missing required field: {spec.name}")
return errors
value = data[spec.name]
# Check type
if not isinstance(value, spec.field_type):
expected = spec.field_type.__name__ if isinstance(spec.field_type, type) else str(spec.field_type)
actual = type(value).__name__
errors.append(f"Field '{spec.name}' has wrong type: expected {expected}, got {actual} (value: {value!r})")
return errors
# Check custom validator
if spec.validator and value is not None:
if not spec.validator(value):
errors.append(f"Field '{spec.name}' failed validation: {value!r}")
return errors
def validate_canonical_stadium(data: dict[str, Any]) -> list[str]:
"""Validate a canonical stadium dictionary.
Args:
data: Stadium dictionary from to_canonical_dict()
Returns:
List of error messages (empty if valid)
"""
errors = []
for spec in STADIUM_SCHEMA:
errors.extend(validate_field(data, spec))
# Additional validation: primary_team_abbrevs should contain strings
if "primary_team_abbrevs" in data and isinstance(data["primary_team_abbrevs"], list):
for i, abbrev in enumerate(data["primary_team_abbrevs"]):
if not isinstance(abbrev, str):
errors.append(f"primary_team_abbrevs[{i}] must be string, got {type(abbrev).__name__}")
return errors
def validate_canonical_team(data: dict[str, Any]) -> list[str]:
"""Validate a canonical team dictionary.
Args:
data: Team dictionary from to_canonical_dict()
Returns:
List of error messages (empty if valid)
"""
errors = []
for spec in TEAM_SCHEMA:
errors.extend(validate_field(data, spec))
return errors
def validate_canonical_game(data: dict[str, Any]) -> list[str]:
"""Validate a canonical game dictionary.
Args:
data: Game dictionary from to_canonical_dict()
Returns:
List of error messages (empty if valid)
"""
errors = []
for spec in GAME_SCHEMA:
errors.extend(validate_field(data, spec))
return errors
def validate_and_raise(data: dict[str, Any], model_type: str) -> None:
"""Validate a canonical dictionary and raise on error.
Args:
data: Dictionary from to_canonical_dict()
model_type: One of 'stadium', 'team', 'game'
Raises:
SchemaValidationError: If validation fails
ValueError: If model_type is unknown
"""
validators = {
"stadium": validate_canonical_stadium,
"team": validate_canonical_team,
"game": validate_canonical_game,
}
if model_type not in validators:
raise ValueError(f"Unknown model type: {model_type}")
errors = validators[model_type](data)
if errors:
raise SchemaValidationError(model_type, errors)
def validate_batch(
items: list[dict[str, Any]],
model_type: str,
fail_fast: bool = True,
) -> list[tuple[int, list[str]]]:
"""Validate a batch of canonical dictionaries.
Args:
items: List of dictionaries from to_canonical_dict()
model_type: One of 'stadium', 'team', 'game'
fail_fast: If True, raise on first error; if False, collect all errors
Returns:
List of (index, errors) tuples for items with validation errors
Raises:
SchemaValidationError: If fail_fast=True and validation fails
"""
validators = {
"stadium": validate_canonical_stadium,
"team": validate_canonical_team,
"game": validate_canonical_game,
}
if model_type not in validators:
raise ValueError(f"Unknown model type: {model_type}")
validator = validators[model_type]
all_errors = []
for i, item in enumerate(items):
errors = validator(item)
if errors:
if fail_fast:
raise SchemaValidationError(
model_type,
[f"Item {i}: {e}" for e in errors],
)
all_errors.append((i, errors))
return all_errors