wip
This commit is contained in:
@@ -8,10 +8,25 @@ from .report import (
|
||||
validate_games,
|
||||
)
|
||||
|
||||
from .schema import (
|
||||
SchemaValidationError,
|
||||
validate_canonical_stadium,
|
||||
validate_canonical_team,
|
||||
validate_canonical_game,
|
||||
validate_and_raise,
|
||||
validate_batch,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ValidationReport",
|
||||
"ValidationSummary",
|
||||
"generate_report",
|
||||
"detect_duplicate_games",
|
||||
"validate_games",
|
||||
"SchemaValidationError",
|
||||
"validate_canonical_stadium",
|
||||
"validate_canonical_team",
|
||||
"validate_canonical_game",
|
||||
"validate_and_raise",
|
||||
"validate_batch",
|
||||
]
|
||||
|
||||
246
Scripts/sportstime_parser/validators/schema.py
Normal file
246
Scripts/sportstime_parser/validators/schema.py
Normal file
@@ -0,0 +1,246 @@
|
||||
"""JSON Schema validation for canonical output matching iOS app expectations.
|
||||
|
||||
This module defines schemas that match the Swift structs in BootstrapService.swift:
|
||||
- JSONCanonicalStadium
|
||||
- JSONCanonicalTeam
|
||||
- JSONCanonicalGame
|
||||
|
||||
Validation is performed at runtime before outputting JSON to ensure
|
||||
Python output matches what the iOS app expects.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
|
||||
class SchemaValidationError(Exception):
|
||||
"""Raised when canonical output fails schema validation."""
|
||||
|
||||
def __init__(self, model_type: str, errors: list[str]):
|
||||
self.model_type = model_type
|
||||
self.errors = errors
|
||||
super().__init__(f"{model_type} schema validation failed:\n" + "\n".join(f" - {e}" for e in errors))
|
||||
|
||||
|
||||
# ISO8601 UTC datetime pattern: YYYY-MM-DDTHH:MM:SSZ
|
||||
ISO8601_UTC_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$")
|
||||
|
||||
# Season format patterns
|
||||
SEASON_SPLIT_PATTERN = re.compile(r"^\d{4}-\d{2}$") # e.g., "2025-26"
|
||||
SEASON_SINGLE_PATTERN = re.compile(r"^\d{4}$") # e.g., "2025"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FieldSpec:
|
||||
"""Specification for a field in the canonical schema."""
|
||||
|
||||
name: str
|
||||
required: bool
|
||||
field_type: Union[type, tuple]
|
||||
validator: Optional[Callable] = None
|
||||
|
||||
|
||||
# Schema definitions matching Swift structs in BootstrapService.swift
|
||||
|
||||
STADIUM_SCHEMA: list[FieldSpec] = [
|
||||
FieldSpec("canonical_id", required=True, field_type=str),
|
||||
FieldSpec("name", required=True, field_type=str),
|
||||
FieldSpec("city", required=True, field_type=str),
|
||||
FieldSpec("state", required=True, field_type=str),
|
||||
FieldSpec("latitude", required=True, field_type=(int, float)),
|
||||
FieldSpec("longitude", required=True, field_type=(int, float)),
|
||||
FieldSpec("capacity", required=True, field_type=int),
|
||||
FieldSpec("sport", required=True, field_type=str),
|
||||
FieldSpec("primary_team_abbrevs", required=True, field_type=list),
|
||||
FieldSpec("year_opened", required=False, field_type=(int, type(None))),
|
||||
]
|
||||
|
||||
TEAM_SCHEMA: list[FieldSpec] = [
|
||||
FieldSpec("canonical_id", required=True, field_type=str),
|
||||
FieldSpec("name", required=True, field_type=str),
|
||||
FieldSpec("abbreviation", required=True, field_type=str),
|
||||
FieldSpec("sport", required=True, field_type=str),
|
||||
FieldSpec("city", required=True, field_type=str),
|
||||
FieldSpec("stadium_canonical_id", required=True, field_type=str),
|
||||
FieldSpec("conference_id", required=False, field_type=(str, type(None))),
|
||||
FieldSpec("division_id", required=False, field_type=(str, type(None))),
|
||||
FieldSpec("primary_color", required=False, field_type=(str, type(None))),
|
||||
FieldSpec("secondary_color", required=False, field_type=(str, type(None))),
|
||||
]
|
||||
|
||||
GAME_SCHEMA: list[FieldSpec] = [
|
||||
FieldSpec("canonical_id", required=True, field_type=str),
|
||||
FieldSpec("sport", required=True, field_type=str),
|
||||
FieldSpec(
|
||||
"season",
|
||||
required=True,
|
||||
field_type=str,
|
||||
validator=lambda v: SEASON_SPLIT_PATTERN.match(v) or SEASON_SINGLE_PATTERN.match(v),
|
||||
),
|
||||
FieldSpec(
|
||||
"game_datetime_utc",
|
||||
required=True,
|
||||
field_type=str,
|
||||
validator=lambda v: ISO8601_UTC_PATTERN.match(v),
|
||||
),
|
||||
FieldSpec("home_team_canonical_id", required=True, field_type=str),
|
||||
FieldSpec("away_team_canonical_id", required=True, field_type=str),
|
||||
FieldSpec("stadium_canonical_id", required=True, field_type=str),
|
||||
FieldSpec("is_playoff", required=True, field_type=bool),
|
||||
FieldSpec("broadcast", required=False, field_type=(str, type(None))),
|
||||
]
|
||||
|
||||
|
||||
def validate_field(data: dict[str, Any], spec: FieldSpec) -> list[str]:
|
||||
"""Validate a single field against its specification.
|
||||
|
||||
Args:
|
||||
data: The dictionary to validate
|
||||
spec: The field specification
|
||||
|
||||
Returns:
|
||||
List of error messages (empty if valid)
|
||||
"""
|
||||
errors = []
|
||||
|
||||
if spec.name not in data:
|
||||
if spec.required:
|
||||
errors.append(f"Missing required field: {spec.name}")
|
||||
return errors
|
||||
|
||||
value = data[spec.name]
|
||||
|
||||
# Check type
|
||||
if not isinstance(value, spec.field_type):
|
||||
expected = spec.field_type.__name__ if isinstance(spec.field_type, type) else str(spec.field_type)
|
||||
actual = type(value).__name__
|
||||
errors.append(f"Field '{spec.name}' has wrong type: expected {expected}, got {actual} (value: {value!r})")
|
||||
return errors
|
||||
|
||||
# Check custom validator
|
||||
if spec.validator and value is not None:
|
||||
if not spec.validator(value):
|
||||
errors.append(f"Field '{spec.name}' failed validation: {value!r}")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def validate_canonical_stadium(data: dict[str, Any]) -> list[str]:
|
||||
"""Validate a canonical stadium dictionary.
|
||||
|
||||
Args:
|
||||
data: Stadium dictionary from to_canonical_dict()
|
||||
|
||||
Returns:
|
||||
List of error messages (empty if valid)
|
||||
"""
|
||||
errors = []
|
||||
for spec in STADIUM_SCHEMA:
|
||||
errors.extend(validate_field(data, spec))
|
||||
|
||||
# Additional validation: primary_team_abbrevs should contain strings
|
||||
if "primary_team_abbrevs" in data and isinstance(data["primary_team_abbrevs"], list):
|
||||
for i, abbrev in enumerate(data["primary_team_abbrevs"]):
|
||||
if not isinstance(abbrev, str):
|
||||
errors.append(f"primary_team_abbrevs[{i}] must be string, got {type(abbrev).__name__}")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def validate_canonical_team(data: dict[str, Any]) -> list[str]:
|
||||
"""Validate a canonical team dictionary.
|
||||
|
||||
Args:
|
||||
data: Team dictionary from to_canonical_dict()
|
||||
|
||||
Returns:
|
||||
List of error messages (empty if valid)
|
||||
"""
|
||||
errors = []
|
||||
for spec in TEAM_SCHEMA:
|
||||
errors.extend(validate_field(data, spec))
|
||||
return errors
|
||||
|
||||
|
||||
def validate_canonical_game(data: dict[str, Any]) -> list[str]:
|
||||
"""Validate a canonical game dictionary.
|
||||
|
||||
Args:
|
||||
data: Game dictionary from to_canonical_dict()
|
||||
|
||||
Returns:
|
||||
List of error messages (empty if valid)
|
||||
"""
|
||||
errors = []
|
||||
for spec in GAME_SCHEMA:
|
||||
errors.extend(validate_field(data, spec))
|
||||
return errors
|
||||
|
||||
|
||||
def validate_and_raise(data: dict[str, Any], model_type: str) -> None:
|
||||
"""Validate a canonical dictionary and raise on error.
|
||||
|
||||
Args:
|
||||
data: Dictionary from to_canonical_dict()
|
||||
model_type: One of 'stadium', 'team', 'game'
|
||||
|
||||
Raises:
|
||||
SchemaValidationError: If validation fails
|
||||
ValueError: If model_type is unknown
|
||||
"""
|
||||
validators = {
|
||||
"stadium": validate_canonical_stadium,
|
||||
"team": validate_canonical_team,
|
||||
"game": validate_canonical_game,
|
||||
}
|
||||
|
||||
if model_type not in validators:
|
||||
raise ValueError(f"Unknown model type: {model_type}")
|
||||
|
||||
errors = validators[model_type](data)
|
||||
if errors:
|
||||
raise SchemaValidationError(model_type, errors)
|
||||
|
||||
|
||||
def validate_batch(
|
||||
items: list[dict[str, Any]],
|
||||
model_type: str,
|
||||
fail_fast: bool = True,
|
||||
) -> list[tuple[int, list[str]]]:
|
||||
"""Validate a batch of canonical dictionaries.
|
||||
|
||||
Args:
|
||||
items: List of dictionaries from to_canonical_dict()
|
||||
model_type: One of 'stadium', 'team', 'game'
|
||||
fail_fast: If True, raise on first error; if False, collect all errors
|
||||
|
||||
Returns:
|
||||
List of (index, errors) tuples for items with validation errors
|
||||
|
||||
Raises:
|
||||
SchemaValidationError: If fail_fast=True and validation fails
|
||||
"""
|
||||
validators = {
|
||||
"stadium": validate_canonical_stadium,
|
||||
"team": validate_canonical_team,
|
||||
"game": validate_canonical_game,
|
||||
}
|
||||
|
||||
if model_type not in validators:
|
||||
raise ValueError(f"Unknown model type: {model_type}")
|
||||
|
||||
validator = validators[model_type]
|
||||
all_errors = []
|
||||
|
||||
for i, item in enumerate(items):
|
||||
errors = validator(item)
|
||||
if errors:
|
||||
if fail_fast:
|
||||
raise SchemaValidationError(
|
||||
model_type,
|
||||
[f"Item {i}: {e}" for e in errors],
|
||||
)
|
||||
all_errors.append((i, errors))
|
||||
|
||||
return all_errors
|
||||
Reference in New Issue
Block a user