feat(scripts): rewrite parser as modular Python CLI

Replace monolithic scraping scripts with sportstime_parser package:

- Multi-source scrapers with automatic fallback for 7 sports
- Canonical ID generation for games, teams, and stadiums
- Fuzzy matching with configurable thresholds for name resolution
- CloudKit Web Services uploader with JWT auth, diff-based updates
- Resumable uploads with checkpoint state persistence
- Validation reports with manual review items and suggested matches
- Comprehensive test suite (249 tests)

CLI: sportstime-parser scrape|validate|upload|status|retry|clear

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 21:06:12 -06:00
parent 284a10d9e1
commit eeaf900e5a
109 changed files with 18415 additions and 266211 deletions

View File

@@ -0,0 +1,108 @@
"""Stadium data model for sportstime-parser."""
from dataclasses import dataclass
from typing import Optional
import json
@dataclass
class Stadium:
"""Represents a stadium with all CloudKit fields.
Attributes:
id: Canonical stadium ID (e.g., 'stadium_nba_paycom_center')
sport: Primary sport code (e.g., 'nba', 'mlb')
name: Current stadium name (e.g., 'Paycom Center')
city: City name (e.g., 'Oklahoma City')
state: State/province code (e.g., 'OK', 'ON')
country: Country code (e.g., 'USA', 'Canada')
latitude: Latitude coordinate
longitude: Longitude coordinate
capacity: Seating capacity
surface: Playing surface (e.g., 'grass', 'turf', 'hardwood')
roof_type: Roof type (e.g., 'dome', 'retractable', 'open')
opened_year: Year stadium opened
image_url: URL to stadium image
timezone: IANA timezone (e.g., 'America/Chicago')
"""
id: str
sport: str
name: str
city: str
state: str
country: str
latitude: float
longitude: float
capacity: Optional[int] = None
surface: Optional[str] = None
roof_type: Optional[str] = None
opened_year: Optional[int] = None
image_url: Optional[str] = None
timezone: Optional[str] = None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"sport": self.sport,
"name": self.name,
"city": self.city,
"state": self.state,
"country": self.country,
"latitude": self.latitude,
"longitude": self.longitude,
"capacity": self.capacity,
"surface": self.surface,
"roof_type": self.roof_type,
"opened_year": self.opened_year,
"image_url": self.image_url,
"timezone": self.timezone,
}
@classmethod
def from_dict(cls, data: dict) -> "Stadium":
"""Create a Stadium from a dictionary."""
return cls(
id=data["id"],
sport=data["sport"],
name=data["name"],
city=data["city"],
state=data["state"],
country=data["country"],
latitude=data["latitude"],
longitude=data["longitude"],
capacity=data.get("capacity"),
surface=data.get("surface"),
roof_type=data.get("roof_type"),
opened_year=data.get("opened_year"),
image_url=data.get("image_url"),
timezone=data.get("timezone"),
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "Stadium":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
def is_in_allowed_region(self) -> bool:
"""Check if stadium is in USA, Canada, or Mexico."""
allowed = {"USA", "US", "United States", "Canada", "CA", "Mexico", "MX"}
return self.country in allowed
def save_stadiums(stadiums: list[Stadium], filepath: str) -> None:
"""Save a list of stadiums to a JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump([s.to_dict() for s in stadiums], f, indent=2)
def load_stadiums(filepath: str) -> list[Stadium]:
"""Load a list of stadiums from a JSON file."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
return [Stadium.from_dict(d) for d in data]