feat(scripts): rewrite parser as modular Python CLI

Replace monolithic scraping scripts with sportstime_parser package:

- Multi-source scrapers with automatic fallback for 7 sports
- Canonical ID generation for games, teams, and stadiums
- Fuzzy matching with configurable thresholds for name resolution
- CloudKit Web Services uploader with JWT auth, diff-based updates
- Resumable uploads with checkpoint state persistence
- Validation reports with manual review items and suggested matches
- Comprehensive test suite (249 tests)

CLI: sportstime-parser scrape|validate|upload|status|retry|clear

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 21:06:12 -06:00
parent 284a10d9e1
commit eeaf900e5a
109 changed files with 18415 additions and 266211 deletions

View File

@@ -0,0 +1,95 @@
"""Team data model for sportstime-parser."""
from dataclasses import dataclass
from typing import Optional
import json
@dataclass
class Team:
"""Represents a team with all CloudKit fields.
Attributes:
id: Canonical team ID (e.g., 'team_nba_okc')
sport: Sport code (e.g., 'nba', 'mlb')
city: Team city (e.g., 'Oklahoma City')
name: Team name (e.g., 'Thunder')
full_name: Full team name (e.g., 'Oklahoma City Thunder')
abbreviation: Official abbreviation (e.g., 'OKC')
conference: Conference name (e.g., 'Western', 'American')
division: Division name (e.g., 'Northwest', 'AL West')
primary_color: Primary team color as hex (e.g., '#007AC1')
secondary_color: Secondary team color as hex (e.g., '#EF3B24')
logo_url: URL to team logo image
stadium_id: Canonical ID of home stadium
"""
id: str
sport: str
city: str
name: str
full_name: str
abbreviation: str
conference: Optional[str] = None
division: Optional[str] = None
primary_color: Optional[str] = None
secondary_color: Optional[str] = None
logo_url: Optional[str] = None
stadium_id: Optional[str] = None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"sport": self.sport,
"city": self.city,
"name": self.name,
"full_name": self.full_name,
"abbreviation": self.abbreviation,
"conference": self.conference,
"division": self.division,
"primary_color": self.primary_color,
"secondary_color": self.secondary_color,
"logo_url": self.logo_url,
"stadium_id": self.stadium_id,
}
@classmethod
def from_dict(cls, data: dict) -> "Team":
"""Create a Team from a dictionary."""
return cls(
id=data["id"],
sport=data["sport"],
city=data["city"],
name=data["name"],
full_name=data["full_name"],
abbreviation=data["abbreviation"],
conference=data.get("conference"),
division=data.get("division"),
primary_color=data.get("primary_color"),
secondary_color=data.get("secondary_color"),
logo_url=data.get("logo_url"),
stadium_id=data.get("stadium_id"),
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "Team":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
def save_teams(teams: list[Team], filepath: str) -> None:
"""Save a list of teams to a JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump([t.to_dict() for t in teams], f, indent=2)
def load_teams(filepath: str) -> list[Team]:
"""Load a list of teams from a JSON file."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
return [Team.from_dict(d) for d in data]