feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
52
sportstime_parser/models/__init__.py
Normal file
52
sportstime_parser/models/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Data models for sportstime-parser."""
|
||||
|
||||
from .game import Game, save_games, load_games
|
||||
from .team import Team, save_teams, load_teams
|
||||
from .stadium import Stadium, save_stadiums, load_stadiums
|
||||
from .aliases import (
|
||||
AliasType,
|
||||
ReviewReason,
|
||||
TeamAlias,
|
||||
StadiumAlias,
|
||||
FuzzyMatch,
|
||||
ManualReviewItem,
|
||||
)
|
||||
from .sport import (
|
||||
Sport,
|
||||
LeagueStructure,
|
||||
LeagueStructureType,
|
||||
save_sports,
|
||||
load_sports,
|
||||
save_league_structures,
|
||||
load_league_structures,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Game
|
||||
"Game",
|
||||
"save_games",
|
||||
"load_games",
|
||||
# Team
|
||||
"Team",
|
||||
"save_teams",
|
||||
"load_teams",
|
||||
# Stadium
|
||||
"Stadium",
|
||||
"save_stadiums",
|
||||
"load_stadiums",
|
||||
# Aliases
|
||||
"AliasType",
|
||||
"ReviewReason",
|
||||
"TeamAlias",
|
||||
"StadiumAlias",
|
||||
"FuzzyMatch",
|
||||
"ManualReviewItem",
|
||||
# Sport and League Structure
|
||||
"Sport",
|
||||
"LeagueStructure",
|
||||
"LeagueStructureType",
|
||||
"save_sports",
|
||||
"load_sports",
|
||||
"save_league_structures",
|
||||
"load_league_structures",
|
||||
]
|
||||
262
sportstime_parser/models/aliases.py
Normal file
262
sportstime_parser/models/aliases.py
Normal file
@@ -0,0 +1,262 @@
|
||||
"""Alias and manual review data models for sportstime-parser."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
import json
|
||||
|
||||
|
||||
class AliasType(Enum):
|
||||
"""Type of team alias."""
|
||||
NAME = "name"
|
||||
ABBREVIATION = "abbreviation"
|
||||
CITY = "city"
|
||||
|
||||
|
||||
class ReviewReason(Enum):
|
||||
"""Reason an item requires manual review."""
|
||||
UNRESOLVED_TEAM = "unresolved_team"
|
||||
UNRESOLVED_STADIUM = "unresolved_stadium"
|
||||
LOW_CONFIDENCE_MATCH = "low_confidence_match"
|
||||
MISSING_DATA = "missing_data"
|
||||
DUPLICATE_GAME = "duplicate_game"
|
||||
TIMEZONE_UNKNOWN = "timezone_unknown"
|
||||
GEOGRAPHIC_FILTER = "geographic_filter"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamAlias:
|
||||
"""Represents a team alias with optional date validity.
|
||||
|
||||
Attributes:
|
||||
id: Unique alias ID
|
||||
team_canonical_id: The canonical team ID this alias resolves to
|
||||
alias_type: Type of alias (name, abbreviation, city)
|
||||
alias_value: The alias value to match against
|
||||
valid_from: Start date of alias validity (None = always valid)
|
||||
valid_until: End date of alias validity (None = still valid)
|
||||
"""
|
||||
|
||||
id: str
|
||||
team_canonical_id: str
|
||||
alias_type: AliasType
|
||||
alias_value: str
|
||||
valid_from: Optional[date] = None
|
||||
valid_until: Optional[date] = None
|
||||
|
||||
def is_valid_on(self, check_date: date) -> bool:
|
||||
"""Check if this alias is valid on the given date."""
|
||||
if self.valid_from and check_date < self.valid_from:
|
||||
return False
|
||||
if self.valid_until and check_date > self.valid_until:
|
||||
return False
|
||||
return True
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"team_canonical_id": self.team_canonical_id,
|
||||
"alias_type": self.alias_type.value,
|
||||
"alias_value": self.alias_value,
|
||||
"valid_from": self.valid_from.isoformat() if self.valid_from else None,
|
||||
"valid_until": self.valid_until.isoformat() if self.valid_until else None,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "TeamAlias":
|
||||
"""Create a TeamAlias from a dictionary."""
|
||||
valid_from = None
|
||||
if data.get("valid_from"):
|
||||
valid_from = date.fromisoformat(data["valid_from"])
|
||||
|
||||
valid_until = None
|
||||
if data.get("valid_until"):
|
||||
valid_until = date.fromisoformat(data["valid_until"])
|
||||
|
||||
return cls(
|
||||
id=data["id"],
|
||||
team_canonical_id=data["team_canonical_id"],
|
||||
alias_type=AliasType(data["alias_type"]),
|
||||
alias_value=data["alias_value"],
|
||||
valid_from=valid_from,
|
||||
valid_until=valid_until,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StadiumAlias:
|
||||
"""Represents a stadium alias with optional date validity.
|
||||
|
||||
Attributes:
|
||||
alias_name: The alias name to match against (lowercase)
|
||||
stadium_canonical_id: The canonical stadium ID this alias resolves to
|
||||
valid_from: Start date of alias validity (None = always valid)
|
||||
valid_until: End date of alias validity (None = still valid)
|
||||
"""
|
||||
|
||||
alias_name: str
|
||||
stadium_canonical_id: str
|
||||
valid_from: Optional[date] = None
|
||||
valid_until: Optional[date] = None
|
||||
|
||||
def is_valid_on(self, check_date: date) -> bool:
|
||||
"""Check if this alias is valid on the given date."""
|
||||
if self.valid_from and check_date < self.valid_from:
|
||||
return False
|
||||
if self.valid_until and check_date > self.valid_until:
|
||||
return False
|
||||
return True
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"alias_name": self.alias_name,
|
||||
"stadium_canonical_id": self.stadium_canonical_id,
|
||||
"valid_from": self.valid_from.isoformat() if self.valid_from else None,
|
||||
"valid_until": self.valid_until.isoformat() if self.valid_until else None,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "StadiumAlias":
|
||||
"""Create a StadiumAlias from a dictionary."""
|
||||
valid_from = None
|
||||
if data.get("valid_from"):
|
||||
valid_from = date.fromisoformat(data["valid_from"])
|
||||
|
||||
valid_until = None
|
||||
if data.get("valid_until"):
|
||||
valid_until = date.fromisoformat(data["valid_until"])
|
||||
|
||||
return cls(
|
||||
alias_name=data["alias_name"],
|
||||
stadium_canonical_id=data["stadium_canonical_id"],
|
||||
valid_from=valid_from,
|
||||
valid_until=valid_until,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FuzzyMatch:
|
||||
"""Represents a fuzzy match suggestion with confidence score."""
|
||||
|
||||
canonical_id: str
|
||||
canonical_name: str
|
||||
confidence: int # 0-100
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"canonical_id": self.canonical_id,
|
||||
"canonical_name": self.canonical_name,
|
||||
"confidence": self.confidence,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManualReviewItem:
|
||||
"""Represents an item requiring manual review.
|
||||
|
||||
Attributes:
|
||||
id: Unique review item ID
|
||||
reason: Why this item needs review
|
||||
sport: Sport code
|
||||
raw_value: The original unresolved value
|
||||
context: Additional context about the issue
|
||||
source_url: URL of the source page
|
||||
suggested_matches: List of potential matches with confidence scores
|
||||
game_date: Date of the game (if applicable)
|
||||
created_at: When this review item was created
|
||||
"""
|
||||
|
||||
id: str
|
||||
reason: ReviewReason
|
||||
sport: str
|
||||
raw_value: str
|
||||
context: dict = field(default_factory=dict)
|
||||
source_url: Optional[str] = None
|
||||
suggested_matches: list[FuzzyMatch] = field(default_factory=list)
|
||||
game_date: Optional[date] = None
|
||||
created_at: datetime = field(default_factory=datetime.now)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"reason": self.reason.value,
|
||||
"sport": self.sport,
|
||||
"raw_value": self.raw_value,
|
||||
"context": self.context,
|
||||
"source_url": self.source_url,
|
||||
"suggested_matches": [m.to_dict() for m in self.suggested_matches],
|
||||
"game_date": self.game_date.isoformat() if self.game_date else None,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ManualReviewItem":
|
||||
"""Create a ManualReviewItem from a dictionary."""
|
||||
game_date = None
|
||||
if data.get("game_date"):
|
||||
game_date = date.fromisoformat(data["game_date"])
|
||||
|
||||
created_at = datetime.now()
|
||||
if data.get("created_at"):
|
||||
created_at = datetime.fromisoformat(data["created_at"])
|
||||
|
||||
suggested_matches = []
|
||||
for match_data in data.get("suggested_matches", []):
|
||||
suggested_matches.append(FuzzyMatch(
|
||||
canonical_id=match_data["canonical_id"],
|
||||
canonical_name=match_data["canonical_name"],
|
||||
confidence=match_data["confidence"],
|
||||
))
|
||||
|
||||
return cls(
|
||||
id=data["id"],
|
||||
reason=ReviewReason(data["reason"]),
|
||||
sport=data["sport"],
|
||||
raw_value=data["raw_value"],
|
||||
context=data.get("context", {}),
|
||||
source_url=data.get("source_url"),
|
||||
suggested_matches=suggested_matches,
|
||||
game_date=game_date,
|
||||
created_at=created_at,
|
||||
)
|
||||
|
||||
def to_markdown(self) -> str:
|
||||
"""Generate markdown representation for validation report."""
|
||||
lines = [
|
||||
f"### {self.reason.value.replace('_', ' ').title()}: {self.raw_value}",
|
||||
"",
|
||||
f"**Sport**: {self.sport.upper()}",
|
||||
]
|
||||
|
||||
if self.game_date:
|
||||
lines.append(f"**Game Date**: {self.game_date.isoformat()}")
|
||||
|
||||
if self.context:
|
||||
lines.append("")
|
||||
lines.append("**Context**:")
|
||||
for key, value in self.context.items():
|
||||
lines.append(f"- {key}: {value}")
|
||||
|
||||
if self.suggested_matches:
|
||||
lines.append("")
|
||||
lines.append("**Suggested Matches**:")
|
||||
for i, match in enumerate(self.suggested_matches, 1):
|
||||
marker = " <- likely correct" if match.confidence >= 90 else ""
|
||||
lines.append(
|
||||
f"{i}. `{match.canonical_id}` ({match.confidence}%){marker}"
|
||||
)
|
||||
|
||||
if self.source_url:
|
||||
lines.append("")
|
||||
lines.append(f"**Source**: [{self.source_url}]({self.source_url})")
|
||||
|
||||
lines.append("")
|
||||
lines.append("---")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
183
sportstime_parser/models/game.py
Normal file
183
sportstime_parser/models/game.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""Game data model for sportstime-parser."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class Game:
|
||||
"""Represents a game with all CloudKit fields.
|
||||
|
||||
Attributes:
|
||||
id: Canonical game ID (e.g., 'nba_2025_hou_okc_1021')
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
season: Season start year (e.g., 2025 for 2025-26)
|
||||
home_team_id: Canonical home team ID
|
||||
away_team_id: Canonical away team ID
|
||||
stadium_id: Canonical stadium ID
|
||||
game_date: Game date/time in UTC
|
||||
game_number: Game number for doubleheaders (1 or 2), None for single games
|
||||
home_score: Final home team score (None if not played)
|
||||
away_score: Final away team score (None if not played)
|
||||
status: Game status ('scheduled', 'final', 'postponed', 'cancelled')
|
||||
source_url: URL of the source page for manual review
|
||||
raw_home_team: Original home team name from source (for debugging)
|
||||
raw_away_team: Original away team name from source (for debugging)
|
||||
raw_stadium: Original stadium name from source (for debugging)
|
||||
"""
|
||||
|
||||
id: str
|
||||
sport: str
|
||||
season: int
|
||||
home_team_id: str
|
||||
away_team_id: str
|
||||
stadium_id: str
|
||||
game_date: datetime
|
||||
game_number: Optional[int] = None
|
||||
home_score: Optional[int] = None
|
||||
away_score: Optional[int] = None
|
||||
status: str = "scheduled"
|
||||
source_url: Optional[str] = None
|
||||
raw_home_team: Optional[str] = None
|
||||
raw_away_team: Optional[str] = None
|
||||
raw_stadium: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"sport": self.sport,
|
||||
"season": self.season,
|
||||
"home_team_id": self.home_team_id,
|
||||
"away_team_id": self.away_team_id,
|
||||
"stadium_id": self.stadium_id,
|
||||
"game_date": self.game_date.isoformat(),
|
||||
"game_number": self.game_number,
|
||||
"home_score": self.home_score,
|
||||
"away_score": self.away_score,
|
||||
"status": self.status,
|
||||
"source_url": self.source_url,
|
||||
"raw_home_team": self.raw_home_team,
|
||||
"raw_away_team": self.raw_away_team,
|
||||
"raw_stadium": self.raw_stadium,
|
||||
}
|
||||
|
||||
def to_canonical_dict(
|
||||
self,
|
||||
stadium_timezone: str,
|
||||
is_playoff: bool = False,
|
||||
broadcast: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Convert to canonical dictionary format matching iOS app schema.
|
||||
|
||||
Args:
|
||||
stadium_timezone: IANA timezone of the stadium (e.g., 'America/Chicago')
|
||||
is_playoff: Whether this is a playoff game
|
||||
broadcast: Broadcast network info (e.g., 'ESPN')
|
||||
|
||||
Returns:
|
||||
Dictionary with field names matching JSONCanonicalGame in BootstrapService.swift
|
||||
"""
|
||||
# Convert game_date to UTC
|
||||
if self.game_date.tzinfo is None:
|
||||
# Localize naive datetime to stadium timezone first
|
||||
local_tz = ZoneInfo(stadium_timezone)
|
||||
local_dt = self.game_date.replace(tzinfo=local_tz)
|
||||
else:
|
||||
local_dt = self.game_date
|
||||
|
||||
utc_dt = local_dt.astimezone(ZoneInfo("UTC"))
|
||||
|
||||
# Format season as string (e.g., 2025 -> "2025-26" for NBA/NHL, "2025" for MLB)
|
||||
sport_lower = self.sport.lower()
|
||||
if sport_lower in ("nba", "nhl"):
|
||||
season_str = f"{self.season}-{str(self.season + 1)[-2:]}"
|
||||
else:
|
||||
season_str = str(self.season)
|
||||
|
||||
return {
|
||||
"canonical_id": self.id,
|
||||
"sport": self.sport.upper(), # iOS Sport enum expects uppercase (e.g., "NFL")
|
||||
"season": season_str,
|
||||
"game_datetime_utc": utc_dt.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"home_team_canonical_id": self.home_team_id,
|
||||
"away_team_canonical_id": self.away_team_id,
|
||||
"stadium_canonical_id": self.stadium_id,
|
||||
"is_playoff": is_playoff,
|
||||
"broadcast_info": broadcast,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "Game":
|
||||
"""Create a Game from a dictionary (internal format)."""
|
||||
game_date = data["game_date"]
|
||||
if isinstance(game_date, str):
|
||||
game_date = datetime.fromisoformat(game_date)
|
||||
|
||||
return cls(
|
||||
id=data["id"],
|
||||
sport=data["sport"],
|
||||
season=data["season"],
|
||||
home_team_id=data["home_team_id"],
|
||||
away_team_id=data["away_team_id"],
|
||||
stadium_id=data["stadium_id"],
|
||||
game_date=game_date,
|
||||
game_number=data.get("game_number"),
|
||||
home_score=data.get("home_score"),
|
||||
away_score=data.get("away_score"),
|
||||
status=data.get("status", "scheduled"),
|
||||
source_url=data.get("source_url"),
|
||||
raw_home_team=data.get("raw_home_team"),
|
||||
raw_away_team=data.get("raw_away_team"),
|
||||
raw_stadium=data.get("raw_stadium"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_canonical_dict(cls, data: dict) -> "Game":
|
||||
"""Create a Game from a canonical dictionary (iOS app format)."""
|
||||
# Handle 'Z' suffix (fromisoformat doesn't support it before Python 3.11)
|
||||
date_str = data["game_datetime_utc"].replace("Z", "+00:00")
|
||||
game_date = datetime.fromisoformat(date_str)
|
||||
|
||||
# Parse season string (e.g., "2025-26" -> 2025, or "2025" -> 2025)
|
||||
season_str = data["season"]
|
||||
season = int(season_str.split("-")[0])
|
||||
|
||||
return cls(
|
||||
id=data["canonical_id"],
|
||||
sport=data["sport"],
|
||||
season=season,
|
||||
home_team_id=data["home_team_canonical_id"],
|
||||
away_team_id=data["away_team_canonical_id"],
|
||||
stadium_id=data["stadium_canonical_id"],
|
||||
game_date=game_date,
|
||||
status="scheduled",
|
||||
)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialize to JSON string."""
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "Game":
|
||||
"""Deserialize from JSON string."""
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
|
||||
def save_games(games: list[Game], filepath: str) -> None:
|
||||
"""Save a list of games to a JSON file."""
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump([g.to_dict() for g in games], f, indent=2)
|
||||
|
||||
|
||||
def load_games(filepath: str) -> list[Game]:
|
||||
"""Load a list of games from a JSON file (auto-detects format)."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
# Detect format: canonical has "canonical_id" and "game_datetime_utc", internal has "id"
|
||||
if data and "canonical_id" in data[0] and "game_datetime_utc" in data[0]:
|
||||
return [Game.from_canonical_dict(d) for d in data]
|
||||
return [Game.from_dict(d) for d in data]
|
||||
157
sportstime_parser/models/sport.py
Normal file
157
sportstime_parser/models/sport.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""Sport and LeagueStructure data models for sportstime-parser."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
import json
|
||||
|
||||
|
||||
class LeagueStructureType(str, Enum):
|
||||
"""Type of league structure element."""
|
||||
CONFERENCE = "conference"
|
||||
DIVISION = "division"
|
||||
LEAGUE = "league"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Sport:
|
||||
"""Represents a sport with all CloudKit fields.
|
||||
|
||||
Attributes:
|
||||
id: Canonical sport ID (e.g., 'MLB', 'NBA')
|
||||
abbreviation: Sport abbreviation (e.g., 'MLB', 'NBA')
|
||||
display_name: Full display name (e.g., 'Major League Baseball')
|
||||
icon_name: SF Symbol name for the sport icon
|
||||
color_hex: Primary color as hex string (e.g., '#FF0000')
|
||||
season_start_month: Month number when season typically starts (1-12)
|
||||
season_end_month: Month number when season typically ends (1-12)
|
||||
is_active: Whether the sport is currently active/supported
|
||||
"""
|
||||
|
||||
id: str
|
||||
abbreviation: str
|
||||
display_name: str
|
||||
icon_name: str
|
||||
color_hex: str
|
||||
season_start_month: int
|
||||
season_end_month: int
|
||||
is_active: bool = True
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"abbreviation": self.abbreviation,
|
||||
"display_name": self.display_name,
|
||||
"icon_name": self.icon_name,
|
||||
"color_hex": self.color_hex,
|
||||
"season_start_month": self.season_start_month,
|
||||
"season_end_month": self.season_end_month,
|
||||
"is_active": self.is_active,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "Sport":
|
||||
"""Create a Sport from a dictionary."""
|
||||
return cls(
|
||||
id=data["id"],
|
||||
abbreviation=data["abbreviation"],
|
||||
display_name=data["display_name"],
|
||||
icon_name=data["icon_name"],
|
||||
color_hex=data["color_hex"],
|
||||
season_start_month=data["season_start_month"],
|
||||
season_end_month=data["season_end_month"],
|
||||
is_active=data.get("is_active", True),
|
||||
)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialize to JSON string."""
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "Sport":
|
||||
"""Deserialize from JSON string."""
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
|
||||
@dataclass
|
||||
class LeagueStructure:
|
||||
"""Represents a league structure element (conference, division, etc.).
|
||||
|
||||
Attributes:
|
||||
id: Unique ID (e.g., 'nba_eastern', 'mlb_al_east')
|
||||
sport: Sport code (e.g., 'NBA', 'MLB')
|
||||
structure_type: Type of structure (conference, division, league)
|
||||
name: Full name (e.g., 'Eastern Conference', 'AL East')
|
||||
abbreviation: Optional abbreviation (e.g., 'East', 'ALE')
|
||||
parent_id: Parent structure ID (e.g., division's parent is conference)
|
||||
display_order: Order for display (0-indexed)
|
||||
"""
|
||||
|
||||
id: str
|
||||
sport: str
|
||||
structure_type: LeagueStructureType
|
||||
name: str
|
||||
abbreviation: Optional[str] = None
|
||||
parent_id: Optional[str] = None
|
||||
display_order: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"sport": self.sport,
|
||||
"structure_type": self.structure_type.value,
|
||||
"name": self.name,
|
||||
"abbreviation": self.abbreviation,
|
||||
"parent_id": self.parent_id,
|
||||
"display_order": self.display_order,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "LeagueStructure":
|
||||
"""Create a LeagueStructure from a dictionary."""
|
||||
return cls(
|
||||
id=data["id"],
|
||||
sport=data["sport"],
|
||||
structure_type=LeagueStructureType(data["structure_type"]),
|
||||
name=data["name"],
|
||||
abbreviation=data.get("abbreviation"),
|
||||
parent_id=data.get("parent_id"),
|
||||
display_order=data.get("display_order", 0),
|
||||
)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialize to JSON string."""
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "LeagueStructure":
|
||||
"""Deserialize from JSON string."""
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
|
||||
def save_sports(sports: list[Sport], filepath: str) -> None:
|
||||
"""Save a list of sports to a JSON file."""
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump([s.to_dict() for s in sports], f, indent=2)
|
||||
|
||||
|
||||
def load_sports(filepath: str) -> list[Sport]:
|
||||
"""Load a list of sports from a JSON file."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [Sport.from_dict(d) for d in data]
|
||||
|
||||
|
||||
def save_league_structures(structures: list[LeagueStructure], filepath: str) -> None:
|
||||
"""Save a list of league structures to a JSON file."""
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump([s.to_dict() for s in structures], f, indent=2)
|
||||
|
||||
|
||||
def load_league_structures(filepath: str) -> list[LeagueStructure]:
|
||||
"""Load a list of league structures from a JSON file."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [LeagueStructure.from_dict(d) for d in data]
|
||||
154
sportstime_parser/models/stadium.py
Normal file
154
sportstime_parser/models/stadium.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""Stadium data model for sportstime-parser."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class Stadium:
|
||||
"""Represents a stadium with all CloudKit fields.
|
||||
|
||||
Attributes:
|
||||
id: Canonical stadium ID (e.g., 'stadium_nba_paycom_center')
|
||||
sport: Primary sport code (e.g., 'nba', 'mlb')
|
||||
name: Current stadium name (e.g., 'Paycom Center')
|
||||
city: City name (e.g., 'Oklahoma City')
|
||||
state: State/province code (e.g., 'OK', 'ON')
|
||||
country: Country code (e.g., 'USA', 'Canada')
|
||||
latitude: Latitude coordinate
|
||||
longitude: Longitude coordinate
|
||||
capacity: Seating capacity
|
||||
surface: Playing surface (e.g., 'grass', 'turf', 'hardwood')
|
||||
roof_type: Roof type (e.g., 'dome', 'retractable', 'open')
|
||||
opened_year: Year stadium opened
|
||||
image_url: URL to stadium image
|
||||
timezone: IANA timezone (e.g., 'America/Chicago')
|
||||
"""
|
||||
|
||||
id: str
|
||||
sport: str
|
||||
name: str
|
||||
city: str
|
||||
state: str
|
||||
country: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
capacity: Optional[int] = None
|
||||
surface: Optional[str] = None
|
||||
roof_type: Optional[str] = None
|
||||
opened_year: Optional[int] = None
|
||||
image_url: Optional[str] = None
|
||||
timezone: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"sport": self.sport,
|
||||
"name": self.name,
|
||||
"city": self.city,
|
||||
"state": self.state,
|
||||
"country": self.country,
|
||||
"latitude": self.latitude,
|
||||
"longitude": self.longitude,
|
||||
"capacity": self.capacity,
|
||||
"surface": self.surface,
|
||||
"roof_type": self.roof_type,
|
||||
"opened_year": self.opened_year,
|
||||
"image_url": self.image_url,
|
||||
"timezone": self.timezone,
|
||||
}
|
||||
|
||||
def to_canonical_dict(self, primary_team_abbrevs: list[str] | None = None) -> dict:
|
||||
"""Convert to canonical dictionary format matching iOS app schema.
|
||||
|
||||
Args:
|
||||
primary_team_abbrevs: List of team abbreviations that play at this stadium.
|
||||
If None, defaults to empty list.
|
||||
|
||||
Returns:
|
||||
Dictionary with field names matching JSONCanonicalStadium in BootstrapService.swift
|
||||
"""
|
||||
return {
|
||||
"canonical_id": self.id,
|
||||
"name": self.name,
|
||||
"city": self.city,
|
||||
"state": self.state,
|
||||
"latitude": self.latitude,
|
||||
"longitude": self.longitude,
|
||||
"capacity": self.capacity if self.capacity is not None else 0,
|
||||
"sport": self.sport.upper(), # iOS Sport enum expects uppercase (e.g., "NFL")
|
||||
"primary_team_abbrevs": primary_team_abbrevs or [],
|
||||
"year_opened": self.opened_year,
|
||||
"timezone_identifier": self.timezone,
|
||||
"image_url": self.image_url,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "Stadium":
|
||||
"""Create a Stadium from a dictionary (internal format)."""
|
||||
return cls(
|
||||
id=data["id"],
|
||||
sport=data["sport"],
|
||||
name=data["name"],
|
||||
city=data["city"],
|
||||
state=data["state"],
|
||||
country=data["country"],
|
||||
latitude=data["latitude"],
|
||||
longitude=data["longitude"],
|
||||
capacity=data.get("capacity"),
|
||||
surface=data.get("surface"),
|
||||
roof_type=data.get("roof_type"),
|
||||
opened_year=data.get("opened_year"),
|
||||
image_url=data.get("image_url"),
|
||||
timezone=data.get("timezone"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_canonical_dict(cls, data: dict) -> "Stadium":
|
||||
"""Create a Stadium from a canonical dictionary (iOS app format)."""
|
||||
return cls(
|
||||
id=data["canonical_id"],
|
||||
sport=data["sport"],
|
||||
name=data["name"],
|
||||
city=data["city"],
|
||||
state=data["state"],
|
||||
country="USA", # Canonical format doesn't include country
|
||||
latitude=data["latitude"],
|
||||
longitude=data["longitude"],
|
||||
capacity=data.get("capacity"),
|
||||
opened_year=data.get("year_opened"),
|
||||
image_url=data.get("image_url"),
|
||||
timezone=data.get("timezone_identifier"),
|
||||
)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialize to JSON string."""
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "Stadium":
|
||||
"""Deserialize from JSON string."""
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
def is_in_allowed_region(self) -> bool:
|
||||
"""Check if stadium is in USA, Canada, or Mexico."""
|
||||
allowed = {"USA", "US", "United States", "Canada", "CA", "Mexico", "MX"}
|
||||
return self.country in allowed
|
||||
|
||||
|
||||
def save_stadiums(stadiums: list[Stadium], filepath: str) -> None:
|
||||
"""Save a list of stadiums to a JSON file."""
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump([s.to_dict() for s in stadiums], f, indent=2)
|
||||
|
||||
|
||||
def load_stadiums(filepath: str) -> list[Stadium]:
|
||||
"""Load a list of stadiums from a JSON file (auto-detects format)."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
# Detect format: canonical has "canonical_id", internal has "id"
|
||||
if data and "canonical_id" in data[0]:
|
||||
return [Stadium.from_canonical_dict(d) for d in data]
|
||||
return [Stadium.from_dict(d) for d in data]
|
||||
177
sportstime_parser/models/team.py
Normal file
177
sportstime_parser/models/team.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""Team data model for sportstime-parser."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class Team:
|
||||
"""Represents a team with all CloudKit fields.
|
||||
|
||||
Attributes:
|
||||
id: Canonical team ID (e.g., 'team_nba_okc')
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
city: Team city (e.g., 'Oklahoma City')
|
||||
name: Team name (e.g., 'Thunder')
|
||||
full_name: Full team name (e.g., 'Oklahoma City Thunder')
|
||||
abbreviation: Official abbreviation (e.g., 'OKC')
|
||||
conference: Conference name (e.g., 'Western', 'American')
|
||||
division: Division name (e.g., 'Northwest', 'AL West')
|
||||
primary_color: Primary team color as hex (e.g., '#007AC1')
|
||||
secondary_color: Secondary team color as hex (e.g., '#EF3B24')
|
||||
logo_url: URL to team logo image
|
||||
stadium_id: Canonical ID of home stadium
|
||||
"""
|
||||
|
||||
id: str
|
||||
sport: str
|
||||
city: str
|
||||
name: str
|
||||
full_name: str
|
||||
abbreviation: str
|
||||
conference: Optional[str] = None
|
||||
division: Optional[str] = None
|
||||
primary_color: Optional[str] = None
|
||||
secondary_color: Optional[str] = None
|
||||
logo_url: Optional[str] = None
|
||||
stadium_id: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"sport": self.sport,
|
||||
"city": self.city,
|
||||
"name": self.name,
|
||||
"full_name": self.full_name,
|
||||
"abbreviation": self.abbreviation,
|
||||
"conference": self.conference,
|
||||
"division": self.division,
|
||||
"primary_color": self.primary_color,
|
||||
"secondary_color": self.secondary_color,
|
||||
"logo_url": self.logo_url,
|
||||
"stadium_id": self.stadium_id,
|
||||
}
|
||||
|
||||
def _make_qualified_id(self, name: Optional[str]) -> Optional[str]:
|
||||
"""Convert a conference/division name to a qualified ID.
|
||||
|
||||
Examples:
|
||||
"Eastern" → "nba_eastern"
|
||||
"AL West" → "mlb_al_west"
|
||||
"Southeast" → "nba_southeast"
|
||||
"""
|
||||
if not name:
|
||||
return None
|
||||
# Lowercase, replace spaces with underscores
|
||||
normalized = name.lower().replace(" ", "_")
|
||||
return f"{self.sport.lower()}_{normalized}"
|
||||
|
||||
def to_canonical_dict(self) -> dict:
|
||||
"""Convert to canonical dictionary format matching iOS app schema.
|
||||
|
||||
Returns:
|
||||
Dictionary with field names matching JSONCanonicalTeam in BootstrapService.swift
|
||||
"""
|
||||
return {
|
||||
"canonical_id": self.id,
|
||||
"name": self.name,
|
||||
"abbreviation": self.abbreviation,
|
||||
"sport": self.sport.upper(), # iOS Sport enum expects uppercase (e.g., "NFL")
|
||||
"city": self.city,
|
||||
"stadium_canonical_id": self.stadium_id or "",
|
||||
"conference_id": self._make_qualified_id(self.conference),
|
||||
"division_id": self._make_qualified_id(self.division),
|
||||
"primary_color": self.primary_color,
|
||||
"secondary_color": self.secondary_color,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "Team":
|
||||
"""Create a Team from a dictionary (internal format)."""
|
||||
return cls(
|
||||
id=data["id"],
|
||||
sport=data["sport"],
|
||||
city=data["city"],
|
||||
name=data["name"],
|
||||
full_name=data["full_name"],
|
||||
abbreviation=data["abbreviation"],
|
||||
conference=data.get("conference"),
|
||||
division=data.get("division"),
|
||||
primary_color=data.get("primary_color"),
|
||||
secondary_color=data.get("secondary_color"),
|
||||
logo_url=data.get("logo_url"),
|
||||
stadium_id=data.get("stadium_id"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_name_from_qualified_id(qualified_id: Optional[str], sport: str) -> Optional[str]:
|
||||
"""Extract the name portion from a qualified ID.
|
||||
|
||||
Examples:
|
||||
"nba_eastern" → "Eastern"
|
||||
"mlb_al_west" → "AL West"
|
||||
"nba_southeast" → "Southeast"
|
||||
"""
|
||||
if not qualified_id:
|
||||
return None
|
||||
# Remove sport prefix (e.g., "nba_" or "mlb_")
|
||||
prefix = f"{sport.lower()}_"
|
||||
if qualified_id.startswith(prefix):
|
||||
name = qualified_id[len(prefix):]
|
||||
else:
|
||||
name = qualified_id
|
||||
# Convert underscores to spaces and title case
|
||||
# Special handling for league abbreviations (AL, NL, etc.)
|
||||
parts = name.split("_")
|
||||
result_parts = []
|
||||
for part in parts:
|
||||
if part.upper() in ("AL", "NL", "AFC", "NFC"):
|
||||
result_parts.append(part.upper())
|
||||
else:
|
||||
result_parts.append(part.capitalize())
|
||||
return " ".join(result_parts)
|
||||
|
||||
@classmethod
|
||||
def from_canonical_dict(cls, data: dict) -> "Team":
|
||||
"""Create a Team from a canonical dictionary (iOS app format)."""
|
||||
sport = data["sport"].lower()
|
||||
return cls(
|
||||
id=data["canonical_id"],
|
||||
sport=data["sport"],
|
||||
city=data["city"],
|
||||
name=data["name"],
|
||||
full_name=f"{data['city']} {data['name']}", # Reconstruct full_name
|
||||
abbreviation=data["abbreviation"],
|
||||
conference=cls._extract_name_from_qualified_id(data.get("conference_id"), sport),
|
||||
division=cls._extract_name_from_qualified_id(data.get("division_id"), sport),
|
||||
primary_color=data.get("primary_color"),
|
||||
secondary_color=data.get("secondary_color"),
|
||||
stadium_id=data.get("stadium_canonical_id"),
|
||||
)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialize to JSON string."""
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "Team":
|
||||
"""Deserialize from JSON string."""
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
|
||||
def save_teams(teams: list[Team], filepath: str) -> None:
|
||||
"""Save a list of teams to a JSON file."""
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump([t.to_dict() for t in teams], f, indent=2)
|
||||
|
||||
|
||||
def load_teams(filepath: str) -> list[Team]:
|
||||
"""Load a list of teams from a JSON file (auto-detects format)."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
# Detect format: canonical has "canonical_id", internal has "id"
|
||||
if data and "canonical_id" in data[0]:
|
||||
return [Team.from_canonical_dict(d) for d in data]
|
||||
return [Team.from_dict(d) for d in data]
|
||||
Reference in New Issue
Block a user