feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,52 @@
"""Data models for sportstime-parser."""
from .game import Game, save_games, load_games
from .team import Team, save_teams, load_teams
from .stadium import Stadium, save_stadiums, load_stadiums
from .aliases import (
AliasType,
ReviewReason,
TeamAlias,
StadiumAlias,
FuzzyMatch,
ManualReviewItem,
)
from .sport import (
Sport,
LeagueStructure,
LeagueStructureType,
save_sports,
load_sports,
save_league_structures,
load_league_structures,
)
__all__ = [
# Game
"Game",
"save_games",
"load_games",
# Team
"Team",
"save_teams",
"load_teams",
# Stadium
"Stadium",
"save_stadiums",
"load_stadiums",
# Aliases
"AliasType",
"ReviewReason",
"TeamAlias",
"StadiumAlias",
"FuzzyMatch",
"ManualReviewItem",
# Sport and League Structure
"Sport",
"LeagueStructure",
"LeagueStructureType",
"save_sports",
"load_sports",
"save_league_structures",
"load_league_structures",
]

View File

@@ -0,0 +1,262 @@
"""Alias and manual review data models for sportstime-parser."""
from dataclasses import dataclass, field
from datetime import date, datetime
from enum import Enum
from typing import Optional
import json
class AliasType(Enum):
"""Type of team alias."""
NAME = "name"
ABBREVIATION = "abbreviation"
CITY = "city"
class ReviewReason(Enum):
"""Reason an item requires manual review."""
UNRESOLVED_TEAM = "unresolved_team"
UNRESOLVED_STADIUM = "unresolved_stadium"
LOW_CONFIDENCE_MATCH = "low_confidence_match"
MISSING_DATA = "missing_data"
DUPLICATE_GAME = "duplicate_game"
TIMEZONE_UNKNOWN = "timezone_unknown"
GEOGRAPHIC_FILTER = "geographic_filter"
@dataclass
class TeamAlias:
"""Represents a team alias with optional date validity.
Attributes:
id: Unique alias ID
team_canonical_id: The canonical team ID this alias resolves to
alias_type: Type of alias (name, abbreviation, city)
alias_value: The alias value to match against
valid_from: Start date of alias validity (None = always valid)
valid_until: End date of alias validity (None = still valid)
"""
id: str
team_canonical_id: str
alias_type: AliasType
alias_value: str
valid_from: Optional[date] = None
valid_until: Optional[date] = None
def is_valid_on(self, check_date: date) -> bool:
"""Check if this alias is valid on the given date."""
if self.valid_from and check_date < self.valid_from:
return False
if self.valid_until and check_date > self.valid_until:
return False
return True
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"team_canonical_id": self.team_canonical_id,
"alias_type": self.alias_type.value,
"alias_value": self.alias_value,
"valid_from": self.valid_from.isoformat() if self.valid_from else None,
"valid_until": self.valid_until.isoformat() if self.valid_until else None,
}
@classmethod
def from_dict(cls, data: dict) -> "TeamAlias":
"""Create a TeamAlias from a dictionary."""
valid_from = None
if data.get("valid_from"):
valid_from = date.fromisoformat(data["valid_from"])
valid_until = None
if data.get("valid_until"):
valid_until = date.fromisoformat(data["valid_until"])
return cls(
id=data["id"],
team_canonical_id=data["team_canonical_id"],
alias_type=AliasType(data["alias_type"]),
alias_value=data["alias_value"],
valid_from=valid_from,
valid_until=valid_until,
)
@dataclass
class StadiumAlias:
"""Represents a stadium alias with optional date validity.
Attributes:
alias_name: The alias name to match against (lowercase)
stadium_canonical_id: The canonical stadium ID this alias resolves to
valid_from: Start date of alias validity (None = always valid)
valid_until: End date of alias validity (None = still valid)
"""
alias_name: str
stadium_canonical_id: str
valid_from: Optional[date] = None
valid_until: Optional[date] = None
def is_valid_on(self, check_date: date) -> bool:
"""Check if this alias is valid on the given date."""
if self.valid_from and check_date < self.valid_from:
return False
if self.valid_until and check_date > self.valid_until:
return False
return True
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"alias_name": self.alias_name,
"stadium_canonical_id": self.stadium_canonical_id,
"valid_from": self.valid_from.isoformat() if self.valid_from else None,
"valid_until": self.valid_until.isoformat() if self.valid_until else None,
}
@classmethod
def from_dict(cls, data: dict) -> "StadiumAlias":
"""Create a StadiumAlias from a dictionary."""
valid_from = None
if data.get("valid_from"):
valid_from = date.fromisoformat(data["valid_from"])
valid_until = None
if data.get("valid_until"):
valid_until = date.fromisoformat(data["valid_until"])
return cls(
alias_name=data["alias_name"],
stadium_canonical_id=data["stadium_canonical_id"],
valid_from=valid_from,
valid_until=valid_until,
)
@dataclass
class FuzzyMatch:
"""Represents a fuzzy match suggestion with confidence score."""
canonical_id: str
canonical_name: str
confidence: int # 0-100
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"canonical_id": self.canonical_id,
"canonical_name": self.canonical_name,
"confidence": self.confidence,
}
@dataclass
class ManualReviewItem:
"""Represents an item requiring manual review.
Attributes:
id: Unique review item ID
reason: Why this item needs review
sport: Sport code
raw_value: The original unresolved value
context: Additional context about the issue
source_url: URL of the source page
suggested_matches: List of potential matches with confidence scores
game_date: Date of the game (if applicable)
created_at: When this review item was created
"""
id: str
reason: ReviewReason
sport: str
raw_value: str
context: dict = field(default_factory=dict)
source_url: Optional[str] = None
suggested_matches: list[FuzzyMatch] = field(default_factory=list)
game_date: Optional[date] = None
created_at: datetime = field(default_factory=datetime.now)
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"reason": self.reason.value,
"sport": self.sport,
"raw_value": self.raw_value,
"context": self.context,
"source_url": self.source_url,
"suggested_matches": [m.to_dict() for m in self.suggested_matches],
"game_date": self.game_date.isoformat() if self.game_date else None,
"created_at": self.created_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict) -> "ManualReviewItem":
"""Create a ManualReviewItem from a dictionary."""
game_date = None
if data.get("game_date"):
game_date = date.fromisoformat(data["game_date"])
created_at = datetime.now()
if data.get("created_at"):
created_at = datetime.fromisoformat(data["created_at"])
suggested_matches = []
for match_data in data.get("suggested_matches", []):
suggested_matches.append(FuzzyMatch(
canonical_id=match_data["canonical_id"],
canonical_name=match_data["canonical_name"],
confidence=match_data["confidence"],
))
return cls(
id=data["id"],
reason=ReviewReason(data["reason"]),
sport=data["sport"],
raw_value=data["raw_value"],
context=data.get("context", {}),
source_url=data.get("source_url"),
suggested_matches=suggested_matches,
game_date=game_date,
created_at=created_at,
)
def to_markdown(self) -> str:
"""Generate markdown representation for validation report."""
lines = [
f"### {self.reason.value.replace('_', ' ').title()}: {self.raw_value}",
"",
f"**Sport**: {self.sport.upper()}",
]
if self.game_date:
lines.append(f"**Game Date**: {self.game_date.isoformat()}")
if self.context:
lines.append("")
lines.append("**Context**:")
for key, value in self.context.items():
lines.append(f"- {key}: {value}")
if self.suggested_matches:
lines.append("")
lines.append("**Suggested Matches**:")
for i, match in enumerate(self.suggested_matches, 1):
marker = " <- likely correct" if match.confidence >= 90 else ""
lines.append(
f"{i}. `{match.canonical_id}` ({match.confidence}%){marker}"
)
if self.source_url:
lines.append("")
lines.append(f"**Source**: [{self.source_url}]({self.source_url})")
lines.append("")
lines.append("---")
lines.append("")
return "\n".join(lines)

View File

@@ -0,0 +1,183 @@
"""Game data model for sportstime-parser."""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
from zoneinfo import ZoneInfo
import json
@dataclass
class Game:
"""Represents a game with all CloudKit fields.
Attributes:
id: Canonical game ID (e.g., 'nba_2025_hou_okc_1021')
sport: Sport code (e.g., 'nba', 'mlb')
season: Season start year (e.g., 2025 for 2025-26)
home_team_id: Canonical home team ID
away_team_id: Canonical away team ID
stadium_id: Canonical stadium ID
game_date: Game date/time in UTC
game_number: Game number for doubleheaders (1 or 2), None for single games
home_score: Final home team score (None if not played)
away_score: Final away team score (None if not played)
status: Game status ('scheduled', 'final', 'postponed', 'cancelled')
source_url: URL of the source page for manual review
raw_home_team: Original home team name from source (for debugging)
raw_away_team: Original away team name from source (for debugging)
raw_stadium: Original stadium name from source (for debugging)
"""
id: str
sport: str
season: int
home_team_id: str
away_team_id: str
stadium_id: str
game_date: datetime
game_number: Optional[int] = None
home_score: Optional[int] = None
away_score: Optional[int] = None
status: str = "scheduled"
source_url: Optional[str] = None
raw_home_team: Optional[str] = None
raw_away_team: Optional[str] = None
raw_stadium: Optional[str] = None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"sport": self.sport,
"season": self.season,
"home_team_id": self.home_team_id,
"away_team_id": self.away_team_id,
"stadium_id": self.stadium_id,
"game_date": self.game_date.isoformat(),
"game_number": self.game_number,
"home_score": self.home_score,
"away_score": self.away_score,
"status": self.status,
"source_url": self.source_url,
"raw_home_team": self.raw_home_team,
"raw_away_team": self.raw_away_team,
"raw_stadium": self.raw_stadium,
}
def to_canonical_dict(
self,
stadium_timezone: str,
is_playoff: bool = False,
broadcast: Optional[str] = None,
) -> dict:
"""Convert to canonical dictionary format matching iOS app schema.
Args:
stadium_timezone: IANA timezone of the stadium (e.g., 'America/Chicago')
is_playoff: Whether this is a playoff game
broadcast: Broadcast network info (e.g., 'ESPN')
Returns:
Dictionary with field names matching JSONCanonicalGame in BootstrapService.swift
"""
# Convert game_date to UTC
if self.game_date.tzinfo is None:
# Localize naive datetime to stadium timezone first
local_tz = ZoneInfo(stadium_timezone)
local_dt = self.game_date.replace(tzinfo=local_tz)
else:
local_dt = self.game_date
utc_dt = local_dt.astimezone(ZoneInfo("UTC"))
# Format season as string (e.g., 2025 -> "2025-26" for NBA/NHL, "2025" for MLB)
sport_lower = self.sport.lower()
if sport_lower in ("nba", "nhl"):
season_str = f"{self.season}-{str(self.season + 1)[-2:]}"
else:
season_str = str(self.season)
return {
"canonical_id": self.id,
"sport": self.sport.upper(), # iOS Sport enum expects uppercase (e.g., "NFL")
"season": season_str,
"game_datetime_utc": utc_dt.strftime("%Y-%m-%dT%H:%M:%SZ"),
"home_team_canonical_id": self.home_team_id,
"away_team_canonical_id": self.away_team_id,
"stadium_canonical_id": self.stadium_id,
"is_playoff": is_playoff,
"broadcast_info": broadcast,
}
@classmethod
def from_dict(cls, data: dict) -> "Game":
"""Create a Game from a dictionary (internal format)."""
game_date = data["game_date"]
if isinstance(game_date, str):
game_date = datetime.fromisoformat(game_date)
return cls(
id=data["id"],
sport=data["sport"],
season=data["season"],
home_team_id=data["home_team_id"],
away_team_id=data["away_team_id"],
stadium_id=data["stadium_id"],
game_date=game_date,
game_number=data.get("game_number"),
home_score=data.get("home_score"),
away_score=data.get("away_score"),
status=data.get("status", "scheduled"),
source_url=data.get("source_url"),
raw_home_team=data.get("raw_home_team"),
raw_away_team=data.get("raw_away_team"),
raw_stadium=data.get("raw_stadium"),
)
@classmethod
def from_canonical_dict(cls, data: dict) -> "Game":
"""Create a Game from a canonical dictionary (iOS app format)."""
# Handle 'Z' suffix (fromisoformat doesn't support it before Python 3.11)
date_str = data["game_datetime_utc"].replace("Z", "+00:00")
game_date = datetime.fromisoformat(date_str)
# Parse season string (e.g., "2025-26" -> 2025, or "2025" -> 2025)
season_str = data["season"]
season = int(season_str.split("-")[0])
return cls(
id=data["canonical_id"],
sport=data["sport"],
season=season,
home_team_id=data["home_team_canonical_id"],
away_team_id=data["away_team_canonical_id"],
stadium_id=data["stadium_canonical_id"],
game_date=game_date,
status="scheduled",
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "Game":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
def save_games(games: list[Game], filepath: str) -> None:
"""Save a list of games to a JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump([g.to_dict() for g in games], f, indent=2)
def load_games(filepath: str) -> list[Game]:
"""Load a list of games from a JSON file (auto-detects format)."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
# Detect format: canonical has "canonical_id" and "game_datetime_utc", internal has "id"
if data and "canonical_id" in data[0] and "game_datetime_utc" in data[0]:
return [Game.from_canonical_dict(d) for d in data]
return [Game.from_dict(d) for d in data]

View File

@@ -0,0 +1,157 @@
"""Sport and LeagueStructure data models for sportstime-parser."""
from dataclasses import dataclass
from enum import Enum
from typing import Optional
import json
class LeagueStructureType(str, Enum):
"""Type of league structure element."""
CONFERENCE = "conference"
DIVISION = "division"
LEAGUE = "league"
@dataclass
class Sport:
"""Represents a sport with all CloudKit fields.
Attributes:
id: Canonical sport ID (e.g., 'MLB', 'NBA')
abbreviation: Sport abbreviation (e.g., 'MLB', 'NBA')
display_name: Full display name (e.g., 'Major League Baseball')
icon_name: SF Symbol name for the sport icon
color_hex: Primary color as hex string (e.g., '#FF0000')
season_start_month: Month number when season typically starts (1-12)
season_end_month: Month number when season typically ends (1-12)
is_active: Whether the sport is currently active/supported
"""
id: str
abbreviation: str
display_name: str
icon_name: str
color_hex: str
season_start_month: int
season_end_month: int
is_active: bool = True
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"abbreviation": self.abbreviation,
"display_name": self.display_name,
"icon_name": self.icon_name,
"color_hex": self.color_hex,
"season_start_month": self.season_start_month,
"season_end_month": self.season_end_month,
"is_active": self.is_active,
}
@classmethod
def from_dict(cls, data: dict) -> "Sport":
"""Create a Sport from a dictionary."""
return cls(
id=data["id"],
abbreviation=data["abbreviation"],
display_name=data["display_name"],
icon_name=data["icon_name"],
color_hex=data["color_hex"],
season_start_month=data["season_start_month"],
season_end_month=data["season_end_month"],
is_active=data.get("is_active", True),
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "Sport":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
@dataclass
class LeagueStructure:
"""Represents a league structure element (conference, division, etc.).
Attributes:
id: Unique ID (e.g., 'nba_eastern', 'mlb_al_east')
sport: Sport code (e.g., 'NBA', 'MLB')
structure_type: Type of structure (conference, division, league)
name: Full name (e.g., 'Eastern Conference', 'AL East')
abbreviation: Optional abbreviation (e.g., 'East', 'ALE')
parent_id: Parent structure ID (e.g., division's parent is conference)
display_order: Order for display (0-indexed)
"""
id: str
sport: str
structure_type: LeagueStructureType
name: str
abbreviation: Optional[str] = None
parent_id: Optional[str] = None
display_order: int = 0
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"sport": self.sport,
"structure_type": self.structure_type.value,
"name": self.name,
"abbreviation": self.abbreviation,
"parent_id": self.parent_id,
"display_order": self.display_order,
}
@classmethod
def from_dict(cls, data: dict) -> "LeagueStructure":
"""Create a LeagueStructure from a dictionary."""
return cls(
id=data["id"],
sport=data["sport"],
structure_type=LeagueStructureType(data["structure_type"]),
name=data["name"],
abbreviation=data.get("abbreviation"),
parent_id=data.get("parent_id"),
display_order=data.get("display_order", 0),
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "LeagueStructure":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
def save_sports(sports: list[Sport], filepath: str) -> None:
"""Save a list of sports to a JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump([s.to_dict() for s in sports], f, indent=2)
def load_sports(filepath: str) -> list[Sport]:
"""Load a list of sports from a JSON file."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
return [Sport.from_dict(d) for d in data]
def save_league_structures(structures: list[LeagueStructure], filepath: str) -> None:
"""Save a list of league structures to a JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump([s.to_dict() for s in structures], f, indent=2)
def load_league_structures(filepath: str) -> list[LeagueStructure]:
"""Load a list of league structures from a JSON file."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
return [LeagueStructure.from_dict(d) for d in data]

View File

@@ -0,0 +1,154 @@
"""Stadium data model for sportstime-parser."""
from dataclasses import dataclass
from typing import Optional
import json
@dataclass
class Stadium:
"""Represents a stadium with all CloudKit fields.
Attributes:
id: Canonical stadium ID (e.g., 'stadium_nba_paycom_center')
sport: Primary sport code (e.g., 'nba', 'mlb')
name: Current stadium name (e.g., 'Paycom Center')
city: City name (e.g., 'Oklahoma City')
state: State/province code (e.g., 'OK', 'ON')
country: Country code (e.g., 'USA', 'Canada')
latitude: Latitude coordinate
longitude: Longitude coordinate
capacity: Seating capacity
surface: Playing surface (e.g., 'grass', 'turf', 'hardwood')
roof_type: Roof type (e.g., 'dome', 'retractable', 'open')
opened_year: Year stadium opened
image_url: URL to stadium image
timezone: IANA timezone (e.g., 'America/Chicago')
"""
id: str
sport: str
name: str
city: str
state: str
country: str
latitude: float
longitude: float
capacity: Optional[int] = None
surface: Optional[str] = None
roof_type: Optional[str] = None
opened_year: Optional[int] = None
image_url: Optional[str] = None
timezone: Optional[str] = None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"sport": self.sport,
"name": self.name,
"city": self.city,
"state": self.state,
"country": self.country,
"latitude": self.latitude,
"longitude": self.longitude,
"capacity": self.capacity,
"surface": self.surface,
"roof_type": self.roof_type,
"opened_year": self.opened_year,
"image_url": self.image_url,
"timezone": self.timezone,
}
def to_canonical_dict(self, primary_team_abbrevs: list[str] | None = None) -> dict:
"""Convert to canonical dictionary format matching iOS app schema.
Args:
primary_team_abbrevs: List of team abbreviations that play at this stadium.
If None, defaults to empty list.
Returns:
Dictionary with field names matching JSONCanonicalStadium in BootstrapService.swift
"""
return {
"canonical_id": self.id,
"name": self.name,
"city": self.city,
"state": self.state,
"latitude": self.latitude,
"longitude": self.longitude,
"capacity": self.capacity if self.capacity is not None else 0,
"sport": self.sport.upper(), # iOS Sport enum expects uppercase (e.g., "NFL")
"primary_team_abbrevs": primary_team_abbrevs or [],
"year_opened": self.opened_year,
"timezone_identifier": self.timezone,
"image_url": self.image_url,
}
@classmethod
def from_dict(cls, data: dict) -> "Stadium":
"""Create a Stadium from a dictionary (internal format)."""
return cls(
id=data["id"],
sport=data["sport"],
name=data["name"],
city=data["city"],
state=data["state"],
country=data["country"],
latitude=data["latitude"],
longitude=data["longitude"],
capacity=data.get("capacity"),
surface=data.get("surface"),
roof_type=data.get("roof_type"),
opened_year=data.get("opened_year"),
image_url=data.get("image_url"),
timezone=data.get("timezone"),
)
@classmethod
def from_canonical_dict(cls, data: dict) -> "Stadium":
"""Create a Stadium from a canonical dictionary (iOS app format)."""
return cls(
id=data["canonical_id"],
sport=data["sport"],
name=data["name"],
city=data["city"],
state=data["state"],
country="USA", # Canonical format doesn't include country
latitude=data["latitude"],
longitude=data["longitude"],
capacity=data.get("capacity"),
opened_year=data.get("year_opened"),
image_url=data.get("image_url"),
timezone=data.get("timezone_identifier"),
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "Stadium":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
def is_in_allowed_region(self) -> bool:
"""Check if stadium is in USA, Canada, or Mexico."""
allowed = {"USA", "US", "United States", "Canada", "CA", "Mexico", "MX"}
return self.country in allowed
def save_stadiums(stadiums: list[Stadium], filepath: str) -> None:
"""Save a list of stadiums to a JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump([s.to_dict() for s in stadiums], f, indent=2)
def load_stadiums(filepath: str) -> list[Stadium]:
"""Load a list of stadiums from a JSON file (auto-detects format)."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
# Detect format: canonical has "canonical_id", internal has "id"
if data and "canonical_id" in data[0]:
return [Stadium.from_canonical_dict(d) for d in data]
return [Stadium.from_dict(d) for d in data]

View File

@@ -0,0 +1,177 @@
"""Team data model for sportstime-parser."""
from dataclasses import dataclass
from typing import Optional
import json
@dataclass
class Team:
"""Represents a team with all CloudKit fields.
Attributes:
id: Canonical team ID (e.g., 'team_nba_okc')
sport: Sport code (e.g., 'nba', 'mlb')
city: Team city (e.g., 'Oklahoma City')
name: Team name (e.g., 'Thunder')
full_name: Full team name (e.g., 'Oklahoma City Thunder')
abbreviation: Official abbreviation (e.g., 'OKC')
conference: Conference name (e.g., 'Western', 'American')
division: Division name (e.g., 'Northwest', 'AL West')
primary_color: Primary team color as hex (e.g., '#007AC1')
secondary_color: Secondary team color as hex (e.g., '#EF3B24')
logo_url: URL to team logo image
stadium_id: Canonical ID of home stadium
"""
id: str
sport: str
city: str
name: str
full_name: str
abbreviation: str
conference: Optional[str] = None
division: Optional[str] = None
primary_color: Optional[str] = None
secondary_color: Optional[str] = None
logo_url: Optional[str] = None
stadium_id: Optional[str] = None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"sport": self.sport,
"city": self.city,
"name": self.name,
"full_name": self.full_name,
"abbreviation": self.abbreviation,
"conference": self.conference,
"division": self.division,
"primary_color": self.primary_color,
"secondary_color": self.secondary_color,
"logo_url": self.logo_url,
"stadium_id": self.stadium_id,
}
def _make_qualified_id(self, name: Optional[str]) -> Optional[str]:
"""Convert a conference/division name to a qualified ID.
Examples:
"Eastern""nba_eastern"
"AL West""mlb_al_west"
"Southeast""nba_southeast"
"""
if not name:
return None
# Lowercase, replace spaces with underscores
normalized = name.lower().replace(" ", "_")
return f"{self.sport.lower()}_{normalized}"
def to_canonical_dict(self) -> dict:
"""Convert to canonical dictionary format matching iOS app schema.
Returns:
Dictionary with field names matching JSONCanonicalTeam in BootstrapService.swift
"""
return {
"canonical_id": self.id,
"name": self.name,
"abbreviation": self.abbreviation,
"sport": self.sport.upper(), # iOS Sport enum expects uppercase (e.g., "NFL")
"city": self.city,
"stadium_canonical_id": self.stadium_id or "",
"conference_id": self._make_qualified_id(self.conference),
"division_id": self._make_qualified_id(self.division),
"primary_color": self.primary_color,
"secondary_color": self.secondary_color,
}
@classmethod
def from_dict(cls, data: dict) -> "Team":
"""Create a Team from a dictionary (internal format)."""
return cls(
id=data["id"],
sport=data["sport"],
city=data["city"],
name=data["name"],
full_name=data["full_name"],
abbreviation=data["abbreviation"],
conference=data.get("conference"),
division=data.get("division"),
primary_color=data.get("primary_color"),
secondary_color=data.get("secondary_color"),
logo_url=data.get("logo_url"),
stadium_id=data.get("stadium_id"),
)
@staticmethod
def _extract_name_from_qualified_id(qualified_id: Optional[str], sport: str) -> Optional[str]:
"""Extract the name portion from a qualified ID.
Examples:
"nba_eastern""Eastern"
"mlb_al_west""AL West"
"nba_southeast""Southeast"
"""
if not qualified_id:
return None
# Remove sport prefix (e.g., "nba_" or "mlb_")
prefix = f"{sport.lower()}_"
if qualified_id.startswith(prefix):
name = qualified_id[len(prefix):]
else:
name = qualified_id
# Convert underscores to spaces and title case
# Special handling for league abbreviations (AL, NL, etc.)
parts = name.split("_")
result_parts = []
for part in parts:
if part.upper() in ("AL", "NL", "AFC", "NFC"):
result_parts.append(part.upper())
else:
result_parts.append(part.capitalize())
return " ".join(result_parts)
@classmethod
def from_canonical_dict(cls, data: dict) -> "Team":
"""Create a Team from a canonical dictionary (iOS app format)."""
sport = data["sport"].lower()
return cls(
id=data["canonical_id"],
sport=data["sport"],
city=data["city"],
name=data["name"],
full_name=f"{data['city']} {data['name']}", # Reconstruct full_name
abbreviation=data["abbreviation"],
conference=cls._extract_name_from_qualified_id(data.get("conference_id"), sport),
division=cls._extract_name_from_qualified_id(data.get("division_id"), sport),
primary_color=data.get("primary_color"),
secondary_color=data.get("secondary_color"),
stadium_id=data.get("stadium_canonical_id"),
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "Team":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
def save_teams(teams: list[Team], filepath: str) -> None:
"""Save a list of teams to a JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump([t.to_dict() for t in teams], f, indent=2)
def load_teams(filepath: str) -> list[Team]:
"""Load a list of teams from a JSON file (auto-detects format)."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
# Detect format: canonical has "canonical_id", internal has "id"
if data and "canonical_id" in data[0]:
return [Team.from_canonical_dict(d) for d in data]
return [Team.from_dict(d) for d in data]