feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,91 @@
"""Normalizers for team, stadium, and game data."""
from .canonical_id import (
generate_game_id,
generate_team_id,
generate_team_id_from_abbrev,
generate_stadium_id,
parse_game_id,
normalize_string,
)
from .timezone import (
TimezoneResult,
parse_datetime,
convert_to_utc,
detect_timezone_from_string,
detect_timezone_from_location,
get_stadium_timezone,
create_timezone_warning,
)
from .fuzzy import (
MatchCandidate,
fuzzy_match_team,
fuzzy_match_stadium,
exact_match,
best_match,
calculate_similarity,
normalize_for_matching,
)
from .alias_loader import (
TeamAliasLoader,
StadiumAliasLoader,
get_team_alias_loader,
get_stadium_alias_loader,
resolve_team_alias,
resolve_stadium_alias,
)
from .team_resolver import (
TeamResolver,
TeamResolveResult,
get_team_resolver,
resolve_team,
)
from .stadium_resolver import (
StadiumResolver,
StadiumResolveResult,
get_stadium_resolver,
resolve_stadium,
)
__all__ = [
# Canonical ID
"generate_game_id",
"generate_team_id",
"generate_team_id_from_abbrev",
"generate_stadium_id",
"parse_game_id",
"normalize_string",
# Timezone
"TimezoneResult",
"parse_datetime",
"convert_to_utc",
"detect_timezone_from_string",
"detect_timezone_from_location",
"get_stadium_timezone",
"create_timezone_warning",
# Fuzzy matching
"MatchCandidate",
"fuzzy_match_team",
"fuzzy_match_stadium",
"exact_match",
"best_match",
"calculate_similarity",
"normalize_for_matching",
# Alias loaders
"TeamAliasLoader",
"StadiumAliasLoader",
"get_team_alias_loader",
"get_stadium_alias_loader",
"resolve_team_alias",
"resolve_stadium_alias",
# Team resolver
"TeamResolver",
"TeamResolveResult",
"get_team_resolver",
"resolve_team",
# Stadium resolver
"StadiumResolver",
"StadiumResolveResult",
"get_stadium_resolver",
"resolve_stadium",
]

View File

@@ -0,0 +1,312 @@
"""Alias file loaders for team and stadium name resolution."""
import json
from datetime import date
from pathlib import Path
from typing import Optional
from ..config import TEAM_ALIASES_FILE, STADIUM_ALIASES_FILE
from ..models.aliases import TeamAlias, StadiumAlias, AliasType
class TeamAliasLoader:
"""Loader for team aliases with date-aware resolution.
Loads team aliases from JSON and provides lookup methods
with support for historical name changes.
"""
def __init__(self, filepath: Optional[Path] = None):
"""Initialize the loader.
Args:
filepath: Path to team_aliases.json, defaults to config value
"""
self.filepath = filepath or TEAM_ALIASES_FILE
self._aliases: list[TeamAlias] = []
self._by_value: dict[str, list[TeamAlias]] = {}
self._by_team: dict[str, list[TeamAlias]] = {}
self._loaded = False
def load(self) -> None:
"""Load aliases from the JSON file."""
if not self.filepath.exists():
self._loaded = True
return
with open(self.filepath, "r", encoding="utf-8") as f:
data = json.load(f)
self._aliases = []
self._by_value = {}
self._by_team = {}
for item in data:
alias = TeamAlias.from_dict(item)
self._aliases.append(alias)
# Index by lowercase value
value_key = alias.alias_value.lower()
if value_key not in self._by_value:
self._by_value[value_key] = []
self._by_value[value_key].append(alias)
# Index by team ID
if alias.team_canonical_id not in self._by_team:
self._by_team[alias.team_canonical_id] = []
self._by_team[alias.team_canonical_id].append(alias)
self._loaded = True
def _ensure_loaded(self) -> None:
"""Ensure aliases are loaded."""
if not self._loaded:
self.load()
def resolve(
self,
value: str,
check_date: Optional[date] = None,
alias_types: Optional[list[AliasType]] = None,
) -> Optional[str]:
"""Resolve an alias value to a canonical team ID.
Args:
value: Alias value to look up (case-insensitive)
check_date: Date to check validity (None = current date)
alias_types: Types of aliases to check (None = all types)
Returns:
Canonical team ID if found, None otherwise
"""
self._ensure_loaded()
if check_date is None:
check_date = date.today()
value_key = value.lower().strip()
aliases = self._by_value.get(value_key, [])
for alias in aliases:
# Check type filter
if alias_types and alias.alias_type not in alias_types:
continue
# Check date validity
if alias.is_valid_on(check_date):
return alias.team_canonical_id
return None
def get_aliases_for_team(
self,
team_id: str,
check_date: Optional[date] = None,
) -> list[TeamAlias]:
"""Get all aliases for a team.
Args:
team_id: Canonical team ID
check_date: Date to filter by (None = all aliases)
Returns:
List of TeamAlias objects
"""
self._ensure_loaded()
aliases = self._by_team.get(team_id, [])
if check_date:
aliases = [a for a in aliases if a.is_valid_on(check_date)]
return aliases
def get_all_values(
self,
alias_type: Optional[AliasType] = None,
) -> list[str]:
"""Get all alias values.
Args:
alias_type: Filter by alias type (None = all types)
Returns:
List of alias values
"""
self._ensure_loaded()
values = []
for alias in self._aliases:
if alias_type is None or alias.alias_type == alias_type:
values.append(alias.alias_value)
return values
class StadiumAliasLoader:
"""Loader for stadium aliases with date-aware resolution.
Loads stadium aliases from JSON and provides lookup methods
with support for historical name changes (e.g., naming rights).
"""
def __init__(self, filepath: Optional[Path] = None):
"""Initialize the loader.
Args:
filepath: Path to stadium_aliases.json, defaults to config value
"""
self.filepath = filepath or STADIUM_ALIASES_FILE
self._aliases: list[StadiumAlias] = []
self._by_name: dict[str, list[StadiumAlias]] = {}
self._by_stadium: dict[str, list[StadiumAlias]] = {}
self._loaded = False
def load(self) -> None:
"""Load aliases from the JSON file."""
if not self.filepath.exists():
self._loaded = True
return
with open(self.filepath, "r", encoding="utf-8") as f:
data = json.load(f)
self._aliases = []
self._by_name = {}
self._by_stadium = {}
for item in data:
alias = StadiumAlias.from_dict(item)
self._aliases.append(alias)
# Index by lowercase name
name_key = alias.alias_name.lower()
if name_key not in self._by_name:
self._by_name[name_key] = []
self._by_name[name_key].append(alias)
# Index by stadium ID
if alias.stadium_canonical_id not in self._by_stadium:
self._by_stadium[alias.stadium_canonical_id] = []
self._by_stadium[alias.stadium_canonical_id].append(alias)
self._loaded = True
def _ensure_loaded(self) -> None:
"""Ensure aliases are loaded."""
if not self._loaded:
self.load()
def resolve(
self,
name: str,
check_date: Optional[date] = None,
) -> Optional[str]:
"""Resolve a stadium name to a canonical stadium ID.
Args:
name: Stadium name to look up (case-insensitive)
check_date: Date to check validity (None = current date)
Returns:
Canonical stadium ID if found, None otherwise
"""
self._ensure_loaded()
if check_date is None:
check_date = date.today()
name_key = name.lower().strip()
aliases = self._by_name.get(name_key, [])
for alias in aliases:
if alias.is_valid_on(check_date):
return alias.stadium_canonical_id
return None
def get_aliases_for_stadium(
self,
stadium_id: str,
check_date: Optional[date] = None,
) -> list[StadiumAlias]:
"""Get all aliases for a stadium.
Args:
stadium_id: Canonical stadium ID
check_date: Date to filter by (None = all aliases)
Returns:
List of StadiumAlias objects
"""
self._ensure_loaded()
aliases = self._by_stadium.get(stadium_id, [])
if check_date:
aliases = [a for a in aliases if a.is_valid_on(check_date)]
return aliases
def get_all_names(self) -> list[str]:
"""Get all stadium alias names.
Returns:
List of stadium names
"""
self._ensure_loaded()
return [alias.alias_name for alias in self._aliases]
# Global loader instances (lazy initialized)
_team_alias_loader: Optional[TeamAliasLoader] = None
_stadium_alias_loader: Optional[StadiumAliasLoader] = None
def get_team_alias_loader() -> TeamAliasLoader:
"""Get the global team alias loader instance."""
global _team_alias_loader
if _team_alias_loader is None:
_team_alias_loader = TeamAliasLoader()
return _team_alias_loader
def get_stadium_alias_loader() -> StadiumAliasLoader:
"""Get the global stadium alias loader instance."""
global _stadium_alias_loader
if _stadium_alias_loader is None:
_stadium_alias_loader = StadiumAliasLoader()
return _stadium_alias_loader
def resolve_team_alias(
value: str,
check_date: Optional[date] = None,
) -> Optional[str]:
"""Convenience function to resolve a team alias.
Args:
value: Alias value (name, abbreviation, or city)
check_date: Date to check validity
Returns:
Canonical team ID if found
"""
return get_team_alias_loader().resolve(value, check_date)
def resolve_stadium_alias(
name: str,
check_date: Optional[date] = None,
) -> Optional[str]:
"""Convenience function to resolve a stadium alias.
Args:
name: Stadium name
check_date: Date to check validity
Returns:
Canonical stadium ID if found
"""
return get_stadium_alias_loader().resolve(name, check_date)

View File

@@ -0,0 +1,284 @@
"""Canonical ID generation for games, teams, and stadiums."""
import re
import unicodedata
from datetime import date, datetime
from typing import Optional
def normalize_string(s: str) -> str:
"""Normalize a string for use in canonical IDs.
- Convert to lowercase
- Replace spaces and hyphens with underscores
- Remove special characters (except underscores)
- Collapse multiple underscores
- Strip leading/trailing underscores
Args:
s: String to normalize
Returns:
Normalized string suitable for IDs
"""
# Convert to lowercase
result = s.lower()
# Normalize unicode (e.g., é -> e)
result = unicodedata.normalize("NFKD", result)
result = result.encode("ascii", "ignore").decode("ascii")
# Replace spaces and hyphens with underscores
result = re.sub(r"[\s\-]+", "_", result)
# Remove special characters except underscores
result = re.sub(r"[^a-z0-9_]", "", result)
# Collapse multiple underscores
result = re.sub(r"_+", "_", result)
# Strip leading/trailing underscores
result = result.strip("_")
return result
def generate_game_id(
sport: str,
season: int,
away_abbrev: str,
home_abbrev: str,
game_date: date | datetime,
game_number: Optional[int] = None,
) -> str:
"""Generate a canonical game ID.
Format: game_{sport}_{season}_{YYYYMMDD}_{away}_{home}[_{game_number}]
Args:
sport: Sport code (e.g., 'nba', 'mlb')
season: Season start year (e.g., 2025 for 2025-26)
away_abbrev: Away team abbreviation (e.g., 'HOU')
home_abbrev: Home team abbreviation (e.g., 'OKC')
game_date: Date of the game
game_number: Game number for doubleheaders (1 or 2), None for single games
Returns:
Canonical game ID (e.g., 'game_nba_2025_20251021_hou_okc')
Examples:
>>> generate_game_id('nba', 2025, 'HOU', 'OKC', date(2025, 10, 21))
'game_nba_2025_20251021_hou_okc'
>>> generate_game_id('mlb', 2026, 'NYY', 'BOS', date(2026, 4, 1), game_number=1)
'game_mlb_2026_20260401_nyy_bos_1'
"""
# Normalize sport and abbreviations
sport_norm = sport.lower()
away_norm = away_abbrev.lower()
home_norm = home_abbrev.lower()
# Format date as YYYYMMDD
if isinstance(game_date, datetime):
game_date = game_date.date()
date_str = game_date.strftime("%Y%m%d")
# Build ID with game_ prefix
parts = ["game", sport_norm, str(season), date_str, away_norm, home_norm]
# Add game number for doubleheaders
if game_number is not None:
parts.append(str(game_number))
return "_".join(parts)
def generate_team_id(sport: str, city: str, name: str) -> str:
"""Generate a canonical team ID.
Format: team_{sport}_{abbreviation}
For most teams, we use the standard abbreviation. This function generates
a fallback ID based on city and name for teams without a known abbreviation.
Args:
sport: Sport code (e.g., 'nba', 'mlb')
city: Team city (e.g., 'Los Angeles')
name: Team name (e.g., 'Lakers')
Returns:
Canonical team ID (e.g., 'team_nba_la_lakers')
Examples:
>>> generate_team_id('nba', 'Los Angeles', 'Lakers')
'team_nba_la_lakers'
>>> generate_team_id('mlb', 'New York', 'Yankees')
'team_mlb_new_york_yankees'
"""
sport_norm = sport.lower()
city_norm = normalize_string(city)
name_norm = normalize_string(name)
return f"team_{sport_norm}_{city_norm}_{name_norm}"
def generate_team_id_from_abbrev(sport: str, abbreviation: str) -> str:
"""Generate a canonical team ID from abbreviation.
Format: team_{sport}_{abbreviation}
Args:
sport: Sport code (e.g., 'nba', 'mlb')
abbreviation: Team abbreviation (e.g., 'LAL', 'NYY')
Returns:
Canonical team ID (e.g., 'team_nba_lal')
Examples:
>>> generate_team_id_from_abbrev('nba', 'LAL')
'team_nba_lal'
>>> generate_team_id_from_abbrev('mlb', 'NYY')
'team_mlb_nyy'
"""
sport_norm = sport.lower()
abbrev_norm = abbreviation.lower()
return f"team_{sport_norm}_{abbrev_norm}"
def generate_stadium_id(sport: str, name: str) -> str:
"""Generate a canonical stadium ID.
Format: stadium_{sport}_{normalized_name}
Args:
sport: Sport code (e.g., 'nba', 'mlb')
name: Stadium name (e.g., 'Yankee Stadium')
Returns:
Canonical stadium ID (e.g., 'stadium_mlb_yankee_stadium')
Examples:
>>> generate_stadium_id('nba', 'Crypto.com Arena')
'stadium_nba_cryptocom_arena'
>>> generate_stadium_id('mlb', 'Yankee Stadium')
'stadium_mlb_yankee_stadium'
"""
sport_norm = sport.lower()
name_norm = normalize_string(name)
return f"stadium_{sport_norm}_{name_norm}"
def parse_game_id(game_id: str) -> dict:
"""Parse a canonical game ID into its components.
Args:
game_id: Canonical game ID (e.g., 'game_nba_2025_20251021_hou_okc')
Returns:
Dictionary with keys: sport, season, away_abbrev, home_abbrev,
year, month, day, game_number (optional)
Raises:
ValueError: If game_id format is invalid
Examples:
>>> parse_game_id('game_nba_2025_20251021_hou_okc')
{'sport': 'nba', 'season': 2025, 'away_abbrev': 'hou',
'home_abbrev': 'okc', 'year': 2025, 'month': 10, 'day': 21, 'game_number': None}
>>> parse_game_id('game_mlb_2026_20260401_nyy_bos_1')
{'sport': 'mlb', 'season': 2026, 'away_abbrev': 'nyy',
'home_abbrev': 'bos', 'year': 2026, 'month': 4, 'day': 1, 'game_number': 1}
"""
parts = game_id.split("_")
if len(parts) < 6 or len(parts) > 7:
raise ValueError(f"Invalid game ID format: {game_id}")
if parts[0] != "game":
raise ValueError(f"Game ID must start with 'game_': {game_id}")
sport = parts[1]
season = int(parts[2])
date_str = parts[3]
away_abbrev = parts[4]
home_abbrev = parts[5]
if len(date_str) != 8:
raise ValueError(f"Invalid date format in game ID: {game_id}")
year = int(date_str[:4])
month = int(date_str[4:6])
day = int(date_str[6:])
game_number = None
if len(parts) == 7:
game_number = int(parts[6])
return {
"sport": sport,
"season": season,
"away_abbrev": away_abbrev,
"home_abbrev": home_abbrev,
"year": year,
"month": month,
"day": day,
"game_number": game_number,
}
def parse_team_id(team_id: str) -> dict:
"""Parse a canonical team ID into its components.
Args:
team_id: Canonical team ID (e.g., 'team_nba_lal')
Returns:
Dictionary with keys: sport, identifier (abbreviation or city_name)
Raises:
ValueError: If team_id format is invalid
"""
if not team_id.startswith("team_"):
raise ValueError(f"Invalid team ID format: {team_id}")
parts = team_id.split("_", 2)
if len(parts) < 3:
raise ValueError(f"Invalid team ID format: {team_id}")
return {
"sport": parts[1],
"identifier": parts[2],
}
def parse_stadium_id(stadium_id: str) -> dict:
"""Parse a canonical stadium ID into its components.
Args:
stadium_id: Canonical stadium ID (e.g., 'stadium_nba_paycom_center')
Returns:
Dictionary with keys: sport, name
Raises:
ValueError: If stadium_id format is invalid
"""
if not stadium_id.startswith("stadium_"):
raise ValueError(f"Invalid stadium ID format: {stadium_id}")
parts = stadium_id.split("_", 2)
if len(parts) < 3:
raise ValueError(f"Invalid stadium ID format: {stadium_id}")
return {
"sport": parts[1],
"name": parts[2],
}

View File

@@ -0,0 +1,272 @@
"""Fuzzy string matching utilities for team and stadium name resolution."""
from dataclasses import dataclass
from typing import Optional
from rapidfuzz import fuzz, process
from rapidfuzz.utils import default_process
from ..config import FUZZY_MATCH_THRESHOLD
from ..models.aliases import FuzzyMatch
@dataclass
class MatchCandidate:
"""A candidate for fuzzy matching.
Attributes:
canonical_id: The canonical ID of this candidate
name: The display name for this candidate
aliases: List of alternative names to match against
"""
canonical_id: str
name: str
aliases: list[str]
def normalize_for_matching(s: str) -> str:
"""Normalize a string for fuzzy matching.
- Convert to lowercase
- Remove common prefixes/suffixes
- Collapse whitespace
Args:
s: String to normalize
Returns:
Normalized string
"""
result = s.lower().strip()
# Remove common prefixes
prefixes = ["the ", "team ", "stadium "]
for prefix in prefixes:
if result.startswith(prefix):
result = result[len(prefix) :]
# Remove common suffixes
suffixes = [" stadium", " arena", " center", " field", " park"]
for suffix in suffixes:
if result.endswith(suffix):
result = result[: -len(suffix)]
return result.strip()
def fuzzy_match_team(
query: str,
candidates: list[MatchCandidate],
threshold: int = FUZZY_MATCH_THRESHOLD,
top_n: int = 3,
) -> list[FuzzyMatch]:
"""Find fuzzy matches for a team name.
Uses multiple matching strategies:
1. Token set ratio (handles word order differences)
2. Partial ratio (handles substring matches)
3. Standard ratio (overall similarity)
Args:
query: Team name to match
candidates: List of candidate teams to match against
threshold: Minimum score to consider a match (0-100)
top_n: Maximum number of matches to return
Returns:
List of FuzzyMatch objects sorted by confidence (descending)
"""
query_norm = normalize_for_matching(query)
# Build list of all matchable strings with their canonical IDs
match_strings: list[tuple[str, str, str]] = [] # (string, canonical_id, name)
for candidate in candidates:
# Add primary name
match_strings.append(
(normalize_for_matching(candidate.name), candidate.canonical_id, candidate.name)
)
# Add aliases
for alias in candidate.aliases:
match_strings.append(
(normalize_for_matching(alias), candidate.canonical_id, candidate.name)
)
# Score all candidates
scored: dict[str, tuple[int, str]] = {} # canonical_id -> (best_score, name)
for match_str, canonical_id, name in match_strings:
# Use multiple scoring methods
token_score = fuzz.token_set_ratio(query_norm, match_str)
partial_score = fuzz.partial_ratio(query_norm, match_str)
ratio_score = fuzz.ratio(query_norm, match_str)
# Weighted average favoring token_set_ratio for team names
score = int(0.5 * token_score + 0.3 * partial_score + 0.2 * ratio_score)
# Keep best score for each canonical ID
if canonical_id not in scored or score > scored[canonical_id][0]:
scored[canonical_id] = (score, name)
# Filter by threshold and sort
matches = [
FuzzyMatch(canonical_id=cid, canonical_name=name, confidence=score)
for cid, (score, name) in scored.items()
if score >= threshold
]
# Sort by confidence descending
matches.sort(key=lambda m: m.confidence, reverse=True)
return matches[:top_n]
def fuzzy_match_stadium(
query: str,
candidates: list[MatchCandidate],
threshold: int = FUZZY_MATCH_THRESHOLD,
top_n: int = 3,
) -> list[FuzzyMatch]:
"""Find fuzzy matches for a stadium name.
Uses matching strategies optimized for stadium names:
1. Token sort ratio (handles "X Stadium" vs "Stadium X")
2. Partial ratio (handles naming rights changes)
3. Standard ratio
Args:
query: Stadium name to match
candidates: List of candidate stadiums to match against
threshold: Minimum score to consider a match (0-100)
top_n: Maximum number of matches to return
Returns:
List of FuzzyMatch objects sorted by confidence (descending)
"""
query_norm = normalize_for_matching(query)
# Build list of all matchable strings
match_strings: list[tuple[str, str, str]] = []
for candidate in candidates:
match_strings.append(
(normalize_for_matching(candidate.name), candidate.canonical_id, candidate.name)
)
for alias in candidate.aliases:
match_strings.append(
(normalize_for_matching(alias), candidate.canonical_id, candidate.name)
)
# Score all candidates
scored: dict[str, tuple[int, str]] = {}
for match_str, canonical_id, name in match_strings:
# Use scoring methods suited for stadium names
token_sort_score = fuzz.token_sort_ratio(query_norm, match_str)
partial_score = fuzz.partial_ratio(query_norm, match_str)
ratio_score = fuzz.ratio(query_norm, match_str)
# Weighted average
score = int(0.4 * token_sort_score + 0.4 * partial_score + 0.2 * ratio_score)
if canonical_id not in scored or score > scored[canonical_id][0]:
scored[canonical_id] = (score, name)
# Filter and sort
matches = [
FuzzyMatch(canonical_id=cid, canonical_name=name, confidence=score)
for cid, (score, name) in scored.items()
if score >= threshold
]
matches.sort(key=lambda m: m.confidence, reverse=True)
return matches[:top_n]
def exact_match(
query: str,
candidates: list[MatchCandidate],
case_sensitive: bool = False,
) -> Optional[str]:
"""Find an exact match for a string.
Args:
query: String to match
candidates: List of candidates to match against
case_sensitive: Whether to use case-sensitive matching
Returns:
Canonical ID if exact match found, None otherwise
"""
if case_sensitive:
query_norm = query.strip()
else:
query_norm = query.lower().strip()
for candidate in candidates:
# Check primary name
name = candidate.name if case_sensitive else candidate.name.lower()
if query_norm == name.strip():
return candidate.canonical_id
# Check aliases
for alias in candidate.aliases:
alias_norm = alias if case_sensitive else alias.lower()
if query_norm == alias_norm.strip():
return candidate.canonical_id
return None
def best_match(
query: str,
candidates: list[MatchCandidate],
threshold: int = FUZZY_MATCH_THRESHOLD,
) -> Optional[FuzzyMatch]:
"""Find the best match for a query string.
First tries exact match, then falls back to fuzzy matching.
Args:
query: String to match
candidates: List of candidates
threshold: Minimum fuzzy match score
Returns:
Best FuzzyMatch or None if no match above threshold
"""
# Try exact match first
exact = exact_match(query, candidates)
if exact:
# Find the name for this ID
for c in candidates:
if c.canonical_id == exact:
return FuzzyMatch(
canonical_id=exact,
canonical_name=c.name,
confidence=100,
)
# Fall back to fuzzy matching
# Use team matching by default (works for both)
matches = fuzzy_match_team(query, candidates, threshold=threshold, top_n=1)
return matches[0] if matches else None
def calculate_similarity(s1: str, s2: str) -> int:
"""Calculate similarity between two strings.
Args:
s1: First string
s2: Second string
Returns:
Similarity score 0-100
"""
s1_norm = normalize_for_matching(s1)
s2_norm = normalize_for_matching(s2)
return fuzz.token_set_ratio(s1_norm, s2_norm)

View File

@@ -0,0 +1,521 @@
"""Stadium name resolver with exact, alias, and fuzzy matching."""
from dataclasses import dataclass
from datetime import date
from typing import Optional
from uuid import uuid4
from ..config import FUZZY_MATCH_THRESHOLD, ALLOWED_COUNTRIES
from ..models.aliases import FuzzyMatch, ManualReviewItem, ReviewReason
from .alias_loader import get_stadium_alias_loader, StadiumAliasLoader
from .fuzzy import MatchCandidate, fuzzy_match_stadium
@dataclass
class StadiumResolveResult:
"""Result of stadium resolution.
Attributes:
canonical_id: Resolved canonical stadium ID (None if unresolved)
confidence: Confidence in the match (100 for exact, lower for fuzzy)
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
filtered_reason: Reason if stadium was filtered out (e.g., 'geographic')
review_item: ManualReviewItem if resolution failed or low confidence
"""
canonical_id: Optional[str]
confidence: int
match_type: str
filtered_reason: Optional[str] = None
review_item: Optional[ManualReviewItem] = None
@dataclass
class StadiumInfo:
"""Stadium information for matching."""
canonical_id: str
name: str
city: str
state: str
country: str
sport: str
latitude: float
longitude: float
timezone: str = "America/New_York" # IANA timezone identifier
# Hardcoded stadium mappings
# Format: {sport: {canonical_id: StadiumInfo}}
STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
"nba": {
"stadium_nba_state_farm_arena": StadiumInfo("stadium_nba_state_farm_arena", "State Farm Arena", "Atlanta", "GA", "USA", "nba", 33.7573, -84.3963),
"stadium_nba_td_garden": StadiumInfo("stadium_nba_td_garden", "TD Garden", "Boston", "MA", "USA", "nba", 42.3662, -71.0621),
"stadium_nba_barclays_center": StadiumInfo("stadium_nba_barclays_center", "Barclays Center", "Brooklyn", "NY", "USA", "nba", 40.6826, -73.9754),
"stadium_nba_spectrum_center": StadiumInfo("stadium_nba_spectrum_center", "Spectrum Center", "Charlotte", "NC", "USA", "nba", 35.2251, -80.8392),
"stadium_nba_united_center": StadiumInfo("stadium_nba_united_center", "United Center", "Chicago", "IL", "USA", "nba", 41.8807, -87.6742),
"stadium_nba_rocket_mortgage_fieldhouse": StadiumInfo("stadium_nba_rocket_mortgage_fieldhouse", "Rocket Mortgage FieldHouse", "Cleveland", "OH", "USA", "nba", 41.4965, -81.6882),
"stadium_nba_american_airlines_center": StadiumInfo("stadium_nba_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nba", 32.7905, -96.8103),
"stadium_nba_ball_arena": StadiumInfo("stadium_nba_ball_arena", "Ball Arena", "Denver", "CO", "USA", "nba", 39.7487, -105.0077, "America/Denver"),
"stadium_nba_little_caesars_arena": StadiumInfo("stadium_nba_little_caesars_arena", "Little Caesars Arena", "Detroit", "MI", "USA", "nba", 42.3411, -83.0553),
"stadium_nba_chase_center": StadiumInfo("stadium_nba_chase_center", "Chase Center", "San Francisco", "CA", "USA", "nba", 37.7680, -122.3877, "America/Los_Angeles"),
"stadium_nba_toyota_center": StadiumInfo("stadium_nba_toyota_center", "Toyota Center", "Houston", "TX", "USA", "nba", 29.7508, -95.3621, "America/Chicago"),
"stadium_nba_gainbridge_fieldhouse": StadiumInfo("stadium_nba_gainbridge_fieldhouse", "Gainbridge Fieldhouse", "Indianapolis", "IN", "USA", "nba", 39.7640, -86.1555),
"stadium_nba_intuit_dome": StadiumInfo("stadium_nba_intuit_dome", "Intuit Dome", "Inglewood", "CA", "USA", "nba", 33.9425, -118.3417),
"stadium_nba_cryptocom_arena": StadiumInfo("stadium_nba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nba", 34.0430, -118.2673),
"stadium_nba_fedexforum": StadiumInfo("stadium_nba_fedexforum", "FedExForum", "Memphis", "TN", "USA", "nba", 35.1383, -90.0505),
"stadium_nba_kaseya_center": StadiumInfo("stadium_nba_kaseya_center", "Kaseya Center", "Miami", "FL", "USA", "nba", 25.7814, -80.1870),
"stadium_nba_fiserv_forum": StadiumInfo("stadium_nba_fiserv_forum", "Fiserv Forum", "Milwaukee", "WI", "USA", "nba", 43.0451, -87.9172),
"stadium_nba_target_center": StadiumInfo("stadium_nba_target_center", "Target Center", "Minneapolis", "MN", "USA", "nba", 44.9795, -93.2761),
"stadium_nba_smoothie_king_center": StadiumInfo("stadium_nba_smoothie_king_center", "Smoothie King Center", "New Orleans", "LA", "USA", "nba", 29.9490, -90.0821),
"stadium_nba_madison_square_garden": StadiumInfo("stadium_nba_madison_square_garden", "Madison Square Garden", "New York", "NY", "USA", "nba", 40.7505, -73.9934),
"stadium_nba_paycom_center": StadiumInfo("stadium_nba_paycom_center", "Paycom Center", "Oklahoma City", "OK", "USA", "nba", 35.4634, -97.5151),
"stadium_nba_kia_center": StadiumInfo("stadium_nba_kia_center", "Kia Center", "Orlando", "FL", "USA", "nba", 28.5392, -81.3839),
"stadium_nba_wells_fargo_center": StadiumInfo("stadium_nba_wells_fargo_center", "Wells Fargo Center", "Philadelphia", "PA", "USA", "nba", 39.9012, -75.1720),
"stadium_nba_footprint_center": StadiumInfo("stadium_nba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "nba", 33.4457, -112.0712),
"stadium_nba_moda_center": StadiumInfo("stadium_nba_moda_center", "Moda Center", "Portland", "OR", "USA", "nba", 45.5316, -122.6668),
"stadium_nba_golden_1_center": StadiumInfo("stadium_nba_golden_1_center", "Golden 1 Center", "Sacramento", "CA", "USA", "nba", 38.5802, -121.4997),
"stadium_nba_frost_bank_center": StadiumInfo("stadium_nba_frost_bank_center", "Frost Bank Center", "San Antonio", "TX", "USA", "nba", 29.4270, -98.4375),
"stadium_nba_scotiabank_arena": StadiumInfo("stadium_nba_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nba", 43.6435, -79.3791, "America/Toronto"),
"stadium_nba_delta_center": StadiumInfo("stadium_nba_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nba", 40.7683, -111.9011, "America/Denver"),
"stadium_nba_capital_one_arena": StadiumInfo("stadium_nba_capital_one_arena", "Capital One Arena", "Washington", "DC", "USA", "nba", 38.8981, -77.0209),
# International venues
"stadium_nba_mexico_city_arena": StadiumInfo("stadium_nba_mexico_city_arena", "Mexico City Arena", "Mexico City", "CDMX", "Mexico", "nba", 19.4042, -99.0970, "America/Mexico_City"),
},
"mlb": {
"stadium_mlb_chase_field": StadiumInfo("stadium_mlb_chase_field", "Chase Field", "Phoenix", "AZ", "USA", "mlb", 33.4455, -112.0667),
"stadium_mlb_truist_park": StadiumInfo("stadium_mlb_truist_park", "Truist Park", "Atlanta", "GA", "USA", "mlb", 33.8908, -84.4678),
"stadium_mlb_oriole_park_at_camden_yards": StadiumInfo("stadium_mlb_oriole_park_at_camden_yards", "Oriole Park at Camden Yards", "Baltimore", "MD", "USA", "mlb", 39.2839, -76.6217),
"stadium_mlb_fenway_park": StadiumInfo("stadium_mlb_fenway_park", "Fenway Park", "Boston", "MA", "USA", "mlb", 42.3467, -71.0972),
"stadium_mlb_wrigley_field": StadiumInfo("stadium_mlb_wrigley_field", "Wrigley Field", "Chicago", "IL", "USA", "mlb", 41.9484, -87.6553),
"stadium_mlb_guaranteed_rate_field": StadiumInfo("stadium_mlb_guaranteed_rate_field", "Guaranteed Rate Field", "Chicago", "IL", "USA", "mlb", 41.8299, -87.6338),
"stadium_mlb_great_american_ball_park": StadiumInfo("stadium_mlb_great_american_ball_park", "Great American Ball Park", "Cincinnati", "OH", "USA", "mlb", 39.0974, -84.5082),
"stadium_mlb_progressive_field": StadiumInfo("stadium_mlb_progressive_field", "Progressive Field", "Cleveland", "OH", "USA", "mlb", 41.4962, -81.6852),
"stadium_mlb_coors_field": StadiumInfo("stadium_mlb_coors_field", "Coors Field", "Denver", "CO", "USA", "mlb", 39.7559, -104.9942),
"stadium_mlb_comerica_park": StadiumInfo("stadium_mlb_comerica_park", "Comerica Park", "Detroit", "MI", "USA", "mlb", 42.3390, -83.0485),
"stadium_mlb_minute_maid_park": StadiumInfo("stadium_mlb_minute_maid_park", "Minute Maid Park", "Houston", "TX", "USA", "mlb", 29.7573, -95.3555),
"stadium_mlb_kauffman_stadium": StadiumInfo("stadium_mlb_kauffman_stadium", "Kauffman Stadium", "Kansas City", "MO", "USA", "mlb", 39.0517, -94.4803),
"stadium_mlb_angel_stadium": StadiumInfo("stadium_mlb_angel_stadium", "Angel Stadium", "Anaheim", "CA", "USA", "mlb", 33.8003, -117.8827),
"stadium_mlb_dodger_stadium": StadiumInfo("stadium_mlb_dodger_stadium", "Dodger Stadium", "Los Angeles", "CA", "USA", "mlb", 34.0739, -118.2400),
"stadium_mlb_loandepot_park": StadiumInfo("stadium_mlb_loandepot_park", "loanDepot park", "Miami", "FL", "USA", "mlb", 25.7781, -80.2195),
"stadium_mlb_american_family_field": StadiumInfo("stadium_mlb_american_family_field", "American Family Field", "Milwaukee", "WI", "USA", "mlb", 43.0280, -87.9712),
"stadium_mlb_target_field": StadiumInfo("stadium_mlb_target_field", "Target Field", "Minneapolis", "MN", "USA", "mlb", 44.9818, -93.2775),
"stadium_mlb_citi_field": StadiumInfo("stadium_mlb_citi_field", "Citi Field", "New York", "NY", "USA", "mlb", 40.7571, -73.8458),
"stadium_mlb_yankee_stadium": StadiumInfo("stadium_mlb_yankee_stadium", "Yankee Stadium", "Bronx", "NY", "USA", "mlb", 40.8296, -73.9262),
"stadium_mlb_sutter_health_park": StadiumInfo("stadium_mlb_sutter_health_park", "Sutter Health Park", "Sacramento", "CA", "USA", "mlb", 38.5803, -121.5005),
"stadium_mlb_citizens_bank_park": StadiumInfo("stadium_mlb_citizens_bank_park", "Citizens Bank Park", "Philadelphia", "PA", "USA", "mlb", 39.9061, -75.1665),
"stadium_mlb_pnc_park": StadiumInfo("stadium_mlb_pnc_park", "PNC Park", "Pittsburgh", "PA", "USA", "mlb", 40.4469, -80.0057),
"stadium_mlb_petco_park": StadiumInfo("stadium_mlb_petco_park", "Petco Park", "San Diego", "CA", "USA", "mlb", 32.7076, -117.1570),
"stadium_mlb_oracle_park": StadiumInfo("stadium_mlb_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "mlb", 37.7786, -122.3893),
"stadium_mlb_tmobile_park": StadiumInfo("stadium_mlb_tmobile_park", "T-Mobile Park", "Seattle", "WA", "USA", "mlb", 47.5914, -122.3325),
"stadium_mlb_busch_stadium": StadiumInfo("stadium_mlb_busch_stadium", "Busch Stadium", "St. Louis", "MO", "USA", "mlb", 38.6226, -90.1928),
"stadium_mlb_tropicana_field": StadiumInfo("stadium_mlb_tropicana_field", "Tropicana Field", "St. Petersburg", "FL", "USA", "mlb", 27.7682, -82.6534),
"stadium_mlb_globe_life_field": StadiumInfo("stadium_mlb_globe_life_field", "Globe Life Field", "Arlington", "TX", "USA", "mlb", 32.7473, -97.0845),
"stadium_mlb_rogers_centre": StadiumInfo("stadium_mlb_rogers_centre", "Rogers Centre", "Toronto", "ON", "Canada", "mlb", 43.6414, -79.3894),
"stadium_mlb_nationals_park": StadiumInfo("stadium_mlb_nationals_park", "Nationals Park", "Washington", "DC", "USA", "mlb", 38.8730, -77.0074),
# Spring Training - Cactus League (Arizona)
"stadium_mlb_spring_salt_river_fields": StadiumInfo("stadium_mlb_spring_salt_river_fields", "Salt River Fields at Talking Stick", "Scottsdale", "AZ", "USA", "mlb", 33.5412, -111.8847, "America/Phoenix"),
"stadium_mlb_spring_sloan_park": StadiumInfo("stadium_mlb_spring_sloan_park", "Sloan Park", "Mesa", "AZ", "USA", "mlb", 33.4312, -111.8821, "America/Phoenix"),
"stadium_mlb_spring_hohokam_stadium": StadiumInfo("stadium_mlb_spring_hohokam_stadium", "Hohokam Stadium", "Mesa", "AZ", "USA", "mlb", 33.4385, -111.8295, "America/Phoenix"),
"stadium_mlb_spring_camelback_ranch": StadiumInfo("stadium_mlb_spring_camelback_ranch", "Camelback Ranch", "Glendale", "AZ", "USA", "mlb", 33.509, -112.272, "America/Phoenix"),
"stadium_mlb_spring_goodyear_ballpark": StadiumInfo("stadium_mlb_spring_goodyear_ballpark", "Goodyear Ballpark", "Goodyear", "AZ", "USA", "mlb", 33.4286, -112.3908, "America/Phoenix"),
"stadium_mlb_spring_tempe_diablo_stadium": StadiumInfo("stadium_mlb_spring_tempe_diablo_stadium", "Tempe Diablo Stadium", "Tempe", "AZ", "USA", "mlb", 33.4003, -111.9685, "America/Phoenix"),
"stadium_mlb_spring_scottsdale_stadium": StadiumInfo("stadium_mlb_spring_scottsdale_stadium", "Scottsdale Stadium", "Scottsdale", "AZ", "USA", "mlb", 33.4881, -111.9210, "America/Phoenix"),
"stadium_mlb_spring_american_family_fields": StadiumInfo("stadium_mlb_spring_american_family_fields", "American Family Fields of Phoenix", "Phoenix", "AZ", "USA", "mlb", 33.4916, -112.1733, "America/Phoenix"),
"stadium_mlb_spring_peoria_sports_complex": StadiumInfo("stadium_mlb_spring_peoria_sports_complex", "Peoria Sports Complex", "Peoria", "AZ", "USA", "mlb", 33.6224, -112.2274, "America/Phoenix"),
"stadium_mlb_spring_surprise_stadium": StadiumInfo("stadium_mlb_spring_surprise_stadium", "Surprise Stadium", "Surprise", "AZ", "USA", "mlb", 33.6306, -112.3332, "America/Phoenix"),
# Spring Training - Grapefruit League (Florida)
"stadium_mlb_spring_jetblue_park": StadiumInfo("stadium_mlb_spring_jetblue_park", "JetBlue Park", "Fort Myers", "FL", "USA", "mlb", 26.5511, -81.7620),
"stadium_mlb_spring_roger_dean_stadium": StadiumInfo("stadium_mlb_spring_roger_dean_stadium", "Roger Dean Chevrolet Stadium", "Jupiter", "FL", "USA", "mlb", 26.8910, -80.1166),
"stadium_mlb_spring_ed_smith_stadium": StadiumInfo("stadium_mlb_spring_ed_smith_stadium", "Ed Smith Stadium", "Sarasota", "FL", "USA", "mlb", 27.3482, -82.5176),
"stadium_mlb_spring_steinbrenner_field": StadiumInfo("stadium_mlb_spring_steinbrenner_field", "George M. Steinbrenner Field", "Tampa", "FL", "USA", "mlb", 27.9748, -82.5040),
"stadium_mlb_spring_td_ballpark": StadiumInfo("stadium_mlb_spring_td_ballpark", "TD Ballpark", "Dunedin", "FL", "USA", "mlb", 28.0039, -82.7867),
"stadium_mlb_spring_cooltoday_park": StadiumInfo("stadium_mlb_spring_cooltoday_park", "CoolToday Park", "North Port", "FL", "USA", "mlb", 27.0219, -82.2358),
"stadium_mlb_spring_hammond_stadium": StadiumInfo("stadium_mlb_spring_hammond_stadium", "Hammond Stadium", "Fort Myers", "FL", "USA", "mlb", 26.5363, -81.8385),
"stadium_mlb_spring_clover_park": StadiumInfo("stadium_mlb_spring_clover_park", "Clover Park", "Port St. Lucie", "FL", "USA", "mlb", 27.2900, -80.4100),
"stadium_mlb_spring_baycare_ballpark": StadiumInfo("stadium_mlb_spring_baycare_ballpark", "BayCare Ballpark", "Clearwater", "FL", "USA", "mlb", 27.9697, -82.7257),
"stadium_mlb_spring_lecom_park": StadiumInfo("stadium_mlb_spring_lecom_park", "LECOM Park", "Bradenton", "FL", "USA", "mlb", 27.4939, -82.5753),
"stadium_mlb_spring_charlotte_sports_park": StadiumInfo("stadium_mlb_spring_charlotte_sports_park", "Charlotte Sports Park", "Port Charlotte", "FL", "USA", "mlb", 26.9992, -82.1817),
"stadium_mlb_spring_cacti_park": StadiumInfo("stadium_mlb_spring_cacti_park", "CACTI Park of the Palm Beaches", "West Palm Beach", "FL", "USA", "mlb", 26.7697, -80.1014),
"stadium_mlb_spring_joker_marchant": StadiumInfo("stadium_mlb_spring_joker_marchant", "Publix Field at Joker Marchant Stadium", "Lakeland", "FL", "USA", "mlb", 28.0655, -81.9545),
# Special venues
"stadium_mlb_las_vegas_ballpark": StadiumInfo("stadium_mlb_las_vegas_ballpark", "Las Vegas Ballpark", "Las Vegas", "NV", "USA", "mlb", 36.0925, -115.1775, "America/Los_Angeles"),
"stadium_mlb_mexico_alfredo_harp_helu": StadiumInfo("stadium_mlb_mexico_alfredo_harp_helu", "Estadio Alfredo Harp Helu", "Mexico City", "CDMX", "Mexico", "mlb", 19.3825, -99.0928, "America/Mexico_City"),
"stadium_mlb_field_of_dreams": StadiumInfo("stadium_mlb_field_of_dreams", "Field of Dreams", "Dyersville", "IA", "USA", "mlb", 42.4671, -91.1095, "America/Chicago"),
"stadium_mlb_journey_bank_ballpark": StadiumInfo("stadium_mlb_journey_bank_ballpark", "Journey Bank Ballpark", "Williamsport", "PA", "USA", "mlb", 41.2415, -77.0011),
},
"nfl": {
"stadium_nfl_state_farm_stadium": StadiumInfo("stadium_nfl_state_farm_stadium", "State Farm Stadium", "Glendale", "AZ", "USA", "nfl", 33.5276, -112.2626),
"stadium_nfl_mercedes_benz_stadium": StadiumInfo("stadium_nfl_mercedes_benz_stadium", "Mercedes-Benz Stadium", "Atlanta", "GA", "USA", "nfl", 33.7553, -84.4006),
"stadium_nfl_mandt_bank_stadium": StadiumInfo("stadium_nfl_mandt_bank_stadium", "M&T Bank Stadium", "Baltimore", "MD", "USA", "nfl", 39.2780, -76.6227),
"stadium_nfl_highmark_stadium": StadiumInfo("stadium_nfl_highmark_stadium", "Highmark Stadium", "Orchard Park", "NY", "USA", "nfl", 42.7738, -78.7870),
"stadium_nfl_bank_of_america_stadium": StadiumInfo("stadium_nfl_bank_of_america_stadium", "Bank of America Stadium", "Charlotte", "NC", "USA", "nfl", 35.2258, -80.8528),
"stadium_nfl_soldier_field": StadiumInfo("stadium_nfl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nfl", 41.8623, -87.6167),
"stadium_nfl_paycor_stadium": StadiumInfo("stadium_nfl_paycor_stadium", "Paycor Stadium", "Cincinnati", "OH", "USA", "nfl", 39.0955, -84.5161),
"stadium_nfl_huntington_bank_field": StadiumInfo("stadium_nfl_huntington_bank_field", "Huntington Bank Field", "Cleveland", "OH", "USA", "nfl", 41.5061, -81.6995),
"stadium_nfl_att_stadium": StadiumInfo("stadium_nfl_att_stadium", "AT&T Stadium", "Arlington", "TX", "USA", "nfl", 32.7473, -97.0945),
"stadium_nfl_empower_field": StadiumInfo("stadium_nfl_empower_field", "Empower Field at Mile High", "Denver", "CO", "USA", "nfl", 39.7439, -105.0201),
"stadium_nfl_ford_field": StadiumInfo("stadium_nfl_ford_field", "Ford Field", "Detroit", "MI", "USA", "nfl", 42.3400, -83.0456),
"stadium_nfl_lambeau_field": StadiumInfo("stadium_nfl_lambeau_field", "Lambeau Field", "Green Bay", "WI", "USA", "nfl", 44.5013, -88.0622),
"stadium_nfl_nrg_stadium": StadiumInfo("stadium_nfl_nrg_stadium", "NRG Stadium", "Houston", "TX", "USA", "nfl", 29.6847, -95.4107),
"stadium_nfl_lucas_oil_stadium": StadiumInfo("stadium_nfl_lucas_oil_stadium", "Lucas Oil Stadium", "Indianapolis", "IN", "USA", "nfl", 39.7601, -86.1639),
"stadium_nfl_everbank_stadium": StadiumInfo("stadium_nfl_everbank_stadium", "EverBank Stadium", "Jacksonville", "FL", "USA", "nfl", 30.3239, -81.6373),
"stadium_nfl_arrowhead_stadium": StadiumInfo("stadium_nfl_arrowhead_stadium", "Arrowhead Stadium", "Kansas City", "MO", "USA", "nfl", 39.0489, -94.4839),
"stadium_nfl_allegiant_stadium": StadiumInfo("stadium_nfl_allegiant_stadium", "Allegiant Stadium", "Las Vegas", "NV", "USA", "nfl", 36.0909, -115.1833),
"stadium_nfl_sofi_stadium": StadiumInfo("stadium_nfl_sofi_stadium", "SoFi Stadium", "Inglewood", "CA", "USA", "nfl", 33.9534, -118.3386),
"stadium_nfl_hard_rock_stadium": StadiumInfo("stadium_nfl_hard_rock_stadium", "Hard Rock Stadium", "Miami Gardens", "FL", "USA", "nfl", 25.9580, -80.2389),
"stadium_nfl_us_bank_stadium": StadiumInfo("stadium_nfl_us_bank_stadium", "U.S. Bank Stadium", "Minneapolis", "MN", "USA", "nfl", 44.9737, -93.2575),
"stadium_nfl_gillette_stadium": StadiumInfo("stadium_nfl_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "nfl", 42.0909, -71.2643),
"stadium_nfl_caesars_superdome": StadiumInfo("stadium_nfl_caesars_superdome", "Caesars Superdome", "New Orleans", "LA", "USA", "nfl", 29.9511, -90.0812),
"stadium_nfl_metlife_stadium": StadiumInfo("stadium_nfl_metlife_stadium", "MetLife Stadium", "East Rutherford", "NJ", "USA", "nfl", 40.8128, -74.0742),
"stadium_nfl_lincoln_financial_field": StadiumInfo("stadium_nfl_lincoln_financial_field", "Lincoln Financial Field", "Philadelphia", "PA", "USA", "nfl", 39.9008, -75.1675),
"stadium_nfl_acrisure_stadium": StadiumInfo("stadium_nfl_acrisure_stadium", "Acrisure Stadium", "Pittsburgh", "PA", "USA", "nfl", 40.4468, -80.0158),
"stadium_nfl_levis_stadium": StadiumInfo("stadium_nfl_levis_stadium", "Levi's Stadium", "Santa Clara", "CA", "USA", "nfl", 37.4033, -121.9695),
"stadium_nfl_lumen_field": StadiumInfo("stadium_nfl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nfl", 47.5952, -122.3316),
"stadium_nfl_raymond_james_stadium": StadiumInfo("stadium_nfl_raymond_james_stadium", "Raymond James Stadium", "Tampa", "FL", "USA", "nfl", 27.9759, -82.5033),
"stadium_nfl_nissan_stadium": StadiumInfo("stadium_nfl_nissan_stadium", "Nissan Stadium", "Nashville", "TN", "USA", "nfl", 36.1665, -86.7713),
"stadium_nfl_northwest_stadium": StadiumInfo("stadium_nfl_northwest_stadium", "Northwest Stadium", "Landover", "MD", "USA", "nfl", 38.9076, -76.8645),
},
"nhl": {
"stadium_nhl_honda_center": StadiumInfo("stadium_nhl_honda_center", "Honda Center", "Anaheim", "CA", "USA", "nhl", 33.8078, -117.8765),
"stadium_nhl_delta_center": StadiumInfo("stadium_nhl_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nhl", 40.7683, -111.9011),
"stadium_nhl_td_garden": StadiumInfo("stadium_nhl_td_garden", "TD Garden", "Boston", "MA", "USA", "nhl", 42.3662, -71.0621),
"stadium_nhl_keybank_center": StadiumInfo("stadium_nhl_keybank_center", "KeyBank Center", "Buffalo", "NY", "USA", "nhl", 42.8750, -78.8764),
"stadium_nhl_scotiabank_saddledome": StadiumInfo("stadium_nhl_scotiabank_saddledome", "Scotiabank Saddledome", "Calgary", "AB", "Canada", "nhl", 51.0374, -114.0519),
"stadium_nhl_pnc_arena": StadiumInfo("stadium_nhl_pnc_arena", "PNC Arena", "Raleigh", "NC", "USA", "nhl", 35.8033, -78.7220),
"stadium_nhl_united_center": StadiumInfo("stadium_nhl_united_center", "United Center", "Chicago", "IL", "USA", "nhl", 41.8807, -87.6742),
"stadium_nhl_ball_arena": StadiumInfo("stadium_nhl_ball_arena", "Ball Arena", "Denver", "CO", "USA", "nhl", 39.7487, -105.0077),
"stadium_nhl_nationwide_arena": StadiumInfo("stadium_nhl_nationwide_arena", "Nationwide Arena", "Columbus", "OH", "USA", "nhl", 39.9692, -83.0061),
"stadium_nhl_american_airlines_center": StadiumInfo("stadium_nhl_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nhl", 32.7905, -96.8103),
"stadium_nhl_little_caesars_arena": StadiumInfo("stadium_nhl_little_caesars_arena", "Little Caesars Arena", "Detroit", "MI", "USA", "nhl", 42.3411, -83.0553),
"stadium_nhl_rogers_place": StadiumInfo("stadium_nhl_rogers_place", "Rogers Place", "Edmonton", "AB", "Canada", "nhl", 53.5469, -113.4979),
"stadium_nhl_amerant_bank_arena": StadiumInfo("stadium_nhl_amerant_bank_arena", "Amerant Bank Arena", "Sunrise", "FL", "USA", "nhl", 26.1584, -80.3256),
"stadium_nhl_cryptocom_arena": StadiumInfo("stadium_nhl_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nhl", 34.0430, -118.2673),
"stadium_nhl_xcel_energy_center": StadiumInfo("stadium_nhl_xcel_energy_center", "Xcel Energy Center", "St. Paul", "MN", "USA", "nhl", 44.9448, -93.1010),
"stadium_nhl_bell_centre": StadiumInfo("stadium_nhl_bell_centre", "Bell Centre", "Montreal", "QC", "Canada", "nhl", 45.4961, -73.5693),
"stadium_nhl_bridgestone_arena": StadiumInfo("stadium_nhl_bridgestone_arena", "Bridgestone Arena", "Nashville", "TN", "USA", "nhl", 36.1592, -86.7785),
"stadium_nhl_prudential_center": StadiumInfo("stadium_nhl_prudential_center", "Prudential Center", "Newark", "NJ", "USA", "nhl", 40.7334, -74.1712),
"stadium_nhl_ubs_arena": StadiumInfo("stadium_nhl_ubs_arena", "UBS Arena", "Elmont", "NY", "USA", "nhl", 40.7170, -73.7255),
"stadium_nhl_madison_square_garden": StadiumInfo("stadium_nhl_madison_square_garden", "Madison Square Garden", "New York", "NY", "USA", "nhl", 40.7505, -73.9934),
"stadium_nhl_canadian_tire_centre": StadiumInfo("stadium_nhl_canadian_tire_centre", "Canadian Tire Centre", "Ottawa", "ON", "Canada", "nhl", 45.2969, -75.9272),
"stadium_nhl_wells_fargo_center": StadiumInfo("stadium_nhl_wells_fargo_center", "Wells Fargo Center", "Philadelphia", "PA", "USA", "nhl", 39.9012, -75.1720),
"stadium_nhl_ppg_paints_arena": StadiumInfo("stadium_nhl_ppg_paints_arena", "PPG Paints Arena", "Pittsburgh", "PA", "USA", "nhl", 40.4395, -79.9890),
"stadium_nhl_sap_center": StadiumInfo("stadium_nhl_sap_center", "SAP Center", "San Jose", "CA", "USA", "nhl", 37.3327, -121.9011),
"stadium_nhl_climate_pledge_arena": StadiumInfo("stadium_nhl_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "nhl", 47.6221, -122.3540),
"stadium_nhl_enterprise_center": StadiumInfo("stadium_nhl_enterprise_center", "Enterprise Center", "St. Louis", "MO", "USA", "nhl", 38.6268, -90.2025),
"stadium_nhl_amalie_arena": StadiumInfo("stadium_nhl_amalie_arena", "Amalie Arena", "Tampa", "FL", "USA", "nhl", 27.9428, -82.4519),
"stadium_nhl_scotiabank_arena": StadiumInfo("stadium_nhl_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nhl", 43.6435, -79.3791),
"stadium_nhl_rogers_arena": StadiumInfo("stadium_nhl_rogers_arena", "Rogers Arena", "Vancouver", "BC", "Canada", "nhl", 49.2778, -123.1088),
"stadium_nhl_tmobile_arena": StadiumInfo("stadium_nhl_tmobile_arena", "T-Mobile Arena", "Las Vegas", "NV", "USA", "nhl", 36.1028, -115.1783),
"stadium_nhl_capital_one_arena": StadiumInfo("stadium_nhl_capital_one_arena", "Capital One Arena", "Washington", "DC", "USA", "nhl", 38.8981, -77.0209),
"stadium_nhl_canada_life_centre": StadiumInfo("stadium_nhl_canada_life_centre", "Canada Life Centre", "Winnipeg", "MB", "Canada", "nhl", 49.8928, -97.1433),
},
"mls": {
"stadium_mls_mercedes_benz_stadium": StadiumInfo("stadium_mls_mercedes_benz_stadium", "Mercedes-Benz Stadium", "Atlanta", "GA", "USA", "mls", 33.7553, -84.4006),
"stadium_mls_q2_stadium": StadiumInfo("stadium_mls_q2_stadium", "Q2 Stadium", "Austin", "TX", "USA", "mls", 30.3875, -97.7186),
"stadium_mls_bank_of_america_stadium": StadiumInfo("stadium_mls_bank_of_america_stadium", "Bank of America Stadium", "Charlotte", "NC", "USA", "mls", 35.2258, -80.8528),
"stadium_mls_soldier_field": StadiumInfo("stadium_mls_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "mls", 41.8623, -87.6167),
"stadium_mls_tql_stadium": StadiumInfo("stadium_mls_tql_stadium", "TQL Stadium", "Cincinnati", "OH", "USA", "mls", 39.1112, -84.5225),
"stadium_mls_dicks_sporting_goods_park": StadiumInfo("stadium_mls_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "mls", 39.8056, -104.8922),
"stadium_mls_lowercom_field": StadiumInfo("stadium_mls_lowercom_field", "Lower.com Field", "Columbus", "OH", "USA", "mls", 39.9689, -83.0173),
"stadium_mls_toyota_stadium": StadiumInfo("stadium_mls_toyota_stadium", "Toyota Stadium", "Frisco", "TX", "USA", "mls", 33.1545, -96.8353),
"stadium_mls_audi_field": StadiumInfo("stadium_mls_audi_field", "Audi Field", "Washington", "DC", "USA", "mls", 38.8687, -77.0128),
"stadium_mls_shell_energy_stadium": StadiumInfo("stadium_mls_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "mls", 29.7522, -95.3527),
"stadium_mls_dignity_health_sports_park": StadiumInfo("stadium_mls_dignity_health_sports_park", "Dignity Health Sports Park", "Carson", "CA", "USA", "mls", 33.8644, -118.2611),
"stadium_mls_bmo_stadium": StadiumInfo("stadium_mls_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "mls", 34.0128, -118.2841),
"stadium_mls_chase_stadium": StadiumInfo("stadium_mls_chase_stadium", "Chase Stadium", "Fort Lauderdale", "FL", "USA", "mls", 26.1930, -80.1611),
"stadium_mls_allianz_field": StadiumInfo("stadium_mls_allianz_field", "Allianz Field", "St. Paul", "MN", "USA", "mls", 44.9528, -93.1650),
"stadium_mls_stade_saputo": StadiumInfo("stadium_mls_stade_saputo", "Stade Saputo", "Montreal", "QC", "Canada", "mls", 45.5622, -73.5528),
"stadium_mls_geodis_park": StadiumInfo("stadium_mls_geodis_park", "GEODIS Park", "Nashville", "TN", "USA", "mls", 36.1304, -86.7651),
"stadium_mls_gillette_stadium": StadiumInfo("stadium_mls_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "mls", 42.0909, -71.2643),
"stadium_mls_yankee_stadium": StadiumInfo("stadium_mls_yankee_stadium", "Yankee Stadium", "Bronx", "NY", "USA", "mls", 40.8296, -73.9262),
"stadium_mls_red_bull_arena": StadiumInfo("stadium_mls_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "mls", 40.7369, -74.1503),
"stadium_mls_interco_stadium": StadiumInfo("stadium_mls_interco_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "mls", 28.5411, -81.3895),
"stadium_mls_subaru_park": StadiumInfo("stadium_mls_subaru_park", "Subaru Park", "Chester", "PA", "USA", "mls", 39.8328, -75.3789),
"stadium_mls_providence_park": StadiumInfo("stadium_mls_providence_park", "Providence Park", "Portland", "OR", "USA", "mls", 45.5216, -122.6917),
"stadium_mls_america_first_field": StadiumInfo("stadium_mls_america_first_field", "America First Field", "Sandy", "UT", "USA", "mls", 40.5830, -111.8933),
"stadium_mls_paypal_park": StadiumInfo("stadium_mls_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "mls", 37.3511, -121.9250),
"stadium_mls_snapdragon_stadium": StadiumInfo("stadium_mls_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "mls", 32.7837, -117.1225),
"stadium_mls_lumen_field": StadiumInfo("stadium_mls_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "mls", 47.5952, -122.3316),
"stadium_mls_childrens_mercy_park": StadiumInfo("stadium_mls_childrens_mercy_park", "Children's Mercy Park", "Kansas City", "KS", "USA", "mls", 39.1217, -94.8231),
"stadium_mls_citypark": StadiumInfo("stadium_mls_citypark", "CITYPARK", "St. Louis", "MO", "USA", "mls", 38.6316, -90.2106),
"stadium_mls_bmo_field": StadiumInfo("stadium_mls_bmo_field", "BMO Field", "Toronto", "ON", "Canada", "mls", 43.6332, -79.4186),
"stadium_mls_bc_place": StadiumInfo("stadium_mls_bc_place", "BC Place", "Vancouver", "BC", "Canada", "mls", 49.2768, -123.1118),
},
"wnba": {
"stadium_wnba_gateway_center_arena": StadiumInfo("stadium_wnba_gateway_center_arena", "Gateway Center Arena", "College Park", "GA", "USA", "wnba", 33.6510, -84.4474),
"stadium_wnba_wintrust_arena": StadiumInfo("stadium_wnba_wintrust_arena", "Wintrust Arena", "Chicago", "IL", "USA", "wnba", 41.8658, -87.6169),
"stadium_wnba_mohegan_sun_arena": StadiumInfo("stadium_wnba_mohegan_sun_arena", "Mohegan Sun Arena", "Uncasville", "CT", "USA", "wnba", 41.4931, -72.0912),
"stadium_wnba_college_park_center": StadiumInfo("stadium_wnba_college_park_center", "College Park Center", "Arlington", "TX", "USA", "wnba", 32.7304, -97.1077),
"stadium_wnba_chase_center": StadiumInfo("stadium_wnba_chase_center", "Chase Center", "San Francisco", "CA", "USA", "wnba", 37.7680, -122.3877),
"stadium_wnba_gainbridge_fieldhouse": StadiumInfo("stadium_wnba_gainbridge_fieldhouse", "Gainbridge Fieldhouse", "Indianapolis", "IN", "USA", "wnba", 39.7640, -86.1555),
"stadium_wnba_michelob_ultra_arena": StadiumInfo("stadium_wnba_michelob_ultra_arena", "Michelob Ultra Arena", "Las Vegas", "NV", "USA", "wnba", 36.0902, -115.1756),
"stadium_wnba_cryptocom_arena": StadiumInfo("stadium_wnba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "wnba", 34.0430, -118.2673),
"stadium_wnba_target_center": StadiumInfo("stadium_wnba_target_center", "Target Center", "Minneapolis", "MN", "USA", "wnba", 44.9795, -93.2761),
"stadium_wnba_barclays_center": StadiumInfo("stadium_wnba_barclays_center", "Barclays Center", "Brooklyn", "NY", "USA", "wnba", 40.6826, -73.9754),
"stadium_wnba_footprint_center": StadiumInfo("stadium_wnba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "wnba", 33.4457, -112.0712),
"stadium_wnba_climate_pledge_arena": StadiumInfo("stadium_wnba_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "wnba", 47.6221, -122.3540),
"stadium_wnba_entertainment_sports_arena": StadiumInfo("stadium_wnba_entertainment_sports_arena", "Entertainment & Sports Arena", "Washington", "DC", "USA", "wnba", 38.8690, -76.9745),
"stadium_wnba_state_farm_arena": StadiumInfo("stadium_wnba_state_farm_arena", "State Farm Arena", "Atlanta", "GA", "USA", "wnba", 33.7573, -84.3963),
"stadium_wnba_rocket_mortgage_fieldhouse": StadiumInfo("stadium_wnba_rocket_mortgage_fieldhouse", "Rocket Mortgage FieldHouse", "Cleveland", "OH", "USA", "wnba", 41.4965, -81.6882),
"stadium_wnba_cfg_bank_arena": StadiumInfo("stadium_wnba_cfg_bank_arena", "CFG Bank Arena", "Baltimore", "MD", "USA", "wnba", 39.2825, -76.6220),
"stadium_wnba_purcell_pavilion": StadiumInfo("stadium_wnba_purcell_pavilion", "Purcell Pavilion", "Notre Dame", "IN", "USA", "wnba", 41.6987, -86.2340),
},
"nwsl": {
"stadium_nwsl_bmo_stadium": StadiumInfo("stadium_nwsl_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "nwsl", 34.0128, -118.2841),
"stadium_nwsl_seatgeek_stadium": StadiumInfo("stadium_nwsl_seatgeek_stadium", "SeatGeek Stadium", "Bridgeview", "IL", "USA", "nwsl", 41.7500, -87.8028),
"stadium_nwsl_shell_energy_stadium": StadiumInfo("stadium_nwsl_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "nwsl", 29.7522, -95.3527),
"stadium_nwsl_cpkc_stadium": StadiumInfo("stadium_nwsl_cpkc_stadium", "CPKC Stadium", "Kansas City", "MO", "USA", "nwsl", 39.1050, -94.5580),
"stadium_nwsl_red_bull_arena": StadiumInfo("stadium_nwsl_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "nwsl", 40.7369, -74.1503),
"stadium_nwsl_wakemed_soccer_park": StadiumInfo("stadium_nwsl_wakemed_soccer_park", "WakeMed Soccer Park", "Cary", "NC", "USA", "nwsl", 35.7879, -78.7806),
"stadium_nwsl_interco_stadium": StadiumInfo("stadium_nwsl_interco_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "nwsl", 28.5411, -81.3895),
"stadium_nwsl_providence_park": StadiumInfo("stadium_nwsl_providence_park", "Providence Park", "Portland", "OR", "USA", "nwsl", 45.5216, -122.6917),
"stadium_nwsl_lynn_family_stadium": StadiumInfo("stadium_nwsl_lynn_family_stadium", "Lynn Family Stadium", "Louisville", "KY", "USA", "nwsl", 38.2219, -85.7381),
"stadium_nwsl_snapdragon_stadium": StadiumInfo("stadium_nwsl_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "nwsl", 32.7837, -117.1225),
"stadium_nwsl_lumen_field": StadiumInfo("stadium_nwsl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nwsl", 47.5952, -122.3316),
"stadium_nwsl_america_first_field": StadiumInfo("stadium_nwsl_america_first_field", "America First Field", "Sandy", "UT", "USA", "nwsl", 40.5830, -111.8933),
"stadium_nwsl_audi_field": StadiumInfo("stadium_nwsl_audi_field", "Audi Field", "Washington", "DC", "USA", "nwsl", 38.8687, -77.0128),
"stadium_nwsl_paypal_park": StadiumInfo("stadium_nwsl_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "nwsl", 37.3511, -121.9250),
# Boston Legacy FC venues
"stadium_nwsl_gillette_stadium": StadiumInfo("stadium_nwsl_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "nwsl", 42.0909, -71.2643),
"stadium_nwsl_centreville_bank_stadium": StadiumInfo("stadium_nwsl_centreville_bank_stadium", "Centreville Bank Stadium", "Pawtucket", "RI", "USA", "nwsl", 41.8770, -71.3910),
# Denver Summit FC venues
"stadium_nwsl_empower_field": StadiumInfo("stadium_nwsl_empower_field", "Empower Field at Mile High", "Denver", "CO", "USA", "nwsl", 39.7439, -105.0201, "America/Denver"),
"stadium_nwsl_dicks_sporting_goods_park": StadiumInfo("stadium_nwsl_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "nwsl", 39.8056, -104.8922, "America/Denver"),
"stadium_nwsl_centennial_stadium": StadiumInfo("stadium_nwsl_centennial_stadium", "Centennial Stadium", "Centennial", "CO", "USA", "nwsl", 39.6000, -104.8800, "America/Denver"),
# Shared NFL/MLB venues
"stadium_nwsl_soldier_field": StadiumInfo("stadium_nwsl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nwsl", 41.8623, -87.6167),
"stadium_nwsl_oracle_park": StadiumInfo("stadium_nwsl_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "nwsl", 37.7786, -122.3893, "America/Los_Angeles"),
},
}
class StadiumResolver:
"""Resolves stadium names to canonical IDs.
Resolution order:
1. Exact match against stadium names
2. Alias lookup (with date awareness)
3. Fuzzy match against all known names
4. Geographic filter check
5. Unresolved (returns ManualReviewItem)
"""
def __init__(
self,
sport: str,
alias_loader: Optional[StadiumAliasLoader] = None,
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
):
"""Initialize the resolver.
Args:
sport: Sport code (e.g., 'nba', 'mlb')
alias_loader: Stadium alias loader (default: global loader)
fuzzy_threshold: Minimum fuzzy match score
"""
self.sport = sport.lower()
self.alias_loader = alias_loader or get_stadium_alias_loader()
self.fuzzy_threshold = fuzzy_threshold
self._stadiums = STADIUM_MAPPINGS.get(self.sport, {})
# Build match candidates
self._candidates = self._build_candidates()
def _build_candidates(self) -> list[MatchCandidate]:
"""Build match candidates from stadium mappings."""
candidates = []
for stadium_id, info in self._stadiums.items():
# Get aliases for this stadium
aliases = [a.alias_name for a in self.alias_loader.get_aliases_for_stadium(stadium_id)]
# Add city as alias
aliases.append(info.city)
candidates.append(MatchCandidate(
canonical_id=stadium_id,
name=info.name,
aliases=aliases,
))
return candidates
def resolve(
self,
name: str,
check_date: Optional[date] = None,
country: Optional[str] = None,
source_url: Optional[str] = None,
) -> StadiumResolveResult:
"""Resolve a stadium name to a canonical ID.
Args:
name: Stadium name to resolve
check_date: Date for alias validity (None = today)
country: Country for geographic filtering (None = no filter)
source_url: Source URL for manual review items
Returns:
StadiumResolveResult with resolution details
"""
name_lower = name.lower().strip()
# 1. Exact match against stadium names
for stadium_id, info in self._stadiums.items():
if name_lower == info.name.lower():
return StadiumResolveResult(
canonical_id=stadium_id,
confidence=100,
match_type="exact",
)
# 2. Alias lookup
alias_result = self.alias_loader.resolve(name, check_date)
if alias_result:
# Verify it's for the right sport (alias file has all sports)
if alias_result.startswith(f"stadium_{self.sport}_"):
return StadiumResolveResult(
canonical_id=alias_result,
confidence=95,
match_type="alias",
)
# 3. Fuzzy match
matches = fuzzy_match_stadium(
name,
self._candidates,
threshold=self.fuzzy_threshold,
)
if matches:
best = matches[0]
review_item = None
# Create review item for low confidence matches
if best.confidence < 90:
review_item = ManualReviewItem(
id=f"stadium_{uuid4().hex[:8]}",
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
sport=self.sport,
raw_value=name,
context={"match_type": "fuzzy"},
source_url=source_url,
suggested_matches=matches,
game_date=check_date,
)
return StadiumResolveResult(
canonical_id=best.canonical_id,
confidence=best.confidence,
match_type="fuzzy",
review_item=review_item,
)
# 4. Geographic filter check
if country and country not in ALLOWED_COUNTRIES:
review_item = ManualReviewItem(
id=f"stadium_{uuid4().hex[:8]}",
reason=ReviewReason.GEOGRAPHIC_FILTER,
sport=self.sport,
raw_value=name,
context={"country": country, "reason": "Stadium outside USA/Canada/Mexico"},
source_url=source_url,
game_date=check_date,
)
return StadiumResolveResult(
canonical_id=None,
confidence=0,
match_type="filtered",
filtered_reason="geographic",
review_item=review_item,
)
# 5. Unresolved
review_item = ManualReviewItem(
id=f"stadium_{uuid4().hex[:8]}",
reason=ReviewReason.UNRESOLVED_STADIUM,
sport=self.sport,
raw_value=name,
context={},
source_url=source_url,
suggested_matches=fuzzy_match_stadium(
name,
self._candidates,
threshold=50, # Lower threshold for suggestions
top_n=5,
),
game_date=check_date,
)
return StadiumResolveResult(
canonical_id=None,
confidence=0,
match_type="unresolved",
review_item=review_item,
)
def get_stadium_info(self, stadium_id: str) -> Optional[StadiumInfo]:
"""Get stadium info by ID.
Args:
stadium_id: Canonical stadium ID
Returns:
StadiumInfo or None
"""
return self._stadiums.get(stadium_id)
def get_all_stadiums(self) -> list[StadiumInfo]:
"""Get all stadiums for this sport.
Returns:
List of StadiumInfo objects
"""
return list(self._stadiums.values())
def is_in_allowed_region(self, stadium_id: str) -> bool:
"""Check if a stadium is in an allowed region.
Args:
stadium_id: Canonical stadium ID
Returns:
True if stadium is in USA, Canada, or Mexico
"""
info = self._stadiums.get(stadium_id)
if not info:
return False
return info.country in ALLOWED_COUNTRIES
# Cached resolvers
_resolvers: dict[str, StadiumResolver] = {}
def get_stadium_resolver(sport: str) -> StadiumResolver:
"""Get or create a stadium resolver for a sport."""
sport_lower = sport.lower()
if sport_lower not in _resolvers:
_resolvers[sport_lower] = StadiumResolver(sport_lower)
return _resolvers[sport_lower]
def resolve_stadium(
sport: str,
name: str,
check_date: Optional[date] = None,
) -> StadiumResolveResult:
"""Convenience function to resolve a stadium name.
Args:
sport: Sport code
name: Stadium name to resolve
check_date: Date for alias validity
Returns:
StadiumResolveResult
"""
return get_stadium_resolver(sport).resolve(name, check_date)

View File

@@ -0,0 +1,514 @@
"""Team name resolver with exact, alias, and fuzzy matching."""
from dataclasses import dataclass
from datetime import date
from typing import Optional
from uuid import uuid4
from ..config import FUZZY_MATCH_THRESHOLD
from ..models.aliases import (
AliasType,
FuzzyMatch,
ManualReviewItem,
ReviewReason,
)
from .alias_loader import get_team_alias_loader, TeamAliasLoader
from .fuzzy import MatchCandidate, fuzzy_match_team, exact_match
@dataclass
class TeamResolveResult:
"""Result of team resolution.
Attributes:
canonical_id: Resolved canonical team ID (None if unresolved)
confidence: Confidence in the match (100 for exact, lower for fuzzy)
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
review_item: ManualReviewItem if resolution failed or low confidence
"""
canonical_id: Optional[str]
confidence: int
match_type: str
review_item: Optional[ManualReviewItem] = None
# Hardcoded team mappings for each sport
# Format: {sport: {abbreviation: (canonical_id, full_name, city, stadium_id)}}
TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str, str]]] = {
"nba": {
"ATL": ("team_nba_atl", "Atlanta Hawks", "Atlanta", "stadium_nba_state_farm_arena"),
"BOS": ("team_nba_bos", "Boston Celtics", "Boston", "stadium_nba_td_garden"),
"BKN": ("team_nba_brk", "Brooklyn Nets", "Brooklyn", "stadium_nba_barclays_center"),
"BRK": ("team_nba_brk", "Brooklyn Nets", "Brooklyn", "stadium_nba_barclays_center"),
"CHA": ("team_nba_cho", "Charlotte Hornets", "Charlotte", "stadium_nba_spectrum_center"),
"CHO": ("team_nba_cho", "Charlotte Hornets", "Charlotte", "stadium_nba_spectrum_center"),
"CHI": ("team_nba_chi", "Chicago Bulls", "Chicago", "stadium_nba_united_center"),
"CLE": ("team_nba_cle", "Cleveland Cavaliers", "Cleveland", "stadium_nba_rocket_mortgage_fieldhouse"),
"DAL": ("team_nba_dal", "Dallas Mavericks", "Dallas", "stadium_nba_american_airlines_center"),
"DEN": ("team_nba_den", "Denver Nuggets", "Denver", "stadium_nba_ball_arena"),
"DET": ("team_nba_det", "Detroit Pistons", "Detroit", "stadium_nba_little_caesars_arena"),
"GSW": ("team_nba_gsw", "Golden State Warriors", "Golden State", "stadium_nba_chase_center"),
"GS": ("team_nba_gsw", "Golden State Warriors", "Golden State", "stadium_nba_chase_center"),
"HOU": ("team_nba_hou", "Houston Rockets", "Houston", "stadium_nba_toyota_center"),
"IND": ("team_nba_ind", "Indiana Pacers", "Indiana", "stadium_nba_gainbridge_fieldhouse"),
"LAC": ("team_nba_lac", "Los Angeles Clippers", "Los Angeles", "stadium_nba_intuit_dome"),
"LAL": ("team_nba_lal", "Los Angeles Lakers", "Los Angeles", "stadium_nba_cryptocom_arena"),
"MEM": ("team_nba_mem", "Memphis Grizzlies", "Memphis", "stadium_nba_fedexforum"),
"MIA": ("team_nba_mia", "Miami Heat", "Miami", "stadium_nba_kaseya_center"),
"MIL": ("team_nba_mil", "Milwaukee Bucks", "Milwaukee", "stadium_nba_fiserv_forum"),
"MIN": ("team_nba_min", "Minnesota Timberwolves", "Minnesota", "stadium_nba_target_center"),
"NOP": ("team_nba_nop", "New Orleans Pelicans", "New Orleans", "stadium_nba_smoothie_king_center"),
"NO": ("team_nba_nop", "New Orleans Pelicans", "New Orleans", "stadium_nba_smoothie_king_center"),
"NYK": ("team_nba_nyk", "New York Knicks", "New York", "stadium_nba_madison_square_garden"),
"NY": ("team_nba_nyk", "New York Knicks", "New York", "stadium_nba_madison_square_garden"),
"OKC": ("team_nba_okc", "Oklahoma City Thunder", "Oklahoma City", "stadium_nba_paycom_center"),
"ORL": ("team_nba_orl", "Orlando Magic", "Orlando", "stadium_nba_kia_center"),
"PHI": ("team_nba_phi", "Philadelphia 76ers", "Philadelphia", "stadium_nba_wells_fargo_center"),
"PHX": ("team_nba_phx", "Phoenix Suns", "Phoenix", "stadium_nba_footprint_center"),
"PHO": ("team_nba_phx", "Phoenix Suns", "Phoenix", "stadium_nba_footprint_center"),
"POR": ("team_nba_por", "Portland Trail Blazers", "Portland", "stadium_nba_moda_center"),
"SAC": ("team_nba_sac", "Sacramento Kings", "Sacramento", "stadium_nba_golden_1_center"),
"SAS": ("team_nba_sas", "San Antonio Spurs", "San Antonio", "stadium_nba_frost_bank_center"),
"SA": ("team_nba_sas", "San Antonio Spurs", "San Antonio", "stadium_nba_frost_bank_center"),
"TOR": ("team_nba_tor", "Toronto Raptors", "Toronto", "stadium_nba_scotiabank_arena"),
"UTA": ("team_nba_uta", "Utah Jazz", "Utah", "stadium_nba_delta_center"),
"WAS": ("team_nba_was", "Washington Wizards", "Washington", "stadium_nba_capital_one_arena"),
"WSH": ("team_nba_was", "Washington Wizards", "Washington", "stadium_nba_capital_one_arena"),
},
"mlb": {
"ARI": ("team_mlb_ari", "Arizona Diamondbacks", "Arizona", "stadium_mlb_chase_field"),
"ATL": ("team_mlb_atl", "Atlanta Braves", "Atlanta", "stadium_mlb_truist_park"),
"BAL": ("team_mlb_bal", "Baltimore Orioles", "Baltimore", "stadium_mlb_oriole_park_at_camden_yards"),
"BOS": ("team_mlb_bos", "Boston Red Sox", "Boston", "stadium_mlb_fenway_park"),
"CHC": ("team_mlb_chc", "Chicago Cubs", "Chicago", "stadium_mlb_wrigley_field"),
"CHW": ("team_mlb_chw", "Chicago White Sox", "Chicago", "stadium_mlb_guaranteed_rate_field"),
"CWS": ("team_mlb_chw", "Chicago White Sox", "Chicago", "stadium_mlb_guaranteed_rate_field"),
"CIN": ("team_mlb_cin", "Cincinnati Reds", "Cincinnati", "stadium_mlb_great_american_ball_park"),
"CLE": ("team_mlb_cle", "Cleveland Guardians", "Cleveland", "stadium_mlb_progressive_field"),
"COL": ("team_mlb_col", "Colorado Rockies", "Colorado", "stadium_mlb_coors_field"),
"DET": ("team_mlb_det", "Detroit Tigers", "Detroit", "stadium_mlb_comerica_park"),
"HOU": ("team_mlb_hou", "Houston Astros", "Houston", "stadium_mlb_minute_maid_park"),
"KC": ("team_mlb_kc", "Kansas City Royals", "Kansas City", "stadium_mlb_kauffman_stadium"),
"KCR": ("team_mlb_kc", "Kansas City Royals", "Kansas City", "stadium_mlb_kauffman_stadium"),
"LAA": ("team_mlb_laa", "Los Angeles Angels", "Los Angeles", "stadium_mlb_angel_stadium"),
"ANA": ("team_mlb_laa", "Los Angeles Angels", "Anaheim", "stadium_mlb_angel_stadium"),
"LAD": ("team_mlb_lad", "Los Angeles Dodgers", "Los Angeles", "stadium_mlb_dodger_stadium"),
"MIA": ("team_mlb_mia", "Miami Marlins", "Miami", "stadium_mlb_loandepot_park"),
"FLA": ("team_mlb_mia", "Miami Marlins", "Florida", "stadium_mlb_loandepot_park"),
"MIL": ("team_mlb_mil", "Milwaukee Brewers", "Milwaukee", "stadium_mlb_american_family_field"),
"MIN": ("team_mlb_min", "Minnesota Twins", "Minnesota", "stadium_mlb_target_field"),
"NYM": ("team_mlb_nym", "New York Mets", "New York", "stadium_mlb_citi_field"),
"NYY": ("team_mlb_nyy", "New York Yankees", "New York", "stadium_mlb_yankee_stadium"),
"OAK": ("team_mlb_oak", "Oakland Athletics", "Oakland", "stadium_mlb_sutter_health_park"),
"PHI": ("team_mlb_phi", "Philadelphia Phillies", "Philadelphia", "stadium_mlb_citizens_bank_park"),
"PIT": ("team_mlb_pit", "Pittsburgh Pirates", "Pittsburgh", "stadium_mlb_pnc_park"),
"SD": ("team_mlb_sd", "San Diego Padres", "San Diego", "stadium_mlb_petco_park"),
"SDP": ("team_mlb_sd", "San Diego Padres", "San Diego", "stadium_mlb_petco_park"),
"SF": ("team_mlb_sf", "San Francisco Giants", "San Francisco", "stadium_mlb_oracle_park"),
"SFG": ("team_mlb_sf", "San Francisco Giants", "San Francisco", "stadium_mlb_oracle_park"),
"SEA": ("team_mlb_sea", "Seattle Mariners", "Seattle", "stadium_mlb_tmobile_park"),
"STL": ("team_mlb_stl", "St. Louis Cardinals", "St. Louis", "stadium_mlb_busch_stadium"),
"TB": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay", "stadium_mlb_tropicana_field"),
"TBR": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay", "stadium_mlb_tropicana_field"),
"TEX": ("team_mlb_tex", "Texas Rangers", "Texas", "stadium_mlb_globe_life_field"),
"TOR": ("team_mlb_tor", "Toronto Blue Jays", "Toronto", "stadium_mlb_rogers_centre"),
"WSN": ("team_mlb_wsn", "Washington Nationals", "Washington", "stadium_mlb_nationals_park"),
"WAS": ("team_mlb_wsn", "Washington Nationals", "Washington", "stadium_mlb_nationals_park"),
},
"nfl": {
"ARI": ("team_nfl_ari", "Arizona Cardinals", "Arizona", "stadium_nfl_state_farm_stadium"),
"ATL": ("team_nfl_atl", "Atlanta Falcons", "Atlanta", "stadium_nfl_mercedes_benz_stadium"),
"BAL": ("team_nfl_bal", "Baltimore Ravens", "Baltimore", "stadium_nfl_mandt_bank_stadium"),
"BUF": ("team_nfl_buf", "Buffalo Bills", "Buffalo", "stadium_nfl_highmark_stadium"),
"CAR": ("team_nfl_car", "Carolina Panthers", "Carolina", "stadium_nfl_bank_of_america_stadium"),
"CHI": ("team_nfl_chi", "Chicago Bears", "Chicago", "stadium_nfl_soldier_field"),
"CIN": ("team_nfl_cin", "Cincinnati Bengals", "Cincinnati", "stadium_nfl_paycor_stadium"),
"CLE": ("team_nfl_cle", "Cleveland Browns", "Cleveland", "stadium_nfl_huntington_bank_field"),
"DAL": ("team_nfl_dal", "Dallas Cowboys", "Dallas", "stadium_nfl_att_stadium"),
"DEN": ("team_nfl_den", "Denver Broncos", "Denver", "stadium_nfl_empower_field"),
"DET": ("team_nfl_det", "Detroit Lions", "Detroit", "stadium_nfl_ford_field"),
"GB": ("team_nfl_gb", "Green Bay Packers", "Green Bay", "stadium_nfl_lambeau_field"),
"GNB": ("team_nfl_gb", "Green Bay Packers", "Green Bay", "stadium_nfl_lambeau_field"),
"HOU": ("team_nfl_hou", "Houston Texans", "Houston", "stadium_nfl_nrg_stadium"),
"IND": ("team_nfl_ind", "Indianapolis Colts", "Indianapolis", "stadium_nfl_lucas_oil_stadium"),
"JAX": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville", "stadium_nfl_everbank_stadium"),
"JAC": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville", "stadium_nfl_everbank_stadium"),
"KC": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City", "stadium_nfl_arrowhead_stadium"),
"KAN": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City", "stadium_nfl_arrowhead_stadium"),
"LV": ("team_nfl_lv", "Las Vegas Raiders", "Las Vegas", "stadium_nfl_allegiant_stadium"),
"LAC": ("team_nfl_lac", "Los Angeles Chargers", "Los Angeles", "stadium_nfl_sofi_stadium"),
"LAR": ("team_nfl_lar", "Los Angeles Rams", "Los Angeles", "stadium_nfl_sofi_stadium"),
"MIA": ("team_nfl_mia", "Miami Dolphins", "Miami", "stadium_nfl_hard_rock_stadium"),
"MIN": ("team_nfl_min", "Minnesota Vikings", "Minnesota", "stadium_nfl_us_bank_stadium"),
"NE": ("team_nfl_ne", "New England Patriots", "New England", "stadium_nfl_gillette_stadium"),
"NWE": ("team_nfl_ne", "New England Patriots", "New England", "stadium_nfl_gillette_stadium"),
"NO": ("team_nfl_no", "New Orleans Saints", "New Orleans", "stadium_nfl_caesars_superdome"),
"NOR": ("team_nfl_no", "New Orleans Saints", "New Orleans", "stadium_nfl_caesars_superdome"),
"NYG": ("team_nfl_nyg", "New York Giants", "New York", "stadium_nfl_metlife_stadium"),
"NYJ": ("team_nfl_nyj", "New York Jets", "New York", "stadium_nfl_metlife_stadium"),
"PHI": ("team_nfl_phi", "Philadelphia Eagles", "Philadelphia", "stadium_nfl_lincoln_financial_field"),
"PIT": ("team_nfl_pit", "Pittsburgh Steelers", "Pittsburgh", "stadium_nfl_acrisure_stadium"),
"SF": ("team_nfl_sf", "San Francisco 49ers", "San Francisco", "stadium_nfl_levis_stadium"),
"SFO": ("team_nfl_sf", "San Francisco 49ers", "San Francisco", "stadium_nfl_levis_stadium"),
"SEA": ("team_nfl_sea", "Seattle Seahawks", "Seattle", "stadium_nfl_lumen_field"),
"TB": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay", "stadium_nfl_raymond_james_stadium"),
"TAM": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay", "stadium_nfl_raymond_james_stadium"),
"TEN": ("team_nfl_ten", "Tennessee Titans", "Tennessee", "stadium_nfl_nissan_stadium"),
"WAS": ("team_nfl_was", "Washington Commanders", "Washington", "stadium_nfl_northwest_stadium"),
"WSH": ("team_nfl_was", "Washington Commanders", "Washington", "stadium_nfl_northwest_stadium"),
},
"nhl": {
"ANA": ("team_nhl_ana", "Anaheim Ducks", "Anaheim", "stadium_nhl_honda_center"),
"ARI": ("team_nhl_ari", "Utah Hockey Club", "Utah", "stadium_nhl_delta_center"), # Moved 2024
"UTA": ("team_nhl_ari", "Utah Hockey Club", "Utah", "stadium_nhl_delta_center"),
"BOS": ("team_nhl_bos", "Boston Bruins", "Boston", "stadium_nhl_td_garden"),
"BUF": ("team_nhl_buf", "Buffalo Sabres", "Buffalo", "stadium_nhl_keybank_center"),
"CGY": ("team_nhl_cgy", "Calgary Flames", "Calgary", "stadium_nhl_scotiabank_saddledome"),
"CAR": ("team_nhl_car", "Carolina Hurricanes", "Carolina", "stadium_nhl_pnc_arena"),
"CHI": ("team_nhl_chi", "Chicago Blackhawks", "Chicago", "stadium_nhl_united_center"),
"COL": ("team_nhl_col", "Colorado Avalanche", "Colorado", "stadium_nhl_ball_arena"),
"CBJ": ("team_nhl_cbj", "Columbus Blue Jackets", "Columbus", "stadium_nhl_nationwide_arena"),
"DAL": ("team_nhl_dal", "Dallas Stars", "Dallas", "stadium_nhl_american_airlines_center"),
"DET": ("team_nhl_det", "Detroit Red Wings", "Detroit", "stadium_nhl_little_caesars_arena"),
"EDM": ("team_nhl_edm", "Edmonton Oilers", "Edmonton", "stadium_nhl_rogers_place"),
"FLA": ("team_nhl_fla", "Florida Panthers", "Florida", "stadium_nhl_amerant_bank_arena"),
"LA": ("team_nhl_la", "Los Angeles Kings", "Los Angeles", "stadium_nhl_cryptocom_arena"),
"LAK": ("team_nhl_la", "Los Angeles Kings", "Los Angeles", "stadium_nhl_cryptocom_arena"),
"MIN": ("team_nhl_min", "Minnesota Wild", "Minnesota", "stadium_nhl_xcel_energy_center"),
"MTL": ("team_nhl_mtl", "Montreal Canadiens", "Montreal", "stadium_nhl_bell_centre"),
"MON": ("team_nhl_mtl", "Montreal Canadiens", "Montreal", "stadium_nhl_bell_centre"),
"NSH": ("team_nhl_nsh", "Nashville Predators", "Nashville", "stadium_nhl_bridgestone_arena"),
"NAS": ("team_nhl_nsh", "Nashville Predators", "Nashville", "stadium_nhl_bridgestone_arena"),
"NJ": ("team_nhl_njd", "New Jersey Devils", "New Jersey", "stadium_nhl_prudential_center"),
"NJD": ("team_nhl_njd", "New Jersey Devils", "New Jersey", "stadium_nhl_prudential_center"),
"NYI": ("team_nhl_nyi", "New York Islanders", "New York", "stadium_nhl_ubs_arena"),
"NYR": ("team_nhl_nyr", "New York Rangers", "New York", "stadium_nhl_madison_square_garden"),
"OTT": ("team_nhl_ott", "Ottawa Senators", "Ottawa", "stadium_nhl_canadian_tire_centre"),
"PHI": ("team_nhl_phi", "Philadelphia Flyers", "Philadelphia", "stadium_nhl_wells_fargo_center"),
"PIT": ("team_nhl_pit", "Pittsburgh Penguins", "Pittsburgh", "stadium_nhl_ppg_paints_arena"),
"SJ": ("team_nhl_sj", "San Jose Sharks", "San Jose", "stadium_nhl_sap_center"),
"SJS": ("team_nhl_sj", "San Jose Sharks", "San Jose", "stadium_nhl_sap_center"),
"SEA": ("team_nhl_sea", "Seattle Kraken", "Seattle", "stadium_nhl_climate_pledge_arena"),
"STL": ("team_nhl_stl", "St. Louis Blues", "St. Louis", "stadium_nhl_enterprise_center"),
"TB": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay", "stadium_nhl_amalie_arena"),
"TBL": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay", "stadium_nhl_amalie_arena"),
"TOR": ("team_nhl_tor", "Toronto Maple Leafs", "Toronto", "stadium_nhl_scotiabank_arena"),
"VAN": ("team_nhl_van", "Vancouver Canucks", "Vancouver", "stadium_nhl_rogers_arena"),
"VGK": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas", "stadium_nhl_tmobile_arena"),
"VEG": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas", "stadium_nhl_tmobile_arena"),
"WAS": ("team_nhl_was", "Washington Capitals", "Washington", "stadium_nhl_capital_one_arena"),
"WSH": ("team_nhl_was", "Washington Capitals", "Washington", "stadium_nhl_capital_one_arena"),
"WPG": ("team_nhl_wpg", "Winnipeg Jets", "Winnipeg", "stadium_nhl_canada_life_centre"),
},
"mls": {
"ATL": ("team_mls_atl", "Atlanta United", "Atlanta", "stadium_mls_mercedes_benz_stadium"),
"AUS": ("team_mls_aus", "Austin FC", "Austin", "stadium_mls_q2_stadium"),
"CLT": ("team_mls_clt", "Charlotte FC", "Charlotte", "stadium_mls_bank_of_america_stadium"),
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago", "stadium_mls_soldier_field"),
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati", "stadium_mls_tql_stadium"),
"COL": ("team_mls_col", "Colorado Rapids", "Colorado", "stadium_mls_dicks_sporting_goods_park"),
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus", "stadium_mls_lowercom_field"),
"DAL": ("team_mls_dal", "FC Dallas", "Dallas", "stadium_mls_toyota_stadium"),
"DC": ("team_mls_dc", "D.C. United", "Washington", "stadium_mls_audi_field"),
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston", "stadium_mls_shell_energy_stadium"),
"LAG": ("team_mls_lag", "LA Galaxy", "Los Angeles", "stadium_mls_dignity_health_sports_park"),
"LAFC": ("team_mls_lafc", "Los Angeles FC", "Los Angeles", "stadium_mls_bmo_stadium"),
"MIA": ("team_mls_mia", "Inter Miami", "Miami", "stadium_mls_chase_stadium"),
"MIN": ("team_mls_min", "Minnesota United", "Minnesota", "stadium_mls_allianz_field"),
"MTL": ("team_mls_mtl", "CF Montreal", "Montreal", "stadium_mls_stade_saputo"),
"NSH": ("team_mls_nsh", "Nashville SC", "Nashville", "stadium_mls_geodis_park"),
"NE": ("team_mls_ne", "New England Revolution", "New England", "stadium_mls_gillette_stadium"),
"NYC": ("team_mls_nyc", "New York City FC", "New York", "stadium_mls_yankee_stadium"),
"RB": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
"ORL": ("team_mls_orl", "Orlando City", "Orlando", "stadium_mls_interco_stadium"),
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia", "stadium_mls_subaru_park"),
"POR": ("team_mls_por", "Portland Timbers", "Portland", "stadium_mls_providence_park"),
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake", "stadium_mls_america_first_field"),
"RSL": ("team_mls_slc", "Real Salt Lake", "Salt Lake", "stadium_mls_america_first_field"),
"SJ": ("team_mls_sj", "San Jose Earthquakes", "San Jose", "stadium_mls_paypal_park"),
"SD": ("team_mls_sd", "San Diego FC", "San Diego", "stadium_mls_snapdragon_stadium"),
"SEA": ("team_mls_sea", "Seattle Sounders", "Seattle", "stadium_mls_lumen_field"),
"SKC": ("team_mls_skc", "Sporting Kansas City", "Kansas City", "stadium_mls_childrens_mercy_park"),
"STL": ("team_mls_stl", "St. Louis City SC", "St. Louis", "stadium_mls_citypark"),
"TOR": ("team_mls_tor", "Toronto FC", "Toronto", "stadium_mls_bmo_field"),
"VAN": ("team_mls_van", "Vancouver Whitecaps", "Vancouver", "stadium_mls_bc_place"),
},
"wnba": {
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"),
"DREAM": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"), # alias
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"),
"SKY": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"), # alias
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"),
"CONN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
"SUN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"),
"WINGS": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"), # alias
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"),
"GS": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
"VAL": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"),
"FEVER": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"), # alias
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"),
"LVA": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
"ACES": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"),
"LAS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
"SPARKS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"),
"LYNX": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"), # alias
"NY": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"),
"NYL": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
"LIB": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"),
"PHO": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
"MERCURY": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"),
"STORM": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"), # alias
"WAS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"),
"WSH": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
"MYSTICS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
},
"nwsl": {
# Canonical IDs aligned with teams_canonical.json
"ANG": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"),
"ANF": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"), # alias
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago", "stadium_nwsl_seatgeek_stadium"),
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston", "stadium_nwsl_shell_energy_stadium"),
"KCC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"),
"KC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"), # alias
"NJY": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"),
"NJ": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"), # alias
"NCC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"),
"NC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"), # alias
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando", "stadium_nwsl_interco_stadium"),
"POR": ("team_nwsl_por", "Portland Thorns", "Portland", "stadium_nwsl_providence_park"),
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville", "stadium_nwsl_lynn_family_stadium"),
"SDW": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"),
"SD": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"), # alias
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle", "stadium_nwsl_lumen_field"),
"UTA": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"),
"SLC": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"), # alias
"WSH": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"),
"WAS": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"), # alias
"BAY": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"),
"BFC": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"), # alias
# Expansion teams (2026) - need to be added to teams_canonical.json
"BOS": ("team_nwsl_bos", "Boston Legacy FC", "Boston", "stadium_nwsl_gillette_stadium"),
"DEN": ("team_nwsl_den", "Denver Summit FC", "Denver", "stadium_nwsl_dicks_sporting_goods_park"),
},
}
class TeamResolver:
"""Resolves team names to canonical IDs.
Resolution order:
1. Exact match against abbreviation mappings
2. Exact match against full team names
3. Alias lookup (with date awareness)
4. Fuzzy match against all known names
5. Unresolved (returns ManualReviewItem)
"""
def __init__(
self,
sport: str,
alias_loader: Optional[TeamAliasLoader] = None,
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
):
"""Initialize the resolver.
Args:
sport: Sport code (e.g., 'nba', 'mlb')
alias_loader: Team alias loader (default: global loader)
fuzzy_threshold: Minimum fuzzy match score
"""
self.sport = sport.lower()
self.alias_loader = alias_loader or get_team_alias_loader()
self.fuzzy_threshold = fuzzy_threshold
self._mappings = TEAM_MAPPINGS.get(self.sport, {})
# Build match candidates for fuzzy matching
self._candidates = self._build_candidates()
def _build_candidates(self) -> list[MatchCandidate]:
"""Build match candidates from team mappings."""
# Group by canonical ID to avoid duplicates
by_id: dict[str, tuple[str, list[str]]] = {}
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
if canonical_id not in by_id:
by_id[canonical_id] = (full_name, [])
# Add abbreviation as alias
by_id[canonical_id][1].append(abbrev)
by_id[canonical_id][1].append(city)
return [
MatchCandidate(
canonical_id=cid,
name=name,
aliases=list(set(aliases)), # Dedupe
)
for cid, (name, aliases) in by_id.items()
]
def resolve(
self,
value: str,
check_date: Optional[date] = None,
source_url: Optional[str] = None,
) -> TeamResolveResult:
"""Resolve a team name to a canonical ID.
Args:
value: Team name, abbreviation, or city to resolve
check_date: Date for alias validity (None = today)
source_url: Source URL for manual review items
Returns:
TeamResolveResult with resolution details
"""
value_upper = value.upper().strip()
value_lower = value.lower().strip()
# 1. Exact match against abbreviation
if value_upper in self._mappings:
canonical_id, full_name, _, _ = self._mappings[value_upper]
return TeamResolveResult(
canonical_id=canonical_id,
confidence=100,
match_type="exact",
)
# 2. Exact match against full names
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
if value_lower == full_name.lower() or value_lower == city.lower():
return TeamResolveResult(
canonical_id=canonical_id,
confidence=100,
match_type="exact",
)
# 3. Alias lookup
alias_result = self.alias_loader.resolve(value, check_date)
if alias_result:
return TeamResolveResult(
canonical_id=alias_result,
confidence=95,
match_type="alias",
)
# 4. Fuzzy match
matches = fuzzy_match_team(
value,
self._candidates,
threshold=self.fuzzy_threshold,
)
if matches:
best = matches[0]
review_item = None
# Create review item for low confidence matches
if best.confidence < 90:
review_item = ManualReviewItem(
id=f"team_{uuid4().hex[:8]}",
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
sport=self.sport,
raw_value=value,
context={"match_type": "fuzzy"},
source_url=source_url,
suggested_matches=matches,
game_date=check_date,
)
return TeamResolveResult(
canonical_id=best.canonical_id,
confidence=best.confidence,
match_type="fuzzy",
review_item=review_item,
)
# 5. Unresolved
review_item = ManualReviewItem(
id=f"team_{uuid4().hex[:8]}",
reason=ReviewReason.UNRESOLVED_TEAM,
sport=self.sport,
raw_value=value,
context={},
source_url=source_url,
suggested_matches=fuzzy_match_team(
value,
self._candidates,
threshold=50, # Lower threshold for suggestions
top_n=5,
),
game_date=check_date,
)
return TeamResolveResult(
canonical_id=None,
confidence=0,
match_type="unresolved",
review_item=review_item,
)
def get_team_info(self, abbreviation: str) -> Optional[tuple[str, str, str, str]]:
"""Get team info by abbreviation.
Args:
abbreviation: Team abbreviation
Returns:
Tuple of (canonical_id, full_name, city, stadium_id) or None
"""
return self._mappings.get(abbreviation.upper())
def get_all_teams(self) -> list[tuple[str, str, str, str]]:
"""Get all teams for this sport.
Returns:
List of (canonical_id, full_name, city, stadium_id) tuples
"""
seen = set()
result = []
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
if canonical_id not in seen:
seen.add(canonical_id)
result.append((canonical_id, full_name, city, stadium_id))
return result
# Cached resolvers
_resolvers: dict[str, TeamResolver] = {}
def get_team_resolver(sport: str) -> TeamResolver:
"""Get or create a team resolver for a sport."""
sport_lower = sport.lower()
if sport_lower not in _resolvers:
_resolvers[sport_lower] = TeamResolver(sport_lower)
return _resolvers[sport_lower]
def resolve_team(
sport: str,
value: str,
check_date: Optional[date] = None,
) -> TeamResolveResult:
"""Convenience function to resolve a team name.
Args:
sport: Sport code
value: Team name to resolve
check_date: Date for alias validity
Returns:
TeamResolveResult
"""
return get_team_resolver(sport).resolve(value, check_date)

View File

@@ -0,0 +1,344 @@
"""Timezone conversion utilities for normalizing game times to UTC."""
import re
from dataclasses import dataclass
from datetime import datetime, date, time
from typing import Optional
from zoneinfo import ZoneInfo
from dateutil import parser as dateutil_parser
from dateutil.tz import gettz, tzutc
from ..models.aliases import ReviewReason, ManualReviewItem
# Common timezone abbreviations to IANA timezones
TIMEZONE_ABBREV_MAP: dict[str, str] = {
# US timezones
"ET": "America/New_York",
"EST": "America/New_York",
"EDT": "America/New_York",
"CT": "America/Chicago",
"CST": "America/Chicago",
"CDT": "America/Chicago",
"MT": "America/Denver",
"MST": "America/Denver",
"MDT": "America/Denver",
"PT": "America/Los_Angeles",
"PST": "America/Los_Angeles",
"PDT": "America/Los_Angeles",
"AT": "America/Anchorage",
"AKST": "America/Anchorage",
"AKDT": "America/Anchorage",
"HT": "Pacific/Honolulu",
"HST": "Pacific/Honolulu",
# Canada
"AST": "America/Halifax",
"ADT": "America/Halifax",
"NST": "America/St_Johns",
"NDT": "America/St_Johns",
# Mexico
"CDST": "America/Mexico_City",
# UTC
"UTC": "UTC",
"GMT": "UTC",
"Z": "UTC",
}
# State/region to timezone mapping for inferring timezone from location
STATE_TIMEZONE_MAP: dict[str, str] = {
# Eastern
"CT": "America/New_York",
"DE": "America/New_York",
"FL": "America/New_York", # Most of Florida
"GA": "America/New_York",
"MA": "America/New_York",
"MD": "America/New_York",
"ME": "America/New_York",
"MI": "America/Detroit",
"NC": "America/New_York",
"NH": "America/New_York",
"NJ": "America/New_York",
"NY": "America/New_York",
"OH": "America/New_York",
"PA": "America/New_York",
"RI": "America/New_York",
"SC": "America/New_York",
"VA": "America/New_York",
"VT": "America/New_York",
"WV": "America/New_York",
"DC": "America/New_York",
# Central
"AL": "America/Chicago",
"AR": "America/Chicago",
"IA": "America/Chicago",
"IL": "America/Chicago",
"IN": "America/Indiana/Indianapolis",
"KS": "America/Chicago",
"KY": "America/Kentucky/Louisville",
"LA": "America/Chicago",
"MN": "America/Chicago",
"MO": "America/Chicago",
"MS": "America/Chicago",
"ND": "America/Chicago",
"NE": "America/Chicago",
"OK": "America/Chicago",
"SD": "America/Chicago",
"TN": "America/Chicago",
"TX": "America/Chicago",
"WI": "America/Chicago",
# Mountain
"AZ": "America/Phoenix", # No DST
"CO": "America/Denver",
"ID": "America/Boise",
"MT": "America/Denver",
"NM": "America/Denver",
"UT": "America/Denver",
"WY": "America/Denver",
# Pacific
"CA": "America/Los_Angeles",
"NV": "America/Los_Angeles",
"OR": "America/Los_Angeles",
"WA": "America/Los_Angeles",
# Alaska/Hawaii
"AK": "America/Anchorage",
"HI": "Pacific/Honolulu",
# Canada provinces
"ON": "America/Toronto",
"QC": "America/Montreal",
"BC": "America/Vancouver",
"AB": "America/Edmonton",
"MB": "America/Winnipeg",
"SK": "America/Regina",
"NS": "America/Halifax",
"NB": "America/Moncton",
"NL": "America/St_Johns",
"PE": "America/Halifax",
}
@dataclass
class TimezoneResult:
"""Result of timezone conversion.
Attributes:
datetime_utc: The datetime converted to UTC
source_timezone: The timezone that was detected/used
confidence: Confidence in the timezone detection ('high', 'medium', 'low')
warning: Warning message if timezone was uncertain
"""
datetime_utc: datetime
source_timezone: str
confidence: str
warning: Optional[str] = None
def detect_timezone_from_string(time_str: str) -> Optional[str]:
"""Detect timezone from a time string containing a timezone abbreviation.
Args:
time_str: Time string that may contain timezone info (e.g., '7:00 PM ET')
Returns:
IANA timezone string if detected, None otherwise
"""
# Look for timezone abbreviation at end of string
for abbrev, tz in TIMEZONE_ABBREV_MAP.items():
pattern = rf"\b{abbrev}\b"
if re.search(pattern, time_str, re.IGNORECASE):
return tz
return None
def detect_timezone_from_location(
state: Optional[str] = None,
city: Optional[str] = None,
) -> Optional[str]:
"""Detect timezone from location information.
Args:
state: State/province code (e.g., 'NY', 'ON')
city: City name (optional, for special cases)
Returns:
IANA timezone string if detected, None otherwise
"""
if state and state.upper() in STATE_TIMEZONE_MAP:
return STATE_TIMEZONE_MAP[state.upper()]
return None
def parse_datetime(
date_str: str,
time_str: Optional[str] = None,
timezone_hint: Optional[str] = None,
location_state: Optional[str] = None,
) -> TimezoneResult:
"""Parse a date/time string and convert to UTC.
Attempts to detect timezone from:
1. Explicit timezone in the string
2. Provided timezone hint
3. Location-based inference
4. Default to Eastern Time with warning
Args:
date_str: Date string (e.g., '2025-10-21', 'October 21, 2025')
time_str: Optional time string (e.g., '7:00 PM ET', '19:00')
timezone_hint: Optional IANA timezone to use if not detected
location_state: Optional state code for timezone inference
Returns:
TimezoneResult with UTC datetime and metadata
"""
# Parse the date
try:
if time_str:
# Combine date and time
full_str = f"{date_str} {time_str}"
else:
full_str = date_str
parsed = dateutil_parser.parse(full_str, fuzzy=True)
except (ValueError, OverflowError) as e:
# If parsing fails, return a placeholder with low confidence
return TimezoneResult(
datetime_utc=datetime.now(tz=ZoneInfo("UTC")),
source_timezone="unknown",
confidence="low",
warning=f"Failed to parse datetime: {e}",
)
# Determine timezone
detected_tz = None
confidence = "high"
warning = None
# Check if datetime already has timezone
if parsed.tzinfo is not None:
detected_tz = str(parsed.tzinfo)
else:
# Try to detect from time string
if time_str:
detected_tz = detect_timezone_from_string(time_str)
# Try timezone hint
if not detected_tz and timezone_hint:
detected_tz = timezone_hint
confidence = "medium"
# Try location inference
if not detected_tz and location_state:
detected_tz = detect_timezone_from_location(state=location_state)
confidence = "medium"
# Default to Eastern Time
if not detected_tz:
detected_tz = "America/New_York"
confidence = "low"
warning = "Timezone not detected, defaulting to Eastern Time"
# Apply timezone and convert to UTC
try:
tz = ZoneInfo(detected_tz)
except KeyError:
# Invalid timezone, try to resolve abbreviation
if detected_tz in TIMEZONE_ABBREV_MAP:
tz = ZoneInfo(TIMEZONE_ABBREV_MAP[detected_tz])
detected_tz = TIMEZONE_ABBREV_MAP[detected_tz]
else:
tz = ZoneInfo("America/New_York")
confidence = "low"
warning = f"Unknown timezone '{detected_tz}', defaulting to Eastern Time"
detected_tz = "America/New_York"
# Apply timezone if not already set
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=tz)
# Convert to UTC
utc_dt = parsed.astimezone(ZoneInfo("UTC"))
return TimezoneResult(
datetime_utc=utc_dt,
source_timezone=detected_tz,
confidence=confidence,
warning=warning,
)
def convert_to_utc(
dt: datetime,
source_timezone: str,
) -> datetime:
"""Convert a datetime from a known timezone to UTC.
Args:
dt: Datetime to convert (timezone-naive or timezone-aware)
source_timezone: IANA timezone of the datetime
Returns:
Datetime in UTC
"""
tz = ZoneInfo(source_timezone)
if dt.tzinfo is None:
# Localize naive datetime
dt = dt.replace(tzinfo=tz)
return dt.astimezone(ZoneInfo("UTC"))
def create_timezone_warning(
raw_value: str,
sport: str,
game_date: Optional[date] = None,
source_url: Optional[str] = None,
) -> ManualReviewItem:
"""Create a manual review item for an undetermined timezone.
Args:
raw_value: The original time string that couldn't be resolved
sport: Sport code
game_date: Date of the game
source_url: URL of the source page
Returns:
ManualReviewItem for timezone review
"""
return ManualReviewItem(
id=f"tz_{sport}_{raw_value[:20].replace(' ', '_')}",
reason=ReviewReason.TIMEZONE_UNKNOWN,
sport=sport,
raw_value=raw_value,
context={"issue": "Could not determine timezone for game time"},
source_url=source_url,
game_date=game_date,
)
def get_stadium_timezone(
stadium_state: str,
stadium_timezone: Optional[str] = None,
) -> str:
"""Get the timezone for a stadium based on its location.
Args:
stadium_state: State/province code
stadium_timezone: Explicit timezone override from stadium data
Returns:
IANA timezone string
"""
if stadium_timezone:
return stadium_timezone
tz = detect_timezone_from_location(state=stadium_state)
if tz:
return tz
# Default to Eastern
return "America/New_York"