feat(scripts): rewrite parser as modular Python CLI
Replace monolithic scraping scripts with sportstime_parser package: - Multi-source scrapers with automatic fallback for 7 sports - Canonical ID generation for games, teams, and stadiums - Fuzzy matching with configurable thresholds for name resolution - CloudKit Web Services uploader with JWT auth, diff-based updates - Resumable uploads with checkpoint state persistence - Validation reports with manual review items and suggested matches - Comprehensive test suite (249 tests) CLI: sportstime-parser scrape|validate|upload|status|retry|clear Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
91
Scripts/sportstime_parser/normalizers/__init__.py
Normal file
91
Scripts/sportstime_parser/normalizers/__init__.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Normalizers for team, stadium, and game data."""
|
||||
|
||||
from .canonical_id import (
|
||||
generate_game_id,
|
||||
generate_team_id,
|
||||
generate_team_id_from_abbrev,
|
||||
generate_stadium_id,
|
||||
parse_game_id,
|
||||
normalize_string,
|
||||
)
|
||||
from .timezone import (
|
||||
TimezoneResult,
|
||||
parse_datetime,
|
||||
convert_to_utc,
|
||||
detect_timezone_from_string,
|
||||
detect_timezone_from_location,
|
||||
get_stadium_timezone,
|
||||
create_timezone_warning,
|
||||
)
|
||||
from .fuzzy import (
|
||||
MatchCandidate,
|
||||
fuzzy_match_team,
|
||||
fuzzy_match_stadium,
|
||||
exact_match,
|
||||
best_match,
|
||||
calculate_similarity,
|
||||
normalize_for_matching,
|
||||
)
|
||||
from .alias_loader import (
|
||||
TeamAliasLoader,
|
||||
StadiumAliasLoader,
|
||||
get_team_alias_loader,
|
||||
get_stadium_alias_loader,
|
||||
resolve_team_alias,
|
||||
resolve_stadium_alias,
|
||||
)
|
||||
from .team_resolver import (
|
||||
TeamResolver,
|
||||
TeamResolveResult,
|
||||
get_team_resolver,
|
||||
resolve_team,
|
||||
)
|
||||
from .stadium_resolver import (
|
||||
StadiumResolver,
|
||||
StadiumResolveResult,
|
||||
get_stadium_resolver,
|
||||
resolve_stadium,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Canonical ID
|
||||
"generate_game_id",
|
||||
"generate_team_id",
|
||||
"generate_team_id_from_abbrev",
|
||||
"generate_stadium_id",
|
||||
"parse_game_id",
|
||||
"normalize_string",
|
||||
# Timezone
|
||||
"TimezoneResult",
|
||||
"parse_datetime",
|
||||
"convert_to_utc",
|
||||
"detect_timezone_from_string",
|
||||
"detect_timezone_from_location",
|
||||
"get_stadium_timezone",
|
||||
"create_timezone_warning",
|
||||
# Fuzzy matching
|
||||
"MatchCandidate",
|
||||
"fuzzy_match_team",
|
||||
"fuzzy_match_stadium",
|
||||
"exact_match",
|
||||
"best_match",
|
||||
"calculate_similarity",
|
||||
"normalize_for_matching",
|
||||
# Alias loaders
|
||||
"TeamAliasLoader",
|
||||
"StadiumAliasLoader",
|
||||
"get_team_alias_loader",
|
||||
"get_stadium_alias_loader",
|
||||
"resolve_team_alias",
|
||||
"resolve_stadium_alias",
|
||||
# Team resolver
|
||||
"TeamResolver",
|
||||
"TeamResolveResult",
|
||||
"get_team_resolver",
|
||||
"resolve_team",
|
||||
# Stadium resolver
|
||||
"StadiumResolver",
|
||||
"StadiumResolveResult",
|
||||
"get_stadium_resolver",
|
||||
"resolve_stadium",
|
||||
]
|
||||
312
Scripts/sportstime_parser/normalizers/alias_loader.py
Normal file
312
Scripts/sportstime_parser/normalizers/alias_loader.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""Alias file loaders for team and stadium name resolution."""
|
||||
|
||||
import json
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from ..config import TEAM_ALIASES_FILE, STADIUM_ALIASES_FILE
|
||||
from ..models.aliases import TeamAlias, StadiumAlias, AliasType
|
||||
|
||||
|
||||
class TeamAliasLoader:
|
||||
"""Loader for team aliases with date-aware resolution.
|
||||
|
||||
Loads team aliases from JSON and provides lookup methods
|
||||
with support for historical name changes.
|
||||
"""
|
||||
|
||||
def __init__(self, filepath: Optional[Path] = None):
|
||||
"""Initialize the loader.
|
||||
|
||||
Args:
|
||||
filepath: Path to team_aliases.json, defaults to config value
|
||||
"""
|
||||
self.filepath = filepath or TEAM_ALIASES_FILE
|
||||
self._aliases: list[TeamAlias] = []
|
||||
self._by_value: dict[str, list[TeamAlias]] = {}
|
||||
self._by_team: dict[str, list[TeamAlias]] = {}
|
||||
self._loaded = False
|
||||
|
||||
def load(self) -> None:
|
||||
"""Load aliases from the JSON file."""
|
||||
if not self.filepath.exists():
|
||||
self._loaded = True
|
||||
return
|
||||
|
||||
with open(self.filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
self._aliases = []
|
||||
self._by_value = {}
|
||||
self._by_team = {}
|
||||
|
||||
for item in data:
|
||||
alias = TeamAlias.from_dict(item)
|
||||
self._aliases.append(alias)
|
||||
|
||||
# Index by lowercase value
|
||||
value_key = alias.alias_value.lower()
|
||||
if value_key not in self._by_value:
|
||||
self._by_value[value_key] = []
|
||||
self._by_value[value_key].append(alias)
|
||||
|
||||
# Index by team ID
|
||||
if alias.team_canonical_id not in self._by_team:
|
||||
self._by_team[alias.team_canonical_id] = []
|
||||
self._by_team[alias.team_canonical_id].append(alias)
|
||||
|
||||
self._loaded = True
|
||||
|
||||
def _ensure_loaded(self) -> None:
|
||||
"""Ensure aliases are loaded."""
|
||||
if not self._loaded:
|
||||
self.load()
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
value: str,
|
||||
check_date: Optional[date] = None,
|
||||
alias_types: Optional[list[AliasType]] = None,
|
||||
) -> Optional[str]:
|
||||
"""Resolve an alias value to a canonical team ID.
|
||||
|
||||
Args:
|
||||
value: Alias value to look up (case-insensitive)
|
||||
check_date: Date to check validity (None = current date)
|
||||
alias_types: Types of aliases to check (None = all types)
|
||||
|
||||
Returns:
|
||||
Canonical team ID if found, None otherwise
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
if check_date is None:
|
||||
check_date = date.today()
|
||||
|
||||
value_key = value.lower().strip()
|
||||
aliases = self._by_value.get(value_key, [])
|
||||
|
||||
for alias in aliases:
|
||||
# Check type filter
|
||||
if alias_types and alias.alias_type not in alias_types:
|
||||
continue
|
||||
|
||||
# Check date validity
|
||||
if alias.is_valid_on(check_date):
|
||||
return alias.team_canonical_id
|
||||
|
||||
return None
|
||||
|
||||
def get_aliases_for_team(
|
||||
self,
|
||||
team_id: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> list[TeamAlias]:
|
||||
"""Get all aliases for a team.
|
||||
|
||||
Args:
|
||||
team_id: Canonical team ID
|
||||
check_date: Date to filter by (None = all aliases)
|
||||
|
||||
Returns:
|
||||
List of TeamAlias objects
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
aliases = self._by_team.get(team_id, [])
|
||||
|
||||
if check_date:
|
||||
aliases = [a for a in aliases if a.is_valid_on(check_date)]
|
||||
|
||||
return aliases
|
||||
|
||||
def get_all_values(
|
||||
self,
|
||||
alias_type: Optional[AliasType] = None,
|
||||
) -> list[str]:
|
||||
"""Get all alias values.
|
||||
|
||||
Args:
|
||||
alias_type: Filter by alias type (None = all types)
|
||||
|
||||
Returns:
|
||||
List of alias values
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
values = []
|
||||
for alias in self._aliases:
|
||||
if alias_type is None or alias.alias_type == alias_type:
|
||||
values.append(alias.alias_value)
|
||||
|
||||
return values
|
||||
|
||||
|
||||
class StadiumAliasLoader:
|
||||
"""Loader for stadium aliases with date-aware resolution.
|
||||
|
||||
Loads stadium aliases from JSON and provides lookup methods
|
||||
with support for historical name changes (e.g., naming rights).
|
||||
"""
|
||||
|
||||
def __init__(self, filepath: Optional[Path] = None):
|
||||
"""Initialize the loader.
|
||||
|
||||
Args:
|
||||
filepath: Path to stadium_aliases.json, defaults to config value
|
||||
"""
|
||||
self.filepath = filepath or STADIUM_ALIASES_FILE
|
||||
self._aliases: list[StadiumAlias] = []
|
||||
self._by_name: dict[str, list[StadiumAlias]] = {}
|
||||
self._by_stadium: dict[str, list[StadiumAlias]] = {}
|
||||
self._loaded = False
|
||||
|
||||
def load(self) -> None:
|
||||
"""Load aliases from the JSON file."""
|
||||
if not self.filepath.exists():
|
||||
self._loaded = True
|
||||
return
|
||||
|
||||
with open(self.filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
self._aliases = []
|
||||
self._by_name = {}
|
||||
self._by_stadium = {}
|
||||
|
||||
for item in data:
|
||||
alias = StadiumAlias.from_dict(item)
|
||||
self._aliases.append(alias)
|
||||
|
||||
# Index by lowercase name
|
||||
name_key = alias.alias_name.lower()
|
||||
if name_key not in self._by_name:
|
||||
self._by_name[name_key] = []
|
||||
self._by_name[name_key].append(alias)
|
||||
|
||||
# Index by stadium ID
|
||||
if alias.stadium_canonical_id not in self._by_stadium:
|
||||
self._by_stadium[alias.stadium_canonical_id] = []
|
||||
self._by_stadium[alias.stadium_canonical_id].append(alias)
|
||||
|
||||
self._loaded = True
|
||||
|
||||
def _ensure_loaded(self) -> None:
|
||||
"""Ensure aliases are loaded."""
|
||||
if not self._loaded:
|
||||
self.load()
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
name: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> Optional[str]:
|
||||
"""Resolve a stadium name to a canonical stadium ID.
|
||||
|
||||
Args:
|
||||
name: Stadium name to look up (case-insensitive)
|
||||
check_date: Date to check validity (None = current date)
|
||||
|
||||
Returns:
|
||||
Canonical stadium ID if found, None otherwise
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
if check_date is None:
|
||||
check_date = date.today()
|
||||
|
||||
name_key = name.lower().strip()
|
||||
aliases = self._by_name.get(name_key, [])
|
||||
|
||||
for alias in aliases:
|
||||
if alias.is_valid_on(check_date):
|
||||
return alias.stadium_canonical_id
|
||||
|
||||
return None
|
||||
|
||||
def get_aliases_for_stadium(
|
||||
self,
|
||||
stadium_id: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> list[StadiumAlias]:
|
||||
"""Get all aliases for a stadium.
|
||||
|
||||
Args:
|
||||
stadium_id: Canonical stadium ID
|
||||
check_date: Date to filter by (None = all aliases)
|
||||
|
||||
Returns:
|
||||
List of StadiumAlias objects
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
aliases = self._by_stadium.get(stadium_id, [])
|
||||
|
||||
if check_date:
|
||||
aliases = [a for a in aliases if a.is_valid_on(check_date)]
|
||||
|
||||
return aliases
|
||||
|
||||
def get_all_names(self) -> list[str]:
|
||||
"""Get all stadium alias names.
|
||||
|
||||
Returns:
|
||||
List of stadium names
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
return [alias.alias_name for alias in self._aliases]
|
||||
|
||||
|
||||
# Global loader instances (lazy initialized)
|
||||
_team_alias_loader: Optional[TeamAliasLoader] = None
|
||||
_stadium_alias_loader: Optional[StadiumAliasLoader] = None
|
||||
|
||||
|
||||
def get_team_alias_loader() -> TeamAliasLoader:
|
||||
"""Get the global team alias loader instance."""
|
||||
global _team_alias_loader
|
||||
if _team_alias_loader is None:
|
||||
_team_alias_loader = TeamAliasLoader()
|
||||
return _team_alias_loader
|
||||
|
||||
|
||||
def get_stadium_alias_loader() -> StadiumAliasLoader:
|
||||
"""Get the global stadium alias loader instance."""
|
||||
global _stadium_alias_loader
|
||||
if _stadium_alias_loader is None:
|
||||
_stadium_alias_loader = StadiumAliasLoader()
|
||||
return _stadium_alias_loader
|
||||
|
||||
|
||||
def resolve_team_alias(
|
||||
value: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> Optional[str]:
|
||||
"""Convenience function to resolve a team alias.
|
||||
|
||||
Args:
|
||||
value: Alias value (name, abbreviation, or city)
|
||||
check_date: Date to check validity
|
||||
|
||||
Returns:
|
||||
Canonical team ID if found
|
||||
"""
|
||||
return get_team_alias_loader().resolve(value, check_date)
|
||||
|
||||
|
||||
def resolve_stadium_alias(
|
||||
name: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> Optional[str]:
|
||||
"""Convenience function to resolve a stadium alias.
|
||||
|
||||
Args:
|
||||
name: Stadium name
|
||||
check_date: Date to check validity
|
||||
|
||||
Returns:
|
||||
Canonical stadium ID if found
|
||||
"""
|
||||
return get_stadium_alias_loader().resolve(name, check_date)
|
||||
279
Scripts/sportstime_parser/normalizers/canonical_id.py
Normal file
279
Scripts/sportstime_parser/normalizers/canonical_id.py
Normal file
@@ -0,0 +1,279 @@
|
||||
"""Canonical ID generation for games, teams, and stadiums."""
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from datetime import date, datetime
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def normalize_string(s: str) -> str:
|
||||
"""Normalize a string for use in canonical IDs.
|
||||
|
||||
- Convert to lowercase
|
||||
- Replace spaces and hyphens with underscores
|
||||
- Remove special characters (except underscores)
|
||||
- Collapse multiple underscores
|
||||
- Strip leading/trailing underscores
|
||||
|
||||
Args:
|
||||
s: String to normalize
|
||||
|
||||
Returns:
|
||||
Normalized string suitable for IDs
|
||||
"""
|
||||
# Convert to lowercase
|
||||
result = s.lower()
|
||||
|
||||
# Normalize unicode (e.g., é -> e)
|
||||
result = unicodedata.normalize("NFKD", result)
|
||||
result = result.encode("ascii", "ignore").decode("ascii")
|
||||
|
||||
# Replace spaces and hyphens with underscores
|
||||
result = re.sub(r"[\s\-]+", "_", result)
|
||||
|
||||
# Remove special characters except underscores
|
||||
result = re.sub(r"[^a-z0-9_]", "", result)
|
||||
|
||||
# Collapse multiple underscores
|
||||
result = re.sub(r"_+", "_", result)
|
||||
|
||||
# Strip leading/trailing underscores
|
||||
result = result.strip("_")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def generate_game_id(
|
||||
sport: str,
|
||||
season: int,
|
||||
away_abbrev: str,
|
||||
home_abbrev: str,
|
||||
game_date: date | datetime,
|
||||
game_number: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Generate a canonical game ID.
|
||||
|
||||
Format: {sport}_{season}_{away}_{home}_{MMDD}[_{game_number}]
|
||||
|
||||
Args:
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
season: Season start year (e.g., 2025 for 2025-26)
|
||||
away_abbrev: Away team abbreviation (e.g., 'HOU')
|
||||
home_abbrev: Home team abbreviation (e.g., 'OKC')
|
||||
game_date: Date of the game
|
||||
game_number: Game number for doubleheaders (1 or 2), None for single games
|
||||
|
||||
Returns:
|
||||
Canonical game ID (e.g., 'nba_2025_hou_okc_1021')
|
||||
|
||||
Examples:
|
||||
>>> generate_game_id('nba', 2025, 'HOU', 'OKC', date(2025, 10, 21))
|
||||
'nba_2025_hou_okc_1021'
|
||||
|
||||
>>> generate_game_id('mlb', 2026, 'NYY', 'BOS', date(2026, 4, 1), game_number=1)
|
||||
'mlb_2026_nyy_bos_0401_1'
|
||||
"""
|
||||
# Normalize sport and abbreviations
|
||||
sport_norm = sport.lower()
|
||||
away_norm = away_abbrev.lower()
|
||||
home_norm = home_abbrev.lower()
|
||||
|
||||
# Format date as MMDD
|
||||
if isinstance(game_date, datetime):
|
||||
game_date = game_date.date()
|
||||
date_str = game_date.strftime("%m%d")
|
||||
|
||||
# Build ID
|
||||
parts = [sport_norm, str(season), away_norm, home_norm, date_str]
|
||||
|
||||
# Add game number for doubleheaders
|
||||
if game_number is not None:
|
||||
parts.append(str(game_number))
|
||||
|
||||
return "_".join(parts)
|
||||
|
||||
|
||||
def generate_team_id(sport: str, city: str, name: str) -> str:
|
||||
"""Generate a canonical team ID.
|
||||
|
||||
Format: team_{sport}_{abbreviation}
|
||||
|
||||
For most teams, we use the standard abbreviation. This function generates
|
||||
a fallback ID based on city and name for teams without a known abbreviation.
|
||||
|
||||
Args:
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
city: Team city (e.g., 'Los Angeles')
|
||||
name: Team name (e.g., 'Lakers')
|
||||
|
||||
Returns:
|
||||
Canonical team ID (e.g., 'team_nba_la_lakers')
|
||||
|
||||
Examples:
|
||||
>>> generate_team_id('nba', 'Los Angeles', 'Lakers')
|
||||
'team_nba_la_lakers'
|
||||
|
||||
>>> generate_team_id('mlb', 'New York', 'Yankees')
|
||||
'team_mlb_new_york_yankees'
|
||||
"""
|
||||
sport_norm = sport.lower()
|
||||
city_norm = normalize_string(city)
|
||||
name_norm = normalize_string(name)
|
||||
|
||||
return f"team_{sport_norm}_{city_norm}_{name_norm}"
|
||||
|
||||
|
||||
def generate_team_id_from_abbrev(sport: str, abbreviation: str) -> str:
|
||||
"""Generate a canonical team ID from abbreviation.
|
||||
|
||||
Format: team_{sport}_{abbreviation}
|
||||
|
||||
Args:
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
abbreviation: Team abbreviation (e.g., 'LAL', 'NYY')
|
||||
|
||||
Returns:
|
||||
Canonical team ID (e.g., 'team_nba_lal')
|
||||
|
||||
Examples:
|
||||
>>> generate_team_id_from_abbrev('nba', 'LAL')
|
||||
'team_nba_lal'
|
||||
|
||||
>>> generate_team_id_from_abbrev('mlb', 'NYY')
|
||||
'team_mlb_nyy'
|
||||
"""
|
||||
sport_norm = sport.lower()
|
||||
abbrev_norm = abbreviation.lower()
|
||||
|
||||
return f"team_{sport_norm}_{abbrev_norm}"
|
||||
|
||||
|
||||
def generate_stadium_id(sport: str, name: str) -> str:
|
||||
"""Generate a canonical stadium ID.
|
||||
|
||||
Format: stadium_{sport}_{normalized_name}
|
||||
|
||||
Args:
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
name: Stadium name (e.g., 'Yankee Stadium')
|
||||
|
||||
Returns:
|
||||
Canonical stadium ID (e.g., 'stadium_mlb_yankee_stadium')
|
||||
|
||||
Examples:
|
||||
>>> generate_stadium_id('nba', 'Crypto.com Arena')
|
||||
'stadium_nba_cryptocom_arena'
|
||||
|
||||
>>> generate_stadium_id('mlb', 'Yankee Stadium')
|
||||
'stadium_mlb_yankee_stadium'
|
||||
"""
|
||||
sport_norm = sport.lower()
|
||||
name_norm = normalize_string(name)
|
||||
|
||||
return f"stadium_{sport_norm}_{name_norm}"
|
||||
|
||||
|
||||
def parse_game_id(game_id: str) -> dict:
|
||||
"""Parse a canonical game ID into its components.
|
||||
|
||||
Args:
|
||||
game_id: Canonical game ID (e.g., 'nba_2025_hou_okc_1021')
|
||||
|
||||
Returns:
|
||||
Dictionary with keys: sport, season, away_abbrev, home_abbrev,
|
||||
month, day, game_number (optional)
|
||||
|
||||
Raises:
|
||||
ValueError: If game_id format is invalid
|
||||
|
||||
Examples:
|
||||
>>> parse_game_id('nba_2025_hou_okc_1021')
|
||||
{'sport': 'nba', 'season': 2025, 'away_abbrev': 'hou',
|
||||
'home_abbrev': 'okc', 'month': 10, 'day': 21, 'game_number': None}
|
||||
|
||||
>>> parse_game_id('mlb_2026_nyy_bos_0401_1')
|
||||
{'sport': 'mlb', 'season': 2026, 'away_abbrev': 'nyy',
|
||||
'home_abbrev': 'bos', 'month': 4, 'day': 1, 'game_number': 1}
|
||||
"""
|
||||
parts = game_id.split("_")
|
||||
|
||||
if len(parts) < 5 or len(parts) > 6:
|
||||
raise ValueError(f"Invalid game ID format: {game_id}")
|
||||
|
||||
sport = parts[0]
|
||||
season = int(parts[1])
|
||||
away_abbrev = parts[2]
|
||||
home_abbrev = parts[3]
|
||||
date_str = parts[4]
|
||||
|
||||
if len(date_str) != 4:
|
||||
raise ValueError(f"Invalid date format in game ID: {game_id}")
|
||||
|
||||
month = int(date_str[:2])
|
||||
day = int(date_str[2:])
|
||||
|
||||
game_number = None
|
||||
if len(parts) == 6:
|
||||
game_number = int(parts[5])
|
||||
|
||||
return {
|
||||
"sport": sport,
|
||||
"season": season,
|
||||
"away_abbrev": away_abbrev,
|
||||
"home_abbrev": home_abbrev,
|
||||
"month": month,
|
||||
"day": day,
|
||||
"game_number": game_number,
|
||||
}
|
||||
|
||||
|
||||
def parse_team_id(team_id: str) -> dict:
|
||||
"""Parse a canonical team ID into its components.
|
||||
|
||||
Args:
|
||||
team_id: Canonical team ID (e.g., 'team_nba_lal')
|
||||
|
||||
Returns:
|
||||
Dictionary with keys: sport, identifier (abbreviation or city_name)
|
||||
|
||||
Raises:
|
||||
ValueError: If team_id format is invalid
|
||||
"""
|
||||
if not team_id.startswith("team_"):
|
||||
raise ValueError(f"Invalid team ID format: {team_id}")
|
||||
|
||||
parts = team_id.split("_", 2)
|
||||
|
||||
if len(parts) < 3:
|
||||
raise ValueError(f"Invalid team ID format: {team_id}")
|
||||
|
||||
return {
|
||||
"sport": parts[1],
|
||||
"identifier": parts[2],
|
||||
}
|
||||
|
||||
|
||||
def parse_stadium_id(stadium_id: str) -> dict:
|
||||
"""Parse a canonical stadium ID into its components.
|
||||
|
||||
Args:
|
||||
stadium_id: Canonical stadium ID (e.g., 'stadium_nba_paycom_center')
|
||||
|
||||
Returns:
|
||||
Dictionary with keys: sport, name
|
||||
|
||||
Raises:
|
||||
ValueError: If stadium_id format is invalid
|
||||
"""
|
||||
if not stadium_id.startswith("stadium_"):
|
||||
raise ValueError(f"Invalid stadium ID format: {stadium_id}")
|
||||
|
||||
parts = stadium_id.split("_", 2)
|
||||
|
||||
if len(parts) < 3:
|
||||
raise ValueError(f"Invalid stadium ID format: {stadium_id}")
|
||||
|
||||
return {
|
||||
"sport": parts[1],
|
||||
"name": parts[2],
|
||||
}
|
||||
272
Scripts/sportstime_parser/normalizers/fuzzy.py
Normal file
272
Scripts/sportstime_parser/normalizers/fuzzy.py
Normal file
@@ -0,0 +1,272 @@
|
||||
"""Fuzzy string matching utilities for team and stadium name resolution."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from rapidfuzz import fuzz, process
|
||||
from rapidfuzz.utils import default_process
|
||||
|
||||
from ..config import FUZZY_MATCH_THRESHOLD
|
||||
from ..models.aliases import FuzzyMatch
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchCandidate:
|
||||
"""A candidate for fuzzy matching.
|
||||
|
||||
Attributes:
|
||||
canonical_id: The canonical ID of this candidate
|
||||
name: The display name for this candidate
|
||||
aliases: List of alternative names to match against
|
||||
"""
|
||||
|
||||
canonical_id: str
|
||||
name: str
|
||||
aliases: list[str]
|
||||
|
||||
|
||||
def normalize_for_matching(s: str) -> str:
|
||||
"""Normalize a string for fuzzy matching.
|
||||
|
||||
- Convert to lowercase
|
||||
- Remove common prefixes/suffixes
|
||||
- Collapse whitespace
|
||||
|
||||
Args:
|
||||
s: String to normalize
|
||||
|
||||
Returns:
|
||||
Normalized string
|
||||
"""
|
||||
result = s.lower().strip()
|
||||
|
||||
# Remove common prefixes
|
||||
prefixes = ["the ", "team ", "stadium "]
|
||||
for prefix in prefixes:
|
||||
if result.startswith(prefix):
|
||||
result = result[len(prefix) :]
|
||||
|
||||
# Remove common suffixes
|
||||
suffixes = [" stadium", " arena", " center", " field", " park"]
|
||||
for suffix in suffixes:
|
||||
if result.endswith(suffix):
|
||||
result = result[: -len(suffix)]
|
||||
|
||||
return result.strip()
|
||||
|
||||
|
||||
def fuzzy_match_team(
|
||||
query: str,
|
||||
candidates: list[MatchCandidate],
|
||||
threshold: int = FUZZY_MATCH_THRESHOLD,
|
||||
top_n: int = 3,
|
||||
) -> list[FuzzyMatch]:
|
||||
"""Find fuzzy matches for a team name.
|
||||
|
||||
Uses multiple matching strategies:
|
||||
1. Token set ratio (handles word order differences)
|
||||
2. Partial ratio (handles substring matches)
|
||||
3. Standard ratio (overall similarity)
|
||||
|
||||
Args:
|
||||
query: Team name to match
|
||||
candidates: List of candidate teams to match against
|
||||
threshold: Minimum score to consider a match (0-100)
|
||||
top_n: Maximum number of matches to return
|
||||
|
||||
Returns:
|
||||
List of FuzzyMatch objects sorted by confidence (descending)
|
||||
"""
|
||||
query_norm = normalize_for_matching(query)
|
||||
|
||||
# Build list of all matchable strings with their canonical IDs
|
||||
match_strings: list[tuple[str, str, str]] = [] # (string, canonical_id, name)
|
||||
|
||||
for candidate in candidates:
|
||||
# Add primary name
|
||||
match_strings.append(
|
||||
(normalize_for_matching(candidate.name), candidate.canonical_id, candidate.name)
|
||||
)
|
||||
# Add aliases
|
||||
for alias in candidate.aliases:
|
||||
match_strings.append(
|
||||
(normalize_for_matching(alias), candidate.canonical_id, candidate.name)
|
||||
)
|
||||
|
||||
# Score all candidates
|
||||
scored: dict[str, tuple[int, str]] = {} # canonical_id -> (best_score, name)
|
||||
|
||||
for match_str, canonical_id, name in match_strings:
|
||||
# Use multiple scoring methods
|
||||
token_score = fuzz.token_set_ratio(query_norm, match_str)
|
||||
partial_score = fuzz.partial_ratio(query_norm, match_str)
|
||||
ratio_score = fuzz.ratio(query_norm, match_str)
|
||||
|
||||
# Weighted average favoring token_set_ratio for team names
|
||||
score = int(0.5 * token_score + 0.3 * partial_score + 0.2 * ratio_score)
|
||||
|
||||
# Keep best score for each canonical ID
|
||||
if canonical_id not in scored or score > scored[canonical_id][0]:
|
||||
scored[canonical_id] = (score, name)
|
||||
|
||||
# Filter by threshold and sort
|
||||
matches = [
|
||||
FuzzyMatch(canonical_id=cid, canonical_name=name, confidence=score)
|
||||
for cid, (score, name) in scored.items()
|
||||
if score >= threshold
|
||||
]
|
||||
|
||||
# Sort by confidence descending
|
||||
matches.sort(key=lambda m: m.confidence, reverse=True)
|
||||
|
||||
return matches[:top_n]
|
||||
|
||||
|
||||
def fuzzy_match_stadium(
|
||||
query: str,
|
||||
candidates: list[MatchCandidate],
|
||||
threshold: int = FUZZY_MATCH_THRESHOLD,
|
||||
top_n: int = 3,
|
||||
) -> list[FuzzyMatch]:
|
||||
"""Find fuzzy matches for a stadium name.
|
||||
|
||||
Uses matching strategies optimized for stadium names:
|
||||
1. Token sort ratio (handles "X Stadium" vs "Stadium X")
|
||||
2. Partial ratio (handles naming rights changes)
|
||||
3. Standard ratio
|
||||
|
||||
Args:
|
||||
query: Stadium name to match
|
||||
candidates: List of candidate stadiums to match against
|
||||
threshold: Minimum score to consider a match (0-100)
|
||||
top_n: Maximum number of matches to return
|
||||
|
||||
Returns:
|
||||
List of FuzzyMatch objects sorted by confidence (descending)
|
||||
"""
|
||||
query_norm = normalize_for_matching(query)
|
||||
|
||||
# Build list of all matchable strings
|
||||
match_strings: list[tuple[str, str, str]] = []
|
||||
|
||||
for candidate in candidates:
|
||||
match_strings.append(
|
||||
(normalize_for_matching(candidate.name), candidate.canonical_id, candidate.name)
|
||||
)
|
||||
for alias in candidate.aliases:
|
||||
match_strings.append(
|
||||
(normalize_for_matching(alias), candidate.canonical_id, candidate.name)
|
||||
)
|
||||
|
||||
# Score all candidates
|
||||
scored: dict[str, tuple[int, str]] = {}
|
||||
|
||||
for match_str, canonical_id, name in match_strings:
|
||||
# Use scoring methods suited for stadium names
|
||||
token_sort_score = fuzz.token_sort_ratio(query_norm, match_str)
|
||||
partial_score = fuzz.partial_ratio(query_norm, match_str)
|
||||
ratio_score = fuzz.ratio(query_norm, match_str)
|
||||
|
||||
# Weighted average
|
||||
score = int(0.4 * token_sort_score + 0.4 * partial_score + 0.2 * ratio_score)
|
||||
|
||||
if canonical_id not in scored or score > scored[canonical_id][0]:
|
||||
scored[canonical_id] = (score, name)
|
||||
|
||||
# Filter and sort
|
||||
matches = [
|
||||
FuzzyMatch(canonical_id=cid, canonical_name=name, confidence=score)
|
||||
for cid, (score, name) in scored.items()
|
||||
if score >= threshold
|
||||
]
|
||||
|
||||
matches.sort(key=lambda m: m.confidence, reverse=True)
|
||||
|
||||
return matches[:top_n]
|
||||
|
||||
|
||||
def exact_match(
|
||||
query: str,
|
||||
candidates: list[MatchCandidate],
|
||||
case_sensitive: bool = False,
|
||||
) -> Optional[str]:
|
||||
"""Find an exact match for a string.
|
||||
|
||||
Args:
|
||||
query: String to match
|
||||
candidates: List of candidates to match against
|
||||
case_sensitive: Whether to use case-sensitive matching
|
||||
|
||||
Returns:
|
||||
Canonical ID if exact match found, None otherwise
|
||||
"""
|
||||
if case_sensitive:
|
||||
query_norm = query.strip()
|
||||
else:
|
||||
query_norm = query.lower().strip()
|
||||
|
||||
for candidate in candidates:
|
||||
# Check primary name
|
||||
name = candidate.name if case_sensitive else candidate.name.lower()
|
||||
if query_norm == name.strip():
|
||||
return candidate.canonical_id
|
||||
|
||||
# Check aliases
|
||||
for alias in candidate.aliases:
|
||||
alias_norm = alias if case_sensitive else alias.lower()
|
||||
if query_norm == alias_norm.strip():
|
||||
return candidate.canonical_id
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def best_match(
|
||||
query: str,
|
||||
candidates: list[MatchCandidate],
|
||||
threshold: int = FUZZY_MATCH_THRESHOLD,
|
||||
) -> Optional[FuzzyMatch]:
|
||||
"""Find the best match for a query string.
|
||||
|
||||
First tries exact match, then falls back to fuzzy matching.
|
||||
|
||||
Args:
|
||||
query: String to match
|
||||
candidates: List of candidates
|
||||
threshold: Minimum fuzzy match score
|
||||
|
||||
Returns:
|
||||
Best FuzzyMatch or None if no match above threshold
|
||||
"""
|
||||
# Try exact match first
|
||||
exact = exact_match(query, candidates)
|
||||
if exact:
|
||||
# Find the name for this ID
|
||||
for c in candidates:
|
||||
if c.canonical_id == exact:
|
||||
return FuzzyMatch(
|
||||
canonical_id=exact,
|
||||
canonical_name=c.name,
|
||||
confidence=100,
|
||||
)
|
||||
|
||||
# Fall back to fuzzy matching
|
||||
# Use team matching by default (works for both)
|
||||
matches = fuzzy_match_team(query, candidates, threshold=threshold, top_n=1)
|
||||
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def calculate_similarity(s1: str, s2: str) -> int:
|
||||
"""Calculate similarity between two strings.
|
||||
|
||||
Args:
|
||||
s1: First string
|
||||
s2: Second string
|
||||
|
||||
Returns:
|
||||
Similarity score 0-100
|
||||
"""
|
||||
s1_norm = normalize_for_matching(s1)
|
||||
s2_norm = normalize_for_matching(s2)
|
||||
|
||||
return fuzz.token_set_ratio(s1_norm, s2_norm)
|
||||
474
Scripts/sportstime_parser/normalizers/stadium_resolver.py
Normal file
474
Scripts/sportstime_parser/normalizers/stadium_resolver.py
Normal file
@@ -0,0 +1,474 @@
|
||||
"""Stadium name resolver with exact, alias, and fuzzy matching."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from ..config import FUZZY_MATCH_THRESHOLD, ALLOWED_COUNTRIES
|
||||
from ..models.aliases import FuzzyMatch, ManualReviewItem, ReviewReason
|
||||
from .alias_loader import get_stadium_alias_loader, StadiumAliasLoader
|
||||
from .fuzzy import MatchCandidate, fuzzy_match_stadium
|
||||
|
||||
|
||||
@dataclass
|
||||
class StadiumResolveResult:
|
||||
"""Result of stadium resolution.
|
||||
|
||||
Attributes:
|
||||
canonical_id: Resolved canonical stadium ID (None if unresolved)
|
||||
confidence: Confidence in the match (100 for exact, lower for fuzzy)
|
||||
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
|
||||
filtered_reason: Reason if stadium was filtered out (e.g., 'geographic')
|
||||
review_item: ManualReviewItem if resolution failed or low confidence
|
||||
"""
|
||||
|
||||
canonical_id: Optional[str]
|
||||
confidence: int
|
||||
match_type: str
|
||||
filtered_reason: Optional[str] = None
|
||||
review_item: Optional[ManualReviewItem] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class StadiumInfo:
|
||||
"""Stadium information for matching."""
|
||||
|
||||
canonical_id: str
|
||||
name: str
|
||||
city: str
|
||||
state: str
|
||||
country: str
|
||||
sport: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
|
||||
|
||||
# Hardcoded stadium mappings
|
||||
# Format: {sport: {canonical_id: StadiumInfo}}
|
||||
STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
|
||||
"nba": {
|
||||
"stadium_nba_state_farm_arena": StadiumInfo("stadium_nba_state_farm_arena", "State Farm Arena", "Atlanta", "GA", "USA", "nba", 33.7573, -84.3963),
|
||||
"stadium_nba_td_garden": StadiumInfo("stadium_nba_td_garden", "TD Garden", "Boston", "MA", "USA", "nba", 42.3662, -71.0621),
|
||||
"stadium_nba_barclays_center": StadiumInfo("stadium_nba_barclays_center", "Barclays Center", "Brooklyn", "NY", "USA", "nba", 40.6826, -73.9754),
|
||||
"stadium_nba_spectrum_center": StadiumInfo("stadium_nba_spectrum_center", "Spectrum Center", "Charlotte", "NC", "USA", "nba", 35.2251, -80.8392),
|
||||
"stadium_nba_united_center": StadiumInfo("stadium_nba_united_center", "United Center", "Chicago", "IL", "USA", "nba", 41.8807, -87.6742),
|
||||
"stadium_nba_rocket_mortgage_fieldhouse": StadiumInfo("stadium_nba_rocket_mortgage_fieldhouse", "Rocket Mortgage FieldHouse", "Cleveland", "OH", "USA", "nba", 41.4965, -81.6882),
|
||||
"stadium_nba_american_airlines_center": StadiumInfo("stadium_nba_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nba", 32.7905, -96.8103),
|
||||
"stadium_nba_ball_arena": StadiumInfo("stadium_nba_ball_arena", "Ball Arena", "Denver", "CO", "USA", "nba", 39.7487, -105.0077),
|
||||
"stadium_nba_little_caesars_arena": StadiumInfo("stadium_nba_little_caesars_arena", "Little Caesars Arena", "Detroit", "MI", "USA", "nba", 42.3411, -83.0553),
|
||||
"stadium_nba_chase_center": StadiumInfo("stadium_nba_chase_center", "Chase Center", "San Francisco", "CA", "USA", "nba", 37.7680, -122.3877),
|
||||
"stadium_nba_toyota_center": StadiumInfo("stadium_nba_toyota_center", "Toyota Center", "Houston", "TX", "USA", "nba", 29.7508, -95.3621),
|
||||
"stadium_nba_gainbridge_fieldhouse": StadiumInfo("stadium_nba_gainbridge_fieldhouse", "Gainbridge Fieldhouse", "Indianapolis", "IN", "USA", "nba", 39.7640, -86.1555),
|
||||
"stadium_nba_intuit_dome": StadiumInfo("stadium_nba_intuit_dome", "Intuit Dome", "Inglewood", "CA", "USA", "nba", 33.9425, -118.3417),
|
||||
"stadium_nba_cryptocom_arena": StadiumInfo("stadium_nba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nba", 34.0430, -118.2673),
|
||||
"stadium_nba_fedexforum": StadiumInfo("stadium_nba_fedexforum", "FedExForum", "Memphis", "TN", "USA", "nba", 35.1383, -90.0505),
|
||||
"stadium_nba_kaseya_center": StadiumInfo("stadium_nba_kaseya_center", "Kaseya Center", "Miami", "FL", "USA", "nba", 25.7814, -80.1870),
|
||||
"stadium_nba_fiserv_forum": StadiumInfo("stadium_nba_fiserv_forum", "Fiserv Forum", "Milwaukee", "WI", "USA", "nba", 43.0451, -87.9172),
|
||||
"stadium_nba_target_center": StadiumInfo("stadium_nba_target_center", "Target Center", "Minneapolis", "MN", "USA", "nba", 44.9795, -93.2761),
|
||||
"stadium_nba_smoothie_king_center": StadiumInfo("stadium_nba_smoothie_king_center", "Smoothie King Center", "New Orleans", "LA", "USA", "nba", 29.9490, -90.0821),
|
||||
"stadium_nba_madison_square_garden": StadiumInfo("stadium_nba_madison_square_garden", "Madison Square Garden", "New York", "NY", "USA", "nba", 40.7505, -73.9934),
|
||||
"stadium_nba_paycom_center": StadiumInfo("stadium_nba_paycom_center", "Paycom Center", "Oklahoma City", "OK", "USA", "nba", 35.4634, -97.5151),
|
||||
"stadium_nba_kia_center": StadiumInfo("stadium_nba_kia_center", "Kia Center", "Orlando", "FL", "USA", "nba", 28.5392, -81.3839),
|
||||
"stadium_nba_wells_fargo_center": StadiumInfo("stadium_nba_wells_fargo_center", "Wells Fargo Center", "Philadelphia", "PA", "USA", "nba", 39.9012, -75.1720),
|
||||
"stadium_nba_footprint_center": StadiumInfo("stadium_nba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "nba", 33.4457, -112.0712),
|
||||
"stadium_nba_moda_center": StadiumInfo("stadium_nba_moda_center", "Moda Center", "Portland", "OR", "USA", "nba", 45.5316, -122.6668),
|
||||
"stadium_nba_golden_1_center": StadiumInfo("stadium_nba_golden_1_center", "Golden 1 Center", "Sacramento", "CA", "USA", "nba", 38.5802, -121.4997),
|
||||
"stadium_nba_frost_bank_center": StadiumInfo("stadium_nba_frost_bank_center", "Frost Bank Center", "San Antonio", "TX", "USA", "nba", 29.4270, -98.4375),
|
||||
"stadium_nba_scotiabank_arena": StadiumInfo("stadium_nba_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nba", 43.6435, -79.3791),
|
||||
"stadium_nba_delta_center": StadiumInfo("stadium_nba_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nba", 40.7683, -111.9011),
|
||||
"stadium_nba_capital_one_arena": StadiumInfo("stadium_nba_capital_one_arena", "Capital One Arena", "Washington", "DC", "USA", "nba", 38.8981, -77.0209),
|
||||
},
|
||||
"mlb": {
|
||||
"stadium_mlb_chase_field": StadiumInfo("stadium_mlb_chase_field", "Chase Field", "Phoenix", "AZ", "USA", "mlb", 33.4455, -112.0667),
|
||||
"stadium_mlb_truist_park": StadiumInfo("stadium_mlb_truist_park", "Truist Park", "Atlanta", "GA", "USA", "mlb", 33.8908, -84.4678),
|
||||
"stadium_mlb_oriole_park_at_camden_yards": StadiumInfo("stadium_mlb_oriole_park_at_camden_yards", "Oriole Park at Camden Yards", "Baltimore", "MD", "USA", "mlb", 39.2839, -76.6217),
|
||||
"stadium_mlb_fenway_park": StadiumInfo("stadium_mlb_fenway_park", "Fenway Park", "Boston", "MA", "USA", "mlb", 42.3467, -71.0972),
|
||||
"stadium_mlb_wrigley_field": StadiumInfo("stadium_mlb_wrigley_field", "Wrigley Field", "Chicago", "IL", "USA", "mlb", 41.9484, -87.6553),
|
||||
"stadium_mlb_guaranteed_rate_field": StadiumInfo("stadium_mlb_guaranteed_rate_field", "Guaranteed Rate Field", "Chicago", "IL", "USA", "mlb", 41.8299, -87.6338),
|
||||
"stadium_mlb_great_american_ball_park": StadiumInfo("stadium_mlb_great_american_ball_park", "Great American Ball Park", "Cincinnati", "OH", "USA", "mlb", 39.0974, -84.5082),
|
||||
"stadium_mlb_progressive_field": StadiumInfo("stadium_mlb_progressive_field", "Progressive Field", "Cleveland", "OH", "USA", "mlb", 41.4962, -81.6852),
|
||||
"stadium_mlb_coors_field": StadiumInfo("stadium_mlb_coors_field", "Coors Field", "Denver", "CO", "USA", "mlb", 39.7559, -104.9942),
|
||||
"stadium_mlb_comerica_park": StadiumInfo("stadium_mlb_comerica_park", "Comerica Park", "Detroit", "MI", "USA", "mlb", 42.3390, -83.0485),
|
||||
"stadium_mlb_minute_maid_park": StadiumInfo("stadium_mlb_minute_maid_park", "Minute Maid Park", "Houston", "TX", "USA", "mlb", 29.7573, -95.3555),
|
||||
"stadium_mlb_kauffman_stadium": StadiumInfo("stadium_mlb_kauffman_stadium", "Kauffman Stadium", "Kansas City", "MO", "USA", "mlb", 39.0517, -94.4803),
|
||||
"stadium_mlb_angel_stadium": StadiumInfo("stadium_mlb_angel_stadium", "Angel Stadium", "Anaheim", "CA", "USA", "mlb", 33.8003, -117.8827),
|
||||
"stadium_mlb_dodger_stadium": StadiumInfo("stadium_mlb_dodger_stadium", "Dodger Stadium", "Los Angeles", "CA", "USA", "mlb", 34.0739, -118.2400),
|
||||
"stadium_mlb_loandepot_park": StadiumInfo("stadium_mlb_loandepot_park", "loanDepot park", "Miami", "FL", "USA", "mlb", 25.7781, -80.2195),
|
||||
"stadium_mlb_american_family_field": StadiumInfo("stadium_mlb_american_family_field", "American Family Field", "Milwaukee", "WI", "USA", "mlb", 43.0280, -87.9712),
|
||||
"stadium_mlb_target_field": StadiumInfo("stadium_mlb_target_field", "Target Field", "Minneapolis", "MN", "USA", "mlb", 44.9818, -93.2775),
|
||||
"stadium_mlb_citi_field": StadiumInfo("stadium_mlb_citi_field", "Citi Field", "New York", "NY", "USA", "mlb", 40.7571, -73.8458),
|
||||
"stadium_mlb_yankee_stadium": StadiumInfo("stadium_mlb_yankee_stadium", "Yankee Stadium", "Bronx", "NY", "USA", "mlb", 40.8296, -73.9262),
|
||||
"stadium_mlb_sutter_health_park": StadiumInfo("stadium_mlb_sutter_health_park", "Sutter Health Park", "Sacramento", "CA", "USA", "mlb", 38.5803, -121.5005),
|
||||
"stadium_mlb_citizens_bank_park": StadiumInfo("stadium_mlb_citizens_bank_park", "Citizens Bank Park", "Philadelphia", "PA", "USA", "mlb", 39.9061, -75.1665),
|
||||
"stadium_mlb_pnc_park": StadiumInfo("stadium_mlb_pnc_park", "PNC Park", "Pittsburgh", "PA", "USA", "mlb", 40.4469, -80.0057),
|
||||
"stadium_mlb_petco_park": StadiumInfo("stadium_mlb_petco_park", "Petco Park", "San Diego", "CA", "USA", "mlb", 32.7076, -117.1570),
|
||||
"stadium_mlb_oracle_park": StadiumInfo("stadium_mlb_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "mlb", 37.7786, -122.3893),
|
||||
"stadium_mlb_tmobile_park": StadiumInfo("stadium_mlb_tmobile_park", "T-Mobile Park", "Seattle", "WA", "USA", "mlb", 47.5914, -122.3325),
|
||||
"stadium_mlb_busch_stadium": StadiumInfo("stadium_mlb_busch_stadium", "Busch Stadium", "St. Louis", "MO", "USA", "mlb", 38.6226, -90.1928),
|
||||
"stadium_mlb_tropicana_field": StadiumInfo("stadium_mlb_tropicana_field", "Tropicana Field", "St. Petersburg", "FL", "USA", "mlb", 27.7682, -82.6534),
|
||||
"stadium_mlb_globe_life_field": StadiumInfo("stadium_mlb_globe_life_field", "Globe Life Field", "Arlington", "TX", "USA", "mlb", 32.7473, -97.0845),
|
||||
"stadium_mlb_rogers_centre": StadiumInfo("stadium_mlb_rogers_centre", "Rogers Centre", "Toronto", "ON", "Canada", "mlb", 43.6414, -79.3894),
|
||||
"stadium_mlb_nationals_park": StadiumInfo("stadium_mlb_nationals_park", "Nationals Park", "Washington", "DC", "USA", "mlb", 38.8730, -77.0074),
|
||||
},
|
||||
"nfl": {
|
||||
"stadium_nfl_state_farm_stadium": StadiumInfo("stadium_nfl_state_farm_stadium", "State Farm Stadium", "Glendale", "AZ", "USA", "nfl", 33.5276, -112.2626),
|
||||
"stadium_nfl_mercedes_benz_stadium": StadiumInfo("stadium_nfl_mercedes_benz_stadium", "Mercedes-Benz Stadium", "Atlanta", "GA", "USA", "nfl", 33.7553, -84.4006),
|
||||
"stadium_nfl_mandt_bank_stadium": StadiumInfo("stadium_nfl_mandt_bank_stadium", "M&T Bank Stadium", "Baltimore", "MD", "USA", "nfl", 39.2780, -76.6227),
|
||||
"stadium_nfl_highmark_stadium": StadiumInfo("stadium_nfl_highmark_stadium", "Highmark Stadium", "Orchard Park", "NY", "USA", "nfl", 42.7738, -78.7870),
|
||||
"stadium_nfl_bank_of_america_stadium": StadiumInfo("stadium_nfl_bank_of_america_stadium", "Bank of America Stadium", "Charlotte", "NC", "USA", "nfl", 35.2258, -80.8528),
|
||||
"stadium_nfl_soldier_field": StadiumInfo("stadium_nfl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nfl", 41.8623, -87.6167),
|
||||
"stadium_nfl_paycor_stadium": StadiumInfo("stadium_nfl_paycor_stadium", "Paycor Stadium", "Cincinnati", "OH", "USA", "nfl", 39.0955, -84.5161),
|
||||
"stadium_nfl_huntington_bank_field": StadiumInfo("stadium_nfl_huntington_bank_field", "Huntington Bank Field", "Cleveland", "OH", "USA", "nfl", 41.5061, -81.6995),
|
||||
"stadium_nfl_att_stadium": StadiumInfo("stadium_nfl_att_stadium", "AT&T Stadium", "Arlington", "TX", "USA", "nfl", 32.7473, -97.0945),
|
||||
"stadium_nfl_empower_field": StadiumInfo("stadium_nfl_empower_field", "Empower Field at Mile High", "Denver", "CO", "USA", "nfl", 39.7439, -105.0201),
|
||||
"stadium_nfl_ford_field": StadiumInfo("stadium_nfl_ford_field", "Ford Field", "Detroit", "MI", "USA", "nfl", 42.3400, -83.0456),
|
||||
"stadium_nfl_lambeau_field": StadiumInfo("stadium_nfl_lambeau_field", "Lambeau Field", "Green Bay", "WI", "USA", "nfl", 44.5013, -88.0622),
|
||||
"stadium_nfl_nrg_stadium": StadiumInfo("stadium_nfl_nrg_stadium", "NRG Stadium", "Houston", "TX", "USA", "nfl", 29.6847, -95.4107),
|
||||
"stadium_nfl_lucas_oil_stadium": StadiumInfo("stadium_nfl_lucas_oil_stadium", "Lucas Oil Stadium", "Indianapolis", "IN", "USA", "nfl", 39.7601, -86.1639),
|
||||
"stadium_nfl_everbank_stadium": StadiumInfo("stadium_nfl_everbank_stadium", "EverBank Stadium", "Jacksonville", "FL", "USA", "nfl", 30.3239, -81.6373),
|
||||
"stadium_nfl_arrowhead_stadium": StadiumInfo("stadium_nfl_arrowhead_stadium", "Arrowhead Stadium", "Kansas City", "MO", "USA", "nfl", 39.0489, -94.4839),
|
||||
"stadium_nfl_allegiant_stadium": StadiumInfo("stadium_nfl_allegiant_stadium", "Allegiant Stadium", "Las Vegas", "NV", "USA", "nfl", 36.0909, -115.1833),
|
||||
"stadium_nfl_sofi_stadium": StadiumInfo("stadium_nfl_sofi_stadium", "SoFi Stadium", "Inglewood", "CA", "USA", "nfl", 33.9534, -118.3386),
|
||||
"stadium_nfl_hard_rock_stadium": StadiumInfo("stadium_nfl_hard_rock_stadium", "Hard Rock Stadium", "Miami Gardens", "FL", "USA", "nfl", 25.9580, -80.2389),
|
||||
"stadium_nfl_us_bank_stadium": StadiumInfo("stadium_nfl_us_bank_stadium", "U.S. Bank Stadium", "Minneapolis", "MN", "USA", "nfl", 44.9737, -93.2575),
|
||||
"stadium_nfl_gillette_stadium": StadiumInfo("stadium_nfl_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "nfl", 42.0909, -71.2643),
|
||||
"stadium_nfl_caesars_superdome": StadiumInfo("stadium_nfl_caesars_superdome", "Caesars Superdome", "New Orleans", "LA", "USA", "nfl", 29.9511, -90.0812),
|
||||
"stadium_nfl_metlife_stadium": StadiumInfo("stadium_nfl_metlife_stadium", "MetLife Stadium", "East Rutherford", "NJ", "USA", "nfl", 40.8128, -74.0742),
|
||||
"stadium_nfl_lincoln_financial_field": StadiumInfo("stadium_nfl_lincoln_financial_field", "Lincoln Financial Field", "Philadelphia", "PA", "USA", "nfl", 39.9008, -75.1675),
|
||||
"stadium_nfl_acrisure_stadium": StadiumInfo("stadium_nfl_acrisure_stadium", "Acrisure Stadium", "Pittsburgh", "PA", "USA", "nfl", 40.4468, -80.0158),
|
||||
"stadium_nfl_levis_stadium": StadiumInfo("stadium_nfl_levis_stadium", "Levi's Stadium", "Santa Clara", "CA", "USA", "nfl", 37.4033, -121.9695),
|
||||
"stadium_nfl_lumen_field": StadiumInfo("stadium_nfl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nfl", 47.5952, -122.3316),
|
||||
"stadium_nfl_raymond_james_stadium": StadiumInfo("stadium_nfl_raymond_james_stadium", "Raymond James Stadium", "Tampa", "FL", "USA", "nfl", 27.9759, -82.5033),
|
||||
"stadium_nfl_nissan_stadium": StadiumInfo("stadium_nfl_nissan_stadium", "Nissan Stadium", "Nashville", "TN", "USA", "nfl", 36.1665, -86.7713),
|
||||
"stadium_nfl_northwest_stadium": StadiumInfo("stadium_nfl_northwest_stadium", "Northwest Stadium", "Landover", "MD", "USA", "nfl", 38.9076, -76.8645),
|
||||
},
|
||||
"nhl": {
|
||||
"stadium_nhl_honda_center": StadiumInfo("stadium_nhl_honda_center", "Honda Center", "Anaheim", "CA", "USA", "nhl", 33.8078, -117.8765),
|
||||
"stadium_nhl_delta_center": StadiumInfo("stadium_nhl_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nhl", 40.7683, -111.9011),
|
||||
"stadium_nhl_td_garden": StadiumInfo("stadium_nhl_td_garden", "TD Garden", "Boston", "MA", "USA", "nhl", 42.3662, -71.0621),
|
||||
"stadium_nhl_keybank_center": StadiumInfo("stadium_nhl_keybank_center", "KeyBank Center", "Buffalo", "NY", "USA", "nhl", 42.8750, -78.8764),
|
||||
"stadium_nhl_scotiabank_saddledome": StadiumInfo("stadium_nhl_scotiabank_saddledome", "Scotiabank Saddledome", "Calgary", "AB", "Canada", "nhl", 51.0374, -114.0519),
|
||||
"stadium_nhl_pnc_arena": StadiumInfo("stadium_nhl_pnc_arena", "PNC Arena", "Raleigh", "NC", "USA", "nhl", 35.8033, -78.7220),
|
||||
"stadium_nhl_united_center": StadiumInfo("stadium_nhl_united_center", "United Center", "Chicago", "IL", "USA", "nhl", 41.8807, -87.6742),
|
||||
"stadium_nhl_ball_arena": StadiumInfo("stadium_nhl_ball_arena", "Ball Arena", "Denver", "CO", "USA", "nhl", 39.7487, -105.0077),
|
||||
"stadium_nhl_nationwide_arena": StadiumInfo("stadium_nhl_nationwide_arena", "Nationwide Arena", "Columbus", "OH", "USA", "nhl", 39.9692, -83.0061),
|
||||
"stadium_nhl_american_airlines_center": StadiumInfo("stadium_nhl_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nhl", 32.7905, -96.8103),
|
||||
"stadium_nhl_little_caesars_arena": StadiumInfo("stadium_nhl_little_caesars_arena", "Little Caesars Arena", "Detroit", "MI", "USA", "nhl", 42.3411, -83.0553),
|
||||
"stadium_nhl_rogers_place": StadiumInfo("stadium_nhl_rogers_place", "Rogers Place", "Edmonton", "AB", "Canada", "nhl", 53.5469, -113.4979),
|
||||
"stadium_nhl_amerant_bank_arena": StadiumInfo("stadium_nhl_amerant_bank_arena", "Amerant Bank Arena", "Sunrise", "FL", "USA", "nhl", 26.1584, -80.3256),
|
||||
"stadium_nhl_cryptocom_arena": StadiumInfo("stadium_nhl_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nhl", 34.0430, -118.2673),
|
||||
"stadium_nhl_xcel_energy_center": StadiumInfo("stadium_nhl_xcel_energy_center", "Xcel Energy Center", "St. Paul", "MN", "USA", "nhl", 44.9448, -93.1010),
|
||||
"stadium_nhl_bell_centre": StadiumInfo("stadium_nhl_bell_centre", "Bell Centre", "Montreal", "QC", "Canada", "nhl", 45.4961, -73.5693),
|
||||
"stadium_nhl_bridgestone_arena": StadiumInfo("stadium_nhl_bridgestone_arena", "Bridgestone Arena", "Nashville", "TN", "USA", "nhl", 36.1592, -86.7785),
|
||||
"stadium_nhl_prudential_center": StadiumInfo("stadium_nhl_prudential_center", "Prudential Center", "Newark", "NJ", "USA", "nhl", 40.7334, -74.1712),
|
||||
"stadium_nhl_ubs_arena": StadiumInfo("stadium_nhl_ubs_arena", "UBS Arena", "Elmont", "NY", "USA", "nhl", 40.7170, -73.7255),
|
||||
"stadium_nhl_madison_square_garden": StadiumInfo("stadium_nhl_madison_square_garden", "Madison Square Garden", "New York", "NY", "USA", "nhl", 40.7505, -73.9934),
|
||||
"stadium_nhl_canadian_tire_centre": StadiumInfo("stadium_nhl_canadian_tire_centre", "Canadian Tire Centre", "Ottawa", "ON", "Canada", "nhl", 45.2969, -75.9272),
|
||||
"stadium_nhl_wells_fargo_center": StadiumInfo("stadium_nhl_wells_fargo_center", "Wells Fargo Center", "Philadelphia", "PA", "USA", "nhl", 39.9012, -75.1720),
|
||||
"stadium_nhl_ppg_paints_arena": StadiumInfo("stadium_nhl_ppg_paints_arena", "PPG Paints Arena", "Pittsburgh", "PA", "USA", "nhl", 40.4395, -79.9890),
|
||||
"stadium_nhl_sap_center": StadiumInfo("stadium_nhl_sap_center", "SAP Center", "San Jose", "CA", "USA", "nhl", 37.3327, -121.9011),
|
||||
"stadium_nhl_climate_pledge_arena": StadiumInfo("stadium_nhl_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "nhl", 47.6221, -122.3540),
|
||||
"stadium_nhl_enterprise_center": StadiumInfo("stadium_nhl_enterprise_center", "Enterprise Center", "St. Louis", "MO", "USA", "nhl", 38.6268, -90.2025),
|
||||
"stadium_nhl_amalie_arena": StadiumInfo("stadium_nhl_amalie_arena", "Amalie Arena", "Tampa", "FL", "USA", "nhl", 27.9428, -82.4519),
|
||||
"stadium_nhl_scotiabank_arena": StadiumInfo("stadium_nhl_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nhl", 43.6435, -79.3791),
|
||||
"stadium_nhl_rogers_arena": StadiumInfo("stadium_nhl_rogers_arena", "Rogers Arena", "Vancouver", "BC", "Canada", "nhl", 49.2778, -123.1088),
|
||||
"stadium_nhl_tmobile_arena": StadiumInfo("stadium_nhl_tmobile_arena", "T-Mobile Arena", "Las Vegas", "NV", "USA", "nhl", 36.1028, -115.1783),
|
||||
"stadium_nhl_capital_one_arena": StadiumInfo("stadium_nhl_capital_one_arena", "Capital One Arena", "Washington", "DC", "USA", "nhl", 38.8981, -77.0209),
|
||||
"stadium_nhl_canada_life_centre": StadiumInfo("stadium_nhl_canada_life_centre", "Canada Life Centre", "Winnipeg", "MB", "Canada", "nhl", 49.8928, -97.1433),
|
||||
},
|
||||
"mls": {
|
||||
"stadium_mls_mercedes_benz_stadium": StadiumInfo("stadium_mls_mercedes_benz_stadium", "Mercedes-Benz Stadium", "Atlanta", "GA", "USA", "mls", 33.7553, -84.4006),
|
||||
"stadium_mls_q2_stadium": StadiumInfo("stadium_mls_q2_stadium", "Q2 Stadium", "Austin", "TX", "USA", "mls", 30.3875, -97.7186),
|
||||
"stadium_mls_bank_of_america_stadium": StadiumInfo("stadium_mls_bank_of_america_stadium", "Bank of America Stadium", "Charlotte", "NC", "USA", "mls", 35.2258, -80.8528),
|
||||
"stadium_mls_soldier_field": StadiumInfo("stadium_mls_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "mls", 41.8623, -87.6167),
|
||||
"stadium_mls_tql_stadium": StadiumInfo("stadium_mls_tql_stadium", "TQL Stadium", "Cincinnati", "OH", "USA", "mls", 39.1112, -84.5225),
|
||||
"stadium_mls_dicks_sporting_goods_park": StadiumInfo("stadium_mls_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "mls", 39.8056, -104.8922),
|
||||
"stadium_mls_lower_com_field": StadiumInfo("stadium_mls_lower_com_field", "Lower.com Field", "Columbus", "OH", "USA", "mls", 39.9689, -83.0173),
|
||||
"stadium_mls_toyota_stadium": StadiumInfo("stadium_mls_toyota_stadium", "Toyota Stadium", "Frisco", "TX", "USA", "mls", 33.1545, -96.8353),
|
||||
"stadium_mls_audi_field": StadiumInfo("stadium_mls_audi_field", "Audi Field", "Washington", "DC", "USA", "mls", 38.8687, -77.0128),
|
||||
"stadium_mls_shell_energy_stadium": StadiumInfo("stadium_mls_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "mls", 29.7522, -95.3527),
|
||||
"stadium_mls_dignity_health_sports_park": StadiumInfo("stadium_mls_dignity_health_sports_park", "Dignity Health Sports Park", "Carson", "CA", "USA", "mls", 33.8644, -118.2611),
|
||||
"stadium_mls_bmo_stadium": StadiumInfo("stadium_mls_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "mls", 34.0128, -118.2841),
|
||||
"stadium_mls_chase_stadium": StadiumInfo("stadium_mls_chase_stadium", "Chase Stadium", "Fort Lauderdale", "FL", "USA", "mls", 26.1930, -80.1611),
|
||||
"stadium_mls_allianz_field": StadiumInfo("stadium_mls_allianz_field", "Allianz Field", "St. Paul", "MN", "USA", "mls", 44.9528, -93.1650),
|
||||
"stadium_mls_stade_saputo": StadiumInfo("stadium_mls_stade_saputo", "Stade Saputo", "Montreal", "QC", "Canada", "mls", 45.5622, -73.5528),
|
||||
"stadium_mls_geodis_park": StadiumInfo("stadium_mls_geodis_park", "GEODIS Park", "Nashville", "TN", "USA", "mls", 36.1304, -86.7651),
|
||||
"stadium_mls_gillette_stadium": StadiumInfo("stadium_mls_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "mls", 42.0909, -71.2643),
|
||||
"stadium_mls_yankee_stadium": StadiumInfo("stadium_mls_yankee_stadium", "Yankee Stadium", "Bronx", "NY", "USA", "mls", 40.8296, -73.9262),
|
||||
"stadium_mls_red_bull_arena": StadiumInfo("stadium_mls_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "mls", 40.7369, -74.1503),
|
||||
"stadium_mls_inter_co_stadium": StadiumInfo("stadium_mls_inter_co_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "mls", 28.5411, -81.3895),
|
||||
"stadium_mls_subaru_park": StadiumInfo("stadium_mls_subaru_park", "Subaru Park", "Chester", "PA", "USA", "mls", 39.8328, -75.3789),
|
||||
"stadium_mls_providence_park": StadiumInfo("stadium_mls_providence_park", "Providence Park", "Portland", "OR", "USA", "mls", 45.5216, -122.6917),
|
||||
"stadium_mls_america_first_field": StadiumInfo("stadium_mls_america_first_field", "America First Field", "Sandy", "UT", "USA", "mls", 40.5830, -111.8933),
|
||||
"stadium_mls_paypal_park": StadiumInfo("stadium_mls_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "mls", 37.3511, -121.9250),
|
||||
"stadium_mls_snapdragon_stadium": StadiumInfo("stadium_mls_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "mls", 32.7837, -117.1225),
|
||||
"stadium_mls_lumen_field": StadiumInfo("stadium_mls_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "mls", 47.5952, -122.3316),
|
||||
"stadium_mls_childrens_mercy_park": StadiumInfo("stadium_mls_childrens_mercy_park", "Children's Mercy Park", "Kansas City", "KS", "USA", "mls", 39.1217, -94.8231),
|
||||
"stadium_mls_citypark": StadiumInfo("stadium_mls_citypark", "CITYPARK", "St. Louis", "MO", "USA", "mls", 38.6316, -90.2106),
|
||||
"stadium_mls_bmo_field": StadiumInfo("stadium_mls_bmo_field", "BMO Field", "Toronto", "ON", "Canada", "mls", 43.6332, -79.4186),
|
||||
"stadium_mls_bc_place": StadiumInfo("stadium_mls_bc_place", "BC Place", "Vancouver", "BC", "Canada", "mls", 49.2768, -123.1118),
|
||||
},
|
||||
"wnba": {
|
||||
"stadium_wnba_gateway_center_arena": StadiumInfo("stadium_wnba_gateway_center_arena", "Gateway Center Arena", "College Park", "GA", "USA", "wnba", 33.6510, -84.4474),
|
||||
"stadium_wnba_wintrust_arena": StadiumInfo("stadium_wnba_wintrust_arena", "Wintrust Arena", "Chicago", "IL", "USA", "wnba", 41.8658, -87.6169),
|
||||
"stadium_wnba_mohegan_sun_arena": StadiumInfo("stadium_wnba_mohegan_sun_arena", "Mohegan Sun Arena", "Uncasville", "CT", "USA", "wnba", 41.4931, -72.0912),
|
||||
"stadium_wnba_college_park_center": StadiumInfo("stadium_wnba_college_park_center", "College Park Center", "Arlington", "TX", "USA", "wnba", 32.7304, -97.1077),
|
||||
"stadium_wnba_chase_center": StadiumInfo("stadium_wnba_chase_center", "Chase Center", "San Francisco", "CA", "USA", "wnba", 37.7680, -122.3877),
|
||||
"stadium_wnba_gainbridge_fieldhouse": StadiumInfo("stadium_wnba_gainbridge_fieldhouse", "Gainbridge Fieldhouse", "Indianapolis", "IN", "USA", "wnba", 39.7640, -86.1555),
|
||||
"stadium_wnba_michelob_ultra_arena": StadiumInfo("stadium_wnba_michelob_ultra_arena", "Michelob Ultra Arena", "Las Vegas", "NV", "USA", "wnba", 36.0902, -115.1756),
|
||||
"stadium_wnba_cryptocom_arena": StadiumInfo("stadium_wnba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "wnba", 34.0430, -118.2673),
|
||||
"stadium_wnba_target_center": StadiumInfo("stadium_wnba_target_center", "Target Center", "Minneapolis", "MN", "USA", "wnba", 44.9795, -93.2761),
|
||||
"stadium_wnba_barclays_center": StadiumInfo("stadium_wnba_barclays_center", "Barclays Center", "Brooklyn", "NY", "USA", "wnba", 40.6826, -73.9754),
|
||||
"stadium_wnba_footprint_center": StadiumInfo("stadium_wnba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "wnba", 33.4457, -112.0712),
|
||||
"stadium_wnba_climate_pledge_arena": StadiumInfo("stadium_wnba_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "wnba", 47.6221, -122.3540),
|
||||
"stadium_wnba_entertainment_sports_arena": StadiumInfo("stadium_wnba_entertainment_sports_arena", "Entertainment & Sports Arena", "Washington", "DC", "USA", "wnba", 38.8690, -76.9745),
|
||||
},
|
||||
"nwsl": {
|
||||
"stadium_nwsl_bmo_stadium": StadiumInfo("stadium_nwsl_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "nwsl", 34.0128, -118.2841),
|
||||
"stadium_nwsl_seatgeek_stadium": StadiumInfo("stadium_nwsl_seatgeek_stadium", "SeatGeek Stadium", "Bridgeview", "IL", "USA", "nwsl", 41.7500, -87.8028),
|
||||
"stadium_nwsl_shell_energy_stadium": StadiumInfo("stadium_nwsl_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "nwsl", 29.7522, -95.3527),
|
||||
"stadium_nwsl_cpkc_stadium": StadiumInfo("stadium_nwsl_cpkc_stadium", "CPKC Stadium", "Kansas City", "MO", "USA", "nwsl", 39.1050, -94.5580),
|
||||
"stadium_nwsl_red_bull_arena": StadiumInfo("stadium_nwsl_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "nwsl", 40.7369, -74.1503),
|
||||
"stadium_nwsl_wakemed_soccer_park": StadiumInfo("stadium_nwsl_wakemed_soccer_park", "WakeMed Soccer Park", "Cary", "NC", "USA", "nwsl", 35.7879, -78.7806),
|
||||
"stadium_nwsl_inter_co_stadium": StadiumInfo("stadium_nwsl_inter_co_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "nwsl", 28.5411, -81.3895),
|
||||
"stadium_nwsl_providence_park": StadiumInfo("stadium_nwsl_providence_park", "Providence Park", "Portland", "OR", "USA", "nwsl", 45.5216, -122.6917),
|
||||
"stadium_nwsl_lynn_family_stadium": StadiumInfo("stadium_nwsl_lynn_family_stadium", "Lynn Family Stadium", "Louisville", "KY", "USA", "nwsl", 38.2219, -85.7381),
|
||||
"stadium_nwsl_snapdragon_stadium": StadiumInfo("stadium_nwsl_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "nwsl", 32.7837, -117.1225),
|
||||
"stadium_nwsl_lumen_field": StadiumInfo("stadium_nwsl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nwsl", 47.5952, -122.3316),
|
||||
"stadium_nwsl_america_first_field": StadiumInfo("stadium_nwsl_america_first_field", "America First Field", "Sandy", "UT", "USA", "nwsl", 40.5830, -111.8933),
|
||||
"stadium_nwsl_audi_field": StadiumInfo("stadium_nwsl_audi_field", "Audi Field", "Washington", "DC", "USA", "nwsl", 38.8687, -77.0128),
|
||||
"stadium_nwsl_paypal_park": StadiumInfo("stadium_nwsl_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "nwsl", 37.3511, -121.9250),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class StadiumResolver:
|
||||
"""Resolves stadium names to canonical IDs.
|
||||
|
||||
Resolution order:
|
||||
1. Exact match against stadium names
|
||||
2. Alias lookup (with date awareness)
|
||||
3. Fuzzy match against all known names
|
||||
4. Geographic filter check
|
||||
5. Unresolved (returns ManualReviewItem)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sport: str,
|
||||
alias_loader: Optional[StadiumAliasLoader] = None,
|
||||
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
|
||||
):
|
||||
"""Initialize the resolver.
|
||||
|
||||
Args:
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
alias_loader: Stadium alias loader (default: global loader)
|
||||
fuzzy_threshold: Minimum fuzzy match score
|
||||
"""
|
||||
self.sport = sport.lower()
|
||||
self.alias_loader = alias_loader or get_stadium_alias_loader()
|
||||
self.fuzzy_threshold = fuzzy_threshold
|
||||
self._stadiums = STADIUM_MAPPINGS.get(self.sport, {})
|
||||
|
||||
# Build match candidates
|
||||
self._candidates = self._build_candidates()
|
||||
|
||||
def _build_candidates(self) -> list[MatchCandidate]:
|
||||
"""Build match candidates from stadium mappings."""
|
||||
candidates = []
|
||||
|
||||
for stadium_id, info in self._stadiums.items():
|
||||
# Get aliases for this stadium
|
||||
aliases = [a.alias_name for a in self.alias_loader.get_aliases_for_stadium(stadium_id)]
|
||||
|
||||
# Add city as alias
|
||||
aliases.append(info.city)
|
||||
|
||||
candidates.append(MatchCandidate(
|
||||
canonical_id=stadium_id,
|
||||
name=info.name,
|
||||
aliases=aliases,
|
||||
))
|
||||
|
||||
return candidates
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
name: str,
|
||||
check_date: Optional[date] = None,
|
||||
country: Optional[str] = None,
|
||||
source_url: Optional[str] = None,
|
||||
) -> StadiumResolveResult:
|
||||
"""Resolve a stadium name to a canonical ID.
|
||||
|
||||
Args:
|
||||
name: Stadium name to resolve
|
||||
check_date: Date for alias validity (None = today)
|
||||
country: Country for geographic filtering (None = no filter)
|
||||
source_url: Source URL for manual review items
|
||||
|
||||
Returns:
|
||||
StadiumResolveResult with resolution details
|
||||
"""
|
||||
name_lower = name.lower().strip()
|
||||
|
||||
# 1. Exact match against stadium names
|
||||
for stadium_id, info in self._stadiums.items():
|
||||
if name_lower == info.name.lower():
|
||||
return StadiumResolveResult(
|
||||
canonical_id=stadium_id,
|
||||
confidence=100,
|
||||
match_type="exact",
|
||||
)
|
||||
|
||||
# 2. Alias lookup
|
||||
alias_result = self.alias_loader.resolve(name, check_date)
|
||||
if alias_result:
|
||||
# Verify it's for the right sport (alias file has all sports)
|
||||
if alias_result.startswith(f"stadium_{self.sport}_"):
|
||||
return StadiumResolveResult(
|
||||
canonical_id=alias_result,
|
||||
confidence=95,
|
||||
match_type="alias",
|
||||
)
|
||||
|
||||
# 3. Fuzzy match
|
||||
matches = fuzzy_match_stadium(
|
||||
name,
|
||||
self._candidates,
|
||||
threshold=self.fuzzy_threshold,
|
||||
)
|
||||
|
||||
if matches:
|
||||
best = matches[0]
|
||||
review_item = None
|
||||
|
||||
# Create review item for low confidence matches
|
||||
if best.confidence < 90:
|
||||
review_item = ManualReviewItem(
|
||||
id=f"stadium_{uuid4().hex[:8]}",
|
||||
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
|
||||
sport=self.sport,
|
||||
raw_value=name,
|
||||
context={"match_type": "fuzzy"},
|
||||
source_url=source_url,
|
||||
suggested_matches=matches,
|
||||
game_date=check_date,
|
||||
)
|
||||
|
||||
return StadiumResolveResult(
|
||||
canonical_id=best.canonical_id,
|
||||
confidence=best.confidence,
|
||||
match_type="fuzzy",
|
||||
review_item=review_item,
|
||||
)
|
||||
|
||||
# 4. Geographic filter check
|
||||
if country and country not in ALLOWED_COUNTRIES:
|
||||
review_item = ManualReviewItem(
|
||||
id=f"stadium_{uuid4().hex[:8]}",
|
||||
reason=ReviewReason.GEOGRAPHIC_FILTER,
|
||||
sport=self.sport,
|
||||
raw_value=name,
|
||||
context={"country": country, "reason": "Stadium outside USA/Canada/Mexico"},
|
||||
source_url=source_url,
|
||||
game_date=check_date,
|
||||
)
|
||||
|
||||
return StadiumResolveResult(
|
||||
canonical_id=None,
|
||||
confidence=0,
|
||||
match_type="filtered",
|
||||
filtered_reason="geographic",
|
||||
review_item=review_item,
|
||||
)
|
||||
|
||||
# 5. Unresolved
|
||||
review_item = ManualReviewItem(
|
||||
id=f"stadium_{uuid4().hex[:8]}",
|
||||
reason=ReviewReason.UNRESOLVED_STADIUM,
|
||||
sport=self.sport,
|
||||
raw_value=name,
|
||||
context={},
|
||||
source_url=source_url,
|
||||
suggested_matches=fuzzy_match_stadium(
|
||||
name,
|
||||
self._candidates,
|
||||
threshold=50, # Lower threshold for suggestions
|
||||
top_n=5,
|
||||
),
|
||||
game_date=check_date,
|
||||
)
|
||||
|
||||
return StadiumResolveResult(
|
||||
canonical_id=None,
|
||||
confidence=0,
|
||||
match_type="unresolved",
|
||||
review_item=review_item,
|
||||
)
|
||||
|
||||
def get_stadium_info(self, stadium_id: str) -> Optional[StadiumInfo]:
|
||||
"""Get stadium info by ID.
|
||||
|
||||
Args:
|
||||
stadium_id: Canonical stadium ID
|
||||
|
||||
Returns:
|
||||
StadiumInfo or None
|
||||
"""
|
||||
return self._stadiums.get(stadium_id)
|
||||
|
||||
def get_all_stadiums(self) -> list[StadiumInfo]:
|
||||
"""Get all stadiums for this sport.
|
||||
|
||||
Returns:
|
||||
List of StadiumInfo objects
|
||||
"""
|
||||
return list(self._stadiums.values())
|
||||
|
||||
def is_in_allowed_region(self, stadium_id: str) -> bool:
|
||||
"""Check if a stadium is in an allowed region.
|
||||
|
||||
Args:
|
||||
stadium_id: Canonical stadium ID
|
||||
|
||||
Returns:
|
||||
True if stadium is in USA, Canada, or Mexico
|
||||
"""
|
||||
info = self._stadiums.get(stadium_id)
|
||||
if not info:
|
||||
return False
|
||||
|
||||
return info.country in ALLOWED_COUNTRIES
|
||||
|
||||
|
||||
# Cached resolvers
|
||||
_resolvers: dict[str, StadiumResolver] = {}
|
||||
|
||||
|
||||
def get_stadium_resolver(sport: str) -> StadiumResolver:
|
||||
"""Get or create a stadium resolver for a sport."""
|
||||
sport_lower = sport.lower()
|
||||
if sport_lower not in _resolvers:
|
||||
_resolvers[sport_lower] = StadiumResolver(sport_lower)
|
||||
return _resolvers[sport_lower]
|
||||
|
||||
|
||||
def resolve_stadium(
|
||||
sport: str,
|
||||
name: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> StadiumResolveResult:
|
||||
"""Convenience function to resolve a stadium name.
|
||||
|
||||
Args:
|
||||
sport: Sport code
|
||||
name: Stadium name to resolve
|
||||
check_date: Date for alias validity
|
||||
|
||||
Returns:
|
||||
StadiumResolveResult
|
||||
"""
|
||||
return get_stadium_resolver(sport).resolve(name, check_date)
|
||||
482
Scripts/sportstime_parser/normalizers/team_resolver.py
Normal file
482
Scripts/sportstime_parser/normalizers/team_resolver.py
Normal file
@@ -0,0 +1,482 @@
|
||||
"""Team name resolver with exact, alias, and fuzzy matching."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from ..config import FUZZY_MATCH_THRESHOLD
|
||||
from ..models.aliases import (
|
||||
AliasType,
|
||||
FuzzyMatch,
|
||||
ManualReviewItem,
|
||||
ReviewReason,
|
||||
)
|
||||
from .alias_loader import get_team_alias_loader, TeamAliasLoader
|
||||
from .fuzzy import MatchCandidate, fuzzy_match_team, exact_match
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamResolveResult:
|
||||
"""Result of team resolution.
|
||||
|
||||
Attributes:
|
||||
canonical_id: Resolved canonical team ID (None if unresolved)
|
||||
confidence: Confidence in the match (100 for exact, lower for fuzzy)
|
||||
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
|
||||
review_item: ManualReviewItem if resolution failed or low confidence
|
||||
"""
|
||||
|
||||
canonical_id: Optional[str]
|
||||
confidence: int
|
||||
match_type: str
|
||||
review_item: Optional[ManualReviewItem] = None
|
||||
|
||||
|
||||
# Hardcoded team mappings for each sport
|
||||
# Format: {sport: {abbreviation: (canonical_id, full_name, city)}}
|
||||
TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str]]] = {
|
||||
"nba": {
|
||||
"ATL": ("team_nba_atl", "Atlanta Hawks", "Atlanta"),
|
||||
"BOS": ("team_nba_bos", "Boston Celtics", "Boston"),
|
||||
"BKN": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
|
||||
"BRK": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
|
||||
"CHA": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
|
||||
"CHO": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
|
||||
"CHI": ("team_nba_chi", "Chicago Bulls", "Chicago"),
|
||||
"CLE": ("team_nba_cle", "Cleveland Cavaliers", "Cleveland"),
|
||||
"DAL": ("team_nba_dal", "Dallas Mavericks", "Dallas"),
|
||||
"DEN": ("team_nba_den", "Denver Nuggets", "Denver"),
|
||||
"DET": ("team_nba_det", "Detroit Pistons", "Detroit"),
|
||||
"GSW": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
|
||||
"GS": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
|
||||
"HOU": ("team_nba_hou", "Houston Rockets", "Houston"),
|
||||
"IND": ("team_nba_ind", "Indiana Pacers", "Indiana"),
|
||||
"LAC": ("team_nba_lac", "Los Angeles Clippers", "Los Angeles"),
|
||||
"LAL": ("team_nba_lal", "Los Angeles Lakers", "Los Angeles"),
|
||||
"MEM": ("team_nba_mem", "Memphis Grizzlies", "Memphis"),
|
||||
"MIA": ("team_nba_mia", "Miami Heat", "Miami"),
|
||||
"MIL": ("team_nba_mil", "Milwaukee Bucks", "Milwaukee"),
|
||||
"MIN": ("team_nba_min", "Minnesota Timberwolves", "Minnesota"),
|
||||
"NOP": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
|
||||
"NO": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
|
||||
"NYK": ("team_nba_nyk", "New York Knicks", "New York"),
|
||||
"NY": ("team_nba_nyk", "New York Knicks", "New York"),
|
||||
"OKC": ("team_nba_okc", "Oklahoma City Thunder", "Oklahoma City"),
|
||||
"ORL": ("team_nba_orl", "Orlando Magic", "Orlando"),
|
||||
"PHI": ("team_nba_phi", "Philadelphia 76ers", "Philadelphia"),
|
||||
"PHX": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
|
||||
"PHO": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
|
||||
"POR": ("team_nba_por", "Portland Trail Blazers", "Portland"),
|
||||
"SAC": ("team_nba_sac", "Sacramento Kings", "Sacramento"),
|
||||
"SAS": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
|
||||
"SA": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
|
||||
"TOR": ("team_nba_tor", "Toronto Raptors", "Toronto"),
|
||||
"UTA": ("team_nba_uta", "Utah Jazz", "Utah"),
|
||||
"WAS": ("team_nba_was", "Washington Wizards", "Washington"),
|
||||
"WSH": ("team_nba_was", "Washington Wizards", "Washington"),
|
||||
},
|
||||
"mlb": {
|
||||
"ARI": ("team_mlb_ari", "Arizona Diamondbacks", "Arizona"),
|
||||
"ATL": ("team_mlb_atl", "Atlanta Braves", "Atlanta"),
|
||||
"BAL": ("team_mlb_bal", "Baltimore Orioles", "Baltimore"),
|
||||
"BOS": ("team_mlb_bos", "Boston Red Sox", "Boston"),
|
||||
"CHC": ("team_mlb_chc", "Chicago Cubs", "Chicago"),
|
||||
"CHW": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
|
||||
"CWS": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
|
||||
"CIN": ("team_mlb_cin", "Cincinnati Reds", "Cincinnati"),
|
||||
"CLE": ("team_mlb_cle", "Cleveland Guardians", "Cleveland"),
|
||||
"COL": ("team_mlb_col", "Colorado Rockies", "Colorado"),
|
||||
"DET": ("team_mlb_det", "Detroit Tigers", "Detroit"),
|
||||
"HOU": ("team_mlb_hou", "Houston Astros", "Houston"),
|
||||
"KC": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
|
||||
"KCR": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
|
||||
"LAA": ("team_mlb_laa", "Los Angeles Angels", "Los Angeles"),
|
||||
"ANA": ("team_mlb_laa", "Los Angeles Angels", "Anaheim"),
|
||||
"LAD": ("team_mlb_lad", "Los Angeles Dodgers", "Los Angeles"),
|
||||
"MIA": ("team_mlb_mia", "Miami Marlins", "Miami"),
|
||||
"FLA": ("team_mlb_mia", "Miami Marlins", "Florida"),
|
||||
"MIL": ("team_mlb_mil", "Milwaukee Brewers", "Milwaukee"),
|
||||
"MIN": ("team_mlb_min", "Minnesota Twins", "Minnesota"),
|
||||
"NYM": ("team_mlb_nym", "New York Mets", "New York"),
|
||||
"NYY": ("team_mlb_nyy", "New York Yankees", "New York"),
|
||||
"OAK": ("team_mlb_oak", "Oakland Athletics", "Oakland"),
|
||||
"PHI": ("team_mlb_phi", "Philadelphia Phillies", "Philadelphia"),
|
||||
"PIT": ("team_mlb_pit", "Pittsburgh Pirates", "Pittsburgh"),
|
||||
"SD": ("team_mlb_sd", "San Diego Padres", "San Diego"),
|
||||
"SDP": ("team_mlb_sd", "San Diego Padres", "San Diego"),
|
||||
"SF": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
|
||||
"SFG": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
|
||||
"SEA": ("team_mlb_sea", "Seattle Mariners", "Seattle"),
|
||||
"STL": ("team_mlb_stl", "St. Louis Cardinals", "St. Louis"),
|
||||
"TB": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
|
||||
"TBR": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
|
||||
"TEX": ("team_mlb_tex", "Texas Rangers", "Texas"),
|
||||
"TOR": ("team_mlb_tor", "Toronto Blue Jays", "Toronto"),
|
||||
"WSN": ("team_mlb_wsn", "Washington Nationals", "Washington"),
|
||||
"WAS": ("team_mlb_wsn", "Washington Nationals", "Washington"),
|
||||
},
|
||||
"nfl": {
|
||||
"ARI": ("team_nfl_ari", "Arizona Cardinals", "Arizona"),
|
||||
"ATL": ("team_nfl_atl", "Atlanta Falcons", "Atlanta"),
|
||||
"BAL": ("team_nfl_bal", "Baltimore Ravens", "Baltimore"),
|
||||
"BUF": ("team_nfl_buf", "Buffalo Bills", "Buffalo"),
|
||||
"CAR": ("team_nfl_car", "Carolina Panthers", "Carolina"),
|
||||
"CHI": ("team_nfl_chi", "Chicago Bears", "Chicago"),
|
||||
"CIN": ("team_nfl_cin", "Cincinnati Bengals", "Cincinnati"),
|
||||
"CLE": ("team_nfl_cle", "Cleveland Browns", "Cleveland"),
|
||||
"DAL": ("team_nfl_dal", "Dallas Cowboys", "Dallas"),
|
||||
"DEN": ("team_nfl_den", "Denver Broncos", "Denver"),
|
||||
"DET": ("team_nfl_det", "Detroit Lions", "Detroit"),
|
||||
"GB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
|
||||
"GNB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
|
||||
"HOU": ("team_nfl_hou", "Houston Texans", "Houston"),
|
||||
"IND": ("team_nfl_ind", "Indianapolis Colts", "Indianapolis"),
|
||||
"JAX": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
|
||||
"JAC": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
|
||||
"KC": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
|
||||
"KAN": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
|
||||
"LV": ("team_nfl_lv", "Las Vegas Raiders", "Las Vegas"),
|
||||
"LAC": ("team_nfl_lac", "Los Angeles Chargers", "Los Angeles"),
|
||||
"LAR": ("team_nfl_lar", "Los Angeles Rams", "Los Angeles"),
|
||||
"MIA": ("team_nfl_mia", "Miami Dolphins", "Miami"),
|
||||
"MIN": ("team_nfl_min", "Minnesota Vikings", "Minnesota"),
|
||||
"NE": ("team_nfl_ne", "New England Patriots", "New England"),
|
||||
"NWE": ("team_nfl_ne", "New England Patriots", "New England"),
|
||||
"NO": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
|
||||
"NOR": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
|
||||
"NYG": ("team_nfl_nyg", "New York Giants", "New York"),
|
||||
"NYJ": ("team_nfl_nyj", "New York Jets", "New York"),
|
||||
"PHI": ("team_nfl_phi", "Philadelphia Eagles", "Philadelphia"),
|
||||
"PIT": ("team_nfl_pit", "Pittsburgh Steelers", "Pittsburgh"),
|
||||
"SF": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
|
||||
"SFO": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
|
||||
"SEA": ("team_nfl_sea", "Seattle Seahawks", "Seattle"),
|
||||
"TB": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
|
||||
"TAM": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
|
||||
"TEN": ("team_nfl_ten", "Tennessee Titans", "Tennessee"),
|
||||
"WAS": ("team_nfl_was", "Washington Commanders", "Washington"),
|
||||
"WSH": ("team_nfl_was", "Washington Commanders", "Washington"),
|
||||
},
|
||||
"nhl": {
|
||||
"ANA": ("team_nhl_ana", "Anaheim Ducks", "Anaheim"),
|
||||
"ARI": ("team_nhl_ari", "Utah Hockey Club", "Utah"), # Moved 2024
|
||||
"UTA": ("team_nhl_ari", "Utah Hockey Club", "Utah"),
|
||||
"BOS": ("team_nhl_bos", "Boston Bruins", "Boston"),
|
||||
"BUF": ("team_nhl_buf", "Buffalo Sabres", "Buffalo"),
|
||||
"CGY": ("team_nhl_cgy", "Calgary Flames", "Calgary"),
|
||||
"CAR": ("team_nhl_car", "Carolina Hurricanes", "Carolina"),
|
||||
"CHI": ("team_nhl_chi", "Chicago Blackhawks", "Chicago"),
|
||||
"COL": ("team_nhl_col", "Colorado Avalanche", "Colorado"),
|
||||
"CBJ": ("team_nhl_cbj", "Columbus Blue Jackets", "Columbus"),
|
||||
"DAL": ("team_nhl_dal", "Dallas Stars", "Dallas"),
|
||||
"DET": ("team_nhl_det", "Detroit Red Wings", "Detroit"),
|
||||
"EDM": ("team_nhl_edm", "Edmonton Oilers", "Edmonton"),
|
||||
"FLA": ("team_nhl_fla", "Florida Panthers", "Florida"),
|
||||
"LA": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
|
||||
"LAK": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
|
||||
"MIN": ("team_nhl_min", "Minnesota Wild", "Minnesota"),
|
||||
"MTL": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
|
||||
"MON": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
|
||||
"NSH": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
|
||||
"NAS": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
|
||||
"NJ": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
|
||||
"NJD": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
|
||||
"NYI": ("team_nhl_nyi", "New York Islanders", "New York"),
|
||||
"NYR": ("team_nhl_nyr", "New York Rangers", "New York"),
|
||||
"OTT": ("team_nhl_ott", "Ottawa Senators", "Ottawa"),
|
||||
"PHI": ("team_nhl_phi", "Philadelphia Flyers", "Philadelphia"),
|
||||
"PIT": ("team_nhl_pit", "Pittsburgh Penguins", "Pittsburgh"),
|
||||
"SJ": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
|
||||
"SJS": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
|
||||
"SEA": ("team_nhl_sea", "Seattle Kraken", "Seattle"),
|
||||
"STL": ("team_nhl_stl", "St. Louis Blues", "St. Louis"),
|
||||
"TB": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
|
||||
"TBL": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
|
||||
"TOR": ("team_nhl_tor", "Toronto Maple Leafs", "Toronto"),
|
||||
"VAN": ("team_nhl_van", "Vancouver Canucks", "Vancouver"),
|
||||
"VGK": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
|
||||
"VEG": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
|
||||
"WAS": ("team_nhl_was", "Washington Capitals", "Washington"),
|
||||
"WSH": ("team_nhl_was", "Washington Capitals", "Washington"),
|
||||
"WPG": ("team_nhl_wpg", "Winnipeg Jets", "Winnipeg"),
|
||||
},
|
||||
"mls": {
|
||||
"ATL": ("team_mls_atl", "Atlanta United", "Atlanta"),
|
||||
"AUS": ("team_mls_aus", "Austin FC", "Austin"),
|
||||
"CLT": ("team_mls_clt", "Charlotte FC", "Charlotte"),
|
||||
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago"),
|
||||
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati"),
|
||||
"COL": ("team_mls_col", "Colorado Rapids", "Colorado"),
|
||||
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus"),
|
||||
"DAL": ("team_mls_dal", "FC Dallas", "Dallas"),
|
||||
"DC": ("team_mls_dc", "D.C. United", "Washington"),
|
||||
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston"),
|
||||
"LAG": ("team_mls_lag", "LA Galaxy", "Los Angeles"),
|
||||
"LAFC": ("team_mls_lafc", "Los Angeles FC", "Los Angeles"),
|
||||
"MIA": ("team_mls_mia", "Inter Miami", "Miami"),
|
||||
"MIN": ("team_mls_min", "Minnesota United", "Minnesota"),
|
||||
"MTL": ("team_mls_mtl", "CF Montreal", "Montreal"),
|
||||
"NSH": ("team_mls_nsh", "Nashville SC", "Nashville"),
|
||||
"NE": ("team_mls_ne", "New England Revolution", "New England"),
|
||||
"NYC": ("team_mls_nyc", "New York City FC", "New York"),
|
||||
"RB": ("team_mls_ny", "New York Red Bulls", "New York"),
|
||||
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York"),
|
||||
"ORL": ("team_mls_orl", "Orlando City", "Orlando"),
|
||||
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia"),
|
||||
"POR": ("team_mls_por", "Portland Timbers", "Portland"),
|
||||
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
|
||||
"RSL": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
|
||||
"SJ": ("team_mls_sj", "San Jose Earthquakes", "San Jose"),
|
||||
"SD": ("team_mls_sd", "San Diego FC", "San Diego"),
|
||||
"SEA": ("team_mls_sea", "Seattle Sounders", "Seattle"),
|
||||
"SKC": ("team_mls_skc", "Sporting Kansas City", "Kansas City"),
|
||||
"STL": ("team_mls_stl", "St. Louis City SC", "St. Louis"),
|
||||
"TOR": ("team_mls_tor", "Toronto FC", "Toronto"),
|
||||
"VAN": ("team_mls_van", "Vancouver Whitecaps", "Vancouver"),
|
||||
},
|
||||
"wnba": {
|
||||
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta"),
|
||||
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago"),
|
||||
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut"),
|
||||
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas"),
|
||||
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State"),
|
||||
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana"),
|
||||
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas"),
|
||||
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles"),
|
||||
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota"),
|
||||
"NY": ("team_wnba_ny", "New York Liberty", "New York"),
|
||||
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix"),
|
||||
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle"),
|
||||
"WAS": ("team_wnba_was", "Washington Mystics", "Washington"),
|
||||
},
|
||||
"nwsl": {
|
||||
"ANF": ("team_nwsl_anf", "Angel City FC", "Los Angeles"),
|
||||
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago"),
|
||||
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston"),
|
||||
"KC": ("team_nwsl_kc", "Kansas City Current", "Kansas City"),
|
||||
"NJ": ("team_nwsl_nj", "NJ/NY Gotham FC", "New Jersey"),
|
||||
"NC": ("team_nwsl_nc", "North Carolina Courage", "North Carolina"),
|
||||
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando"),
|
||||
"POR": ("team_nwsl_por", "Portland Thorns", "Portland"),
|
||||
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville"),
|
||||
"SD": ("team_nwsl_sd", "San Diego Wave", "San Diego"),
|
||||
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle"),
|
||||
"SLC": ("team_nwsl_slc", "Utah Royals", "Utah"),
|
||||
"WAS": ("team_nwsl_was", "Washington Spirit", "Washington"),
|
||||
"BFC": ("team_nwsl_bfc", "Bay FC", "San Francisco"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TeamResolver:
|
||||
"""Resolves team names to canonical IDs.
|
||||
|
||||
Resolution order:
|
||||
1. Exact match against abbreviation mappings
|
||||
2. Exact match against full team names
|
||||
3. Alias lookup (with date awareness)
|
||||
4. Fuzzy match against all known names
|
||||
5. Unresolved (returns ManualReviewItem)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sport: str,
|
||||
alias_loader: Optional[TeamAliasLoader] = None,
|
||||
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
|
||||
):
|
||||
"""Initialize the resolver.
|
||||
|
||||
Args:
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
alias_loader: Team alias loader (default: global loader)
|
||||
fuzzy_threshold: Minimum fuzzy match score
|
||||
"""
|
||||
self.sport = sport.lower()
|
||||
self.alias_loader = alias_loader or get_team_alias_loader()
|
||||
self.fuzzy_threshold = fuzzy_threshold
|
||||
self._mappings = TEAM_MAPPINGS.get(self.sport, {})
|
||||
|
||||
# Build match candidates for fuzzy matching
|
||||
self._candidates = self._build_candidates()
|
||||
|
||||
def _build_candidates(self) -> list[MatchCandidate]:
|
||||
"""Build match candidates from team mappings."""
|
||||
# Group by canonical ID to avoid duplicates
|
||||
by_id: dict[str, tuple[str, list[str]]] = {}
|
||||
|
||||
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
||||
if canonical_id not in by_id:
|
||||
by_id[canonical_id] = (full_name, [])
|
||||
|
||||
# Add abbreviation as alias
|
||||
by_id[canonical_id][1].append(abbrev)
|
||||
by_id[canonical_id][1].append(city)
|
||||
|
||||
return [
|
||||
MatchCandidate(
|
||||
canonical_id=cid,
|
||||
name=name,
|
||||
aliases=list(set(aliases)), # Dedupe
|
||||
)
|
||||
for cid, (name, aliases) in by_id.items()
|
||||
]
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
value: str,
|
||||
check_date: Optional[date] = None,
|
||||
source_url: Optional[str] = None,
|
||||
) -> TeamResolveResult:
|
||||
"""Resolve a team name to a canonical ID.
|
||||
|
||||
Args:
|
||||
value: Team name, abbreviation, or city to resolve
|
||||
check_date: Date for alias validity (None = today)
|
||||
source_url: Source URL for manual review items
|
||||
|
||||
Returns:
|
||||
TeamResolveResult with resolution details
|
||||
"""
|
||||
value_upper = value.upper().strip()
|
||||
value_lower = value.lower().strip()
|
||||
|
||||
# 1. Exact match against abbreviation
|
||||
if value_upper in self._mappings:
|
||||
canonical_id, full_name, _ = self._mappings[value_upper]
|
||||
return TeamResolveResult(
|
||||
canonical_id=canonical_id,
|
||||
confidence=100,
|
||||
match_type="exact",
|
||||
)
|
||||
|
||||
# 2. Exact match against full names
|
||||
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
||||
if value_lower == full_name.lower() or value_lower == city.lower():
|
||||
return TeamResolveResult(
|
||||
canonical_id=canonical_id,
|
||||
confidence=100,
|
||||
match_type="exact",
|
||||
)
|
||||
|
||||
# 3. Alias lookup
|
||||
alias_result = self.alias_loader.resolve(value, check_date)
|
||||
if alias_result:
|
||||
return TeamResolveResult(
|
||||
canonical_id=alias_result,
|
||||
confidence=95,
|
||||
match_type="alias",
|
||||
)
|
||||
|
||||
# 4. Fuzzy match
|
||||
matches = fuzzy_match_team(
|
||||
value,
|
||||
self._candidates,
|
||||
threshold=self.fuzzy_threshold,
|
||||
)
|
||||
|
||||
if matches:
|
||||
best = matches[0]
|
||||
review_item = None
|
||||
|
||||
# Create review item for low confidence matches
|
||||
if best.confidence < 90:
|
||||
review_item = ManualReviewItem(
|
||||
id=f"team_{uuid4().hex[:8]}",
|
||||
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
|
||||
sport=self.sport,
|
||||
raw_value=value,
|
||||
context={"match_type": "fuzzy"},
|
||||
source_url=source_url,
|
||||
suggested_matches=matches,
|
||||
game_date=check_date,
|
||||
)
|
||||
|
||||
return TeamResolveResult(
|
||||
canonical_id=best.canonical_id,
|
||||
confidence=best.confidence,
|
||||
match_type="fuzzy",
|
||||
review_item=review_item,
|
||||
)
|
||||
|
||||
# 5. Unresolved
|
||||
review_item = ManualReviewItem(
|
||||
id=f"team_{uuid4().hex[:8]}",
|
||||
reason=ReviewReason.UNRESOLVED_TEAM,
|
||||
sport=self.sport,
|
||||
raw_value=value,
|
||||
context={},
|
||||
source_url=source_url,
|
||||
suggested_matches=fuzzy_match_team(
|
||||
value,
|
||||
self._candidates,
|
||||
threshold=50, # Lower threshold for suggestions
|
||||
top_n=5,
|
||||
),
|
||||
game_date=check_date,
|
||||
)
|
||||
|
||||
return TeamResolveResult(
|
||||
canonical_id=None,
|
||||
confidence=0,
|
||||
match_type="unresolved",
|
||||
review_item=review_item,
|
||||
)
|
||||
|
||||
def get_team_info(self, abbreviation: str) -> Optional[tuple[str, str, str]]:
|
||||
"""Get team info by abbreviation.
|
||||
|
||||
Args:
|
||||
abbreviation: Team abbreviation
|
||||
|
||||
Returns:
|
||||
Tuple of (canonical_id, full_name, city) or None
|
||||
"""
|
||||
return self._mappings.get(abbreviation.upper())
|
||||
|
||||
def get_all_teams(self) -> list[tuple[str, str, str]]:
|
||||
"""Get all teams for this sport.
|
||||
|
||||
Returns:
|
||||
List of (canonical_id, full_name, city) tuples
|
||||
"""
|
||||
seen = set()
|
||||
result = []
|
||||
|
||||
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
||||
if canonical_id not in seen:
|
||||
seen.add(canonical_id)
|
||||
result.append((canonical_id, full_name, city))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Cached resolvers
|
||||
_resolvers: dict[str, TeamResolver] = {}
|
||||
|
||||
|
||||
def get_team_resolver(sport: str) -> TeamResolver:
|
||||
"""Get or create a team resolver for a sport."""
|
||||
sport_lower = sport.lower()
|
||||
if sport_lower not in _resolvers:
|
||||
_resolvers[sport_lower] = TeamResolver(sport_lower)
|
||||
return _resolvers[sport_lower]
|
||||
|
||||
|
||||
def resolve_team(
|
||||
sport: str,
|
||||
value: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> TeamResolveResult:
|
||||
"""Convenience function to resolve a team name.
|
||||
|
||||
Args:
|
||||
sport: Sport code
|
||||
value: Team name to resolve
|
||||
check_date: Date for alias validity
|
||||
|
||||
Returns:
|
||||
TeamResolveResult
|
||||
"""
|
||||
return get_team_resolver(sport).resolve(value, check_date)
|
||||
344
Scripts/sportstime_parser/normalizers/timezone.py
Normal file
344
Scripts/sportstime_parser/normalizers/timezone.py
Normal file
@@ -0,0 +1,344 @@
|
||||
"""Timezone conversion utilities for normalizing game times to UTC."""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, date, time
|
||||
from typing import Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from dateutil import parser as dateutil_parser
|
||||
from dateutil.tz import gettz, tzutc
|
||||
|
||||
from ..models.aliases import ReviewReason, ManualReviewItem
|
||||
|
||||
|
||||
# Common timezone abbreviations to IANA timezones
|
||||
TIMEZONE_ABBREV_MAP: dict[str, str] = {
|
||||
# US timezones
|
||||
"ET": "America/New_York",
|
||||
"EST": "America/New_York",
|
||||
"EDT": "America/New_York",
|
||||
"CT": "America/Chicago",
|
||||
"CST": "America/Chicago",
|
||||
"CDT": "America/Chicago",
|
||||
"MT": "America/Denver",
|
||||
"MST": "America/Denver",
|
||||
"MDT": "America/Denver",
|
||||
"PT": "America/Los_Angeles",
|
||||
"PST": "America/Los_Angeles",
|
||||
"PDT": "America/Los_Angeles",
|
||||
"AT": "America/Anchorage",
|
||||
"AKST": "America/Anchorage",
|
||||
"AKDT": "America/Anchorage",
|
||||
"HT": "Pacific/Honolulu",
|
||||
"HST": "Pacific/Honolulu",
|
||||
# Canada
|
||||
"AST": "America/Halifax",
|
||||
"ADT": "America/Halifax",
|
||||
"NST": "America/St_Johns",
|
||||
"NDT": "America/St_Johns",
|
||||
# Mexico
|
||||
"CDST": "America/Mexico_City",
|
||||
# UTC
|
||||
"UTC": "UTC",
|
||||
"GMT": "UTC",
|
||||
"Z": "UTC",
|
||||
}
|
||||
|
||||
# State/region to timezone mapping for inferring timezone from location
|
||||
STATE_TIMEZONE_MAP: dict[str, str] = {
|
||||
# Eastern
|
||||
"CT": "America/New_York",
|
||||
"DE": "America/New_York",
|
||||
"FL": "America/New_York", # Most of Florida
|
||||
"GA": "America/New_York",
|
||||
"MA": "America/New_York",
|
||||
"MD": "America/New_York",
|
||||
"ME": "America/New_York",
|
||||
"MI": "America/Detroit",
|
||||
"NC": "America/New_York",
|
||||
"NH": "America/New_York",
|
||||
"NJ": "America/New_York",
|
||||
"NY": "America/New_York",
|
||||
"OH": "America/New_York",
|
||||
"PA": "America/New_York",
|
||||
"RI": "America/New_York",
|
||||
"SC": "America/New_York",
|
||||
"VA": "America/New_York",
|
||||
"VT": "America/New_York",
|
||||
"WV": "America/New_York",
|
||||
"DC": "America/New_York",
|
||||
# Central
|
||||
"AL": "America/Chicago",
|
||||
"AR": "America/Chicago",
|
||||
"IA": "America/Chicago",
|
||||
"IL": "America/Chicago",
|
||||
"IN": "America/Indiana/Indianapolis",
|
||||
"KS": "America/Chicago",
|
||||
"KY": "America/Kentucky/Louisville",
|
||||
"LA": "America/Chicago",
|
||||
"MN": "America/Chicago",
|
||||
"MO": "America/Chicago",
|
||||
"MS": "America/Chicago",
|
||||
"ND": "America/Chicago",
|
||||
"NE": "America/Chicago",
|
||||
"OK": "America/Chicago",
|
||||
"SD": "America/Chicago",
|
||||
"TN": "America/Chicago",
|
||||
"TX": "America/Chicago",
|
||||
"WI": "America/Chicago",
|
||||
# Mountain
|
||||
"AZ": "America/Phoenix", # No DST
|
||||
"CO": "America/Denver",
|
||||
"ID": "America/Boise",
|
||||
"MT": "America/Denver",
|
||||
"NM": "America/Denver",
|
||||
"UT": "America/Denver",
|
||||
"WY": "America/Denver",
|
||||
# Pacific
|
||||
"CA": "America/Los_Angeles",
|
||||
"NV": "America/Los_Angeles",
|
||||
"OR": "America/Los_Angeles",
|
||||
"WA": "America/Los_Angeles",
|
||||
# Alaska/Hawaii
|
||||
"AK": "America/Anchorage",
|
||||
"HI": "Pacific/Honolulu",
|
||||
# Canada provinces
|
||||
"ON": "America/Toronto",
|
||||
"QC": "America/Montreal",
|
||||
"BC": "America/Vancouver",
|
||||
"AB": "America/Edmonton",
|
||||
"MB": "America/Winnipeg",
|
||||
"SK": "America/Regina",
|
||||
"NS": "America/Halifax",
|
||||
"NB": "America/Moncton",
|
||||
"NL": "America/St_Johns",
|
||||
"PE": "America/Halifax",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimezoneResult:
|
||||
"""Result of timezone conversion.
|
||||
|
||||
Attributes:
|
||||
datetime_utc: The datetime converted to UTC
|
||||
source_timezone: The timezone that was detected/used
|
||||
confidence: Confidence in the timezone detection ('high', 'medium', 'low')
|
||||
warning: Warning message if timezone was uncertain
|
||||
"""
|
||||
|
||||
datetime_utc: datetime
|
||||
source_timezone: str
|
||||
confidence: str
|
||||
warning: Optional[str] = None
|
||||
|
||||
|
||||
def detect_timezone_from_string(time_str: str) -> Optional[str]:
|
||||
"""Detect timezone from a time string containing a timezone abbreviation.
|
||||
|
||||
Args:
|
||||
time_str: Time string that may contain timezone info (e.g., '7:00 PM ET')
|
||||
|
||||
Returns:
|
||||
IANA timezone string if detected, None otherwise
|
||||
"""
|
||||
# Look for timezone abbreviation at end of string
|
||||
for abbrev, tz in TIMEZONE_ABBREV_MAP.items():
|
||||
pattern = rf"\b{abbrev}\b"
|
||||
if re.search(pattern, time_str, re.IGNORECASE):
|
||||
return tz
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_timezone_from_location(
|
||||
state: Optional[str] = None,
|
||||
city: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Detect timezone from location information.
|
||||
|
||||
Args:
|
||||
state: State/province code (e.g., 'NY', 'ON')
|
||||
city: City name (optional, for special cases)
|
||||
|
||||
Returns:
|
||||
IANA timezone string if detected, None otherwise
|
||||
"""
|
||||
if state and state.upper() in STATE_TIMEZONE_MAP:
|
||||
return STATE_TIMEZONE_MAP[state.upper()]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_datetime(
|
||||
date_str: str,
|
||||
time_str: Optional[str] = None,
|
||||
timezone_hint: Optional[str] = None,
|
||||
location_state: Optional[str] = None,
|
||||
) -> TimezoneResult:
|
||||
"""Parse a date/time string and convert to UTC.
|
||||
|
||||
Attempts to detect timezone from:
|
||||
1. Explicit timezone in the string
|
||||
2. Provided timezone hint
|
||||
3. Location-based inference
|
||||
4. Default to Eastern Time with warning
|
||||
|
||||
Args:
|
||||
date_str: Date string (e.g., '2025-10-21', 'October 21, 2025')
|
||||
time_str: Optional time string (e.g., '7:00 PM ET', '19:00')
|
||||
timezone_hint: Optional IANA timezone to use if not detected
|
||||
location_state: Optional state code for timezone inference
|
||||
|
||||
Returns:
|
||||
TimezoneResult with UTC datetime and metadata
|
||||
"""
|
||||
# Parse the date
|
||||
try:
|
||||
if time_str:
|
||||
# Combine date and time
|
||||
full_str = f"{date_str} {time_str}"
|
||||
else:
|
||||
full_str = date_str
|
||||
|
||||
parsed = dateutil_parser.parse(full_str, fuzzy=True)
|
||||
except (ValueError, OverflowError) as e:
|
||||
# If parsing fails, return a placeholder with low confidence
|
||||
return TimezoneResult(
|
||||
datetime_utc=datetime.now(tz=ZoneInfo("UTC")),
|
||||
source_timezone="unknown",
|
||||
confidence="low",
|
||||
warning=f"Failed to parse datetime: {e}",
|
||||
)
|
||||
|
||||
# Determine timezone
|
||||
detected_tz = None
|
||||
confidence = "high"
|
||||
warning = None
|
||||
|
||||
# Check if datetime already has timezone
|
||||
if parsed.tzinfo is not None:
|
||||
detected_tz = str(parsed.tzinfo)
|
||||
else:
|
||||
# Try to detect from time string
|
||||
if time_str:
|
||||
detected_tz = detect_timezone_from_string(time_str)
|
||||
|
||||
# Try timezone hint
|
||||
if not detected_tz and timezone_hint:
|
||||
detected_tz = timezone_hint
|
||||
confidence = "medium"
|
||||
|
||||
# Try location inference
|
||||
if not detected_tz and location_state:
|
||||
detected_tz = detect_timezone_from_location(state=location_state)
|
||||
confidence = "medium"
|
||||
|
||||
# Default to Eastern Time
|
||||
if not detected_tz:
|
||||
detected_tz = "America/New_York"
|
||||
confidence = "low"
|
||||
warning = "Timezone not detected, defaulting to Eastern Time"
|
||||
|
||||
# Apply timezone and convert to UTC
|
||||
try:
|
||||
tz = ZoneInfo(detected_tz)
|
||||
except KeyError:
|
||||
# Invalid timezone, try to resolve abbreviation
|
||||
if detected_tz in TIMEZONE_ABBREV_MAP:
|
||||
tz = ZoneInfo(TIMEZONE_ABBREV_MAP[detected_tz])
|
||||
detected_tz = TIMEZONE_ABBREV_MAP[detected_tz]
|
||||
else:
|
||||
tz = ZoneInfo("America/New_York")
|
||||
confidence = "low"
|
||||
warning = f"Unknown timezone '{detected_tz}', defaulting to Eastern Time"
|
||||
detected_tz = "America/New_York"
|
||||
|
||||
# Apply timezone if not already set
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=tz)
|
||||
|
||||
# Convert to UTC
|
||||
utc_dt = parsed.astimezone(ZoneInfo("UTC"))
|
||||
|
||||
return TimezoneResult(
|
||||
datetime_utc=utc_dt,
|
||||
source_timezone=detected_tz,
|
||||
confidence=confidence,
|
||||
warning=warning,
|
||||
)
|
||||
|
||||
|
||||
def convert_to_utc(
|
||||
dt: datetime,
|
||||
source_timezone: str,
|
||||
) -> datetime:
|
||||
"""Convert a datetime from a known timezone to UTC.
|
||||
|
||||
Args:
|
||||
dt: Datetime to convert (timezone-naive or timezone-aware)
|
||||
source_timezone: IANA timezone of the datetime
|
||||
|
||||
Returns:
|
||||
Datetime in UTC
|
||||
"""
|
||||
tz = ZoneInfo(source_timezone)
|
||||
|
||||
if dt.tzinfo is None:
|
||||
# Localize naive datetime
|
||||
dt = dt.replace(tzinfo=tz)
|
||||
|
||||
return dt.astimezone(ZoneInfo("UTC"))
|
||||
|
||||
|
||||
def create_timezone_warning(
|
||||
raw_value: str,
|
||||
sport: str,
|
||||
game_date: Optional[date] = None,
|
||||
source_url: Optional[str] = None,
|
||||
) -> ManualReviewItem:
|
||||
"""Create a manual review item for an undetermined timezone.
|
||||
|
||||
Args:
|
||||
raw_value: The original time string that couldn't be resolved
|
||||
sport: Sport code
|
||||
game_date: Date of the game
|
||||
source_url: URL of the source page
|
||||
|
||||
Returns:
|
||||
ManualReviewItem for timezone review
|
||||
"""
|
||||
return ManualReviewItem(
|
||||
id=f"tz_{sport}_{raw_value[:20].replace(' ', '_')}",
|
||||
reason=ReviewReason.TIMEZONE_UNKNOWN,
|
||||
sport=sport,
|
||||
raw_value=raw_value,
|
||||
context={"issue": "Could not determine timezone for game time"},
|
||||
source_url=source_url,
|
||||
game_date=game_date,
|
||||
)
|
||||
|
||||
|
||||
def get_stadium_timezone(
|
||||
stadium_state: str,
|
||||
stadium_timezone: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Get the timezone for a stadium based on its location.
|
||||
|
||||
Args:
|
||||
stadium_state: State/province code
|
||||
stadium_timezone: Explicit timezone override from stadium data
|
||||
|
||||
Returns:
|
||||
IANA timezone string
|
||||
"""
|
||||
if stadium_timezone:
|
||||
return stadium_timezone
|
||||
|
||||
tz = detect_timezone_from_location(state=stadium_state)
|
||||
if tz:
|
||||
return tz
|
||||
|
||||
# Default to Eastern
|
||||
return "America/New_York"
|
||||
Reference in New Issue
Block a user