Files
Sportstime/Scripts/sportstime_parser/normalizers/team_resolver.py
Trey t a8b0491571 wip
2026-01-19 22:12:53 -06:00

485 lines
22 KiB
Python

"""Team name resolver with exact, alias, and fuzzy matching."""
from dataclasses import dataclass
from datetime import date
from typing import Optional
from uuid import uuid4
from ..config import FUZZY_MATCH_THRESHOLD
from ..models.aliases import (
AliasType,
FuzzyMatch,
ManualReviewItem,
ReviewReason,
)
from .alias_loader import get_team_alias_loader, TeamAliasLoader
from .fuzzy import MatchCandidate, fuzzy_match_team, exact_match
@dataclass
class TeamResolveResult:
"""Result of team resolution.
Attributes:
canonical_id: Resolved canonical team ID (None if unresolved)
confidence: Confidence in the match (100 for exact, lower for fuzzy)
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
review_item: ManualReviewItem if resolution failed or low confidence
"""
canonical_id: Optional[str]
confidence: int
match_type: str
review_item: Optional[ManualReviewItem] = None
# Hardcoded team mappings for each sport
# Format: {sport: {abbreviation: (canonical_id, full_name, city)}}
TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str]]] = {
"nba": {
"ATL": ("team_nba_atl", "Atlanta Hawks", "Atlanta"),
"BOS": ("team_nba_bos", "Boston Celtics", "Boston"),
"BKN": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
"BRK": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
"CHA": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
"CHO": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
"CHI": ("team_nba_chi", "Chicago Bulls", "Chicago"),
"CLE": ("team_nba_cle", "Cleveland Cavaliers", "Cleveland"),
"DAL": ("team_nba_dal", "Dallas Mavericks", "Dallas"),
"DEN": ("team_nba_den", "Denver Nuggets", "Denver"),
"DET": ("team_nba_det", "Detroit Pistons", "Detroit"),
"GSW": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
"GS": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
"HOU": ("team_nba_hou", "Houston Rockets", "Houston"),
"IND": ("team_nba_ind", "Indiana Pacers", "Indiana"),
"LAC": ("team_nba_lac", "Los Angeles Clippers", "Los Angeles"),
"LAL": ("team_nba_lal", "Los Angeles Lakers", "Los Angeles"),
"MEM": ("team_nba_mem", "Memphis Grizzlies", "Memphis"),
"MIA": ("team_nba_mia", "Miami Heat", "Miami"),
"MIL": ("team_nba_mil", "Milwaukee Bucks", "Milwaukee"),
"MIN": ("team_nba_min", "Minnesota Timberwolves", "Minnesota"),
"NOP": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
"NO": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
"NYK": ("team_nba_nyk", "New York Knicks", "New York"),
"NY": ("team_nba_nyk", "New York Knicks", "New York"),
"OKC": ("team_nba_okc", "Oklahoma City Thunder", "Oklahoma City"),
"ORL": ("team_nba_orl", "Orlando Magic", "Orlando"),
"PHI": ("team_nba_phi", "Philadelphia 76ers", "Philadelphia"),
"PHX": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
"PHO": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
"POR": ("team_nba_por", "Portland Trail Blazers", "Portland"),
"SAC": ("team_nba_sac", "Sacramento Kings", "Sacramento"),
"SAS": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
"SA": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
"TOR": ("team_nba_tor", "Toronto Raptors", "Toronto"),
"UTA": ("team_nba_uta", "Utah Jazz", "Utah"),
"WAS": ("team_nba_was", "Washington Wizards", "Washington"),
"WSH": ("team_nba_was", "Washington Wizards", "Washington"),
},
"mlb": {
"ARI": ("team_mlb_ari", "Arizona Diamondbacks", "Arizona"),
"ATL": ("team_mlb_atl", "Atlanta Braves", "Atlanta"),
"BAL": ("team_mlb_bal", "Baltimore Orioles", "Baltimore"),
"BOS": ("team_mlb_bos", "Boston Red Sox", "Boston"),
"CHC": ("team_mlb_chc", "Chicago Cubs", "Chicago"),
"CHW": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
"CWS": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
"CIN": ("team_mlb_cin", "Cincinnati Reds", "Cincinnati"),
"CLE": ("team_mlb_cle", "Cleveland Guardians", "Cleveland"),
"COL": ("team_mlb_col", "Colorado Rockies", "Colorado"),
"DET": ("team_mlb_det", "Detroit Tigers", "Detroit"),
"HOU": ("team_mlb_hou", "Houston Astros", "Houston"),
"KC": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
"KCR": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
"LAA": ("team_mlb_laa", "Los Angeles Angels", "Los Angeles"),
"ANA": ("team_mlb_laa", "Los Angeles Angels", "Anaheim"),
"LAD": ("team_mlb_lad", "Los Angeles Dodgers", "Los Angeles"),
"MIA": ("team_mlb_mia", "Miami Marlins", "Miami"),
"FLA": ("team_mlb_mia", "Miami Marlins", "Florida"),
"MIL": ("team_mlb_mil", "Milwaukee Brewers", "Milwaukee"),
"MIN": ("team_mlb_min", "Minnesota Twins", "Minnesota"),
"NYM": ("team_mlb_nym", "New York Mets", "New York"),
"NYY": ("team_mlb_nyy", "New York Yankees", "New York"),
"OAK": ("team_mlb_oak", "Oakland Athletics", "Oakland"),
"PHI": ("team_mlb_phi", "Philadelphia Phillies", "Philadelphia"),
"PIT": ("team_mlb_pit", "Pittsburgh Pirates", "Pittsburgh"),
"SD": ("team_mlb_sd", "San Diego Padres", "San Diego"),
"SDP": ("team_mlb_sd", "San Diego Padres", "San Diego"),
"SF": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
"SFG": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
"SEA": ("team_mlb_sea", "Seattle Mariners", "Seattle"),
"STL": ("team_mlb_stl", "St. Louis Cardinals", "St. Louis"),
"TB": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
"TBR": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
"TEX": ("team_mlb_tex", "Texas Rangers", "Texas"),
"TOR": ("team_mlb_tor", "Toronto Blue Jays", "Toronto"),
"WSN": ("team_mlb_wsn", "Washington Nationals", "Washington"),
"WAS": ("team_mlb_wsn", "Washington Nationals", "Washington"),
},
"nfl": {
"ARI": ("team_nfl_ari", "Arizona Cardinals", "Arizona"),
"ATL": ("team_nfl_atl", "Atlanta Falcons", "Atlanta"),
"BAL": ("team_nfl_bal", "Baltimore Ravens", "Baltimore"),
"BUF": ("team_nfl_buf", "Buffalo Bills", "Buffalo"),
"CAR": ("team_nfl_car", "Carolina Panthers", "Carolina"),
"CHI": ("team_nfl_chi", "Chicago Bears", "Chicago"),
"CIN": ("team_nfl_cin", "Cincinnati Bengals", "Cincinnati"),
"CLE": ("team_nfl_cle", "Cleveland Browns", "Cleveland"),
"DAL": ("team_nfl_dal", "Dallas Cowboys", "Dallas"),
"DEN": ("team_nfl_den", "Denver Broncos", "Denver"),
"DET": ("team_nfl_det", "Detroit Lions", "Detroit"),
"GB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
"GNB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
"HOU": ("team_nfl_hou", "Houston Texans", "Houston"),
"IND": ("team_nfl_ind", "Indianapolis Colts", "Indianapolis"),
"JAX": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
"JAC": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
"KC": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
"KAN": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
"LV": ("team_nfl_lv", "Las Vegas Raiders", "Las Vegas"),
"LAC": ("team_nfl_lac", "Los Angeles Chargers", "Los Angeles"),
"LAR": ("team_nfl_lar", "Los Angeles Rams", "Los Angeles"),
"MIA": ("team_nfl_mia", "Miami Dolphins", "Miami"),
"MIN": ("team_nfl_min", "Minnesota Vikings", "Minnesota"),
"NE": ("team_nfl_ne", "New England Patriots", "New England"),
"NWE": ("team_nfl_ne", "New England Patriots", "New England"),
"NO": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
"NOR": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
"NYG": ("team_nfl_nyg", "New York Giants", "New York"),
"NYJ": ("team_nfl_nyj", "New York Jets", "New York"),
"PHI": ("team_nfl_phi", "Philadelphia Eagles", "Philadelphia"),
"PIT": ("team_nfl_pit", "Pittsburgh Steelers", "Pittsburgh"),
"SF": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
"SFO": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
"SEA": ("team_nfl_sea", "Seattle Seahawks", "Seattle"),
"TB": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
"TAM": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
"TEN": ("team_nfl_ten", "Tennessee Titans", "Tennessee"),
"WAS": ("team_nfl_was", "Washington Commanders", "Washington"),
"WSH": ("team_nfl_was", "Washington Commanders", "Washington"),
},
"nhl": {
"ANA": ("team_nhl_ana", "Anaheim Ducks", "Anaheim"),
"ARI": ("team_nhl_ari", "Utah Hockey Club", "Utah"), # Moved 2024
"UTA": ("team_nhl_ari", "Utah Hockey Club", "Utah"),
"BOS": ("team_nhl_bos", "Boston Bruins", "Boston"),
"BUF": ("team_nhl_buf", "Buffalo Sabres", "Buffalo"),
"CGY": ("team_nhl_cgy", "Calgary Flames", "Calgary"),
"CAR": ("team_nhl_car", "Carolina Hurricanes", "Carolina"),
"CHI": ("team_nhl_chi", "Chicago Blackhawks", "Chicago"),
"COL": ("team_nhl_col", "Colorado Avalanche", "Colorado"),
"CBJ": ("team_nhl_cbj", "Columbus Blue Jackets", "Columbus"),
"DAL": ("team_nhl_dal", "Dallas Stars", "Dallas"),
"DET": ("team_nhl_det", "Detroit Red Wings", "Detroit"),
"EDM": ("team_nhl_edm", "Edmonton Oilers", "Edmonton"),
"FLA": ("team_nhl_fla", "Florida Panthers", "Florida"),
"LA": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
"LAK": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
"MIN": ("team_nhl_min", "Minnesota Wild", "Minnesota"),
"MTL": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
"MON": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
"NSH": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
"NAS": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
"NJ": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
"NJD": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
"NYI": ("team_nhl_nyi", "New York Islanders", "New York"),
"NYR": ("team_nhl_nyr", "New York Rangers", "New York"),
"OTT": ("team_nhl_ott", "Ottawa Senators", "Ottawa"),
"PHI": ("team_nhl_phi", "Philadelphia Flyers", "Philadelphia"),
"PIT": ("team_nhl_pit", "Pittsburgh Penguins", "Pittsburgh"),
"SJ": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
"SJS": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
"SEA": ("team_nhl_sea", "Seattle Kraken", "Seattle"),
"STL": ("team_nhl_stl", "St. Louis Blues", "St. Louis"),
"TB": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
"TBL": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
"TOR": ("team_nhl_tor", "Toronto Maple Leafs", "Toronto"),
"VAN": ("team_nhl_van", "Vancouver Canucks", "Vancouver"),
"VGK": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
"VEG": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
"WAS": ("team_nhl_was", "Washington Capitals", "Washington"),
"WSH": ("team_nhl_was", "Washington Capitals", "Washington"),
"WPG": ("team_nhl_wpg", "Winnipeg Jets", "Winnipeg"),
},
"mls": {
"ATL": ("team_mls_atl", "Atlanta United", "Atlanta"),
"AUS": ("team_mls_aus", "Austin FC", "Austin"),
"CLT": ("team_mls_clt", "Charlotte FC", "Charlotte"),
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago"),
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati"),
"COL": ("team_mls_col", "Colorado Rapids", "Colorado"),
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus"),
"DAL": ("team_mls_dal", "FC Dallas", "Dallas"),
"DC": ("team_mls_dc", "D.C. United", "Washington"),
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston"),
"LAG": ("team_mls_lag", "LA Galaxy", "Los Angeles"),
"LAFC": ("team_mls_lafc", "Los Angeles FC", "Los Angeles"),
"MIA": ("team_mls_mia", "Inter Miami", "Miami"),
"MIN": ("team_mls_min", "Minnesota United", "Minnesota"),
"MTL": ("team_mls_mtl", "CF Montreal", "Montreal"),
"NSH": ("team_mls_nsh", "Nashville SC", "Nashville"),
"NE": ("team_mls_ne", "New England Revolution", "New England"),
"NYC": ("team_mls_nyc", "New York City FC", "New York"),
"RB": ("team_mls_ny", "New York Red Bulls", "New York"),
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York"),
"ORL": ("team_mls_orl", "Orlando City", "Orlando"),
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia"),
"POR": ("team_mls_por", "Portland Timbers", "Portland"),
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
"RSL": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
"SJ": ("team_mls_sj", "San Jose Earthquakes", "San Jose"),
"SD": ("team_mls_sd", "San Diego FC", "San Diego"),
"SEA": ("team_mls_sea", "Seattle Sounders", "Seattle"),
"SKC": ("team_mls_skc", "Sporting Kansas City", "Kansas City"),
"STL": ("team_mls_stl", "St. Louis City SC", "St. Louis"),
"TOR": ("team_mls_tor", "Toronto FC", "Toronto"),
"VAN": ("team_mls_van", "Vancouver Whitecaps", "Vancouver"),
},
"wnba": {
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta"),
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago"),
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut"),
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas"),
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State"),
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana"),
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas"),
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles"),
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota"),
"NY": ("team_wnba_ny", "New York Liberty", "New York"),
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix"),
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle"),
"WAS": ("team_wnba_was", "Washington Mystics", "Washington"),
},
"nwsl": {
"ANF": ("team_nwsl_anf", "Angel City FC", "Los Angeles"),
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago"),
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston"),
"KC": ("team_nwsl_kc", "Kansas City Current", "Kansas City"),
"NJ": ("team_nwsl_nj", "NJ/NY Gotham FC", "New Jersey"),
"NC": ("team_nwsl_nc", "North Carolina Courage", "North Carolina"),
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando"),
"POR": ("team_nwsl_por", "Portland Thorns", "Portland"),
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville"),
"SD": ("team_nwsl_sd", "San Diego Wave", "San Diego"),
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle"),
"SLC": ("team_nwsl_slc", "Utah Royals", "Utah"),
"WAS": ("team_nwsl_was", "Washington Spirit", "Washington"),
"BFC": ("team_nwsl_bfc", "Bay FC", "San Francisco"),
"BOS": ("team_nwsl_bos", "Boston Legacy FC", "Boston"),
"DEN": ("team_nwsl_den", "Denver Summit FC", "Denver"),
},
}
class TeamResolver:
"""Resolves team names to canonical IDs.
Resolution order:
1. Exact match against abbreviation mappings
2. Exact match against full team names
3. Alias lookup (with date awareness)
4. Fuzzy match against all known names
5. Unresolved (returns ManualReviewItem)
"""
def __init__(
self,
sport: str,
alias_loader: Optional[TeamAliasLoader] = None,
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
):
"""Initialize the resolver.
Args:
sport: Sport code (e.g., 'nba', 'mlb')
alias_loader: Team alias loader (default: global loader)
fuzzy_threshold: Minimum fuzzy match score
"""
self.sport = sport.lower()
self.alias_loader = alias_loader or get_team_alias_loader()
self.fuzzy_threshold = fuzzy_threshold
self._mappings = TEAM_MAPPINGS.get(self.sport, {})
# Build match candidates for fuzzy matching
self._candidates = self._build_candidates()
def _build_candidates(self) -> list[MatchCandidate]:
"""Build match candidates from team mappings."""
# Group by canonical ID to avoid duplicates
by_id: dict[str, tuple[str, list[str]]] = {}
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
if canonical_id not in by_id:
by_id[canonical_id] = (full_name, [])
# Add abbreviation as alias
by_id[canonical_id][1].append(abbrev)
by_id[canonical_id][1].append(city)
return [
MatchCandidate(
canonical_id=cid,
name=name,
aliases=list(set(aliases)), # Dedupe
)
for cid, (name, aliases) in by_id.items()
]
def resolve(
self,
value: str,
check_date: Optional[date] = None,
source_url: Optional[str] = None,
) -> TeamResolveResult:
"""Resolve a team name to a canonical ID.
Args:
value: Team name, abbreviation, or city to resolve
check_date: Date for alias validity (None = today)
source_url: Source URL for manual review items
Returns:
TeamResolveResult with resolution details
"""
value_upper = value.upper().strip()
value_lower = value.lower().strip()
# 1. Exact match against abbreviation
if value_upper in self._mappings:
canonical_id, full_name, _ = self._mappings[value_upper]
return TeamResolveResult(
canonical_id=canonical_id,
confidence=100,
match_type="exact",
)
# 2. Exact match against full names
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
if value_lower == full_name.lower() or value_lower == city.lower():
return TeamResolveResult(
canonical_id=canonical_id,
confidence=100,
match_type="exact",
)
# 3. Alias lookup
alias_result = self.alias_loader.resolve(value, check_date)
if alias_result:
return TeamResolveResult(
canonical_id=alias_result,
confidence=95,
match_type="alias",
)
# 4. Fuzzy match
matches = fuzzy_match_team(
value,
self._candidates,
threshold=self.fuzzy_threshold,
)
if matches:
best = matches[0]
review_item = None
# Create review item for low confidence matches
if best.confidence < 90:
review_item = ManualReviewItem(
id=f"team_{uuid4().hex[:8]}",
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
sport=self.sport,
raw_value=value,
context={"match_type": "fuzzy"},
source_url=source_url,
suggested_matches=matches,
game_date=check_date,
)
return TeamResolveResult(
canonical_id=best.canonical_id,
confidence=best.confidence,
match_type="fuzzy",
review_item=review_item,
)
# 5. Unresolved
review_item = ManualReviewItem(
id=f"team_{uuid4().hex[:8]}",
reason=ReviewReason.UNRESOLVED_TEAM,
sport=self.sport,
raw_value=value,
context={},
source_url=source_url,
suggested_matches=fuzzy_match_team(
value,
self._candidates,
threshold=50, # Lower threshold for suggestions
top_n=5,
),
game_date=check_date,
)
return TeamResolveResult(
canonical_id=None,
confidence=0,
match_type="unresolved",
review_item=review_item,
)
def get_team_info(self, abbreviation: str) -> Optional[tuple[str, str, str]]:
"""Get team info by abbreviation.
Args:
abbreviation: Team abbreviation
Returns:
Tuple of (canonical_id, full_name, city) or None
"""
return self._mappings.get(abbreviation.upper())
def get_all_teams(self) -> list[tuple[str, str, str]]:
"""Get all teams for this sport.
Returns:
List of (canonical_id, full_name, city) tuples
"""
seen = set()
result = []
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
if canonical_id not in seen:
seen.add(canonical_id)
result.append((canonical_id, full_name, city))
return result
# Cached resolvers
_resolvers: dict[str, TeamResolver] = {}
def get_team_resolver(sport: str) -> TeamResolver:
"""Get or create a team resolver for a sport."""
sport_lower = sport.lower()
if sport_lower not in _resolvers:
_resolvers[sport_lower] = TeamResolver(sport_lower)
return _resolvers[sport_lower]
def resolve_team(
sport: str,
value: str,
check_date: Optional[date] = None,
) -> TeamResolveResult:
"""Convenience function to resolve a team name.
Args:
sport: Sport code
value: Team name to resolve
check_date: Date for alias validity
Returns:
TeamResolveResult
"""
return get_team_resolver(sport).resolve(value, check_date)