485 lines
22 KiB
Python
485 lines
22 KiB
Python
"""Team name resolver with exact, alias, and fuzzy matching."""
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import date
|
|
from typing import Optional
|
|
from uuid import uuid4
|
|
|
|
from ..config import FUZZY_MATCH_THRESHOLD
|
|
from ..models.aliases import (
|
|
AliasType,
|
|
FuzzyMatch,
|
|
ManualReviewItem,
|
|
ReviewReason,
|
|
)
|
|
from .alias_loader import get_team_alias_loader, TeamAliasLoader
|
|
from .fuzzy import MatchCandidate, fuzzy_match_team, exact_match
|
|
|
|
|
|
@dataclass
|
|
class TeamResolveResult:
|
|
"""Result of team resolution.
|
|
|
|
Attributes:
|
|
canonical_id: Resolved canonical team ID (None if unresolved)
|
|
confidence: Confidence in the match (100 for exact, lower for fuzzy)
|
|
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
|
|
review_item: ManualReviewItem if resolution failed or low confidence
|
|
"""
|
|
|
|
canonical_id: Optional[str]
|
|
confidence: int
|
|
match_type: str
|
|
review_item: Optional[ManualReviewItem] = None
|
|
|
|
|
|
# Hardcoded team mappings for each sport
|
|
# Format: {sport: {abbreviation: (canonical_id, full_name, city)}}
|
|
TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str]]] = {
|
|
"nba": {
|
|
"ATL": ("team_nba_atl", "Atlanta Hawks", "Atlanta"),
|
|
"BOS": ("team_nba_bos", "Boston Celtics", "Boston"),
|
|
"BKN": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
|
|
"BRK": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
|
|
"CHA": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
|
|
"CHO": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
|
|
"CHI": ("team_nba_chi", "Chicago Bulls", "Chicago"),
|
|
"CLE": ("team_nba_cle", "Cleveland Cavaliers", "Cleveland"),
|
|
"DAL": ("team_nba_dal", "Dallas Mavericks", "Dallas"),
|
|
"DEN": ("team_nba_den", "Denver Nuggets", "Denver"),
|
|
"DET": ("team_nba_det", "Detroit Pistons", "Detroit"),
|
|
"GSW": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
|
|
"GS": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
|
|
"HOU": ("team_nba_hou", "Houston Rockets", "Houston"),
|
|
"IND": ("team_nba_ind", "Indiana Pacers", "Indiana"),
|
|
"LAC": ("team_nba_lac", "Los Angeles Clippers", "Los Angeles"),
|
|
"LAL": ("team_nba_lal", "Los Angeles Lakers", "Los Angeles"),
|
|
"MEM": ("team_nba_mem", "Memphis Grizzlies", "Memphis"),
|
|
"MIA": ("team_nba_mia", "Miami Heat", "Miami"),
|
|
"MIL": ("team_nba_mil", "Milwaukee Bucks", "Milwaukee"),
|
|
"MIN": ("team_nba_min", "Minnesota Timberwolves", "Minnesota"),
|
|
"NOP": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
|
|
"NO": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
|
|
"NYK": ("team_nba_nyk", "New York Knicks", "New York"),
|
|
"NY": ("team_nba_nyk", "New York Knicks", "New York"),
|
|
"OKC": ("team_nba_okc", "Oklahoma City Thunder", "Oklahoma City"),
|
|
"ORL": ("team_nba_orl", "Orlando Magic", "Orlando"),
|
|
"PHI": ("team_nba_phi", "Philadelphia 76ers", "Philadelphia"),
|
|
"PHX": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
|
|
"PHO": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
|
|
"POR": ("team_nba_por", "Portland Trail Blazers", "Portland"),
|
|
"SAC": ("team_nba_sac", "Sacramento Kings", "Sacramento"),
|
|
"SAS": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
|
|
"SA": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
|
|
"TOR": ("team_nba_tor", "Toronto Raptors", "Toronto"),
|
|
"UTA": ("team_nba_uta", "Utah Jazz", "Utah"),
|
|
"WAS": ("team_nba_was", "Washington Wizards", "Washington"),
|
|
"WSH": ("team_nba_was", "Washington Wizards", "Washington"),
|
|
},
|
|
"mlb": {
|
|
"ARI": ("team_mlb_ari", "Arizona Diamondbacks", "Arizona"),
|
|
"ATL": ("team_mlb_atl", "Atlanta Braves", "Atlanta"),
|
|
"BAL": ("team_mlb_bal", "Baltimore Orioles", "Baltimore"),
|
|
"BOS": ("team_mlb_bos", "Boston Red Sox", "Boston"),
|
|
"CHC": ("team_mlb_chc", "Chicago Cubs", "Chicago"),
|
|
"CHW": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
|
|
"CWS": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
|
|
"CIN": ("team_mlb_cin", "Cincinnati Reds", "Cincinnati"),
|
|
"CLE": ("team_mlb_cle", "Cleveland Guardians", "Cleveland"),
|
|
"COL": ("team_mlb_col", "Colorado Rockies", "Colorado"),
|
|
"DET": ("team_mlb_det", "Detroit Tigers", "Detroit"),
|
|
"HOU": ("team_mlb_hou", "Houston Astros", "Houston"),
|
|
"KC": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
|
|
"KCR": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
|
|
"LAA": ("team_mlb_laa", "Los Angeles Angels", "Los Angeles"),
|
|
"ANA": ("team_mlb_laa", "Los Angeles Angels", "Anaheim"),
|
|
"LAD": ("team_mlb_lad", "Los Angeles Dodgers", "Los Angeles"),
|
|
"MIA": ("team_mlb_mia", "Miami Marlins", "Miami"),
|
|
"FLA": ("team_mlb_mia", "Miami Marlins", "Florida"),
|
|
"MIL": ("team_mlb_mil", "Milwaukee Brewers", "Milwaukee"),
|
|
"MIN": ("team_mlb_min", "Minnesota Twins", "Minnesota"),
|
|
"NYM": ("team_mlb_nym", "New York Mets", "New York"),
|
|
"NYY": ("team_mlb_nyy", "New York Yankees", "New York"),
|
|
"OAK": ("team_mlb_oak", "Oakland Athletics", "Oakland"),
|
|
"PHI": ("team_mlb_phi", "Philadelphia Phillies", "Philadelphia"),
|
|
"PIT": ("team_mlb_pit", "Pittsburgh Pirates", "Pittsburgh"),
|
|
"SD": ("team_mlb_sd", "San Diego Padres", "San Diego"),
|
|
"SDP": ("team_mlb_sd", "San Diego Padres", "San Diego"),
|
|
"SF": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
|
|
"SFG": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
|
|
"SEA": ("team_mlb_sea", "Seattle Mariners", "Seattle"),
|
|
"STL": ("team_mlb_stl", "St. Louis Cardinals", "St. Louis"),
|
|
"TB": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
|
|
"TBR": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
|
|
"TEX": ("team_mlb_tex", "Texas Rangers", "Texas"),
|
|
"TOR": ("team_mlb_tor", "Toronto Blue Jays", "Toronto"),
|
|
"WSN": ("team_mlb_wsn", "Washington Nationals", "Washington"),
|
|
"WAS": ("team_mlb_wsn", "Washington Nationals", "Washington"),
|
|
},
|
|
"nfl": {
|
|
"ARI": ("team_nfl_ari", "Arizona Cardinals", "Arizona"),
|
|
"ATL": ("team_nfl_atl", "Atlanta Falcons", "Atlanta"),
|
|
"BAL": ("team_nfl_bal", "Baltimore Ravens", "Baltimore"),
|
|
"BUF": ("team_nfl_buf", "Buffalo Bills", "Buffalo"),
|
|
"CAR": ("team_nfl_car", "Carolina Panthers", "Carolina"),
|
|
"CHI": ("team_nfl_chi", "Chicago Bears", "Chicago"),
|
|
"CIN": ("team_nfl_cin", "Cincinnati Bengals", "Cincinnati"),
|
|
"CLE": ("team_nfl_cle", "Cleveland Browns", "Cleveland"),
|
|
"DAL": ("team_nfl_dal", "Dallas Cowboys", "Dallas"),
|
|
"DEN": ("team_nfl_den", "Denver Broncos", "Denver"),
|
|
"DET": ("team_nfl_det", "Detroit Lions", "Detroit"),
|
|
"GB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
|
|
"GNB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
|
|
"HOU": ("team_nfl_hou", "Houston Texans", "Houston"),
|
|
"IND": ("team_nfl_ind", "Indianapolis Colts", "Indianapolis"),
|
|
"JAX": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
|
|
"JAC": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
|
|
"KC": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
|
|
"KAN": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
|
|
"LV": ("team_nfl_lv", "Las Vegas Raiders", "Las Vegas"),
|
|
"LAC": ("team_nfl_lac", "Los Angeles Chargers", "Los Angeles"),
|
|
"LAR": ("team_nfl_lar", "Los Angeles Rams", "Los Angeles"),
|
|
"MIA": ("team_nfl_mia", "Miami Dolphins", "Miami"),
|
|
"MIN": ("team_nfl_min", "Minnesota Vikings", "Minnesota"),
|
|
"NE": ("team_nfl_ne", "New England Patriots", "New England"),
|
|
"NWE": ("team_nfl_ne", "New England Patriots", "New England"),
|
|
"NO": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
|
|
"NOR": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
|
|
"NYG": ("team_nfl_nyg", "New York Giants", "New York"),
|
|
"NYJ": ("team_nfl_nyj", "New York Jets", "New York"),
|
|
"PHI": ("team_nfl_phi", "Philadelphia Eagles", "Philadelphia"),
|
|
"PIT": ("team_nfl_pit", "Pittsburgh Steelers", "Pittsburgh"),
|
|
"SF": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
|
|
"SFO": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
|
|
"SEA": ("team_nfl_sea", "Seattle Seahawks", "Seattle"),
|
|
"TB": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
|
|
"TAM": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
|
|
"TEN": ("team_nfl_ten", "Tennessee Titans", "Tennessee"),
|
|
"WAS": ("team_nfl_was", "Washington Commanders", "Washington"),
|
|
"WSH": ("team_nfl_was", "Washington Commanders", "Washington"),
|
|
},
|
|
"nhl": {
|
|
"ANA": ("team_nhl_ana", "Anaheim Ducks", "Anaheim"),
|
|
"ARI": ("team_nhl_ari", "Utah Hockey Club", "Utah"), # Moved 2024
|
|
"UTA": ("team_nhl_ari", "Utah Hockey Club", "Utah"),
|
|
"BOS": ("team_nhl_bos", "Boston Bruins", "Boston"),
|
|
"BUF": ("team_nhl_buf", "Buffalo Sabres", "Buffalo"),
|
|
"CGY": ("team_nhl_cgy", "Calgary Flames", "Calgary"),
|
|
"CAR": ("team_nhl_car", "Carolina Hurricanes", "Carolina"),
|
|
"CHI": ("team_nhl_chi", "Chicago Blackhawks", "Chicago"),
|
|
"COL": ("team_nhl_col", "Colorado Avalanche", "Colorado"),
|
|
"CBJ": ("team_nhl_cbj", "Columbus Blue Jackets", "Columbus"),
|
|
"DAL": ("team_nhl_dal", "Dallas Stars", "Dallas"),
|
|
"DET": ("team_nhl_det", "Detroit Red Wings", "Detroit"),
|
|
"EDM": ("team_nhl_edm", "Edmonton Oilers", "Edmonton"),
|
|
"FLA": ("team_nhl_fla", "Florida Panthers", "Florida"),
|
|
"LA": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
|
|
"LAK": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
|
|
"MIN": ("team_nhl_min", "Minnesota Wild", "Minnesota"),
|
|
"MTL": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
|
|
"MON": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
|
|
"NSH": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
|
|
"NAS": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
|
|
"NJ": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
|
|
"NJD": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
|
|
"NYI": ("team_nhl_nyi", "New York Islanders", "New York"),
|
|
"NYR": ("team_nhl_nyr", "New York Rangers", "New York"),
|
|
"OTT": ("team_nhl_ott", "Ottawa Senators", "Ottawa"),
|
|
"PHI": ("team_nhl_phi", "Philadelphia Flyers", "Philadelphia"),
|
|
"PIT": ("team_nhl_pit", "Pittsburgh Penguins", "Pittsburgh"),
|
|
"SJ": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
|
|
"SJS": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
|
|
"SEA": ("team_nhl_sea", "Seattle Kraken", "Seattle"),
|
|
"STL": ("team_nhl_stl", "St. Louis Blues", "St. Louis"),
|
|
"TB": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
|
|
"TBL": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
|
|
"TOR": ("team_nhl_tor", "Toronto Maple Leafs", "Toronto"),
|
|
"VAN": ("team_nhl_van", "Vancouver Canucks", "Vancouver"),
|
|
"VGK": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
|
|
"VEG": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
|
|
"WAS": ("team_nhl_was", "Washington Capitals", "Washington"),
|
|
"WSH": ("team_nhl_was", "Washington Capitals", "Washington"),
|
|
"WPG": ("team_nhl_wpg", "Winnipeg Jets", "Winnipeg"),
|
|
},
|
|
"mls": {
|
|
"ATL": ("team_mls_atl", "Atlanta United", "Atlanta"),
|
|
"AUS": ("team_mls_aus", "Austin FC", "Austin"),
|
|
"CLT": ("team_mls_clt", "Charlotte FC", "Charlotte"),
|
|
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago"),
|
|
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati"),
|
|
"COL": ("team_mls_col", "Colorado Rapids", "Colorado"),
|
|
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus"),
|
|
"DAL": ("team_mls_dal", "FC Dallas", "Dallas"),
|
|
"DC": ("team_mls_dc", "D.C. United", "Washington"),
|
|
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston"),
|
|
"LAG": ("team_mls_lag", "LA Galaxy", "Los Angeles"),
|
|
"LAFC": ("team_mls_lafc", "Los Angeles FC", "Los Angeles"),
|
|
"MIA": ("team_mls_mia", "Inter Miami", "Miami"),
|
|
"MIN": ("team_mls_min", "Minnesota United", "Minnesota"),
|
|
"MTL": ("team_mls_mtl", "CF Montreal", "Montreal"),
|
|
"NSH": ("team_mls_nsh", "Nashville SC", "Nashville"),
|
|
"NE": ("team_mls_ne", "New England Revolution", "New England"),
|
|
"NYC": ("team_mls_nyc", "New York City FC", "New York"),
|
|
"RB": ("team_mls_ny", "New York Red Bulls", "New York"),
|
|
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York"),
|
|
"ORL": ("team_mls_orl", "Orlando City", "Orlando"),
|
|
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia"),
|
|
"POR": ("team_mls_por", "Portland Timbers", "Portland"),
|
|
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
|
|
"RSL": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
|
|
"SJ": ("team_mls_sj", "San Jose Earthquakes", "San Jose"),
|
|
"SD": ("team_mls_sd", "San Diego FC", "San Diego"),
|
|
"SEA": ("team_mls_sea", "Seattle Sounders", "Seattle"),
|
|
"SKC": ("team_mls_skc", "Sporting Kansas City", "Kansas City"),
|
|
"STL": ("team_mls_stl", "St. Louis City SC", "St. Louis"),
|
|
"TOR": ("team_mls_tor", "Toronto FC", "Toronto"),
|
|
"VAN": ("team_mls_van", "Vancouver Whitecaps", "Vancouver"),
|
|
},
|
|
"wnba": {
|
|
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta"),
|
|
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago"),
|
|
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut"),
|
|
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas"),
|
|
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State"),
|
|
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana"),
|
|
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas"),
|
|
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles"),
|
|
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota"),
|
|
"NY": ("team_wnba_ny", "New York Liberty", "New York"),
|
|
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix"),
|
|
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle"),
|
|
"WAS": ("team_wnba_was", "Washington Mystics", "Washington"),
|
|
},
|
|
"nwsl": {
|
|
"ANF": ("team_nwsl_anf", "Angel City FC", "Los Angeles"),
|
|
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago"),
|
|
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston"),
|
|
"KC": ("team_nwsl_kc", "Kansas City Current", "Kansas City"),
|
|
"NJ": ("team_nwsl_nj", "NJ/NY Gotham FC", "New Jersey"),
|
|
"NC": ("team_nwsl_nc", "North Carolina Courage", "North Carolina"),
|
|
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando"),
|
|
"POR": ("team_nwsl_por", "Portland Thorns", "Portland"),
|
|
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville"),
|
|
"SD": ("team_nwsl_sd", "San Diego Wave", "San Diego"),
|
|
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle"),
|
|
"SLC": ("team_nwsl_slc", "Utah Royals", "Utah"),
|
|
"WAS": ("team_nwsl_was", "Washington Spirit", "Washington"),
|
|
"BFC": ("team_nwsl_bfc", "Bay FC", "San Francisco"),
|
|
"BOS": ("team_nwsl_bos", "Boston Legacy FC", "Boston"),
|
|
"DEN": ("team_nwsl_den", "Denver Summit FC", "Denver"),
|
|
},
|
|
}
|
|
|
|
|
|
class TeamResolver:
|
|
"""Resolves team names to canonical IDs.
|
|
|
|
Resolution order:
|
|
1. Exact match against abbreviation mappings
|
|
2. Exact match against full team names
|
|
3. Alias lookup (with date awareness)
|
|
4. Fuzzy match against all known names
|
|
5. Unresolved (returns ManualReviewItem)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
sport: str,
|
|
alias_loader: Optional[TeamAliasLoader] = None,
|
|
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
|
|
):
|
|
"""Initialize the resolver.
|
|
|
|
Args:
|
|
sport: Sport code (e.g., 'nba', 'mlb')
|
|
alias_loader: Team alias loader (default: global loader)
|
|
fuzzy_threshold: Minimum fuzzy match score
|
|
"""
|
|
self.sport = sport.lower()
|
|
self.alias_loader = alias_loader or get_team_alias_loader()
|
|
self.fuzzy_threshold = fuzzy_threshold
|
|
self._mappings = TEAM_MAPPINGS.get(self.sport, {})
|
|
|
|
# Build match candidates for fuzzy matching
|
|
self._candidates = self._build_candidates()
|
|
|
|
def _build_candidates(self) -> list[MatchCandidate]:
|
|
"""Build match candidates from team mappings."""
|
|
# Group by canonical ID to avoid duplicates
|
|
by_id: dict[str, tuple[str, list[str]]] = {}
|
|
|
|
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
|
if canonical_id not in by_id:
|
|
by_id[canonical_id] = (full_name, [])
|
|
|
|
# Add abbreviation as alias
|
|
by_id[canonical_id][1].append(abbrev)
|
|
by_id[canonical_id][1].append(city)
|
|
|
|
return [
|
|
MatchCandidate(
|
|
canonical_id=cid,
|
|
name=name,
|
|
aliases=list(set(aliases)), # Dedupe
|
|
)
|
|
for cid, (name, aliases) in by_id.items()
|
|
]
|
|
|
|
def resolve(
|
|
self,
|
|
value: str,
|
|
check_date: Optional[date] = None,
|
|
source_url: Optional[str] = None,
|
|
) -> TeamResolveResult:
|
|
"""Resolve a team name to a canonical ID.
|
|
|
|
Args:
|
|
value: Team name, abbreviation, or city to resolve
|
|
check_date: Date for alias validity (None = today)
|
|
source_url: Source URL for manual review items
|
|
|
|
Returns:
|
|
TeamResolveResult with resolution details
|
|
"""
|
|
value_upper = value.upper().strip()
|
|
value_lower = value.lower().strip()
|
|
|
|
# 1. Exact match against abbreviation
|
|
if value_upper in self._mappings:
|
|
canonical_id, full_name, _ = self._mappings[value_upper]
|
|
return TeamResolveResult(
|
|
canonical_id=canonical_id,
|
|
confidence=100,
|
|
match_type="exact",
|
|
)
|
|
|
|
# 2. Exact match against full names
|
|
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
|
if value_lower == full_name.lower() or value_lower == city.lower():
|
|
return TeamResolveResult(
|
|
canonical_id=canonical_id,
|
|
confidence=100,
|
|
match_type="exact",
|
|
)
|
|
|
|
# 3. Alias lookup
|
|
alias_result = self.alias_loader.resolve(value, check_date)
|
|
if alias_result:
|
|
return TeamResolveResult(
|
|
canonical_id=alias_result,
|
|
confidence=95,
|
|
match_type="alias",
|
|
)
|
|
|
|
# 4. Fuzzy match
|
|
matches = fuzzy_match_team(
|
|
value,
|
|
self._candidates,
|
|
threshold=self.fuzzy_threshold,
|
|
)
|
|
|
|
if matches:
|
|
best = matches[0]
|
|
review_item = None
|
|
|
|
# Create review item for low confidence matches
|
|
if best.confidence < 90:
|
|
review_item = ManualReviewItem(
|
|
id=f"team_{uuid4().hex[:8]}",
|
|
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
|
|
sport=self.sport,
|
|
raw_value=value,
|
|
context={"match_type": "fuzzy"},
|
|
source_url=source_url,
|
|
suggested_matches=matches,
|
|
game_date=check_date,
|
|
)
|
|
|
|
return TeamResolveResult(
|
|
canonical_id=best.canonical_id,
|
|
confidence=best.confidence,
|
|
match_type="fuzzy",
|
|
review_item=review_item,
|
|
)
|
|
|
|
# 5. Unresolved
|
|
review_item = ManualReviewItem(
|
|
id=f"team_{uuid4().hex[:8]}",
|
|
reason=ReviewReason.UNRESOLVED_TEAM,
|
|
sport=self.sport,
|
|
raw_value=value,
|
|
context={},
|
|
source_url=source_url,
|
|
suggested_matches=fuzzy_match_team(
|
|
value,
|
|
self._candidates,
|
|
threshold=50, # Lower threshold for suggestions
|
|
top_n=5,
|
|
),
|
|
game_date=check_date,
|
|
)
|
|
|
|
return TeamResolveResult(
|
|
canonical_id=None,
|
|
confidence=0,
|
|
match_type="unresolved",
|
|
review_item=review_item,
|
|
)
|
|
|
|
def get_team_info(self, abbreviation: str) -> Optional[tuple[str, str, str]]:
|
|
"""Get team info by abbreviation.
|
|
|
|
Args:
|
|
abbreviation: Team abbreviation
|
|
|
|
Returns:
|
|
Tuple of (canonical_id, full_name, city) or None
|
|
"""
|
|
return self._mappings.get(abbreviation.upper())
|
|
|
|
def get_all_teams(self) -> list[tuple[str, str, str]]:
|
|
"""Get all teams for this sport.
|
|
|
|
Returns:
|
|
List of (canonical_id, full_name, city) tuples
|
|
"""
|
|
seen = set()
|
|
result = []
|
|
|
|
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
|
if canonical_id not in seen:
|
|
seen.add(canonical_id)
|
|
result.append((canonical_id, full_name, city))
|
|
|
|
return result
|
|
|
|
|
|
# Cached resolvers
|
|
_resolvers: dict[str, TeamResolver] = {}
|
|
|
|
|
|
def get_team_resolver(sport: str) -> TeamResolver:
|
|
"""Get or create a team resolver for a sport."""
|
|
sport_lower = sport.lower()
|
|
if sport_lower not in _resolvers:
|
|
_resolvers[sport_lower] = TeamResolver(sport_lower)
|
|
return _resolvers[sport_lower]
|
|
|
|
|
|
def resolve_team(
|
|
sport: str,
|
|
value: str,
|
|
check_date: Optional[date] = None,
|
|
) -> TeamResolveResult:
|
|
"""Convenience function to resolve a team name.
|
|
|
|
Args:
|
|
sport: Sport code
|
|
value: Team name to resolve
|
|
check_date: Date for alias validity
|
|
|
|
Returns:
|
|
TeamResolveResult
|
|
"""
|
|
return get_team_resolver(sport).resolve(value, check_date)
|