feat(scripts): rewrite parser as modular Python CLI
Replace monolithic scraping scripts with sportstime_parser package: - Multi-source scrapers with automatic fallback for 7 sports - Canonical ID generation for games, teams, and stadiums - Fuzzy matching with configurable thresholds for name resolution - CloudKit Web Services uploader with JWT auth, diff-based updates - Resumable uploads with checkpoint state persistence - Validation reports with manual review items and suggested matches - Comprehensive test suite (249 tests) CLI: sportstime-parser scrape|validate|upload|status|retry|clear Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
482
Scripts/sportstime_parser/normalizers/team_resolver.py
Normal file
482
Scripts/sportstime_parser/normalizers/team_resolver.py
Normal file
@@ -0,0 +1,482 @@
|
||||
"""Team name resolver with exact, alias, and fuzzy matching."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from ..config import FUZZY_MATCH_THRESHOLD
|
||||
from ..models.aliases import (
|
||||
AliasType,
|
||||
FuzzyMatch,
|
||||
ManualReviewItem,
|
||||
ReviewReason,
|
||||
)
|
||||
from .alias_loader import get_team_alias_loader, TeamAliasLoader
|
||||
from .fuzzy import MatchCandidate, fuzzy_match_team, exact_match
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamResolveResult:
|
||||
"""Result of team resolution.
|
||||
|
||||
Attributes:
|
||||
canonical_id: Resolved canonical team ID (None if unresolved)
|
||||
confidence: Confidence in the match (100 for exact, lower for fuzzy)
|
||||
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
|
||||
review_item: ManualReviewItem if resolution failed or low confidence
|
||||
"""
|
||||
|
||||
canonical_id: Optional[str]
|
||||
confidence: int
|
||||
match_type: str
|
||||
review_item: Optional[ManualReviewItem] = None
|
||||
|
||||
|
||||
# Hardcoded team mappings for each sport
|
||||
# Format: {sport: {abbreviation: (canonical_id, full_name, city)}}
|
||||
TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str]]] = {
|
||||
"nba": {
|
||||
"ATL": ("team_nba_atl", "Atlanta Hawks", "Atlanta"),
|
||||
"BOS": ("team_nba_bos", "Boston Celtics", "Boston"),
|
||||
"BKN": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
|
||||
"BRK": ("team_nba_brk", "Brooklyn Nets", "Brooklyn"),
|
||||
"CHA": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
|
||||
"CHO": ("team_nba_cho", "Charlotte Hornets", "Charlotte"),
|
||||
"CHI": ("team_nba_chi", "Chicago Bulls", "Chicago"),
|
||||
"CLE": ("team_nba_cle", "Cleveland Cavaliers", "Cleveland"),
|
||||
"DAL": ("team_nba_dal", "Dallas Mavericks", "Dallas"),
|
||||
"DEN": ("team_nba_den", "Denver Nuggets", "Denver"),
|
||||
"DET": ("team_nba_det", "Detroit Pistons", "Detroit"),
|
||||
"GSW": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
|
||||
"GS": ("team_nba_gsw", "Golden State Warriors", "Golden State"),
|
||||
"HOU": ("team_nba_hou", "Houston Rockets", "Houston"),
|
||||
"IND": ("team_nba_ind", "Indiana Pacers", "Indiana"),
|
||||
"LAC": ("team_nba_lac", "Los Angeles Clippers", "Los Angeles"),
|
||||
"LAL": ("team_nba_lal", "Los Angeles Lakers", "Los Angeles"),
|
||||
"MEM": ("team_nba_mem", "Memphis Grizzlies", "Memphis"),
|
||||
"MIA": ("team_nba_mia", "Miami Heat", "Miami"),
|
||||
"MIL": ("team_nba_mil", "Milwaukee Bucks", "Milwaukee"),
|
||||
"MIN": ("team_nba_min", "Minnesota Timberwolves", "Minnesota"),
|
||||
"NOP": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
|
||||
"NO": ("team_nba_nop", "New Orleans Pelicans", "New Orleans"),
|
||||
"NYK": ("team_nba_nyk", "New York Knicks", "New York"),
|
||||
"NY": ("team_nba_nyk", "New York Knicks", "New York"),
|
||||
"OKC": ("team_nba_okc", "Oklahoma City Thunder", "Oklahoma City"),
|
||||
"ORL": ("team_nba_orl", "Orlando Magic", "Orlando"),
|
||||
"PHI": ("team_nba_phi", "Philadelphia 76ers", "Philadelphia"),
|
||||
"PHX": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
|
||||
"PHO": ("team_nba_phx", "Phoenix Suns", "Phoenix"),
|
||||
"POR": ("team_nba_por", "Portland Trail Blazers", "Portland"),
|
||||
"SAC": ("team_nba_sac", "Sacramento Kings", "Sacramento"),
|
||||
"SAS": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
|
||||
"SA": ("team_nba_sas", "San Antonio Spurs", "San Antonio"),
|
||||
"TOR": ("team_nba_tor", "Toronto Raptors", "Toronto"),
|
||||
"UTA": ("team_nba_uta", "Utah Jazz", "Utah"),
|
||||
"WAS": ("team_nba_was", "Washington Wizards", "Washington"),
|
||||
"WSH": ("team_nba_was", "Washington Wizards", "Washington"),
|
||||
},
|
||||
"mlb": {
|
||||
"ARI": ("team_mlb_ari", "Arizona Diamondbacks", "Arizona"),
|
||||
"ATL": ("team_mlb_atl", "Atlanta Braves", "Atlanta"),
|
||||
"BAL": ("team_mlb_bal", "Baltimore Orioles", "Baltimore"),
|
||||
"BOS": ("team_mlb_bos", "Boston Red Sox", "Boston"),
|
||||
"CHC": ("team_mlb_chc", "Chicago Cubs", "Chicago"),
|
||||
"CHW": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
|
||||
"CWS": ("team_mlb_chw", "Chicago White Sox", "Chicago"),
|
||||
"CIN": ("team_mlb_cin", "Cincinnati Reds", "Cincinnati"),
|
||||
"CLE": ("team_mlb_cle", "Cleveland Guardians", "Cleveland"),
|
||||
"COL": ("team_mlb_col", "Colorado Rockies", "Colorado"),
|
||||
"DET": ("team_mlb_det", "Detroit Tigers", "Detroit"),
|
||||
"HOU": ("team_mlb_hou", "Houston Astros", "Houston"),
|
||||
"KC": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
|
||||
"KCR": ("team_mlb_kc", "Kansas City Royals", "Kansas City"),
|
||||
"LAA": ("team_mlb_laa", "Los Angeles Angels", "Los Angeles"),
|
||||
"ANA": ("team_mlb_laa", "Los Angeles Angels", "Anaheim"),
|
||||
"LAD": ("team_mlb_lad", "Los Angeles Dodgers", "Los Angeles"),
|
||||
"MIA": ("team_mlb_mia", "Miami Marlins", "Miami"),
|
||||
"FLA": ("team_mlb_mia", "Miami Marlins", "Florida"),
|
||||
"MIL": ("team_mlb_mil", "Milwaukee Brewers", "Milwaukee"),
|
||||
"MIN": ("team_mlb_min", "Minnesota Twins", "Minnesota"),
|
||||
"NYM": ("team_mlb_nym", "New York Mets", "New York"),
|
||||
"NYY": ("team_mlb_nyy", "New York Yankees", "New York"),
|
||||
"OAK": ("team_mlb_oak", "Oakland Athletics", "Oakland"),
|
||||
"PHI": ("team_mlb_phi", "Philadelphia Phillies", "Philadelphia"),
|
||||
"PIT": ("team_mlb_pit", "Pittsburgh Pirates", "Pittsburgh"),
|
||||
"SD": ("team_mlb_sd", "San Diego Padres", "San Diego"),
|
||||
"SDP": ("team_mlb_sd", "San Diego Padres", "San Diego"),
|
||||
"SF": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
|
||||
"SFG": ("team_mlb_sf", "San Francisco Giants", "San Francisco"),
|
||||
"SEA": ("team_mlb_sea", "Seattle Mariners", "Seattle"),
|
||||
"STL": ("team_mlb_stl", "St. Louis Cardinals", "St. Louis"),
|
||||
"TB": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
|
||||
"TBR": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay"),
|
||||
"TEX": ("team_mlb_tex", "Texas Rangers", "Texas"),
|
||||
"TOR": ("team_mlb_tor", "Toronto Blue Jays", "Toronto"),
|
||||
"WSN": ("team_mlb_wsn", "Washington Nationals", "Washington"),
|
||||
"WAS": ("team_mlb_wsn", "Washington Nationals", "Washington"),
|
||||
},
|
||||
"nfl": {
|
||||
"ARI": ("team_nfl_ari", "Arizona Cardinals", "Arizona"),
|
||||
"ATL": ("team_nfl_atl", "Atlanta Falcons", "Atlanta"),
|
||||
"BAL": ("team_nfl_bal", "Baltimore Ravens", "Baltimore"),
|
||||
"BUF": ("team_nfl_buf", "Buffalo Bills", "Buffalo"),
|
||||
"CAR": ("team_nfl_car", "Carolina Panthers", "Carolina"),
|
||||
"CHI": ("team_nfl_chi", "Chicago Bears", "Chicago"),
|
||||
"CIN": ("team_nfl_cin", "Cincinnati Bengals", "Cincinnati"),
|
||||
"CLE": ("team_nfl_cle", "Cleveland Browns", "Cleveland"),
|
||||
"DAL": ("team_nfl_dal", "Dallas Cowboys", "Dallas"),
|
||||
"DEN": ("team_nfl_den", "Denver Broncos", "Denver"),
|
||||
"DET": ("team_nfl_det", "Detroit Lions", "Detroit"),
|
||||
"GB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
|
||||
"GNB": ("team_nfl_gb", "Green Bay Packers", "Green Bay"),
|
||||
"HOU": ("team_nfl_hou", "Houston Texans", "Houston"),
|
||||
"IND": ("team_nfl_ind", "Indianapolis Colts", "Indianapolis"),
|
||||
"JAX": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
|
||||
"JAC": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville"),
|
||||
"KC": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
|
||||
"KAN": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City"),
|
||||
"LV": ("team_nfl_lv", "Las Vegas Raiders", "Las Vegas"),
|
||||
"LAC": ("team_nfl_lac", "Los Angeles Chargers", "Los Angeles"),
|
||||
"LAR": ("team_nfl_lar", "Los Angeles Rams", "Los Angeles"),
|
||||
"MIA": ("team_nfl_mia", "Miami Dolphins", "Miami"),
|
||||
"MIN": ("team_nfl_min", "Minnesota Vikings", "Minnesota"),
|
||||
"NE": ("team_nfl_ne", "New England Patriots", "New England"),
|
||||
"NWE": ("team_nfl_ne", "New England Patriots", "New England"),
|
||||
"NO": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
|
||||
"NOR": ("team_nfl_no", "New Orleans Saints", "New Orleans"),
|
||||
"NYG": ("team_nfl_nyg", "New York Giants", "New York"),
|
||||
"NYJ": ("team_nfl_nyj", "New York Jets", "New York"),
|
||||
"PHI": ("team_nfl_phi", "Philadelphia Eagles", "Philadelphia"),
|
||||
"PIT": ("team_nfl_pit", "Pittsburgh Steelers", "Pittsburgh"),
|
||||
"SF": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
|
||||
"SFO": ("team_nfl_sf", "San Francisco 49ers", "San Francisco"),
|
||||
"SEA": ("team_nfl_sea", "Seattle Seahawks", "Seattle"),
|
||||
"TB": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
|
||||
"TAM": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay"),
|
||||
"TEN": ("team_nfl_ten", "Tennessee Titans", "Tennessee"),
|
||||
"WAS": ("team_nfl_was", "Washington Commanders", "Washington"),
|
||||
"WSH": ("team_nfl_was", "Washington Commanders", "Washington"),
|
||||
},
|
||||
"nhl": {
|
||||
"ANA": ("team_nhl_ana", "Anaheim Ducks", "Anaheim"),
|
||||
"ARI": ("team_nhl_ari", "Utah Hockey Club", "Utah"), # Moved 2024
|
||||
"UTA": ("team_nhl_ari", "Utah Hockey Club", "Utah"),
|
||||
"BOS": ("team_nhl_bos", "Boston Bruins", "Boston"),
|
||||
"BUF": ("team_nhl_buf", "Buffalo Sabres", "Buffalo"),
|
||||
"CGY": ("team_nhl_cgy", "Calgary Flames", "Calgary"),
|
||||
"CAR": ("team_nhl_car", "Carolina Hurricanes", "Carolina"),
|
||||
"CHI": ("team_nhl_chi", "Chicago Blackhawks", "Chicago"),
|
||||
"COL": ("team_nhl_col", "Colorado Avalanche", "Colorado"),
|
||||
"CBJ": ("team_nhl_cbj", "Columbus Blue Jackets", "Columbus"),
|
||||
"DAL": ("team_nhl_dal", "Dallas Stars", "Dallas"),
|
||||
"DET": ("team_nhl_det", "Detroit Red Wings", "Detroit"),
|
||||
"EDM": ("team_nhl_edm", "Edmonton Oilers", "Edmonton"),
|
||||
"FLA": ("team_nhl_fla", "Florida Panthers", "Florida"),
|
||||
"LA": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
|
||||
"LAK": ("team_nhl_la", "Los Angeles Kings", "Los Angeles"),
|
||||
"MIN": ("team_nhl_min", "Minnesota Wild", "Minnesota"),
|
||||
"MTL": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
|
||||
"MON": ("team_nhl_mtl", "Montreal Canadiens", "Montreal"),
|
||||
"NSH": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
|
||||
"NAS": ("team_nhl_nsh", "Nashville Predators", "Nashville"),
|
||||
"NJ": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
|
||||
"NJD": ("team_nhl_njd", "New Jersey Devils", "New Jersey"),
|
||||
"NYI": ("team_nhl_nyi", "New York Islanders", "New York"),
|
||||
"NYR": ("team_nhl_nyr", "New York Rangers", "New York"),
|
||||
"OTT": ("team_nhl_ott", "Ottawa Senators", "Ottawa"),
|
||||
"PHI": ("team_nhl_phi", "Philadelphia Flyers", "Philadelphia"),
|
||||
"PIT": ("team_nhl_pit", "Pittsburgh Penguins", "Pittsburgh"),
|
||||
"SJ": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
|
||||
"SJS": ("team_nhl_sj", "San Jose Sharks", "San Jose"),
|
||||
"SEA": ("team_nhl_sea", "Seattle Kraken", "Seattle"),
|
||||
"STL": ("team_nhl_stl", "St. Louis Blues", "St. Louis"),
|
||||
"TB": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
|
||||
"TBL": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay"),
|
||||
"TOR": ("team_nhl_tor", "Toronto Maple Leafs", "Toronto"),
|
||||
"VAN": ("team_nhl_van", "Vancouver Canucks", "Vancouver"),
|
||||
"VGK": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
|
||||
"VEG": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas"),
|
||||
"WAS": ("team_nhl_was", "Washington Capitals", "Washington"),
|
||||
"WSH": ("team_nhl_was", "Washington Capitals", "Washington"),
|
||||
"WPG": ("team_nhl_wpg", "Winnipeg Jets", "Winnipeg"),
|
||||
},
|
||||
"mls": {
|
||||
"ATL": ("team_mls_atl", "Atlanta United", "Atlanta"),
|
||||
"AUS": ("team_mls_aus", "Austin FC", "Austin"),
|
||||
"CLT": ("team_mls_clt", "Charlotte FC", "Charlotte"),
|
||||
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago"),
|
||||
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati"),
|
||||
"COL": ("team_mls_col", "Colorado Rapids", "Colorado"),
|
||||
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus"),
|
||||
"DAL": ("team_mls_dal", "FC Dallas", "Dallas"),
|
||||
"DC": ("team_mls_dc", "D.C. United", "Washington"),
|
||||
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston"),
|
||||
"LAG": ("team_mls_lag", "LA Galaxy", "Los Angeles"),
|
||||
"LAFC": ("team_mls_lafc", "Los Angeles FC", "Los Angeles"),
|
||||
"MIA": ("team_mls_mia", "Inter Miami", "Miami"),
|
||||
"MIN": ("team_mls_min", "Minnesota United", "Minnesota"),
|
||||
"MTL": ("team_mls_mtl", "CF Montreal", "Montreal"),
|
||||
"NSH": ("team_mls_nsh", "Nashville SC", "Nashville"),
|
||||
"NE": ("team_mls_ne", "New England Revolution", "New England"),
|
||||
"NYC": ("team_mls_nyc", "New York City FC", "New York"),
|
||||
"RB": ("team_mls_ny", "New York Red Bulls", "New York"),
|
||||
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York"),
|
||||
"ORL": ("team_mls_orl", "Orlando City", "Orlando"),
|
||||
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia"),
|
||||
"POR": ("team_mls_por", "Portland Timbers", "Portland"),
|
||||
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
|
||||
"RSL": ("team_mls_slc", "Real Salt Lake", "Salt Lake"),
|
||||
"SJ": ("team_mls_sj", "San Jose Earthquakes", "San Jose"),
|
||||
"SD": ("team_mls_sd", "San Diego FC", "San Diego"),
|
||||
"SEA": ("team_mls_sea", "Seattle Sounders", "Seattle"),
|
||||
"SKC": ("team_mls_skc", "Sporting Kansas City", "Kansas City"),
|
||||
"STL": ("team_mls_stl", "St. Louis City SC", "St. Louis"),
|
||||
"TOR": ("team_mls_tor", "Toronto FC", "Toronto"),
|
||||
"VAN": ("team_mls_van", "Vancouver Whitecaps", "Vancouver"),
|
||||
},
|
||||
"wnba": {
|
||||
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta"),
|
||||
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago"),
|
||||
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut"),
|
||||
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas"),
|
||||
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State"),
|
||||
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana"),
|
||||
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas"),
|
||||
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles"),
|
||||
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota"),
|
||||
"NY": ("team_wnba_ny", "New York Liberty", "New York"),
|
||||
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix"),
|
||||
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle"),
|
||||
"WAS": ("team_wnba_was", "Washington Mystics", "Washington"),
|
||||
},
|
||||
"nwsl": {
|
||||
"ANF": ("team_nwsl_anf", "Angel City FC", "Los Angeles"),
|
||||
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago"),
|
||||
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston"),
|
||||
"KC": ("team_nwsl_kc", "Kansas City Current", "Kansas City"),
|
||||
"NJ": ("team_nwsl_nj", "NJ/NY Gotham FC", "New Jersey"),
|
||||
"NC": ("team_nwsl_nc", "North Carolina Courage", "North Carolina"),
|
||||
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando"),
|
||||
"POR": ("team_nwsl_por", "Portland Thorns", "Portland"),
|
||||
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville"),
|
||||
"SD": ("team_nwsl_sd", "San Diego Wave", "San Diego"),
|
||||
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle"),
|
||||
"SLC": ("team_nwsl_slc", "Utah Royals", "Utah"),
|
||||
"WAS": ("team_nwsl_was", "Washington Spirit", "Washington"),
|
||||
"BFC": ("team_nwsl_bfc", "Bay FC", "San Francisco"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TeamResolver:
|
||||
"""Resolves team names to canonical IDs.
|
||||
|
||||
Resolution order:
|
||||
1. Exact match against abbreviation mappings
|
||||
2. Exact match against full team names
|
||||
3. Alias lookup (with date awareness)
|
||||
4. Fuzzy match against all known names
|
||||
5. Unresolved (returns ManualReviewItem)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sport: str,
|
||||
alias_loader: Optional[TeamAliasLoader] = None,
|
||||
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
|
||||
):
|
||||
"""Initialize the resolver.
|
||||
|
||||
Args:
|
||||
sport: Sport code (e.g., 'nba', 'mlb')
|
||||
alias_loader: Team alias loader (default: global loader)
|
||||
fuzzy_threshold: Minimum fuzzy match score
|
||||
"""
|
||||
self.sport = sport.lower()
|
||||
self.alias_loader = alias_loader or get_team_alias_loader()
|
||||
self.fuzzy_threshold = fuzzy_threshold
|
||||
self._mappings = TEAM_MAPPINGS.get(self.sport, {})
|
||||
|
||||
# Build match candidates for fuzzy matching
|
||||
self._candidates = self._build_candidates()
|
||||
|
||||
def _build_candidates(self) -> list[MatchCandidate]:
|
||||
"""Build match candidates from team mappings."""
|
||||
# Group by canonical ID to avoid duplicates
|
||||
by_id: dict[str, tuple[str, list[str]]] = {}
|
||||
|
||||
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
||||
if canonical_id not in by_id:
|
||||
by_id[canonical_id] = (full_name, [])
|
||||
|
||||
# Add abbreviation as alias
|
||||
by_id[canonical_id][1].append(abbrev)
|
||||
by_id[canonical_id][1].append(city)
|
||||
|
||||
return [
|
||||
MatchCandidate(
|
||||
canonical_id=cid,
|
||||
name=name,
|
||||
aliases=list(set(aliases)), # Dedupe
|
||||
)
|
||||
for cid, (name, aliases) in by_id.items()
|
||||
]
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
value: str,
|
||||
check_date: Optional[date] = None,
|
||||
source_url: Optional[str] = None,
|
||||
) -> TeamResolveResult:
|
||||
"""Resolve a team name to a canonical ID.
|
||||
|
||||
Args:
|
||||
value: Team name, abbreviation, or city to resolve
|
||||
check_date: Date for alias validity (None = today)
|
||||
source_url: Source URL for manual review items
|
||||
|
||||
Returns:
|
||||
TeamResolveResult with resolution details
|
||||
"""
|
||||
value_upper = value.upper().strip()
|
||||
value_lower = value.lower().strip()
|
||||
|
||||
# 1. Exact match against abbreviation
|
||||
if value_upper in self._mappings:
|
||||
canonical_id, full_name, _ = self._mappings[value_upper]
|
||||
return TeamResolveResult(
|
||||
canonical_id=canonical_id,
|
||||
confidence=100,
|
||||
match_type="exact",
|
||||
)
|
||||
|
||||
# 2. Exact match against full names
|
||||
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
||||
if value_lower == full_name.lower() or value_lower == city.lower():
|
||||
return TeamResolveResult(
|
||||
canonical_id=canonical_id,
|
||||
confidence=100,
|
||||
match_type="exact",
|
||||
)
|
||||
|
||||
# 3. Alias lookup
|
||||
alias_result = self.alias_loader.resolve(value, check_date)
|
||||
if alias_result:
|
||||
return TeamResolveResult(
|
||||
canonical_id=alias_result,
|
||||
confidence=95,
|
||||
match_type="alias",
|
||||
)
|
||||
|
||||
# 4. Fuzzy match
|
||||
matches = fuzzy_match_team(
|
||||
value,
|
||||
self._candidates,
|
||||
threshold=self.fuzzy_threshold,
|
||||
)
|
||||
|
||||
if matches:
|
||||
best = matches[0]
|
||||
review_item = None
|
||||
|
||||
# Create review item for low confidence matches
|
||||
if best.confidence < 90:
|
||||
review_item = ManualReviewItem(
|
||||
id=f"team_{uuid4().hex[:8]}",
|
||||
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
|
||||
sport=self.sport,
|
||||
raw_value=value,
|
||||
context={"match_type": "fuzzy"},
|
||||
source_url=source_url,
|
||||
suggested_matches=matches,
|
||||
game_date=check_date,
|
||||
)
|
||||
|
||||
return TeamResolveResult(
|
||||
canonical_id=best.canonical_id,
|
||||
confidence=best.confidence,
|
||||
match_type="fuzzy",
|
||||
review_item=review_item,
|
||||
)
|
||||
|
||||
# 5. Unresolved
|
||||
review_item = ManualReviewItem(
|
||||
id=f"team_{uuid4().hex[:8]}",
|
||||
reason=ReviewReason.UNRESOLVED_TEAM,
|
||||
sport=self.sport,
|
||||
raw_value=value,
|
||||
context={},
|
||||
source_url=source_url,
|
||||
suggested_matches=fuzzy_match_team(
|
||||
value,
|
||||
self._candidates,
|
||||
threshold=50, # Lower threshold for suggestions
|
||||
top_n=5,
|
||||
),
|
||||
game_date=check_date,
|
||||
)
|
||||
|
||||
return TeamResolveResult(
|
||||
canonical_id=None,
|
||||
confidence=0,
|
||||
match_type="unresolved",
|
||||
review_item=review_item,
|
||||
)
|
||||
|
||||
def get_team_info(self, abbreviation: str) -> Optional[tuple[str, str, str]]:
|
||||
"""Get team info by abbreviation.
|
||||
|
||||
Args:
|
||||
abbreviation: Team abbreviation
|
||||
|
||||
Returns:
|
||||
Tuple of (canonical_id, full_name, city) or None
|
||||
"""
|
||||
return self._mappings.get(abbreviation.upper())
|
||||
|
||||
def get_all_teams(self) -> list[tuple[str, str, str]]:
|
||||
"""Get all teams for this sport.
|
||||
|
||||
Returns:
|
||||
List of (canonical_id, full_name, city) tuples
|
||||
"""
|
||||
seen = set()
|
||||
result = []
|
||||
|
||||
for abbrev, (canonical_id, full_name, city) in self._mappings.items():
|
||||
if canonical_id not in seen:
|
||||
seen.add(canonical_id)
|
||||
result.append((canonical_id, full_name, city))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Cached resolvers
|
||||
_resolvers: dict[str, TeamResolver] = {}
|
||||
|
||||
|
||||
def get_team_resolver(sport: str) -> TeamResolver:
|
||||
"""Get or create a team resolver for a sport."""
|
||||
sport_lower = sport.lower()
|
||||
if sport_lower not in _resolvers:
|
||||
_resolvers[sport_lower] = TeamResolver(sport_lower)
|
||||
return _resolvers[sport_lower]
|
||||
|
||||
|
||||
def resolve_team(
|
||||
sport: str,
|
||||
value: str,
|
||||
check_date: Optional[date] = None,
|
||||
) -> TeamResolveResult:
|
||||
"""Convenience function to resolve a team name.
|
||||
|
||||
Args:
|
||||
sport: Sport code
|
||||
value: Team name to resolve
|
||||
check_date: Date for alias validity
|
||||
|
||||
Returns:
|
||||
TeamResolveResult
|
||||
"""
|
||||
return get_team_resolver(sport).resolve(value, check_date)
|
||||
Reference in New Issue
Block a user