Files
Sportstime/Scripts/sportstime_parser/normalizers/team_resolver.py
Trey t 8ea3e6112a feat(scripts): complete data pipeline remediation
Scripts changes:
- Add WNBA abbreviation aliases to team_resolver.py
- Fix NHL stadium coordinates in stadium_resolver.py
- Add validate_aliases.py script for orphan detection
- Update scrapers with improved error handling
- Add DATA_AUDIT.md and REMEDIATION_PLAN.md documentation
- Update alias JSON files with new mappings

iOS bundle updates:
- Update games_canonical.json with latest scraped data
- Update teams_canonical.json and stadiums_canonical.json
- Sync alias files with Scripts versions

All 5 remediation phases complete.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 18:58:47 -06:00

515 lines
32 KiB
Python

"""Team name resolver with exact, alias, and fuzzy matching."""
from dataclasses import dataclass
from datetime import date
from typing import Optional
from uuid import uuid4
from ..config import FUZZY_MATCH_THRESHOLD
from ..models.aliases import (
AliasType,
FuzzyMatch,
ManualReviewItem,
ReviewReason,
)
from .alias_loader import get_team_alias_loader, TeamAliasLoader
from .fuzzy import MatchCandidate, fuzzy_match_team, exact_match
@dataclass
class TeamResolveResult:
"""Result of team resolution.
Attributes:
canonical_id: Resolved canonical team ID (None if unresolved)
confidence: Confidence in the match (100 for exact, lower for fuzzy)
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
review_item: ManualReviewItem if resolution failed or low confidence
"""
canonical_id: Optional[str]
confidence: int
match_type: str
review_item: Optional[ManualReviewItem] = None
# Hardcoded team mappings for each sport
# Format: {sport: {abbreviation: (canonical_id, full_name, city, stadium_id)}}
TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str, str]]] = {
"nba": {
"ATL": ("team_nba_atl", "Atlanta Hawks", "Atlanta", "stadium_nba_state_farm_arena"),
"BOS": ("team_nba_bos", "Boston Celtics", "Boston", "stadium_nba_td_garden"),
"BKN": ("team_nba_brk", "Brooklyn Nets", "Brooklyn", "stadium_nba_barclays_center"),
"BRK": ("team_nba_brk", "Brooklyn Nets", "Brooklyn", "stadium_nba_barclays_center"),
"CHA": ("team_nba_cho", "Charlotte Hornets", "Charlotte", "stadium_nba_spectrum_center"),
"CHO": ("team_nba_cho", "Charlotte Hornets", "Charlotte", "stadium_nba_spectrum_center"),
"CHI": ("team_nba_chi", "Chicago Bulls", "Chicago", "stadium_nba_united_center"),
"CLE": ("team_nba_cle", "Cleveland Cavaliers", "Cleveland", "stadium_nba_rocket_mortgage_fieldhouse"),
"DAL": ("team_nba_dal", "Dallas Mavericks", "Dallas", "stadium_nba_american_airlines_center"),
"DEN": ("team_nba_den", "Denver Nuggets", "Denver", "stadium_nba_ball_arena"),
"DET": ("team_nba_det", "Detroit Pistons", "Detroit", "stadium_nba_little_caesars_arena"),
"GSW": ("team_nba_gsw", "Golden State Warriors", "Golden State", "stadium_nba_chase_center"),
"GS": ("team_nba_gsw", "Golden State Warriors", "Golden State", "stadium_nba_chase_center"),
"HOU": ("team_nba_hou", "Houston Rockets", "Houston", "stadium_nba_toyota_center"),
"IND": ("team_nba_ind", "Indiana Pacers", "Indiana", "stadium_nba_gainbridge_fieldhouse"),
"LAC": ("team_nba_lac", "Los Angeles Clippers", "Los Angeles", "stadium_nba_intuit_dome"),
"LAL": ("team_nba_lal", "Los Angeles Lakers", "Los Angeles", "stadium_nba_cryptocom_arena"),
"MEM": ("team_nba_mem", "Memphis Grizzlies", "Memphis", "stadium_nba_fedexforum"),
"MIA": ("team_nba_mia", "Miami Heat", "Miami", "stadium_nba_kaseya_center"),
"MIL": ("team_nba_mil", "Milwaukee Bucks", "Milwaukee", "stadium_nba_fiserv_forum"),
"MIN": ("team_nba_min", "Minnesota Timberwolves", "Minnesota", "stadium_nba_target_center"),
"NOP": ("team_nba_nop", "New Orleans Pelicans", "New Orleans", "stadium_nba_smoothie_king_center"),
"NO": ("team_nba_nop", "New Orleans Pelicans", "New Orleans", "stadium_nba_smoothie_king_center"),
"NYK": ("team_nba_nyk", "New York Knicks", "New York", "stadium_nba_madison_square_garden"),
"NY": ("team_nba_nyk", "New York Knicks", "New York", "stadium_nba_madison_square_garden"),
"OKC": ("team_nba_okc", "Oklahoma City Thunder", "Oklahoma City", "stadium_nba_paycom_center"),
"ORL": ("team_nba_orl", "Orlando Magic", "Orlando", "stadium_nba_kia_center"),
"PHI": ("team_nba_phi", "Philadelphia 76ers", "Philadelphia", "stadium_nba_wells_fargo_center"),
"PHX": ("team_nba_phx", "Phoenix Suns", "Phoenix", "stadium_nba_footprint_center"),
"PHO": ("team_nba_phx", "Phoenix Suns", "Phoenix", "stadium_nba_footprint_center"),
"POR": ("team_nba_por", "Portland Trail Blazers", "Portland", "stadium_nba_moda_center"),
"SAC": ("team_nba_sac", "Sacramento Kings", "Sacramento", "stadium_nba_golden_1_center"),
"SAS": ("team_nba_sas", "San Antonio Spurs", "San Antonio", "stadium_nba_frost_bank_center"),
"SA": ("team_nba_sas", "San Antonio Spurs", "San Antonio", "stadium_nba_frost_bank_center"),
"TOR": ("team_nba_tor", "Toronto Raptors", "Toronto", "stadium_nba_scotiabank_arena"),
"UTA": ("team_nba_uta", "Utah Jazz", "Utah", "stadium_nba_delta_center"),
"WAS": ("team_nba_was", "Washington Wizards", "Washington", "stadium_nba_capital_one_arena"),
"WSH": ("team_nba_was", "Washington Wizards", "Washington", "stadium_nba_capital_one_arena"),
},
"mlb": {
"ARI": ("team_mlb_ari", "Arizona Diamondbacks", "Arizona", "stadium_mlb_chase_field"),
"ATL": ("team_mlb_atl", "Atlanta Braves", "Atlanta", "stadium_mlb_truist_park"),
"BAL": ("team_mlb_bal", "Baltimore Orioles", "Baltimore", "stadium_mlb_oriole_park_at_camden_yards"),
"BOS": ("team_mlb_bos", "Boston Red Sox", "Boston", "stadium_mlb_fenway_park"),
"CHC": ("team_mlb_chc", "Chicago Cubs", "Chicago", "stadium_mlb_wrigley_field"),
"CHW": ("team_mlb_chw", "Chicago White Sox", "Chicago", "stadium_mlb_guaranteed_rate_field"),
"CWS": ("team_mlb_chw", "Chicago White Sox", "Chicago", "stadium_mlb_guaranteed_rate_field"),
"CIN": ("team_mlb_cin", "Cincinnati Reds", "Cincinnati", "stadium_mlb_great_american_ball_park"),
"CLE": ("team_mlb_cle", "Cleveland Guardians", "Cleveland", "stadium_mlb_progressive_field"),
"COL": ("team_mlb_col", "Colorado Rockies", "Colorado", "stadium_mlb_coors_field"),
"DET": ("team_mlb_det", "Detroit Tigers", "Detroit", "stadium_mlb_comerica_park"),
"HOU": ("team_mlb_hou", "Houston Astros", "Houston", "stadium_mlb_minute_maid_park"),
"KC": ("team_mlb_kc", "Kansas City Royals", "Kansas City", "stadium_mlb_kauffman_stadium"),
"KCR": ("team_mlb_kc", "Kansas City Royals", "Kansas City", "stadium_mlb_kauffman_stadium"),
"LAA": ("team_mlb_laa", "Los Angeles Angels", "Los Angeles", "stadium_mlb_angel_stadium"),
"ANA": ("team_mlb_laa", "Los Angeles Angels", "Anaheim", "stadium_mlb_angel_stadium"),
"LAD": ("team_mlb_lad", "Los Angeles Dodgers", "Los Angeles", "stadium_mlb_dodger_stadium"),
"MIA": ("team_mlb_mia", "Miami Marlins", "Miami", "stadium_mlb_loandepot_park"),
"FLA": ("team_mlb_mia", "Miami Marlins", "Florida", "stadium_mlb_loandepot_park"),
"MIL": ("team_mlb_mil", "Milwaukee Brewers", "Milwaukee", "stadium_mlb_american_family_field"),
"MIN": ("team_mlb_min", "Minnesota Twins", "Minnesota", "stadium_mlb_target_field"),
"NYM": ("team_mlb_nym", "New York Mets", "New York", "stadium_mlb_citi_field"),
"NYY": ("team_mlb_nyy", "New York Yankees", "New York", "stadium_mlb_yankee_stadium"),
"OAK": ("team_mlb_oak", "Oakland Athletics", "Oakland", "stadium_mlb_sutter_health_park"),
"PHI": ("team_mlb_phi", "Philadelphia Phillies", "Philadelphia", "stadium_mlb_citizens_bank_park"),
"PIT": ("team_mlb_pit", "Pittsburgh Pirates", "Pittsburgh", "stadium_mlb_pnc_park"),
"SD": ("team_mlb_sd", "San Diego Padres", "San Diego", "stadium_mlb_petco_park"),
"SDP": ("team_mlb_sd", "San Diego Padres", "San Diego", "stadium_mlb_petco_park"),
"SF": ("team_mlb_sf", "San Francisco Giants", "San Francisco", "stadium_mlb_oracle_park"),
"SFG": ("team_mlb_sf", "San Francisco Giants", "San Francisco", "stadium_mlb_oracle_park"),
"SEA": ("team_mlb_sea", "Seattle Mariners", "Seattle", "stadium_mlb_tmobile_park"),
"STL": ("team_mlb_stl", "St. Louis Cardinals", "St. Louis", "stadium_mlb_busch_stadium"),
"TB": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay", "stadium_mlb_tropicana_field"),
"TBR": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay", "stadium_mlb_tropicana_field"),
"TEX": ("team_mlb_tex", "Texas Rangers", "Texas", "stadium_mlb_globe_life_field"),
"TOR": ("team_mlb_tor", "Toronto Blue Jays", "Toronto", "stadium_mlb_rogers_centre"),
"WSN": ("team_mlb_wsn", "Washington Nationals", "Washington", "stadium_mlb_nationals_park"),
"WAS": ("team_mlb_wsn", "Washington Nationals", "Washington", "stadium_mlb_nationals_park"),
},
"nfl": {
"ARI": ("team_nfl_ari", "Arizona Cardinals", "Arizona", "stadium_nfl_state_farm_stadium"),
"ATL": ("team_nfl_atl", "Atlanta Falcons", "Atlanta", "stadium_nfl_mercedes_benz_stadium"),
"BAL": ("team_nfl_bal", "Baltimore Ravens", "Baltimore", "stadium_nfl_mandt_bank_stadium"),
"BUF": ("team_nfl_buf", "Buffalo Bills", "Buffalo", "stadium_nfl_highmark_stadium"),
"CAR": ("team_nfl_car", "Carolina Panthers", "Carolina", "stadium_nfl_bank_of_america_stadium"),
"CHI": ("team_nfl_chi", "Chicago Bears", "Chicago", "stadium_nfl_soldier_field"),
"CIN": ("team_nfl_cin", "Cincinnati Bengals", "Cincinnati", "stadium_nfl_paycor_stadium"),
"CLE": ("team_nfl_cle", "Cleveland Browns", "Cleveland", "stadium_nfl_huntington_bank_field"),
"DAL": ("team_nfl_dal", "Dallas Cowboys", "Dallas", "stadium_nfl_att_stadium"),
"DEN": ("team_nfl_den", "Denver Broncos", "Denver", "stadium_nfl_empower_field"),
"DET": ("team_nfl_det", "Detroit Lions", "Detroit", "stadium_nfl_ford_field"),
"GB": ("team_nfl_gb", "Green Bay Packers", "Green Bay", "stadium_nfl_lambeau_field"),
"GNB": ("team_nfl_gb", "Green Bay Packers", "Green Bay", "stadium_nfl_lambeau_field"),
"HOU": ("team_nfl_hou", "Houston Texans", "Houston", "stadium_nfl_nrg_stadium"),
"IND": ("team_nfl_ind", "Indianapolis Colts", "Indianapolis", "stadium_nfl_lucas_oil_stadium"),
"JAX": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville", "stadium_nfl_everbank_stadium"),
"JAC": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville", "stadium_nfl_everbank_stadium"),
"KC": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City", "stadium_nfl_arrowhead_stadium"),
"KAN": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City", "stadium_nfl_arrowhead_stadium"),
"LV": ("team_nfl_lv", "Las Vegas Raiders", "Las Vegas", "stadium_nfl_allegiant_stadium"),
"LAC": ("team_nfl_lac", "Los Angeles Chargers", "Los Angeles", "stadium_nfl_sofi_stadium"),
"LAR": ("team_nfl_lar", "Los Angeles Rams", "Los Angeles", "stadium_nfl_sofi_stadium"),
"MIA": ("team_nfl_mia", "Miami Dolphins", "Miami", "stadium_nfl_hard_rock_stadium"),
"MIN": ("team_nfl_min", "Minnesota Vikings", "Minnesota", "stadium_nfl_us_bank_stadium"),
"NE": ("team_nfl_ne", "New England Patriots", "New England", "stadium_nfl_gillette_stadium"),
"NWE": ("team_nfl_ne", "New England Patriots", "New England", "stadium_nfl_gillette_stadium"),
"NO": ("team_nfl_no", "New Orleans Saints", "New Orleans", "stadium_nfl_caesars_superdome"),
"NOR": ("team_nfl_no", "New Orleans Saints", "New Orleans", "stadium_nfl_caesars_superdome"),
"NYG": ("team_nfl_nyg", "New York Giants", "New York", "stadium_nfl_metlife_stadium"),
"NYJ": ("team_nfl_nyj", "New York Jets", "New York", "stadium_nfl_metlife_stadium"),
"PHI": ("team_nfl_phi", "Philadelphia Eagles", "Philadelphia", "stadium_nfl_lincoln_financial_field"),
"PIT": ("team_nfl_pit", "Pittsburgh Steelers", "Pittsburgh", "stadium_nfl_acrisure_stadium"),
"SF": ("team_nfl_sf", "San Francisco 49ers", "San Francisco", "stadium_nfl_levis_stadium"),
"SFO": ("team_nfl_sf", "San Francisco 49ers", "San Francisco", "stadium_nfl_levis_stadium"),
"SEA": ("team_nfl_sea", "Seattle Seahawks", "Seattle", "stadium_nfl_lumen_field"),
"TB": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay", "stadium_nfl_raymond_james_stadium"),
"TAM": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay", "stadium_nfl_raymond_james_stadium"),
"TEN": ("team_nfl_ten", "Tennessee Titans", "Tennessee", "stadium_nfl_nissan_stadium"),
"WAS": ("team_nfl_was", "Washington Commanders", "Washington", "stadium_nfl_northwest_stadium"),
"WSH": ("team_nfl_was", "Washington Commanders", "Washington", "stadium_nfl_northwest_stadium"),
},
"nhl": {
"ANA": ("team_nhl_ana", "Anaheim Ducks", "Anaheim", "stadium_nhl_honda_center"),
"ARI": ("team_nhl_ari", "Utah Hockey Club", "Utah", "stadium_nhl_delta_center"), # Moved 2024
"UTA": ("team_nhl_ari", "Utah Hockey Club", "Utah", "stadium_nhl_delta_center"),
"BOS": ("team_nhl_bos", "Boston Bruins", "Boston", "stadium_nhl_td_garden"),
"BUF": ("team_nhl_buf", "Buffalo Sabres", "Buffalo", "stadium_nhl_keybank_center"),
"CGY": ("team_nhl_cgy", "Calgary Flames", "Calgary", "stadium_nhl_scotiabank_saddledome"),
"CAR": ("team_nhl_car", "Carolina Hurricanes", "Carolina", "stadium_nhl_pnc_arena"),
"CHI": ("team_nhl_chi", "Chicago Blackhawks", "Chicago", "stadium_nhl_united_center"),
"COL": ("team_nhl_col", "Colorado Avalanche", "Colorado", "stadium_nhl_ball_arena"),
"CBJ": ("team_nhl_cbj", "Columbus Blue Jackets", "Columbus", "stadium_nhl_nationwide_arena"),
"DAL": ("team_nhl_dal", "Dallas Stars", "Dallas", "stadium_nhl_american_airlines_center"),
"DET": ("team_nhl_det", "Detroit Red Wings", "Detroit", "stadium_nhl_little_caesars_arena"),
"EDM": ("team_nhl_edm", "Edmonton Oilers", "Edmonton", "stadium_nhl_rogers_place"),
"FLA": ("team_nhl_fla", "Florida Panthers", "Florida", "stadium_nhl_amerant_bank_arena"),
"LA": ("team_nhl_la", "Los Angeles Kings", "Los Angeles", "stadium_nhl_cryptocom_arena"),
"LAK": ("team_nhl_la", "Los Angeles Kings", "Los Angeles", "stadium_nhl_cryptocom_arena"),
"MIN": ("team_nhl_min", "Minnesota Wild", "Minnesota", "stadium_nhl_xcel_energy_center"),
"MTL": ("team_nhl_mtl", "Montreal Canadiens", "Montreal", "stadium_nhl_bell_centre"),
"MON": ("team_nhl_mtl", "Montreal Canadiens", "Montreal", "stadium_nhl_bell_centre"),
"NSH": ("team_nhl_nsh", "Nashville Predators", "Nashville", "stadium_nhl_bridgestone_arena"),
"NAS": ("team_nhl_nsh", "Nashville Predators", "Nashville", "stadium_nhl_bridgestone_arena"),
"NJ": ("team_nhl_njd", "New Jersey Devils", "New Jersey", "stadium_nhl_prudential_center"),
"NJD": ("team_nhl_njd", "New Jersey Devils", "New Jersey", "stadium_nhl_prudential_center"),
"NYI": ("team_nhl_nyi", "New York Islanders", "New York", "stadium_nhl_ubs_arena"),
"NYR": ("team_nhl_nyr", "New York Rangers", "New York", "stadium_nhl_madison_square_garden"),
"OTT": ("team_nhl_ott", "Ottawa Senators", "Ottawa", "stadium_nhl_canadian_tire_centre"),
"PHI": ("team_nhl_phi", "Philadelphia Flyers", "Philadelphia", "stadium_nhl_wells_fargo_center"),
"PIT": ("team_nhl_pit", "Pittsburgh Penguins", "Pittsburgh", "stadium_nhl_ppg_paints_arena"),
"SJ": ("team_nhl_sj", "San Jose Sharks", "San Jose", "stadium_nhl_sap_center"),
"SJS": ("team_nhl_sj", "San Jose Sharks", "San Jose", "stadium_nhl_sap_center"),
"SEA": ("team_nhl_sea", "Seattle Kraken", "Seattle", "stadium_nhl_climate_pledge_arena"),
"STL": ("team_nhl_stl", "St. Louis Blues", "St. Louis", "stadium_nhl_enterprise_center"),
"TB": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay", "stadium_nhl_amalie_arena"),
"TBL": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay", "stadium_nhl_amalie_arena"),
"TOR": ("team_nhl_tor", "Toronto Maple Leafs", "Toronto", "stadium_nhl_scotiabank_arena"),
"VAN": ("team_nhl_van", "Vancouver Canucks", "Vancouver", "stadium_nhl_rogers_arena"),
"VGK": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas", "stadium_nhl_tmobile_arena"),
"VEG": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas", "stadium_nhl_tmobile_arena"),
"WAS": ("team_nhl_was", "Washington Capitals", "Washington", "stadium_nhl_capital_one_arena"),
"WSH": ("team_nhl_was", "Washington Capitals", "Washington", "stadium_nhl_capital_one_arena"),
"WPG": ("team_nhl_wpg", "Winnipeg Jets", "Winnipeg", "stadium_nhl_canada_life_centre"),
},
"mls": {
"ATL": ("team_mls_atl", "Atlanta United", "Atlanta", "stadium_mls_mercedes_benz_stadium"),
"AUS": ("team_mls_aus", "Austin FC", "Austin", "stadium_mls_q2_stadium"),
"CLT": ("team_mls_clt", "Charlotte FC", "Charlotte", "stadium_mls_bank_of_america_stadium"),
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago", "stadium_mls_soldier_field"),
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati", "stadium_mls_tql_stadium"),
"COL": ("team_mls_col", "Colorado Rapids", "Colorado", "stadium_mls_dicks_sporting_goods_park"),
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus", "stadium_mls_lowercom_field"),
"DAL": ("team_mls_dal", "FC Dallas", "Dallas", "stadium_mls_toyota_stadium"),
"DC": ("team_mls_dc", "D.C. United", "Washington", "stadium_mls_audi_field"),
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston", "stadium_mls_shell_energy_stadium"),
"LAG": ("team_mls_lag", "LA Galaxy", "Los Angeles", "stadium_mls_dignity_health_sports_park"),
"LAFC": ("team_mls_lafc", "Los Angeles FC", "Los Angeles", "stadium_mls_bmo_stadium"),
"MIA": ("team_mls_mia", "Inter Miami", "Miami", "stadium_mls_chase_stadium"),
"MIN": ("team_mls_min", "Minnesota United", "Minnesota", "stadium_mls_allianz_field"),
"MTL": ("team_mls_mtl", "CF Montreal", "Montreal", "stadium_mls_stade_saputo"),
"NSH": ("team_mls_nsh", "Nashville SC", "Nashville", "stadium_mls_geodis_park"),
"NE": ("team_mls_ne", "New England Revolution", "New England", "stadium_mls_gillette_stadium"),
"NYC": ("team_mls_nyc", "New York City FC", "New York", "stadium_mls_yankee_stadium"),
"RB": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
"ORL": ("team_mls_orl", "Orlando City", "Orlando", "stadium_mls_interco_stadium"),
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia", "stadium_mls_subaru_park"),
"POR": ("team_mls_por", "Portland Timbers", "Portland", "stadium_mls_providence_park"),
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake", "stadium_mls_america_first_field"),
"RSL": ("team_mls_slc", "Real Salt Lake", "Salt Lake", "stadium_mls_america_first_field"),
"SJ": ("team_mls_sj", "San Jose Earthquakes", "San Jose", "stadium_mls_paypal_park"),
"SD": ("team_mls_sd", "San Diego FC", "San Diego", "stadium_mls_snapdragon_stadium"),
"SEA": ("team_mls_sea", "Seattle Sounders", "Seattle", "stadium_mls_lumen_field"),
"SKC": ("team_mls_skc", "Sporting Kansas City", "Kansas City", "stadium_mls_childrens_mercy_park"),
"STL": ("team_mls_stl", "St. Louis City SC", "St. Louis", "stadium_mls_citypark"),
"TOR": ("team_mls_tor", "Toronto FC", "Toronto", "stadium_mls_bmo_field"),
"VAN": ("team_mls_van", "Vancouver Whitecaps", "Vancouver", "stadium_mls_bc_place"),
},
"wnba": {
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"),
"DREAM": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"), # alias
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"),
"SKY": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"), # alias
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"),
"CONN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
"SUN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"),
"WINGS": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"), # alias
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"),
"GS": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
"VAL": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"),
"FEVER": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"), # alias
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"),
"LVA": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
"ACES": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"),
"LAS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
"SPARKS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"),
"LYNX": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"), # alias
"NY": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"),
"NYL": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
"LIB": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"),
"PHO": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
"MERCURY": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"),
"STORM": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"), # alias
"WAS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"),
"WSH": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
"MYSTICS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
},
"nwsl": {
# Canonical IDs aligned with teams_canonical.json
"ANG": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"),
"ANF": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"), # alias
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago", "stadium_nwsl_seatgeek_stadium"),
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston", "stadium_nwsl_shell_energy_stadium"),
"KCC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"),
"KC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"), # alias
"NJY": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"),
"NJ": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"), # alias
"NCC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"),
"NC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"), # alias
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando", "stadium_nwsl_interco_stadium"),
"POR": ("team_nwsl_por", "Portland Thorns", "Portland", "stadium_nwsl_providence_park"),
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville", "stadium_nwsl_lynn_family_stadium"),
"SDW": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"),
"SD": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"), # alias
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle", "stadium_nwsl_lumen_field"),
"UTA": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"),
"SLC": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"), # alias
"WSH": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"),
"WAS": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"), # alias
"BAY": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"),
"BFC": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"), # alias
# Expansion teams (2026) - need to be added to teams_canonical.json
"BOS": ("team_nwsl_bos", "Boston Legacy FC", "Boston", "stadium_nwsl_gillette_stadium"),
"DEN": ("team_nwsl_den", "Denver Summit FC", "Denver", "stadium_nwsl_dicks_sporting_goods_park"),
},
}
class TeamResolver:
"""Resolves team names to canonical IDs.
Resolution order:
1. Exact match against abbreviation mappings
2. Exact match against full team names
3. Alias lookup (with date awareness)
4. Fuzzy match against all known names
5. Unresolved (returns ManualReviewItem)
"""
def __init__(
self,
sport: str,
alias_loader: Optional[TeamAliasLoader] = None,
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
):
"""Initialize the resolver.
Args:
sport: Sport code (e.g., 'nba', 'mlb')
alias_loader: Team alias loader (default: global loader)
fuzzy_threshold: Minimum fuzzy match score
"""
self.sport = sport.lower()
self.alias_loader = alias_loader or get_team_alias_loader()
self.fuzzy_threshold = fuzzy_threshold
self._mappings = TEAM_MAPPINGS.get(self.sport, {})
# Build match candidates for fuzzy matching
self._candidates = self._build_candidates()
def _build_candidates(self) -> list[MatchCandidate]:
"""Build match candidates from team mappings."""
# Group by canonical ID to avoid duplicates
by_id: dict[str, tuple[str, list[str]]] = {}
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
if canonical_id not in by_id:
by_id[canonical_id] = (full_name, [])
# Add abbreviation as alias
by_id[canonical_id][1].append(abbrev)
by_id[canonical_id][1].append(city)
return [
MatchCandidate(
canonical_id=cid,
name=name,
aliases=list(set(aliases)), # Dedupe
)
for cid, (name, aliases) in by_id.items()
]
def resolve(
self,
value: str,
check_date: Optional[date] = None,
source_url: Optional[str] = None,
) -> TeamResolveResult:
"""Resolve a team name to a canonical ID.
Args:
value: Team name, abbreviation, or city to resolve
check_date: Date for alias validity (None = today)
source_url: Source URL for manual review items
Returns:
TeamResolveResult with resolution details
"""
value_upper = value.upper().strip()
value_lower = value.lower().strip()
# 1. Exact match against abbreviation
if value_upper in self._mappings:
canonical_id, full_name, _, _ = self._mappings[value_upper]
return TeamResolveResult(
canonical_id=canonical_id,
confidence=100,
match_type="exact",
)
# 2. Exact match against full names
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
if value_lower == full_name.lower() or value_lower == city.lower():
return TeamResolveResult(
canonical_id=canonical_id,
confidence=100,
match_type="exact",
)
# 3. Alias lookup
alias_result = self.alias_loader.resolve(value, check_date)
if alias_result:
return TeamResolveResult(
canonical_id=alias_result,
confidence=95,
match_type="alias",
)
# 4. Fuzzy match
matches = fuzzy_match_team(
value,
self._candidates,
threshold=self.fuzzy_threshold,
)
if matches:
best = matches[0]
review_item = None
# Create review item for low confidence matches
if best.confidence < 90:
review_item = ManualReviewItem(
id=f"team_{uuid4().hex[:8]}",
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
sport=self.sport,
raw_value=value,
context={"match_type": "fuzzy"},
source_url=source_url,
suggested_matches=matches,
game_date=check_date,
)
return TeamResolveResult(
canonical_id=best.canonical_id,
confidence=best.confidence,
match_type="fuzzy",
review_item=review_item,
)
# 5. Unresolved
review_item = ManualReviewItem(
id=f"team_{uuid4().hex[:8]}",
reason=ReviewReason.UNRESOLVED_TEAM,
sport=self.sport,
raw_value=value,
context={},
source_url=source_url,
suggested_matches=fuzzy_match_team(
value,
self._candidates,
threshold=50, # Lower threshold for suggestions
top_n=5,
),
game_date=check_date,
)
return TeamResolveResult(
canonical_id=None,
confidence=0,
match_type="unresolved",
review_item=review_item,
)
def get_team_info(self, abbreviation: str) -> Optional[tuple[str, str, str, str]]:
"""Get team info by abbreviation.
Args:
abbreviation: Team abbreviation
Returns:
Tuple of (canonical_id, full_name, city, stadium_id) or None
"""
return self._mappings.get(abbreviation.upper())
def get_all_teams(self) -> list[tuple[str, str, str, str]]:
"""Get all teams for this sport.
Returns:
List of (canonical_id, full_name, city, stadium_id) tuples
"""
seen = set()
result = []
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
if canonical_id not in seen:
seen.add(canonical_id)
result.append((canonical_id, full_name, city, stadium_id))
return result
# Cached resolvers
_resolvers: dict[str, TeamResolver] = {}
def get_team_resolver(sport: str) -> TeamResolver:
"""Get or create a team resolver for a sport."""
sport_lower = sport.lower()
if sport_lower not in _resolvers:
_resolvers[sport_lower] = TeamResolver(sport_lower)
return _resolvers[sport_lower]
def resolve_team(
sport: str,
value: str,
check_date: Optional[date] = None,
) -> TeamResolveResult:
"""Convenience function to resolve a team name.
Args:
sport: Sport code
value: Team name to resolve
check_date: Date for alias validity
Returns:
TeamResolveResult
"""
return get_team_resolver(sport).resolve(value, check_date)