Scripts changes: - Add WNBA abbreviation aliases to team_resolver.py - Fix NHL stadium coordinates in stadium_resolver.py - Add validate_aliases.py script for orphan detection - Update scrapers with improved error handling - Add DATA_AUDIT.md and REMEDIATION_PLAN.md documentation - Update alias JSON files with new mappings iOS bundle updates: - Update games_canonical.json with latest scraped data - Update teams_canonical.json and stadiums_canonical.json - Sync alias files with Scripts versions All 5 remediation phases complete. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
515 lines
32 KiB
Python
515 lines
32 KiB
Python
"""Team name resolver with exact, alias, and fuzzy matching."""
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import date
|
|
from typing import Optional
|
|
from uuid import uuid4
|
|
|
|
from ..config import FUZZY_MATCH_THRESHOLD
|
|
from ..models.aliases import (
|
|
AliasType,
|
|
FuzzyMatch,
|
|
ManualReviewItem,
|
|
ReviewReason,
|
|
)
|
|
from .alias_loader import get_team_alias_loader, TeamAliasLoader
|
|
from .fuzzy import MatchCandidate, fuzzy_match_team, exact_match
|
|
|
|
|
|
@dataclass
|
|
class TeamResolveResult:
|
|
"""Result of team resolution.
|
|
|
|
Attributes:
|
|
canonical_id: Resolved canonical team ID (None if unresolved)
|
|
confidence: Confidence in the match (100 for exact, lower for fuzzy)
|
|
match_type: How the match was made ('exact', 'alias', 'fuzzy', 'unresolved')
|
|
review_item: ManualReviewItem if resolution failed or low confidence
|
|
"""
|
|
|
|
canonical_id: Optional[str]
|
|
confidence: int
|
|
match_type: str
|
|
review_item: Optional[ManualReviewItem] = None
|
|
|
|
|
|
# Hardcoded team mappings for each sport
|
|
# Format: {sport: {abbreviation: (canonical_id, full_name, city, stadium_id)}}
|
|
TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str, str]]] = {
|
|
"nba": {
|
|
"ATL": ("team_nba_atl", "Atlanta Hawks", "Atlanta", "stadium_nba_state_farm_arena"),
|
|
"BOS": ("team_nba_bos", "Boston Celtics", "Boston", "stadium_nba_td_garden"),
|
|
"BKN": ("team_nba_brk", "Brooklyn Nets", "Brooklyn", "stadium_nba_barclays_center"),
|
|
"BRK": ("team_nba_brk", "Brooklyn Nets", "Brooklyn", "stadium_nba_barclays_center"),
|
|
"CHA": ("team_nba_cho", "Charlotte Hornets", "Charlotte", "stadium_nba_spectrum_center"),
|
|
"CHO": ("team_nba_cho", "Charlotte Hornets", "Charlotte", "stadium_nba_spectrum_center"),
|
|
"CHI": ("team_nba_chi", "Chicago Bulls", "Chicago", "stadium_nba_united_center"),
|
|
"CLE": ("team_nba_cle", "Cleveland Cavaliers", "Cleveland", "stadium_nba_rocket_mortgage_fieldhouse"),
|
|
"DAL": ("team_nba_dal", "Dallas Mavericks", "Dallas", "stadium_nba_american_airlines_center"),
|
|
"DEN": ("team_nba_den", "Denver Nuggets", "Denver", "stadium_nba_ball_arena"),
|
|
"DET": ("team_nba_det", "Detroit Pistons", "Detroit", "stadium_nba_little_caesars_arena"),
|
|
"GSW": ("team_nba_gsw", "Golden State Warriors", "Golden State", "stadium_nba_chase_center"),
|
|
"GS": ("team_nba_gsw", "Golden State Warriors", "Golden State", "stadium_nba_chase_center"),
|
|
"HOU": ("team_nba_hou", "Houston Rockets", "Houston", "stadium_nba_toyota_center"),
|
|
"IND": ("team_nba_ind", "Indiana Pacers", "Indiana", "stadium_nba_gainbridge_fieldhouse"),
|
|
"LAC": ("team_nba_lac", "Los Angeles Clippers", "Los Angeles", "stadium_nba_intuit_dome"),
|
|
"LAL": ("team_nba_lal", "Los Angeles Lakers", "Los Angeles", "stadium_nba_cryptocom_arena"),
|
|
"MEM": ("team_nba_mem", "Memphis Grizzlies", "Memphis", "stadium_nba_fedexforum"),
|
|
"MIA": ("team_nba_mia", "Miami Heat", "Miami", "stadium_nba_kaseya_center"),
|
|
"MIL": ("team_nba_mil", "Milwaukee Bucks", "Milwaukee", "stadium_nba_fiserv_forum"),
|
|
"MIN": ("team_nba_min", "Minnesota Timberwolves", "Minnesota", "stadium_nba_target_center"),
|
|
"NOP": ("team_nba_nop", "New Orleans Pelicans", "New Orleans", "stadium_nba_smoothie_king_center"),
|
|
"NO": ("team_nba_nop", "New Orleans Pelicans", "New Orleans", "stadium_nba_smoothie_king_center"),
|
|
"NYK": ("team_nba_nyk", "New York Knicks", "New York", "stadium_nba_madison_square_garden"),
|
|
"NY": ("team_nba_nyk", "New York Knicks", "New York", "stadium_nba_madison_square_garden"),
|
|
"OKC": ("team_nba_okc", "Oklahoma City Thunder", "Oklahoma City", "stadium_nba_paycom_center"),
|
|
"ORL": ("team_nba_orl", "Orlando Magic", "Orlando", "stadium_nba_kia_center"),
|
|
"PHI": ("team_nba_phi", "Philadelphia 76ers", "Philadelphia", "stadium_nba_wells_fargo_center"),
|
|
"PHX": ("team_nba_phx", "Phoenix Suns", "Phoenix", "stadium_nba_footprint_center"),
|
|
"PHO": ("team_nba_phx", "Phoenix Suns", "Phoenix", "stadium_nba_footprint_center"),
|
|
"POR": ("team_nba_por", "Portland Trail Blazers", "Portland", "stadium_nba_moda_center"),
|
|
"SAC": ("team_nba_sac", "Sacramento Kings", "Sacramento", "stadium_nba_golden_1_center"),
|
|
"SAS": ("team_nba_sas", "San Antonio Spurs", "San Antonio", "stadium_nba_frost_bank_center"),
|
|
"SA": ("team_nba_sas", "San Antonio Spurs", "San Antonio", "stadium_nba_frost_bank_center"),
|
|
"TOR": ("team_nba_tor", "Toronto Raptors", "Toronto", "stadium_nba_scotiabank_arena"),
|
|
"UTA": ("team_nba_uta", "Utah Jazz", "Utah", "stadium_nba_delta_center"),
|
|
"WAS": ("team_nba_was", "Washington Wizards", "Washington", "stadium_nba_capital_one_arena"),
|
|
"WSH": ("team_nba_was", "Washington Wizards", "Washington", "stadium_nba_capital_one_arena"),
|
|
},
|
|
"mlb": {
|
|
"ARI": ("team_mlb_ari", "Arizona Diamondbacks", "Arizona", "stadium_mlb_chase_field"),
|
|
"ATL": ("team_mlb_atl", "Atlanta Braves", "Atlanta", "stadium_mlb_truist_park"),
|
|
"BAL": ("team_mlb_bal", "Baltimore Orioles", "Baltimore", "stadium_mlb_oriole_park_at_camden_yards"),
|
|
"BOS": ("team_mlb_bos", "Boston Red Sox", "Boston", "stadium_mlb_fenway_park"),
|
|
"CHC": ("team_mlb_chc", "Chicago Cubs", "Chicago", "stadium_mlb_wrigley_field"),
|
|
"CHW": ("team_mlb_chw", "Chicago White Sox", "Chicago", "stadium_mlb_guaranteed_rate_field"),
|
|
"CWS": ("team_mlb_chw", "Chicago White Sox", "Chicago", "stadium_mlb_guaranteed_rate_field"),
|
|
"CIN": ("team_mlb_cin", "Cincinnati Reds", "Cincinnati", "stadium_mlb_great_american_ball_park"),
|
|
"CLE": ("team_mlb_cle", "Cleveland Guardians", "Cleveland", "stadium_mlb_progressive_field"),
|
|
"COL": ("team_mlb_col", "Colorado Rockies", "Colorado", "stadium_mlb_coors_field"),
|
|
"DET": ("team_mlb_det", "Detroit Tigers", "Detroit", "stadium_mlb_comerica_park"),
|
|
"HOU": ("team_mlb_hou", "Houston Astros", "Houston", "stadium_mlb_minute_maid_park"),
|
|
"KC": ("team_mlb_kc", "Kansas City Royals", "Kansas City", "stadium_mlb_kauffman_stadium"),
|
|
"KCR": ("team_mlb_kc", "Kansas City Royals", "Kansas City", "stadium_mlb_kauffman_stadium"),
|
|
"LAA": ("team_mlb_laa", "Los Angeles Angels", "Los Angeles", "stadium_mlb_angel_stadium"),
|
|
"ANA": ("team_mlb_laa", "Los Angeles Angels", "Anaheim", "stadium_mlb_angel_stadium"),
|
|
"LAD": ("team_mlb_lad", "Los Angeles Dodgers", "Los Angeles", "stadium_mlb_dodger_stadium"),
|
|
"MIA": ("team_mlb_mia", "Miami Marlins", "Miami", "stadium_mlb_loandepot_park"),
|
|
"FLA": ("team_mlb_mia", "Miami Marlins", "Florida", "stadium_mlb_loandepot_park"),
|
|
"MIL": ("team_mlb_mil", "Milwaukee Brewers", "Milwaukee", "stadium_mlb_american_family_field"),
|
|
"MIN": ("team_mlb_min", "Minnesota Twins", "Minnesota", "stadium_mlb_target_field"),
|
|
"NYM": ("team_mlb_nym", "New York Mets", "New York", "stadium_mlb_citi_field"),
|
|
"NYY": ("team_mlb_nyy", "New York Yankees", "New York", "stadium_mlb_yankee_stadium"),
|
|
"OAK": ("team_mlb_oak", "Oakland Athletics", "Oakland", "stadium_mlb_sutter_health_park"),
|
|
"PHI": ("team_mlb_phi", "Philadelphia Phillies", "Philadelphia", "stadium_mlb_citizens_bank_park"),
|
|
"PIT": ("team_mlb_pit", "Pittsburgh Pirates", "Pittsburgh", "stadium_mlb_pnc_park"),
|
|
"SD": ("team_mlb_sd", "San Diego Padres", "San Diego", "stadium_mlb_petco_park"),
|
|
"SDP": ("team_mlb_sd", "San Diego Padres", "San Diego", "stadium_mlb_petco_park"),
|
|
"SF": ("team_mlb_sf", "San Francisco Giants", "San Francisco", "stadium_mlb_oracle_park"),
|
|
"SFG": ("team_mlb_sf", "San Francisco Giants", "San Francisco", "stadium_mlb_oracle_park"),
|
|
"SEA": ("team_mlb_sea", "Seattle Mariners", "Seattle", "stadium_mlb_tmobile_park"),
|
|
"STL": ("team_mlb_stl", "St. Louis Cardinals", "St. Louis", "stadium_mlb_busch_stadium"),
|
|
"TB": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay", "stadium_mlb_tropicana_field"),
|
|
"TBR": ("team_mlb_tbr", "Tampa Bay Rays", "Tampa Bay", "stadium_mlb_tropicana_field"),
|
|
"TEX": ("team_mlb_tex", "Texas Rangers", "Texas", "stadium_mlb_globe_life_field"),
|
|
"TOR": ("team_mlb_tor", "Toronto Blue Jays", "Toronto", "stadium_mlb_rogers_centre"),
|
|
"WSN": ("team_mlb_wsn", "Washington Nationals", "Washington", "stadium_mlb_nationals_park"),
|
|
"WAS": ("team_mlb_wsn", "Washington Nationals", "Washington", "stadium_mlb_nationals_park"),
|
|
},
|
|
"nfl": {
|
|
"ARI": ("team_nfl_ari", "Arizona Cardinals", "Arizona", "stadium_nfl_state_farm_stadium"),
|
|
"ATL": ("team_nfl_atl", "Atlanta Falcons", "Atlanta", "stadium_nfl_mercedes_benz_stadium"),
|
|
"BAL": ("team_nfl_bal", "Baltimore Ravens", "Baltimore", "stadium_nfl_mandt_bank_stadium"),
|
|
"BUF": ("team_nfl_buf", "Buffalo Bills", "Buffalo", "stadium_nfl_highmark_stadium"),
|
|
"CAR": ("team_nfl_car", "Carolina Panthers", "Carolina", "stadium_nfl_bank_of_america_stadium"),
|
|
"CHI": ("team_nfl_chi", "Chicago Bears", "Chicago", "stadium_nfl_soldier_field"),
|
|
"CIN": ("team_nfl_cin", "Cincinnati Bengals", "Cincinnati", "stadium_nfl_paycor_stadium"),
|
|
"CLE": ("team_nfl_cle", "Cleveland Browns", "Cleveland", "stadium_nfl_huntington_bank_field"),
|
|
"DAL": ("team_nfl_dal", "Dallas Cowboys", "Dallas", "stadium_nfl_att_stadium"),
|
|
"DEN": ("team_nfl_den", "Denver Broncos", "Denver", "stadium_nfl_empower_field"),
|
|
"DET": ("team_nfl_det", "Detroit Lions", "Detroit", "stadium_nfl_ford_field"),
|
|
"GB": ("team_nfl_gb", "Green Bay Packers", "Green Bay", "stadium_nfl_lambeau_field"),
|
|
"GNB": ("team_nfl_gb", "Green Bay Packers", "Green Bay", "stadium_nfl_lambeau_field"),
|
|
"HOU": ("team_nfl_hou", "Houston Texans", "Houston", "stadium_nfl_nrg_stadium"),
|
|
"IND": ("team_nfl_ind", "Indianapolis Colts", "Indianapolis", "stadium_nfl_lucas_oil_stadium"),
|
|
"JAX": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville", "stadium_nfl_everbank_stadium"),
|
|
"JAC": ("team_nfl_jax", "Jacksonville Jaguars", "Jacksonville", "stadium_nfl_everbank_stadium"),
|
|
"KC": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City", "stadium_nfl_arrowhead_stadium"),
|
|
"KAN": ("team_nfl_kc", "Kansas City Chiefs", "Kansas City", "stadium_nfl_arrowhead_stadium"),
|
|
"LV": ("team_nfl_lv", "Las Vegas Raiders", "Las Vegas", "stadium_nfl_allegiant_stadium"),
|
|
"LAC": ("team_nfl_lac", "Los Angeles Chargers", "Los Angeles", "stadium_nfl_sofi_stadium"),
|
|
"LAR": ("team_nfl_lar", "Los Angeles Rams", "Los Angeles", "stadium_nfl_sofi_stadium"),
|
|
"MIA": ("team_nfl_mia", "Miami Dolphins", "Miami", "stadium_nfl_hard_rock_stadium"),
|
|
"MIN": ("team_nfl_min", "Minnesota Vikings", "Minnesota", "stadium_nfl_us_bank_stadium"),
|
|
"NE": ("team_nfl_ne", "New England Patriots", "New England", "stadium_nfl_gillette_stadium"),
|
|
"NWE": ("team_nfl_ne", "New England Patriots", "New England", "stadium_nfl_gillette_stadium"),
|
|
"NO": ("team_nfl_no", "New Orleans Saints", "New Orleans", "stadium_nfl_caesars_superdome"),
|
|
"NOR": ("team_nfl_no", "New Orleans Saints", "New Orleans", "stadium_nfl_caesars_superdome"),
|
|
"NYG": ("team_nfl_nyg", "New York Giants", "New York", "stadium_nfl_metlife_stadium"),
|
|
"NYJ": ("team_nfl_nyj", "New York Jets", "New York", "stadium_nfl_metlife_stadium"),
|
|
"PHI": ("team_nfl_phi", "Philadelphia Eagles", "Philadelphia", "stadium_nfl_lincoln_financial_field"),
|
|
"PIT": ("team_nfl_pit", "Pittsburgh Steelers", "Pittsburgh", "stadium_nfl_acrisure_stadium"),
|
|
"SF": ("team_nfl_sf", "San Francisco 49ers", "San Francisco", "stadium_nfl_levis_stadium"),
|
|
"SFO": ("team_nfl_sf", "San Francisco 49ers", "San Francisco", "stadium_nfl_levis_stadium"),
|
|
"SEA": ("team_nfl_sea", "Seattle Seahawks", "Seattle", "stadium_nfl_lumen_field"),
|
|
"TB": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay", "stadium_nfl_raymond_james_stadium"),
|
|
"TAM": ("team_nfl_tb", "Tampa Bay Buccaneers", "Tampa Bay", "stadium_nfl_raymond_james_stadium"),
|
|
"TEN": ("team_nfl_ten", "Tennessee Titans", "Tennessee", "stadium_nfl_nissan_stadium"),
|
|
"WAS": ("team_nfl_was", "Washington Commanders", "Washington", "stadium_nfl_northwest_stadium"),
|
|
"WSH": ("team_nfl_was", "Washington Commanders", "Washington", "stadium_nfl_northwest_stadium"),
|
|
},
|
|
"nhl": {
|
|
"ANA": ("team_nhl_ana", "Anaheim Ducks", "Anaheim", "stadium_nhl_honda_center"),
|
|
"ARI": ("team_nhl_ari", "Utah Hockey Club", "Utah", "stadium_nhl_delta_center"), # Moved 2024
|
|
"UTA": ("team_nhl_ari", "Utah Hockey Club", "Utah", "stadium_nhl_delta_center"),
|
|
"BOS": ("team_nhl_bos", "Boston Bruins", "Boston", "stadium_nhl_td_garden"),
|
|
"BUF": ("team_nhl_buf", "Buffalo Sabres", "Buffalo", "stadium_nhl_keybank_center"),
|
|
"CGY": ("team_nhl_cgy", "Calgary Flames", "Calgary", "stadium_nhl_scotiabank_saddledome"),
|
|
"CAR": ("team_nhl_car", "Carolina Hurricanes", "Carolina", "stadium_nhl_pnc_arena"),
|
|
"CHI": ("team_nhl_chi", "Chicago Blackhawks", "Chicago", "stadium_nhl_united_center"),
|
|
"COL": ("team_nhl_col", "Colorado Avalanche", "Colorado", "stadium_nhl_ball_arena"),
|
|
"CBJ": ("team_nhl_cbj", "Columbus Blue Jackets", "Columbus", "stadium_nhl_nationwide_arena"),
|
|
"DAL": ("team_nhl_dal", "Dallas Stars", "Dallas", "stadium_nhl_american_airlines_center"),
|
|
"DET": ("team_nhl_det", "Detroit Red Wings", "Detroit", "stadium_nhl_little_caesars_arena"),
|
|
"EDM": ("team_nhl_edm", "Edmonton Oilers", "Edmonton", "stadium_nhl_rogers_place"),
|
|
"FLA": ("team_nhl_fla", "Florida Panthers", "Florida", "stadium_nhl_amerant_bank_arena"),
|
|
"LA": ("team_nhl_la", "Los Angeles Kings", "Los Angeles", "stadium_nhl_cryptocom_arena"),
|
|
"LAK": ("team_nhl_la", "Los Angeles Kings", "Los Angeles", "stadium_nhl_cryptocom_arena"),
|
|
"MIN": ("team_nhl_min", "Minnesota Wild", "Minnesota", "stadium_nhl_xcel_energy_center"),
|
|
"MTL": ("team_nhl_mtl", "Montreal Canadiens", "Montreal", "stadium_nhl_bell_centre"),
|
|
"MON": ("team_nhl_mtl", "Montreal Canadiens", "Montreal", "stadium_nhl_bell_centre"),
|
|
"NSH": ("team_nhl_nsh", "Nashville Predators", "Nashville", "stadium_nhl_bridgestone_arena"),
|
|
"NAS": ("team_nhl_nsh", "Nashville Predators", "Nashville", "stadium_nhl_bridgestone_arena"),
|
|
"NJ": ("team_nhl_njd", "New Jersey Devils", "New Jersey", "stadium_nhl_prudential_center"),
|
|
"NJD": ("team_nhl_njd", "New Jersey Devils", "New Jersey", "stadium_nhl_prudential_center"),
|
|
"NYI": ("team_nhl_nyi", "New York Islanders", "New York", "stadium_nhl_ubs_arena"),
|
|
"NYR": ("team_nhl_nyr", "New York Rangers", "New York", "stadium_nhl_madison_square_garden"),
|
|
"OTT": ("team_nhl_ott", "Ottawa Senators", "Ottawa", "stadium_nhl_canadian_tire_centre"),
|
|
"PHI": ("team_nhl_phi", "Philadelphia Flyers", "Philadelphia", "stadium_nhl_wells_fargo_center"),
|
|
"PIT": ("team_nhl_pit", "Pittsburgh Penguins", "Pittsburgh", "stadium_nhl_ppg_paints_arena"),
|
|
"SJ": ("team_nhl_sj", "San Jose Sharks", "San Jose", "stadium_nhl_sap_center"),
|
|
"SJS": ("team_nhl_sj", "San Jose Sharks", "San Jose", "stadium_nhl_sap_center"),
|
|
"SEA": ("team_nhl_sea", "Seattle Kraken", "Seattle", "stadium_nhl_climate_pledge_arena"),
|
|
"STL": ("team_nhl_stl", "St. Louis Blues", "St. Louis", "stadium_nhl_enterprise_center"),
|
|
"TB": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay", "stadium_nhl_amalie_arena"),
|
|
"TBL": ("team_nhl_tb", "Tampa Bay Lightning", "Tampa Bay", "stadium_nhl_amalie_arena"),
|
|
"TOR": ("team_nhl_tor", "Toronto Maple Leafs", "Toronto", "stadium_nhl_scotiabank_arena"),
|
|
"VAN": ("team_nhl_van", "Vancouver Canucks", "Vancouver", "stadium_nhl_rogers_arena"),
|
|
"VGK": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas", "stadium_nhl_tmobile_arena"),
|
|
"VEG": ("team_nhl_vgk", "Vegas Golden Knights", "Vegas", "stadium_nhl_tmobile_arena"),
|
|
"WAS": ("team_nhl_was", "Washington Capitals", "Washington", "stadium_nhl_capital_one_arena"),
|
|
"WSH": ("team_nhl_was", "Washington Capitals", "Washington", "stadium_nhl_capital_one_arena"),
|
|
"WPG": ("team_nhl_wpg", "Winnipeg Jets", "Winnipeg", "stadium_nhl_canada_life_centre"),
|
|
},
|
|
"mls": {
|
|
"ATL": ("team_mls_atl", "Atlanta United", "Atlanta", "stadium_mls_mercedes_benz_stadium"),
|
|
"AUS": ("team_mls_aus", "Austin FC", "Austin", "stadium_mls_q2_stadium"),
|
|
"CLT": ("team_mls_clt", "Charlotte FC", "Charlotte", "stadium_mls_bank_of_america_stadium"),
|
|
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago", "stadium_mls_soldier_field"),
|
|
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati", "stadium_mls_tql_stadium"),
|
|
"COL": ("team_mls_col", "Colorado Rapids", "Colorado", "stadium_mls_dicks_sporting_goods_park"),
|
|
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus", "stadium_mls_lowercom_field"),
|
|
"DAL": ("team_mls_dal", "FC Dallas", "Dallas", "stadium_mls_toyota_stadium"),
|
|
"DC": ("team_mls_dc", "D.C. United", "Washington", "stadium_mls_audi_field"),
|
|
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston", "stadium_mls_shell_energy_stadium"),
|
|
"LAG": ("team_mls_lag", "LA Galaxy", "Los Angeles", "stadium_mls_dignity_health_sports_park"),
|
|
"LAFC": ("team_mls_lafc", "Los Angeles FC", "Los Angeles", "stadium_mls_bmo_stadium"),
|
|
"MIA": ("team_mls_mia", "Inter Miami", "Miami", "stadium_mls_chase_stadium"),
|
|
"MIN": ("team_mls_min", "Minnesota United", "Minnesota", "stadium_mls_allianz_field"),
|
|
"MTL": ("team_mls_mtl", "CF Montreal", "Montreal", "stadium_mls_stade_saputo"),
|
|
"NSH": ("team_mls_nsh", "Nashville SC", "Nashville", "stadium_mls_geodis_park"),
|
|
"NE": ("team_mls_ne", "New England Revolution", "New England", "stadium_mls_gillette_stadium"),
|
|
"NYC": ("team_mls_nyc", "New York City FC", "New York", "stadium_mls_yankee_stadium"),
|
|
"RB": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
|
|
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
|
|
"ORL": ("team_mls_orl", "Orlando City", "Orlando", "stadium_mls_interco_stadium"),
|
|
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia", "stadium_mls_subaru_park"),
|
|
"POR": ("team_mls_por", "Portland Timbers", "Portland", "stadium_mls_providence_park"),
|
|
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake", "stadium_mls_america_first_field"),
|
|
"RSL": ("team_mls_slc", "Real Salt Lake", "Salt Lake", "stadium_mls_america_first_field"),
|
|
"SJ": ("team_mls_sj", "San Jose Earthquakes", "San Jose", "stadium_mls_paypal_park"),
|
|
"SD": ("team_mls_sd", "San Diego FC", "San Diego", "stadium_mls_snapdragon_stadium"),
|
|
"SEA": ("team_mls_sea", "Seattle Sounders", "Seattle", "stadium_mls_lumen_field"),
|
|
"SKC": ("team_mls_skc", "Sporting Kansas City", "Kansas City", "stadium_mls_childrens_mercy_park"),
|
|
"STL": ("team_mls_stl", "St. Louis City SC", "St. Louis", "stadium_mls_citypark"),
|
|
"TOR": ("team_mls_tor", "Toronto FC", "Toronto", "stadium_mls_bmo_field"),
|
|
"VAN": ("team_mls_van", "Vancouver Whitecaps", "Vancouver", "stadium_mls_bc_place"),
|
|
},
|
|
"wnba": {
|
|
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"),
|
|
"DREAM": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"), # alias
|
|
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"),
|
|
"SKY": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"), # alias
|
|
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"),
|
|
"CONN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
|
|
"SUN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
|
|
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"),
|
|
"WINGS": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"), # alias
|
|
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"),
|
|
"GS": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
|
|
"VAL": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
|
|
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"),
|
|
"FEVER": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"), # alias
|
|
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"),
|
|
"LVA": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
|
|
"ACES": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
|
|
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"),
|
|
"LAS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
|
|
"SPARKS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
|
|
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"),
|
|
"LYNX": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"), # alias
|
|
"NY": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"),
|
|
"NYL": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
|
|
"LIB": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
|
|
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"),
|
|
"PHO": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
|
|
"MERCURY": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
|
|
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"),
|
|
"STORM": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"), # alias
|
|
"WAS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"),
|
|
"WSH": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
|
|
"MYSTICS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
|
|
},
|
|
"nwsl": {
|
|
# Canonical IDs aligned with teams_canonical.json
|
|
"ANG": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"),
|
|
"ANF": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"), # alias
|
|
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago", "stadium_nwsl_seatgeek_stadium"),
|
|
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston", "stadium_nwsl_shell_energy_stadium"),
|
|
"KCC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"),
|
|
"KC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"), # alias
|
|
"NJY": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"),
|
|
"NJ": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"), # alias
|
|
"NCC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"),
|
|
"NC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"), # alias
|
|
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando", "stadium_nwsl_interco_stadium"),
|
|
"POR": ("team_nwsl_por", "Portland Thorns", "Portland", "stadium_nwsl_providence_park"),
|
|
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville", "stadium_nwsl_lynn_family_stadium"),
|
|
"SDW": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"),
|
|
"SD": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"), # alias
|
|
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle", "stadium_nwsl_lumen_field"),
|
|
"UTA": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"),
|
|
"SLC": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"), # alias
|
|
"WSH": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"),
|
|
"WAS": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"), # alias
|
|
"BAY": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"),
|
|
"BFC": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"), # alias
|
|
# Expansion teams (2026) - need to be added to teams_canonical.json
|
|
"BOS": ("team_nwsl_bos", "Boston Legacy FC", "Boston", "stadium_nwsl_gillette_stadium"),
|
|
"DEN": ("team_nwsl_den", "Denver Summit FC", "Denver", "stadium_nwsl_dicks_sporting_goods_park"),
|
|
},
|
|
}
|
|
|
|
|
|
class TeamResolver:
|
|
"""Resolves team names to canonical IDs.
|
|
|
|
Resolution order:
|
|
1. Exact match against abbreviation mappings
|
|
2. Exact match against full team names
|
|
3. Alias lookup (with date awareness)
|
|
4. Fuzzy match against all known names
|
|
5. Unresolved (returns ManualReviewItem)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
sport: str,
|
|
alias_loader: Optional[TeamAliasLoader] = None,
|
|
fuzzy_threshold: int = FUZZY_MATCH_THRESHOLD,
|
|
):
|
|
"""Initialize the resolver.
|
|
|
|
Args:
|
|
sport: Sport code (e.g., 'nba', 'mlb')
|
|
alias_loader: Team alias loader (default: global loader)
|
|
fuzzy_threshold: Minimum fuzzy match score
|
|
"""
|
|
self.sport = sport.lower()
|
|
self.alias_loader = alias_loader or get_team_alias_loader()
|
|
self.fuzzy_threshold = fuzzy_threshold
|
|
self._mappings = TEAM_MAPPINGS.get(self.sport, {})
|
|
|
|
# Build match candidates for fuzzy matching
|
|
self._candidates = self._build_candidates()
|
|
|
|
def _build_candidates(self) -> list[MatchCandidate]:
|
|
"""Build match candidates from team mappings."""
|
|
# Group by canonical ID to avoid duplicates
|
|
by_id: dict[str, tuple[str, list[str]]] = {}
|
|
|
|
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
|
|
if canonical_id not in by_id:
|
|
by_id[canonical_id] = (full_name, [])
|
|
|
|
# Add abbreviation as alias
|
|
by_id[canonical_id][1].append(abbrev)
|
|
by_id[canonical_id][1].append(city)
|
|
|
|
return [
|
|
MatchCandidate(
|
|
canonical_id=cid,
|
|
name=name,
|
|
aliases=list(set(aliases)), # Dedupe
|
|
)
|
|
for cid, (name, aliases) in by_id.items()
|
|
]
|
|
|
|
def resolve(
|
|
self,
|
|
value: str,
|
|
check_date: Optional[date] = None,
|
|
source_url: Optional[str] = None,
|
|
) -> TeamResolveResult:
|
|
"""Resolve a team name to a canonical ID.
|
|
|
|
Args:
|
|
value: Team name, abbreviation, or city to resolve
|
|
check_date: Date for alias validity (None = today)
|
|
source_url: Source URL for manual review items
|
|
|
|
Returns:
|
|
TeamResolveResult with resolution details
|
|
"""
|
|
value_upper = value.upper().strip()
|
|
value_lower = value.lower().strip()
|
|
|
|
# 1. Exact match against abbreviation
|
|
if value_upper in self._mappings:
|
|
canonical_id, full_name, _, _ = self._mappings[value_upper]
|
|
return TeamResolveResult(
|
|
canonical_id=canonical_id,
|
|
confidence=100,
|
|
match_type="exact",
|
|
)
|
|
|
|
# 2. Exact match against full names
|
|
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
|
|
if value_lower == full_name.lower() or value_lower == city.lower():
|
|
return TeamResolveResult(
|
|
canonical_id=canonical_id,
|
|
confidence=100,
|
|
match_type="exact",
|
|
)
|
|
|
|
# 3. Alias lookup
|
|
alias_result = self.alias_loader.resolve(value, check_date)
|
|
if alias_result:
|
|
return TeamResolveResult(
|
|
canonical_id=alias_result,
|
|
confidence=95,
|
|
match_type="alias",
|
|
)
|
|
|
|
# 4. Fuzzy match
|
|
matches = fuzzy_match_team(
|
|
value,
|
|
self._candidates,
|
|
threshold=self.fuzzy_threshold,
|
|
)
|
|
|
|
if matches:
|
|
best = matches[0]
|
|
review_item = None
|
|
|
|
# Create review item for low confidence matches
|
|
if best.confidence < 90:
|
|
review_item = ManualReviewItem(
|
|
id=f"team_{uuid4().hex[:8]}",
|
|
reason=ReviewReason.LOW_CONFIDENCE_MATCH,
|
|
sport=self.sport,
|
|
raw_value=value,
|
|
context={"match_type": "fuzzy"},
|
|
source_url=source_url,
|
|
suggested_matches=matches,
|
|
game_date=check_date,
|
|
)
|
|
|
|
return TeamResolveResult(
|
|
canonical_id=best.canonical_id,
|
|
confidence=best.confidence,
|
|
match_type="fuzzy",
|
|
review_item=review_item,
|
|
)
|
|
|
|
# 5. Unresolved
|
|
review_item = ManualReviewItem(
|
|
id=f"team_{uuid4().hex[:8]}",
|
|
reason=ReviewReason.UNRESOLVED_TEAM,
|
|
sport=self.sport,
|
|
raw_value=value,
|
|
context={},
|
|
source_url=source_url,
|
|
suggested_matches=fuzzy_match_team(
|
|
value,
|
|
self._candidates,
|
|
threshold=50, # Lower threshold for suggestions
|
|
top_n=5,
|
|
),
|
|
game_date=check_date,
|
|
)
|
|
|
|
return TeamResolveResult(
|
|
canonical_id=None,
|
|
confidence=0,
|
|
match_type="unresolved",
|
|
review_item=review_item,
|
|
)
|
|
|
|
def get_team_info(self, abbreviation: str) -> Optional[tuple[str, str, str, str]]:
|
|
"""Get team info by abbreviation.
|
|
|
|
Args:
|
|
abbreviation: Team abbreviation
|
|
|
|
Returns:
|
|
Tuple of (canonical_id, full_name, city, stadium_id) or None
|
|
"""
|
|
return self._mappings.get(abbreviation.upper())
|
|
|
|
def get_all_teams(self) -> list[tuple[str, str, str, str]]:
|
|
"""Get all teams for this sport.
|
|
|
|
Returns:
|
|
List of (canonical_id, full_name, city, stadium_id) tuples
|
|
"""
|
|
seen = set()
|
|
result = []
|
|
|
|
for abbrev, (canonical_id, full_name, city, stadium_id) in self._mappings.items():
|
|
if canonical_id not in seen:
|
|
seen.add(canonical_id)
|
|
result.append((canonical_id, full_name, city, stadium_id))
|
|
|
|
return result
|
|
|
|
|
|
# Cached resolvers
|
|
_resolvers: dict[str, TeamResolver] = {}
|
|
|
|
|
|
def get_team_resolver(sport: str) -> TeamResolver:
|
|
"""Get or create a team resolver for a sport."""
|
|
sport_lower = sport.lower()
|
|
if sport_lower not in _resolvers:
|
|
_resolvers[sport_lower] = TeamResolver(sport_lower)
|
|
return _resolvers[sport_lower]
|
|
|
|
|
|
def resolve_team(
|
|
sport: str,
|
|
value: str,
|
|
check_date: Optional[date] = None,
|
|
) -> TeamResolveResult:
|
|
"""Convenience function to resolve a team name.
|
|
|
|
Args:
|
|
sport: Sport code
|
|
value: Team name to resolve
|
|
check_date: Date for alias validity
|
|
|
|
Returns:
|
|
TeamResolveResult
|
|
"""
|
|
return get_team_resolver(sport).resolve(value, check_date)
|