feat(03-02): add MLS to canonicalization pipeline

- Import MLS_TEAMS from mls module
- Add MLS_DIVISIONS dict (Eastern/Western conferences)
- Add MLS to sport_mappings for team canonicalization
- Add MLS team abbreviation aliases (LA, NYC, RBNY, etc.)
- Add MLS stadium historical aliases (BMO, PayPal Park, Shell Energy, etc.)

Total teams: 154 (30 MLS teams added)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 09:40:39 -06:00
parent 3e9bd24214
commit b6a913df1d
3 changed files with 83 additions and 0 deletions

View File

@@ -97,6 +97,18 @@ TEAM_ABBREV_ALIASES = {
('NFL', 'TAM'): 'team_nfl_tb', # Tampa Bay alternate
('NFL', 'SFO'): 'team_nfl_sf', # San Francisco alternate
('NFL', 'WAS'): 'team_nfl_was', # Washington (direct match but include for completeness)
# MLS
('MLS', 'LA'): 'team_mls_lag', # LA Galaxy
('MLS', 'NYC'): 'team_mls_nycfc', # NYC FC
('MLS', 'RBNY'): 'team_mls_nyrb', # NY Red Bulls
('MLS', 'NYR'): 'team_mls_nyrb', # NY Red Bulls alt
('MLS', 'SJE'): 'team_mls_sj', # San Jose Earthquakes
('MLS', 'KC'): 'team_mls_skc', # Sporting KC
('MLS', 'DCU'): 'team_mls_dc', # DC United
('MLS', 'FCD'): 'team_mls_dal', # FC Dallas
('MLS', 'MON'): 'team_mls_mtl', # Montreal
('MLS', 'LAF'): 'team_mls_lafc', # LAFC alt
}

View File

@@ -200,6 +200,39 @@ HISTORICAL_STADIUM_ALIASES = {
{'alias_name': 'qwest field', 'valid_from': '2004-06-01', 'valid_until': '2011-05-31'},
{'alias_name': 'seahawks stadium', 'valid_from': '2002-07-01', 'valid_until': '2004-05-31'},
],
# MLS
'stadium_mls_bmo_stadium': [
{'alias_name': 'banc of california stadium', 'valid_from': '2018-04-01', 'valid_until': '2023-06-01'},
],
'stadium_mls_paypal_park': [
{'alias_name': 'earthquakes stadium', 'valid_from': '2015-03-01', 'valid_until': '2020-12-31'},
{'alias_name': 'avaya stadium', 'valid_from': '2015-03-01', 'valid_until': '2020-12-31'},
],
'stadium_mls_shell_energy_stadium': [
{'alias_name': 'pnc stadium', 'valid_from': '2021-03-01', 'valid_until': '2023-03-01'},
{'alias_name': 'bbva stadium', 'valid_from': '2019-01-01', 'valid_until': '2021-02-28'},
{'alias_name': 'bbva compass stadium', 'valid_from': '2012-05-01', 'valid_until': '2018-12-31'},
],
'stadium_mls_dignity_health_sports_park': [
{'alias_name': 'stubhub center', 'valid_from': '2013-06-01', 'valid_until': '2019-01-31'},
{'alias_name': 'home depot center', 'valid_from': '2003-06-01', 'valid_until': '2013-05-31'},
],
'stadium_mls_interandco_stadium': [
{'alias_name': 'exploria stadium', 'valid_from': '2017-03-01', 'valid_until': '2023-07-01'},
{'alias_name': 'orlando city stadium', 'valid_from': '2017-03-01', 'valid_until': '2019-01-01'},
],
'stadium_mls_chase_stadium': [
{'alias_name': 'drv pnk stadium', 'valid_from': '2020-07-01', 'valid_until': '2024-01-01'},
{'alias_name': 'inter miami cf stadium', 'valid_from': '2020-07-01', 'valid_until': '2020-09-01'},
],
'stadium_mls_america_first_field': [
{'alias_name': 'rio tinto stadium', 'valid_from': '2008-10-01', 'valid_until': '2021-08-01'},
],
'stadium_mls_lowercom_field': [
{'alias_name': 'lower.com field', 'valid_from': '2021-07-01'}, # Current name with period
{'alias_name': 'new crew stadium', 'valid_from': '2021-07-01', 'valid_until': '2021-07-01'},
],
}

View File

@@ -19,6 +19,7 @@ from typing import Optional
# Import team mappings from scraper
from scrape_schedules import NBA_TEAMS, MLB_TEAMS, NHL_TEAMS, NFL_TEAMS
from mls import MLS_TEAMS
# =============================================================================
@@ -216,6 +217,41 @@ NFL_DIVISIONS = {
'SEA': ('nfl_nfc', 'nfl_nfc_west'),
}
MLS_DIVISIONS = {
# Eastern Conference (MLS uses conferences, not divisions)
'ATL': ('mls_eastern', None),
'CHI': ('mls_eastern', None),
'CIN': ('mls_eastern', None),
'CLB': ('mls_eastern', None),
'CLT': ('mls_eastern', None),
'DC': ('mls_eastern', None),
'MIA': ('mls_eastern', None),
'MTL': ('mls_eastern', None),
'NE': ('mls_eastern', None),
'NYCFC': ('mls_eastern', None),
'NYRB': ('mls_eastern', None),
'ORL': ('mls_eastern', None),
'PHI': ('mls_eastern', None),
'TOR': ('mls_eastern', None),
# Western Conference
'AUS': ('mls_western', None),
'COL': ('mls_western', None),
'DAL': ('mls_western', None),
'HOU': ('mls_western', None),
'LAFC': ('mls_western', None),
'LAG': ('mls_western', None),
'MIN': ('mls_western', None),
'NSH': ('mls_western', None),
'POR': ('mls_western', None),
'RSL': ('mls_western', None),
'SD': ('mls_western', None),
'SEA': ('mls_western', None),
'SJ': ('mls_western', None),
'SKC': ('mls_western', None),
'STL': ('mls_western', None),
'VAN': ('mls_western', None),
}
# =============================================================================
# FUZZY MATCHING
@@ -374,6 +410,7 @@ def canonicalize_teams(
'MLB': MLB_DIVISIONS,
'NHL': NHL_DIVISIONS,
'NFL': NFL_DIVISIONS,
'MLS': MLS_DIVISIONS,
}.get(sport, {})
for abbrev, info in team_mappings.items():
@@ -447,6 +484,7 @@ def canonicalize_all_teams(
('MLB', MLB_TEAMS),
('NHL', NHL_TEAMS),
('NFL', NFL_TEAMS),
('MLS', MLS_TEAMS),
]
for sport, team_map in sport_mappings: