feat(03-02): add MLS to canonicalization pipeline
- Import MLS_TEAMS from mls module - Add MLS_DIVISIONS dict (Eastern/Western conferences) - Add MLS to sport_mappings for team canonicalization - Add MLS team abbreviation aliases (LA, NYC, RBNY, etc.) - Add MLS stadium historical aliases (BMO, PayPal Park, Shell Energy, etc.) Total teams: 154 (30 MLS teams added) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -97,6 +97,18 @@ TEAM_ABBREV_ALIASES = {
|
||||
('NFL', 'TAM'): 'team_nfl_tb', # Tampa Bay alternate
|
||||
('NFL', 'SFO'): 'team_nfl_sf', # San Francisco alternate
|
||||
('NFL', 'WAS'): 'team_nfl_was', # Washington (direct match but include for completeness)
|
||||
|
||||
# MLS
|
||||
('MLS', 'LA'): 'team_mls_lag', # LA Galaxy
|
||||
('MLS', 'NYC'): 'team_mls_nycfc', # NYC FC
|
||||
('MLS', 'RBNY'): 'team_mls_nyrb', # NY Red Bulls
|
||||
('MLS', 'NYR'): 'team_mls_nyrb', # NY Red Bulls alt
|
||||
('MLS', 'SJE'): 'team_mls_sj', # San Jose Earthquakes
|
||||
('MLS', 'KC'): 'team_mls_skc', # Sporting KC
|
||||
('MLS', 'DCU'): 'team_mls_dc', # DC United
|
||||
('MLS', 'FCD'): 'team_mls_dal', # FC Dallas
|
||||
('MLS', 'MON'): 'team_mls_mtl', # Montreal
|
||||
('MLS', 'LAF'): 'team_mls_lafc', # LAFC alt
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -200,6 +200,39 @@ HISTORICAL_STADIUM_ALIASES = {
|
||||
{'alias_name': 'qwest field', 'valid_from': '2004-06-01', 'valid_until': '2011-05-31'},
|
||||
{'alias_name': 'seahawks stadium', 'valid_from': '2002-07-01', 'valid_until': '2004-05-31'},
|
||||
],
|
||||
|
||||
# MLS
|
||||
'stadium_mls_bmo_stadium': [
|
||||
{'alias_name': 'banc of california stadium', 'valid_from': '2018-04-01', 'valid_until': '2023-06-01'},
|
||||
],
|
||||
'stadium_mls_paypal_park': [
|
||||
{'alias_name': 'earthquakes stadium', 'valid_from': '2015-03-01', 'valid_until': '2020-12-31'},
|
||||
{'alias_name': 'avaya stadium', 'valid_from': '2015-03-01', 'valid_until': '2020-12-31'},
|
||||
],
|
||||
'stadium_mls_shell_energy_stadium': [
|
||||
{'alias_name': 'pnc stadium', 'valid_from': '2021-03-01', 'valid_until': '2023-03-01'},
|
||||
{'alias_name': 'bbva stadium', 'valid_from': '2019-01-01', 'valid_until': '2021-02-28'},
|
||||
{'alias_name': 'bbva compass stadium', 'valid_from': '2012-05-01', 'valid_until': '2018-12-31'},
|
||||
],
|
||||
'stadium_mls_dignity_health_sports_park': [
|
||||
{'alias_name': 'stubhub center', 'valid_from': '2013-06-01', 'valid_until': '2019-01-31'},
|
||||
{'alias_name': 'home depot center', 'valid_from': '2003-06-01', 'valid_until': '2013-05-31'},
|
||||
],
|
||||
'stadium_mls_interandco_stadium': [
|
||||
{'alias_name': 'exploria stadium', 'valid_from': '2017-03-01', 'valid_until': '2023-07-01'},
|
||||
{'alias_name': 'orlando city stadium', 'valid_from': '2017-03-01', 'valid_until': '2019-01-01'},
|
||||
],
|
||||
'stadium_mls_chase_stadium': [
|
||||
{'alias_name': 'drv pnk stadium', 'valid_from': '2020-07-01', 'valid_until': '2024-01-01'},
|
||||
{'alias_name': 'inter miami cf stadium', 'valid_from': '2020-07-01', 'valid_until': '2020-09-01'},
|
||||
],
|
||||
'stadium_mls_america_first_field': [
|
||||
{'alias_name': 'rio tinto stadium', 'valid_from': '2008-10-01', 'valid_until': '2021-08-01'},
|
||||
],
|
||||
'stadium_mls_lowercom_field': [
|
||||
{'alias_name': 'lower.com field', 'valid_from': '2021-07-01'}, # Current name with period
|
||||
{'alias_name': 'new crew stadium', 'valid_from': '2021-07-01', 'valid_until': '2021-07-01'},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ from typing import Optional
|
||||
|
||||
# Import team mappings from scraper
|
||||
from scrape_schedules import NBA_TEAMS, MLB_TEAMS, NHL_TEAMS, NFL_TEAMS
|
||||
from mls import MLS_TEAMS
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -216,6 +217,41 @@ NFL_DIVISIONS = {
|
||||
'SEA': ('nfl_nfc', 'nfl_nfc_west'),
|
||||
}
|
||||
|
||||
MLS_DIVISIONS = {
|
||||
# Eastern Conference (MLS uses conferences, not divisions)
|
||||
'ATL': ('mls_eastern', None),
|
||||
'CHI': ('mls_eastern', None),
|
||||
'CIN': ('mls_eastern', None),
|
||||
'CLB': ('mls_eastern', None),
|
||||
'CLT': ('mls_eastern', None),
|
||||
'DC': ('mls_eastern', None),
|
||||
'MIA': ('mls_eastern', None),
|
||||
'MTL': ('mls_eastern', None),
|
||||
'NE': ('mls_eastern', None),
|
||||
'NYCFC': ('mls_eastern', None),
|
||||
'NYRB': ('mls_eastern', None),
|
||||
'ORL': ('mls_eastern', None),
|
||||
'PHI': ('mls_eastern', None),
|
||||
'TOR': ('mls_eastern', None),
|
||||
# Western Conference
|
||||
'AUS': ('mls_western', None),
|
||||
'COL': ('mls_western', None),
|
||||
'DAL': ('mls_western', None),
|
||||
'HOU': ('mls_western', None),
|
||||
'LAFC': ('mls_western', None),
|
||||
'LAG': ('mls_western', None),
|
||||
'MIN': ('mls_western', None),
|
||||
'NSH': ('mls_western', None),
|
||||
'POR': ('mls_western', None),
|
||||
'RSL': ('mls_western', None),
|
||||
'SD': ('mls_western', None),
|
||||
'SEA': ('mls_western', None),
|
||||
'SJ': ('mls_western', None),
|
||||
'SKC': ('mls_western', None),
|
||||
'STL': ('mls_western', None),
|
||||
'VAN': ('mls_western', None),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FUZZY MATCHING
|
||||
@@ -374,6 +410,7 @@ def canonicalize_teams(
|
||||
'MLB': MLB_DIVISIONS,
|
||||
'NHL': NHL_DIVISIONS,
|
||||
'NFL': NFL_DIVISIONS,
|
||||
'MLS': MLS_DIVISIONS,
|
||||
}.get(sport, {})
|
||||
|
||||
for abbrev, info in team_mappings.items():
|
||||
@@ -447,6 +484,7 @@ def canonicalize_all_teams(
|
||||
('MLB', MLB_TEAMS),
|
||||
('NHL', NHL_TEAMS),
|
||||
('NFL', NFL_TEAMS),
|
||||
('MLS', MLS_TEAMS),
|
||||
]
|
||||
|
||||
for sport, team_map in sport_mappings:
|
||||
|
||||
Reference in New Issue
Block a user