From 81f620defe4f8b691e176df495f707086677c8fd Mon Sep 17 00:00:00 2001 From: Trey t Date: Sat, 10 Jan 2026 09:43:18 -0600 Subject: [PATCH] feat(03-02): add NWSL to canonicalization pipeline - Import NWSL_TEAMS from nwsl module - Add NWSL_DIVISIONS dict (single league structure, no divisions) - Add NWSL to sport_mappings for team canonicalization - Add NWSL team abbreviation aliases (ANG, GOTHAM, NCC, BAY, etc.) - Add NWSL stadium aliases (CPKC Stadium, SeatGeek Stadium, WakeMed, etc.) Total teams: 180 (13 NWSL teams added) Final breakdown: NBA(30) + MLB(30) + NHL(32) + NFL(32) + MLS(30) + WNBA(13) + NWSL(13) Co-Authored-By: Claude Opus 4.5 --- Scripts/canonicalize_games.py | 10 ++++++++++ Scripts/canonicalize_stadiums.py | 12 ++++++++++++ Scripts/canonicalize_teams.py | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/Scripts/canonicalize_games.py b/Scripts/canonicalize_games.py index 489a2c5..c0e4382 100644 --- a/Scripts/canonicalize_games.py +++ b/Scripts/canonicalize_games.py @@ -117,6 +117,16 @@ TEAM_ABBREV_ALIASES = { ('WNBA', 'PHX'): 'team_wnba_pho', # Phoenix Mercury ('WNBA', 'CONN'): 'team_wnba_con', # Connecticut Sun ('WNBA', 'WSH'): 'team_wnba_was', # Washington Mystics + + # NWSL + ('NWSL', 'ANG'): 'team_nwsl_la', # Angel City FC (uses LA abbrev) + ('NWSL', 'ACFC'): 'team_nwsl_la', # Angel City FC alt + ('NWSL', 'NCC'): 'team_nwsl_nc', # North Carolina Courage + ('NWSL', 'GOTHAM'): 'team_nwsl_nj', # NJ/NY Gotham FC + ('NWSL', 'NY'): 'team_nwsl_nj', # NJ/NY Gotham FC alt + ('NWSL', 'BAY'): 'team_nwsl_sj', # Bay FC (San Jose) + ('NWSL', 'RLC'): 'team_nwsl_uta', # Racing Louisville -> Utah Royals (rebrand) + ('NWSL', 'LOU'): 'team_nwsl_uta', # Louisville -> Utah alt } diff --git a/Scripts/canonicalize_stadiums.py b/Scripts/canonicalize_stadiums.py index 2ae0217..48c030b 100644 --- a/Scripts/canonicalize_stadiums.py +++ b/Scripts/canonicalize_stadiums.py @@ -247,6 +247,18 @@ HISTORICAL_STADIUM_ALIASES = { 'stadium_wnba_college_park_center': [ # College Park Center opened 2012, university venue ], + + # NWSL (most share MLS stadiums with existing aliases; these are NWSL-specific) + 'stadium_nwsl_cpkc_stadium': [ + # CPKC Stadium opened 2024, first soccer-specific stadium for NWSL team + ], + 'stadium_nwsl_seatgeek_stadium': [ + {'alias_name': 'toyota park', 'valid_from': '2006-06-01', 'valid_until': '2018-04-30'}, + {'alias_name': 'bridgeview stadium', 'valid_from': '2006-06-01', 'valid_until': '2006-06-01'}, + ], + 'stadium_nwsl_wakemed_soccer_park': [ + {'alias_name': 'sas soccer park', 'valid_from': '2002-04-01', 'valid_until': '2007-03-31'}, + ], } diff --git a/Scripts/canonicalize_teams.py b/Scripts/canonicalize_teams.py index c1071d0..6c0c61a 100644 --- a/Scripts/canonicalize_teams.py +++ b/Scripts/canonicalize_teams.py @@ -21,6 +21,7 @@ from typing import Optional from scrape_schedules import NBA_TEAMS, MLB_TEAMS, NHL_TEAMS, NFL_TEAMS from mls import MLS_TEAMS from wnba import WNBA_TEAMS +from nwsl import NWSL_TEAMS # ============================================================================= @@ -270,6 +271,23 @@ WNBA_DIVISIONS = { 'WAS': ('wnba', None), } +NWSL_DIVISIONS = { + # NWSL has no divisions (single league structure) + 'LA': ('nwsl', None), # Angel City FC + 'SJ': ('nwsl', None), # Bay FC + 'CHI': ('nwsl', None), # Chicago Red Stars + 'HOU': ('nwsl', None), # Houston Dash + 'KC': ('nwsl', None), # Kansas City Current + 'NJ': ('nwsl', None), # NJ/NY Gotham FC + 'NC': ('nwsl', None), # North Carolina Courage + 'ORL': ('nwsl', None), # Orlando Pride + 'POR': ('nwsl', None), # Portland Thorns FC + 'SEA': ('nwsl', None), # Seattle Reign FC + 'SD': ('nwsl', None), # San Diego Wave FC + 'UTA': ('nwsl', None), # Utah Royals FC + 'WAS': ('nwsl', None), # Washington Spirit +} + # ============================================================================= # FUZZY MATCHING @@ -430,6 +448,7 @@ def canonicalize_teams( 'NFL': NFL_DIVISIONS, 'MLS': MLS_DIVISIONS, 'WNBA': WNBA_DIVISIONS, + 'NWSL': NWSL_DIVISIONS, }.get(sport, {}) for abbrev, info in team_mappings.items(): @@ -505,6 +524,7 @@ def canonicalize_all_teams( ('NFL', NFL_TEAMS), ('MLS', MLS_TEAMS), ('WNBA', WNBA_TEAMS), + ('NWSL', NWSL_TEAMS), ] for sport, team_map in sport_mappings: