feat(02.1-03): integrate NWSL module with scrape_schedules.py

Update scrape_schedules.py to import NWSL stadium functionality from nwsl.py:
- Add import for NWSL_TEAMS, get_nwsl_team_abbrev, scrape_nwsl_stadiums
- Remove inline NWSL_TEAMS dict (now imported from nwsl.py)
- Remove stub scrape_nwsl_stadiums function (now using module implementation)
- Update docstrings and comments to reflect module structure

Stadium scraping now uses modules for all secondary sports:
- MLS: 30 stadiums from mls.py
- WNBA: 13 arenas from wnba.py
- NWSL: 13 stadiums from nwsl.py

Only CBB remains inline (350+ D1 teams requires separate scoped phase).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 01:06:14 -06:00
parent 75e2498382
commit 5307fdf6a4

View File

@@ -8,8 +8,11 @@ This script coordinates scraping across sport-specific modules:
- nba.py: NBA scrapers
- nhl.py: NHL scrapers
- nfl.py: NFL scrapers
- mls.py: MLS stadiums
- wnba.py: WNBA stadiums
- nwsl.py: NWSL stadiums
Non-core sports (WNBA, MLS, NWSL, CBB) remain inline pending extraction.
CBB (College Basketball) remains inline pending extraction (350+ D1 teams).
Usage:
python scrape_schedules.py --sport nba --season 2026
@@ -79,35 +82,18 @@ from wnba import (
scrape_wnba_stadiums,
WNBA_STADIUM_SOURCES,
)
# =============================================================================
# NON-CORE SPORT TEAM MAPPINGS
# TODO: Extract to separate modules (nwsl.py, cbb.py)
# NOTE: MLS_TEAMS is now imported from mls.py
# NOTE: WNBA_TEAMS is now imported from wnba.py
# =============================================================================
NWSL_TEAMS = {
'LA': {'name': 'Angel City FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'},
'SJ': {'name': 'Bay FC', 'city': 'San Jose', 'stadium': 'PayPal Park'},
'CHI': {'name': 'Chicago Red Stars', 'city': 'Bridgeview', 'stadium': 'SeatGeek Stadium'},
'HOU': {'name': 'Houston Dash', 'city': 'Houston', 'stadium': 'Shell Energy Stadium'},
'KC': {'name': 'Kansas City Current', 'city': 'Kansas City', 'stadium': 'CPKC Stadium'},
'NJ': {'name': 'NJ/NY Gotham FC', 'city': 'Harrison', 'stadium': 'Red Bull Arena'},
'NC': {'name': 'North Carolina Courage', 'city': 'Cary', 'stadium': 'WakeMed Soccer Park'},
'ORL': {'name': 'Orlando Pride', 'city': 'Orlando', 'stadium': 'Inter&Co Stadium'},
'POR': {'name': 'Portland Thorns FC', 'city': 'Portland', 'stadium': 'Providence Park'},
'SEA': {'name': 'Seattle Reign FC', 'city': 'Seattle', 'stadium': 'Lumen Field'},
'SD': {'name': 'San Diego Wave FC', 'city': 'San Diego', 'stadium': 'Snapdragon Stadium'},
'UTA': {'name': 'Utah Royals FC', 'city': 'Sandy', 'stadium': 'America First Field'},
'WAS': {'name': 'Washington Spirit', 'city': 'Washington', 'stadium': 'Audi Field'},
}
from nwsl import (
NWSL_TEAMS,
get_nwsl_team_abbrev,
scrape_nwsl_stadiums,
NWSL_STADIUM_SOURCES,
)
# =============================================================================
# NON-CORE SPORT SCRAPERS
# TODO: Extract to separate modules (wnba.py, mls.py, nwsl.py, cbb.py)
# NOTE: MLS, WNBA, NWSL stadiums are now imported from their respective modules
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
# =============================================================================
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
@@ -373,21 +359,12 @@ def scrape_cbb_cbssports(season: int) -> list[Game]:
# =============================================================================
# NON-CORE STADIUM SCRAPERS
# TODO: Extract to separate modules (nwsl.py, cbb.py)
# NOTE: scrape_mls_stadiums() is now imported from mls.py
# NOTE: scrape_wnba_stadiums() is now imported from wnba.py
# NOTE: scrape_nwsl_stadiums() is now imported from nwsl.py
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
# =============================================================================
def scrape_nwsl_stadiums() -> list[Stadium]:
"""Fetch NWSL stadium data (hardcoded)."""
print("\nNWSL STADIUMS")
print("-" * 40)
stadiums = []
# Would include NWSL stadium data here
print(f" Found {len(stadiums)} NWSL stadiums")
return stadiums
def scrape_cbb_stadiums() -> list[Stadium]:
"""Fetch College Basketball arena data."""
print("\nCBB STADIUMS")