feat(02.1-03): integrate NWSL module with scrape_schedules.py
Update scrape_schedules.py to import NWSL stadium functionality from nwsl.py: - Add import for NWSL_TEAMS, get_nwsl_team_abbrev, scrape_nwsl_stadiums - Remove inline NWSL_TEAMS dict (now imported from nwsl.py) - Remove stub scrape_nwsl_stadiums function (now using module implementation) - Update docstrings and comments to reflect module structure Stadium scraping now uses modules for all secondary sports: - MLS: 30 stadiums from mls.py - WNBA: 13 arenas from wnba.py - NWSL: 13 stadiums from nwsl.py Only CBB remains inline (350+ D1 teams requires separate scoped phase). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -8,8 +8,11 @@ This script coordinates scraping across sport-specific modules:
|
||||
- nba.py: NBA scrapers
|
||||
- nhl.py: NHL scrapers
|
||||
- nfl.py: NFL scrapers
|
||||
- mls.py: MLS stadiums
|
||||
- wnba.py: WNBA stadiums
|
||||
- nwsl.py: NWSL stadiums
|
||||
|
||||
Non-core sports (WNBA, MLS, NWSL, CBB) remain inline pending extraction.
|
||||
CBB (College Basketball) remains inline pending extraction (350+ D1 teams).
|
||||
|
||||
Usage:
|
||||
python scrape_schedules.py --sport nba --season 2026
|
||||
@@ -79,35 +82,18 @@ from wnba import (
|
||||
scrape_wnba_stadiums,
|
||||
WNBA_STADIUM_SOURCES,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NON-CORE SPORT TEAM MAPPINGS
|
||||
# TODO: Extract to separate modules (nwsl.py, cbb.py)
|
||||
# NOTE: MLS_TEAMS is now imported from mls.py
|
||||
# NOTE: WNBA_TEAMS is now imported from wnba.py
|
||||
# =============================================================================
|
||||
|
||||
NWSL_TEAMS = {
|
||||
'LA': {'name': 'Angel City FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'},
|
||||
'SJ': {'name': 'Bay FC', 'city': 'San Jose', 'stadium': 'PayPal Park'},
|
||||
'CHI': {'name': 'Chicago Red Stars', 'city': 'Bridgeview', 'stadium': 'SeatGeek Stadium'},
|
||||
'HOU': {'name': 'Houston Dash', 'city': 'Houston', 'stadium': 'Shell Energy Stadium'},
|
||||
'KC': {'name': 'Kansas City Current', 'city': 'Kansas City', 'stadium': 'CPKC Stadium'},
|
||||
'NJ': {'name': 'NJ/NY Gotham FC', 'city': 'Harrison', 'stadium': 'Red Bull Arena'},
|
||||
'NC': {'name': 'North Carolina Courage', 'city': 'Cary', 'stadium': 'WakeMed Soccer Park'},
|
||||
'ORL': {'name': 'Orlando Pride', 'city': 'Orlando', 'stadium': 'Inter&Co Stadium'},
|
||||
'POR': {'name': 'Portland Thorns FC', 'city': 'Portland', 'stadium': 'Providence Park'},
|
||||
'SEA': {'name': 'Seattle Reign FC', 'city': 'Seattle', 'stadium': 'Lumen Field'},
|
||||
'SD': {'name': 'San Diego Wave FC', 'city': 'San Diego', 'stadium': 'Snapdragon Stadium'},
|
||||
'UTA': {'name': 'Utah Royals FC', 'city': 'Sandy', 'stadium': 'America First Field'},
|
||||
'WAS': {'name': 'Washington Spirit', 'city': 'Washington', 'stadium': 'Audi Field'},
|
||||
}
|
||||
from nwsl import (
|
||||
NWSL_TEAMS,
|
||||
get_nwsl_team_abbrev,
|
||||
scrape_nwsl_stadiums,
|
||||
NWSL_STADIUM_SOURCES,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NON-CORE SPORT SCRAPERS
|
||||
# TODO: Extract to separate modules (wnba.py, mls.py, nwsl.py, cbb.py)
|
||||
# NOTE: MLS, WNBA, NWSL stadiums are now imported from their respective modules
|
||||
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
|
||||
# =============================================================================
|
||||
|
||||
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
|
||||
@@ -373,21 +359,12 @@ def scrape_cbb_cbssports(season: int) -> list[Game]:
|
||||
|
||||
# =============================================================================
|
||||
# NON-CORE STADIUM SCRAPERS
|
||||
# TODO: Extract to separate modules (nwsl.py, cbb.py)
|
||||
# NOTE: scrape_mls_stadiums() is now imported from mls.py
|
||||
# NOTE: scrape_wnba_stadiums() is now imported from wnba.py
|
||||
# NOTE: scrape_nwsl_stadiums() is now imported from nwsl.py
|
||||
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
|
||||
# =============================================================================
|
||||
|
||||
def scrape_nwsl_stadiums() -> list[Stadium]:
|
||||
"""Fetch NWSL stadium data (hardcoded)."""
|
||||
print("\nNWSL STADIUMS")
|
||||
print("-" * 40)
|
||||
stadiums = []
|
||||
# Would include NWSL stadium data here
|
||||
print(f" Found {len(stadiums)} NWSL stadiums")
|
||||
return stadiums
|
||||
|
||||
|
||||
def scrape_cbb_stadiums() -> list[Stadium]:
|
||||
"""Fetch College Basketball arena data."""
|
||||
print("\nCBB STADIUMS")
|
||||
|
||||
Reference in New Issue
Block a user