#!/usr/bin/env python3 """ NWSL schedule and stadium scrapers for SportsTime. This module provides: - NWSL team mappings (13 teams) - NWSL stadium scrapers (hardcoded with coordinates) - Multi-source fallback configurations Note: Many NWSL teams share stadiums with MLS teams. Coordinates are cross-referenced from mls.py where applicable. """ from typing import Optional import requests # Support both direct execution and import from parent directory try: from core import ( Game, Stadium, ScraperSource, StadiumScraperSource, fetch_page, scrape_with_fallback, scrape_stadiums_with_fallback, ) except ImportError: from Scripts.core import ( Game, Stadium, ScraperSource, StadiumScraperSource, fetch_page, scrape_with_fallback, scrape_stadiums_with_fallback, ) __all__ = [ # Team data 'NWSL_TEAMS', # Stadium scrapers 'scrape_nwsl_stadiums_hardcoded', 'scrape_nwsl_stadiums', # Source configurations 'NWSL_STADIUM_SOURCES', # Convenience functions 'get_nwsl_team_abbrev', ] # ============================================================================= # TEAM MAPPINGS # ============================================================================= NWSL_TEAMS = { 'LA': {'name': 'Angel City FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'}, 'SJ': {'name': 'Bay FC', 'city': 'San Jose', 'stadium': 'PayPal Park'}, 'CHI': {'name': 'Chicago Red Stars', 'city': 'Bridgeview', 'stadium': 'SeatGeek Stadium'}, 'HOU': {'name': 'Houston Dash', 'city': 'Houston', 'stadium': 'Shell Energy Stadium'}, 'KC': {'name': 'Kansas City Current', 'city': 'Kansas City', 'stadium': 'CPKC Stadium'}, 'NJ': {'name': 'NJ/NY Gotham FC', 'city': 'Harrison', 'stadium': 'Red Bull Arena'}, 'NC': {'name': 'North Carolina Courage', 'city': 'Cary', 'stadium': 'WakeMed Soccer Park'}, 'ORL': {'name': 'Orlando Pride', 'city': 'Orlando', 'stadium': 'Inter&Co Stadium'}, 'POR': {'name': 'Portland Thorns FC', 'city': 'Portland', 'stadium': 'Providence Park'}, 'SEA': {'name': 'Seattle Reign FC', 'city': 'Seattle', 'stadium': 'Lumen Field'}, 'SD': {'name': 'San Diego Wave FC', 'city': 'San Diego', 'stadium': 'Snapdragon Stadium'}, 'UTA': {'name': 'Utah Royals FC', 'city': 'Sandy', 'stadium': 'America First Field'}, 'WAS': {'name': 'Washington Spirit', 'city': 'Washington', 'stadium': 'Audi Field'}, } def get_nwsl_team_abbrev(team_name: str) -> str: """Get NWSL team abbreviation from full name.""" for abbrev, info in NWSL_TEAMS.items(): if info['name'].lower() == team_name.lower(): return abbrev if team_name.lower() in info['name'].lower(): return abbrev # Return first 3 letters as fallback return team_name[:3].upper() # ============================================================================= # STADIUM SCRAPERS # ============================================================================= def scrape_nwsl_stadiums_hardcoded() -> list[Stadium]: """ Source 1: Hardcoded NWSL stadiums with complete data. All 13 NWSL stadiums with capacity (NWSL configuration) and year_opened. Shared stadium coordinates are cross-referenced from MLS module: - BMO Stadium (shared with LAFC) - PayPal Park (shared with SJ Earthquakes) - Shell Energy Stadium (shared with Houston Dynamo) - Red Bull Arena (shared with NY Red Bulls) - Inter&Co Stadium (shared with Orlando City SC) - Providence Park (shared with Portland Timbers) - Lumen Field (shared with Seattle Sounders/Seahawks) - Snapdragon Stadium (shared with San Diego FC) - America First Field (shared with Real Salt Lake) - Audi Field (shared with DC United) """ nwsl_stadiums = { # Shared stadiums with MLS teams (coordinates from mls.py) 'BMO Stadium': { 'city': 'Los Angeles', 'state': 'CA', 'lat': 34.0128, 'lng': -118.2841, 'capacity': 22000, 'teams': ['LA'], 'year_opened': 2018 }, 'PayPal Park': { 'city': 'San Jose', 'state': 'CA', 'lat': 37.3514, 'lng': -121.9250, 'capacity': 18000, 'teams': ['SJ'], 'year_opened': 2015 }, 'Shell Energy Stadium': { 'city': 'Houston', 'state': 'TX', 'lat': 29.7522, 'lng': -95.3524, 'capacity': 22039, 'teams': ['HOU'], 'year_opened': 2012 }, 'Red Bull Arena': { 'city': 'Harrison', 'state': 'NJ', 'lat': 40.7367, 'lng': -74.1503, 'capacity': 25000, 'teams': ['NJ'], 'year_opened': 2010 }, 'Inter&Co Stadium': { 'city': 'Orlando', 'state': 'FL', 'lat': 28.5411, 'lng': -81.3893, 'capacity': 25500, 'teams': ['ORL'], 'year_opened': 2017 }, 'Providence Park': { 'city': 'Portland', 'state': 'OR', 'lat': 45.5214, 'lng': -122.6917, 'capacity': 25218, 'teams': ['POR'], 'year_opened': 1926 }, 'Lumen Field': { 'city': 'Seattle', 'state': 'WA', 'lat': 47.5952, 'lng': -122.3316, 'capacity': 37722, 'teams': ['SEA'], 'year_opened': 2002 }, 'Snapdragon Stadium': { 'city': 'San Diego', 'state': 'CA', 'lat': 32.7844, 'lng': -117.1228, 'capacity': 35000, 'teams': ['SD'], 'year_opened': 2022 }, 'America First Field': { 'city': 'Sandy', 'state': 'UT', 'lat': 40.5829, 'lng': -111.8934, 'capacity': 20213, 'teams': ['UTA'], 'year_opened': 2008 }, 'Audi Field': { 'city': 'Washington', 'state': 'DC', 'lat': 38.8684, 'lng': -77.0129, 'capacity': 20000, 'teams': ['WAS'], 'year_opened': 2018 }, # NWSL-specific stadiums 'SeatGeek Stadium': { 'city': 'Bridgeview', 'state': 'IL', 'lat': 41.7653, 'lng': -87.8049, 'capacity': 20000, 'teams': ['CHI'], 'year_opened': 2006 }, 'CPKC Stadium': { 'city': 'Kansas City', 'state': 'MO', 'lat': 39.0975, 'lng': -94.5556, 'capacity': 11500, 'teams': ['KC'], 'year_opened': 2024 }, 'WakeMed Soccer Park': { 'city': 'Cary', 'state': 'NC', 'lat': 35.8018, 'lng': -78.7442, 'capacity': 10000, 'teams': ['NC'], 'year_opened': 2002 }, } stadiums = [] for name, info in nwsl_stadiums.items(): # Create normalized ID (f-strings can't have backslashes) normalized_name = name.lower().replace(' ', '_').replace('&', 'and').replace('.', '').replace("'", '') stadium_id = f"nwsl_{normalized_name[:30]}" stadium = Stadium( id=stadium_id, name=name, city=info['city'], state=info['state'], latitude=info['lat'], longitude=info['lng'], capacity=info['capacity'], sport='NWSL', team_abbrevs=info['teams'], source='nwsl_hardcoded', year_opened=info.get('year_opened') ) stadiums.append(stadium) return stadiums def scrape_nwsl_stadiums() -> list[Stadium]: """ Fetch NWSL stadium data with multi-source fallback. Hardcoded source is primary (has complete data). """ print("\nNWSL STADIUMS") print("-" * 40) sources = [ StadiumScraperSource('Hardcoded', scrape_nwsl_stadiums_hardcoded, priority=1, min_venues=10), ] return scrape_stadiums_with_fallback('NWSL', sources) # ============================================================================= # SOURCE CONFIGURATIONS # ============================================================================= NWSL_STADIUM_SOURCES = [ StadiumScraperSource('Hardcoded', scrape_nwsl_stadiums_hardcoded, priority=1, min_venues=10), ]