diff --git a/Scripts/scrape_schedules.py b/Scripts/scrape_schedules.py index 1bd7d76..a2dbb2d 100644 --- a/Scripts/scrape_schedules.py +++ b/Scripts/scrape_schedules.py @@ -67,11 +67,18 @@ from nfl import ( get_nfl_season_string, NFL_TEAMS, ) +from mls import ( + MLS_TEAMS, + get_mls_team_abbrev, + scrape_mls_stadiums, + MLS_STADIUM_SOURCES, +) # ============================================================================= # NON-CORE SPORT TEAM MAPPINGS -# TODO: Extract to separate modules (wnba.py, mls.py, nwsl.py, cbb.py) +# TODO: Extract to separate modules (wnba.py, nwsl.py, cbb.py) +# NOTE: MLS_TEAMS is now imported from mls.py # ============================================================================= WNBA_TEAMS = { @@ -90,39 +97,6 @@ WNBA_TEAMS = { 'WAS': {'name': 'Washington Mystics', 'city': 'Washington', 'arena': 'Entertainment & Sports Arena'}, } -MLS_TEAMS = { - 'ATL': {'name': 'Atlanta United FC', 'city': 'Atlanta', 'stadium': 'Mercedes-Benz Stadium'}, - 'AUS': {'name': 'Austin FC', 'city': 'Austin', 'stadium': 'Q2 Stadium'}, - 'CLT': {'name': 'Charlotte FC', 'city': 'Charlotte', 'stadium': 'Bank of America Stadium'}, - 'CHI': {'name': 'Chicago Fire FC', 'city': 'Chicago', 'stadium': 'Soldier Field'}, - 'CIN': {'name': 'FC Cincinnati', 'city': 'Cincinnati', 'stadium': 'TQL Stadium'}, - 'COL': {'name': 'Colorado Rapids', 'city': 'Commerce City', 'stadium': "Dick's Sporting Goods Park"}, - 'CLB': {'name': 'Columbus Crew', 'city': 'Columbus', 'stadium': 'Lower.com Field'}, - 'DAL': {'name': 'FC Dallas', 'city': 'Frisco', 'stadium': 'Toyota Stadium'}, - 'DC': {'name': 'D.C. United', 'city': 'Washington', 'stadium': 'Audi Field'}, - 'HOU': {'name': 'Houston Dynamo FC', 'city': 'Houston', 'stadium': 'Shell Energy Stadium'}, - 'LAG': {'name': 'LA Galaxy', 'city': 'Carson', 'stadium': 'Dignity Health Sports Park'}, - 'LAFC': {'name': 'Los Angeles FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'}, - 'MIA': {'name': 'Inter Miami CF', 'city': 'Fort Lauderdale', 'stadium': 'Chase Stadium'}, - 'MIN': {'name': 'Minnesota United FC', 'city': 'Saint Paul', 'stadium': 'Allianz Field'}, - 'MTL': {'name': 'CF Montreal', 'city': 'Montreal', 'stadium': 'Stade Saputo'}, - 'NSH': {'name': 'Nashville SC', 'city': 'Nashville', 'stadium': 'Geodis Park'}, - 'NE': {'name': 'New England Revolution', 'city': 'Foxborough', 'stadium': 'Gillette Stadium'}, - 'NYCFC': {'name': 'New York City FC', 'city': 'New York', 'stadium': 'Yankee Stadium'}, - 'NYRB': {'name': 'New York Red Bulls', 'city': 'Harrison', 'stadium': 'Red Bull Arena'}, - 'ORL': {'name': 'Orlando City SC', 'city': 'Orlando', 'stadium': 'Inter&Co Stadium'}, - 'PHI': {'name': 'Philadelphia Union', 'city': 'Chester', 'stadium': 'Subaru Park'}, - 'POR': {'name': 'Portland Timbers', 'city': 'Portland', 'stadium': 'Providence Park'}, - 'RSL': {'name': 'Real Salt Lake', 'city': 'Sandy', 'stadium': 'America First Field'}, - 'SJ': {'name': 'San Jose Earthquakes', 'city': 'San Jose', 'stadium': 'PayPal Park'}, - 'SEA': {'name': 'Seattle Sounders FC', 'city': 'Seattle', 'stadium': 'Lumen Field'}, - 'SKC': {'name': 'Sporting Kansas City', 'city': 'Kansas City', 'stadium': "Children's Mercy Park"}, - 'STL': {'name': 'St. Louis City SC', 'city': 'St. Louis', 'stadium': 'CityPark'}, - 'TOR': {'name': 'Toronto FC', 'city': 'Toronto', 'stadium': 'BMO Field'}, - 'VAN': {'name': 'Vancouver Whitecaps FC', 'city': 'Vancouver', 'stadium': 'BC Place'}, - 'SD': {'name': 'San Diego FC', 'city': 'San Diego', 'stadium': 'Snapdragon Stadium'}, -} - NWSL_TEAMS = { 'LA': {'name': 'Angel City FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'}, 'SJ': {'name': 'Bay FC', 'city': 'San Jose', 'stadium': 'PayPal Park'}, @@ -408,87 +382,10 @@ def scrape_cbb_cbssports(season: int) -> list[Game]: # ============================================================================= # NON-CORE STADIUM SCRAPERS -# TODO: Extract to separate modules +# TODO: Extract to separate modules (wnba.py, nwsl.py, cbb.py) +# NOTE: scrape_mls_stadiums() is now imported from mls.py # ============================================================================= -def scrape_mls_stadiums_geojson() -> list[Stadium]: - """Source 1: gavinr/usa-soccer GeoJSON.""" - stadiums = [] - url = "https://raw.githubusercontent.com/gavinr/usa-soccer/master/mls.geojson" - - response = requests.get(url, timeout=30) - response.raise_for_status() - data = response.json() - - for feature in data.get('features', []): - props = feature.get('properties', {}) - coords = feature.get('geometry', {}).get('coordinates', [0, 0]) - - stadium = Stadium( - id=f"mls_{props.get('stadium', '').lower().replace(' ', '_')[:30]}", - name=props.get('stadium', ''), - city=props.get('city', ''), - state=props.get('state', ''), - latitude=coords[1] if len(coords) > 1 else 0, - longitude=coords[0] if len(coords) > 0 else 0, - capacity=props.get('capacity', 0), - sport='MLS', - team_abbrevs=[props.get('team', '')], - source='github.com/gavinr' - ) - stadiums.append(stadium) - - return stadiums - - -def scrape_mls_stadiums_csv() -> list[Stadium]: - """Source 2: gavinr/usa-soccer CSV.""" - stadiums = [] - url = "https://raw.githubusercontent.com/gavinr/usa-soccer/master/mls.csv" - - response = requests.get(url, timeout=30) - response.raise_for_status() - - reader = csv.DictReader(StringIO(response.text)) - - for row in reader: - stadium = Stadium( - id=f"mls_{row.get('stadium', '').lower().replace(' ', '_')[:30]}", - name=row.get('stadium', ''), - city=row.get('city', ''), - state=row.get('state', ''), - latitude=float(row.get('lat', 0) or 0), - longitude=float(row.get('lng', 0) or 0), - capacity=int(row.get('capacity', 0) or 0), - sport='MLS', - team_abbrevs=[row.get('team', '')], - source='github.com/gavinr/csv' - ) - stadiums.append(stadium) - - return stadiums - - -def scrape_mls_stadiums_hardcoded() -> list[Stadium]: - """Source 3: Hardcoded MLS stadiums (fallback).""" - # Placeholder - would include full stadium list - return [] - - -def scrape_mls_stadiums() -> list[Stadium]: - """Fetch MLS stadium data with multi-source fallback.""" - print("\nMLS STADIUMS") - print("-" * 40) - - sources = [ - StadiumScraperSource('gavinr GeoJSON', scrape_mls_stadiums_geojson, priority=1, min_venues=20), - StadiumScraperSource('gavinr CSV', scrape_mls_stadiums_csv, priority=2, min_venues=20), - StadiumScraperSource('Hardcoded', scrape_mls_stadiums_hardcoded, priority=3, min_venues=20), - ] - - return scrape_stadiums_with_fallback('MLS', sources) - - def scrape_wnba_stadiums() -> list[Stadium]: """Fetch WNBA arena data (hardcoded).""" print("\nWNBA STADIUMS")