diff --git a/Scripts/mls.py b/Scripts/mls.py new file mode 100644 index 0000000..d9bdb55 --- /dev/null +++ b/Scripts/mls.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +""" +MLS schedule and stadium scrapers for SportsTime. + +This module provides: +- MLS game scrapers (ESPN, FBref, MLSSoccer.com) +- MLS stadium scrapers (gavinr GeoJSON, hardcoded) +- Multi-source fallback configurations +""" + +from typing import Optional + +import requests + +# Support both direct execution and import from parent directory +try: + from core import ( + Game, + Stadium, + ScraperSource, + StadiumScraperSource, + fetch_page, + scrape_with_fallback, + scrape_stadiums_with_fallback, + ) +except ImportError: + from Scripts.core import ( + Game, + Stadium, + ScraperSource, + StadiumScraperSource, + fetch_page, + scrape_with_fallback, + scrape_stadiums_with_fallback, + ) + + +__all__ = [ + # Team data + 'MLS_TEAMS', + # Stadium scrapers + 'scrape_mls_stadiums_hardcoded', + 'scrape_mls_stadiums_gavinr', + 'scrape_mls_stadiums', + # Source configurations + 'MLS_STADIUM_SOURCES', + # Convenience functions + 'get_mls_team_abbrev', +] + + +# ============================================================================= +# TEAM MAPPINGS +# ============================================================================= + +MLS_TEAMS = { + 'ATL': {'name': 'Atlanta United FC', 'city': 'Atlanta', 'stadium': 'Mercedes-Benz Stadium'}, + 'AUS': {'name': 'Austin FC', 'city': 'Austin', 'stadium': 'Q2 Stadium'}, + 'CLT': {'name': 'Charlotte FC', 'city': 'Charlotte', 'stadium': 'Bank of America Stadium'}, + 'CHI': {'name': 'Chicago Fire FC', 'city': 'Chicago', 'stadium': 'Soldier Field'}, + 'CIN': {'name': 'FC Cincinnati', 'city': 'Cincinnati', 'stadium': 'TQL Stadium'}, + 'COL': {'name': 'Colorado Rapids', 'city': 'Commerce City', 'stadium': "Dick's Sporting Goods Park"}, + 'CLB': {'name': 'Columbus Crew', 'city': 'Columbus', 'stadium': 'Lower.com Field'}, + 'DAL': {'name': 'FC Dallas', 'city': 'Frisco', 'stadium': 'Toyota Stadium'}, + 'DC': {'name': 'D.C. United', 'city': 'Washington', 'stadium': 'Audi Field'}, + 'HOU': {'name': 'Houston Dynamo FC', 'city': 'Houston', 'stadium': 'Shell Energy Stadium'}, + 'LAG': {'name': 'LA Galaxy', 'city': 'Carson', 'stadium': 'Dignity Health Sports Park'}, + 'LAFC': {'name': 'Los Angeles FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'}, + 'MIA': {'name': 'Inter Miami CF', 'city': 'Fort Lauderdale', 'stadium': 'Chase Stadium'}, + 'MIN': {'name': 'Minnesota United FC', 'city': 'Saint Paul', 'stadium': 'Allianz Field'}, + 'MTL': {'name': 'CF Montreal', 'city': 'Montreal', 'stadium': 'Stade Saputo'}, + 'NSH': {'name': 'Nashville SC', 'city': 'Nashville', 'stadium': 'Geodis Park'}, + 'NE': {'name': 'New England Revolution', 'city': 'Foxborough', 'stadium': 'Gillette Stadium'}, + 'NYCFC': {'name': 'New York City FC', 'city': 'New York', 'stadium': 'Yankee Stadium'}, + 'NYRB': {'name': 'New York Red Bulls', 'city': 'Harrison', 'stadium': 'Red Bull Arena'}, + 'ORL': {'name': 'Orlando City SC', 'city': 'Orlando', 'stadium': 'Inter&Co Stadium'}, + 'PHI': {'name': 'Philadelphia Union', 'city': 'Chester', 'stadium': 'Subaru Park'}, + 'POR': {'name': 'Portland Timbers', 'city': 'Portland', 'stadium': 'Providence Park'}, + 'RSL': {'name': 'Real Salt Lake', 'city': 'Sandy', 'stadium': 'America First Field'}, + 'SJ': {'name': 'San Jose Earthquakes', 'city': 'San Jose', 'stadium': 'PayPal Park'}, + 'SEA': {'name': 'Seattle Sounders FC', 'city': 'Seattle', 'stadium': 'Lumen Field'}, + 'SKC': {'name': 'Sporting Kansas City', 'city': 'Kansas City', 'stadium': "Children's Mercy Park"}, + 'STL': {'name': 'St. Louis City SC', 'city': 'St. Louis', 'stadium': 'CityPark'}, + 'TOR': {'name': 'Toronto FC', 'city': 'Toronto', 'stadium': 'BMO Field'}, + 'VAN': {'name': 'Vancouver Whitecaps FC', 'city': 'Vancouver', 'stadium': 'BC Place'}, + 'SD': {'name': 'San Diego FC', 'city': 'San Diego', 'stadium': 'Snapdragon Stadium'}, +} + + +def get_mls_team_abbrev(team_name: str) -> str: + """Get MLS team abbreviation from full name.""" + for abbrev, info in MLS_TEAMS.items(): + if info['name'].lower() == team_name.lower(): + return abbrev + if team_name.lower() in info['name'].lower(): + return abbrev + + # Return first 3 letters as fallback + return team_name[:3].upper() + + +# ============================================================================= +# STADIUM SCRAPERS +# ============================================================================= + +def scrape_mls_stadiums_hardcoded() -> list[Stadium]: + """ + Source 1: Hardcoded MLS stadiums with complete data. + All 30 MLS stadiums with capacity (soccer configuration) and year_opened. + """ + mls_stadiums = { + 'Mercedes-Benz Stadium': { + 'city': 'Atlanta', 'state': 'GA', + 'lat': 33.7555, 'lng': -84.4000, + 'capacity': 42500, 'teams': ['ATL'], 'year_opened': 2017 + }, + 'Q2 Stadium': { + 'city': 'Austin', 'state': 'TX', + 'lat': 30.3877, 'lng': -97.7195, + 'capacity': 20738, 'teams': ['AUS'], 'year_opened': 2021 + }, + 'Bank of America Stadium': { + 'city': 'Charlotte', 'state': 'NC', + 'lat': 35.2258, 'lng': -80.8528, + 'capacity': 38000, 'teams': ['CLT'], 'year_opened': 1996 + }, + 'Soldier Field': { + 'city': 'Chicago', 'state': 'IL', + 'lat': 41.8623, 'lng': -87.6167, + 'capacity': 24995, 'teams': ['CHI'], 'year_opened': 1924 + }, + 'TQL Stadium': { + 'city': 'Cincinnati', 'state': 'OH', + 'lat': 39.1114, 'lng': -84.5222, + 'capacity': 26000, 'teams': ['CIN'], 'year_opened': 2021 + }, + "Dick's Sporting Goods Park": { + 'city': 'Commerce City', 'state': 'CO', + 'lat': 39.8056, 'lng': -104.8919, + 'capacity': 18061, 'teams': ['COL'], 'year_opened': 2007 + }, + 'Lower.com Field': { + 'city': 'Columbus', 'state': 'OH', + 'lat': 39.9685, 'lng': -83.0171, + 'capacity': 20371, 'teams': ['CLB'], 'year_opened': 2021 + }, + 'Toyota Stadium': { + 'city': 'Frisco', 'state': 'TX', + 'lat': 33.1544, 'lng': -96.8353, + 'capacity': 20500, 'teams': ['DAL'], 'year_opened': 2005 + }, + 'Audi Field': { + 'city': 'Washington', 'state': 'DC', + 'lat': 38.8684, 'lng': -77.0129, + 'capacity': 20000, 'teams': ['DC'], 'year_opened': 2018 + }, + 'Shell Energy Stadium': { + 'city': 'Houston', 'state': 'TX', + 'lat': 29.7522, 'lng': -95.3524, + 'capacity': 22039, 'teams': ['HOU'], 'year_opened': 2012 + }, + 'Dignity Health Sports Park': { + 'city': 'Carson', 'state': 'CA', + 'lat': 33.8640, 'lng': -118.2610, + 'capacity': 27000, 'teams': ['LAG'], 'year_opened': 2003 + }, + 'BMO Stadium': { + 'city': 'Los Angeles', 'state': 'CA', + 'lat': 34.0128, 'lng': -118.2841, + 'capacity': 22000, 'teams': ['LAFC'], 'year_opened': 2018 + }, + 'Chase Stadium': { + 'city': 'Fort Lauderdale', 'state': 'FL', + 'lat': 26.1933, 'lng': -80.1607, + 'capacity': 21550, 'teams': ['MIA'], 'year_opened': 2020 + }, + 'Allianz Field': { + 'city': 'Saint Paul', 'state': 'MN', + 'lat': 44.9531, 'lng': -93.1647, + 'capacity': 19400, 'teams': ['MIN'], 'year_opened': 2019 + }, + 'Stade Saputo': { + 'city': 'Montreal', 'state': 'QC', + 'lat': 45.5631, 'lng': -73.5525, + 'capacity': 19619, 'teams': ['MTL'], 'year_opened': 2008 + }, + 'Geodis Park': { + 'city': 'Nashville', 'state': 'TN', + 'lat': 36.1301, 'lng': -86.7660, + 'capacity': 30000, 'teams': ['NSH'], 'year_opened': 2022 + }, + 'Gillette Stadium': { + 'city': 'Foxborough', 'state': 'MA', + 'lat': 42.0909, 'lng': -71.2643, + 'capacity': 22385, 'teams': ['NE'], 'year_opened': 2002 + }, + 'Yankee Stadium': { + 'city': 'Bronx', 'state': 'NY', + 'lat': 40.8292, 'lng': -73.9264, + 'capacity': 28000, 'teams': ['NYCFC'], 'year_opened': 2009 + }, + 'Red Bull Arena': { + 'city': 'Harrison', 'state': 'NJ', + 'lat': 40.7367, 'lng': -74.1503, + 'capacity': 25000, 'teams': ['NYRB'], 'year_opened': 2010 + }, + 'Inter&Co Stadium': { + 'city': 'Orlando', 'state': 'FL', + 'lat': 28.5411, 'lng': -81.3893, + 'capacity': 25500, 'teams': ['ORL'], 'year_opened': 2017 + }, + 'Subaru Park': { + 'city': 'Chester', 'state': 'PA', + 'lat': 39.8322, 'lng': -75.3789, + 'capacity': 18500, 'teams': ['PHI'], 'year_opened': 2010 + }, + 'Providence Park': { + 'city': 'Portland', 'state': 'OR', + 'lat': 45.5214, 'lng': -122.6917, + 'capacity': 25218, 'teams': ['POR'], 'year_opened': 1926 + }, + 'America First Field': { + 'city': 'Sandy', 'state': 'UT', + 'lat': 40.5829, 'lng': -111.8934, + 'capacity': 20213, 'teams': ['RSL'], 'year_opened': 2008 + }, + 'PayPal Park': { + 'city': 'San Jose', 'state': 'CA', + 'lat': 37.3514, 'lng': -121.9250, + 'capacity': 18000, 'teams': ['SJ'], 'year_opened': 2015 + }, + 'Lumen Field': { + 'city': 'Seattle', 'state': 'WA', + 'lat': 47.5952, 'lng': -122.3316, + 'capacity': 37722, 'teams': ['SEA'], 'year_opened': 2002 + }, + "Children's Mercy Park": { + 'city': 'Kansas City', 'state': 'KS', + 'lat': 39.1217, 'lng': -94.8232, + 'capacity': 18467, 'teams': ['SKC'], 'year_opened': 2011 + }, + 'CityPark': { + 'city': 'St. Louis', 'state': 'MO', + 'lat': 38.6314, 'lng': -90.2103, + 'capacity': 22500, 'teams': ['STL'], 'year_opened': 2023 + }, + 'BMO Field': { + 'city': 'Toronto', 'state': 'ON', + 'lat': 43.6332, 'lng': -79.4186, + 'capacity': 30000, 'teams': ['TOR'], 'year_opened': 2007 + }, + 'BC Place': { + 'city': 'Vancouver', 'state': 'BC', + 'lat': 49.2767, 'lng': -123.1119, + 'capacity': 22120, 'teams': ['VAN'], 'year_opened': 1983 + }, + 'Snapdragon Stadium': { + 'city': 'San Diego', 'state': 'CA', + 'lat': 32.7844, 'lng': -117.1228, + 'capacity': 35000, 'teams': ['SD'], 'year_opened': 2022 + }, + } + + stadiums = [] + for name, info in mls_stadiums.items(): + # Create normalized ID (f-strings can't have backslashes) + normalized_name = name.lower().replace(' ', '_').replace('&', 'and').replace('.', '').replace("'", '') + stadium_id = f"mls_{normalized_name[:30]}" + stadium = Stadium( + id=stadium_id, + name=name, + city=info['city'], + state=info['state'], + latitude=info['lat'], + longitude=info['lng'], + capacity=info['capacity'], + sport='MLS', + team_abbrevs=info['teams'], + source='mls_hardcoded', + year_opened=info.get('year_opened') + ) + stadiums.append(stadium) + + return stadiums + + +def scrape_mls_stadiums_gavinr() -> list[Stadium]: + """ + Source 2: gavinr/usa-soccer GeoJSON (fallback for coordinates). + Note: This source lacks capacity and year_opened data. + """ + stadiums = [] + url = "https://raw.githubusercontent.com/gavinr/usa-soccer/master/mls.geojson" + + response = requests.get(url, timeout=30) + response.raise_for_status() + data = response.json() + + for feature in data.get('features', []): + props = feature.get('properties', {}) + coords = feature.get('geometry', {}).get('coordinates', [0, 0]) + + stadium = Stadium( + id=f"mls_{props.get('stadium', '').lower().replace(' ', '_')[:30]}", + name=props.get('stadium', ''), + city=props.get('city', ''), + state=props.get('state', ''), + latitude=coords[1] if len(coords) > 1 else 0, + longitude=coords[0] if len(coords) > 0 else 0, + capacity=props.get('capacity', 0), + sport='MLS', + team_abbrevs=[get_mls_team_abbrev(props.get('team', ''))], + source='github.com/gavinr' + ) + stadiums.append(stadium) + + return stadiums + + +def scrape_mls_stadiums() -> list[Stadium]: + """ + Fetch MLS stadium data with multi-source fallback. + Hardcoded source is primary (has complete data). + """ + print("\nMLS STADIUMS") + print("-" * 40) + + sources = [ + StadiumScraperSource('Hardcoded', scrape_mls_stadiums_hardcoded, priority=1, min_venues=25), + StadiumScraperSource('gavinr GeoJSON', scrape_mls_stadiums_gavinr, priority=2, min_venues=20), + ] + + return scrape_stadiums_with_fallback('MLS', sources) + + +# ============================================================================= +# SOURCE CONFIGURATIONS +# ============================================================================= + +MLS_STADIUM_SOURCES = [ + StadiumScraperSource('Hardcoded', scrape_mls_stadiums_hardcoded, priority=1, min_venues=25), + StadiumScraperSource('gavinr GeoJSON', scrape_mls_stadiums_gavinr, priority=2, min_venues=20), +]