feat(02.1-01): create MLS sport module with 30 hardcoded stadiums
Add complete MLS stadium data following established sport module pattern: - 30 MLS stadiums with capacity (soccer configuration) and year_opened - MLS_TEAMS dict with all 30 teams - get_mls_team_abbrev() function for team abbreviation lookup - scrape_mls_stadiums_hardcoded() as primary source - scrape_mls_stadiums_gavinr() as fallback source - MLS_STADIUM_SOURCES configuration for fallback system Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
343
Scripts/mls.py
Normal file
343
Scripts/mls.py
Normal file
@@ -0,0 +1,343 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MLS schedule and stadium scrapers for SportsTime.
|
||||
|
||||
This module provides:
|
||||
- MLS game scrapers (ESPN, FBref, MLSSoccer.com)
|
||||
- MLS stadium scrapers (gavinr GeoJSON, hardcoded)
|
||||
- Multi-source fallback configurations
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
# Support both direct execution and import from parent directory
|
||||
try:
|
||||
from core import (
|
||||
Game,
|
||||
Stadium,
|
||||
ScraperSource,
|
||||
StadiumScraperSource,
|
||||
fetch_page,
|
||||
scrape_with_fallback,
|
||||
scrape_stadiums_with_fallback,
|
||||
)
|
||||
except ImportError:
|
||||
from Scripts.core import (
|
||||
Game,
|
||||
Stadium,
|
||||
ScraperSource,
|
||||
StadiumScraperSource,
|
||||
fetch_page,
|
||||
scrape_with_fallback,
|
||||
scrape_stadiums_with_fallback,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Team data
|
||||
'MLS_TEAMS',
|
||||
# Stadium scrapers
|
||||
'scrape_mls_stadiums_hardcoded',
|
||||
'scrape_mls_stadiums_gavinr',
|
||||
'scrape_mls_stadiums',
|
||||
# Source configurations
|
||||
'MLS_STADIUM_SOURCES',
|
||||
# Convenience functions
|
||||
'get_mls_team_abbrev',
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TEAM MAPPINGS
|
||||
# =============================================================================
|
||||
|
||||
MLS_TEAMS = {
|
||||
'ATL': {'name': 'Atlanta United FC', 'city': 'Atlanta', 'stadium': 'Mercedes-Benz Stadium'},
|
||||
'AUS': {'name': 'Austin FC', 'city': 'Austin', 'stadium': 'Q2 Stadium'},
|
||||
'CLT': {'name': 'Charlotte FC', 'city': 'Charlotte', 'stadium': 'Bank of America Stadium'},
|
||||
'CHI': {'name': 'Chicago Fire FC', 'city': 'Chicago', 'stadium': 'Soldier Field'},
|
||||
'CIN': {'name': 'FC Cincinnati', 'city': 'Cincinnati', 'stadium': 'TQL Stadium'},
|
||||
'COL': {'name': 'Colorado Rapids', 'city': 'Commerce City', 'stadium': "Dick's Sporting Goods Park"},
|
||||
'CLB': {'name': 'Columbus Crew', 'city': 'Columbus', 'stadium': 'Lower.com Field'},
|
||||
'DAL': {'name': 'FC Dallas', 'city': 'Frisco', 'stadium': 'Toyota Stadium'},
|
||||
'DC': {'name': 'D.C. United', 'city': 'Washington', 'stadium': 'Audi Field'},
|
||||
'HOU': {'name': 'Houston Dynamo FC', 'city': 'Houston', 'stadium': 'Shell Energy Stadium'},
|
||||
'LAG': {'name': 'LA Galaxy', 'city': 'Carson', 'stadium': 'Dignity Health Sports Park'},
|
||||
'LAFC': {'name': 'Los Angeles FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'},
|
||||
'MIA': {'name': 'Inter Miami CF', 'city': 'Fort Lauderdale', 'stadium': 'Chase Stadium'},
|
||||
'MIN': {'name': 'Minnesota United FC', 'city': 'Saint Paul', 'stadium': 'Allianz Field'},
|
||||
'MTL': {'name': 'CF Montreal', 'city': 'Montreal', 'stadium': 'Stade Saputo'},
|
||||
'NSH': {'name': 'Nashville SC', 'city': 'Nashville', 'stadium': 'Geodis Park'},
|
||||
'NE': {'name': 'New England Revolution', 'city': 'Foxborough', 'stadium': 'Gillette Stadium'},
|
||||
'NYCFC': {'name': 'New York City FC', 'city': 'New York', 'stadium': 'Yankee Stadium'},
|
||||
'NYRB': {'name': 'New York Red Bulls', 'city': 'Harrison', 'stadium': 'Red Bull Arena'},
|
||||
'ORL': {'name': 'Orlando City SC', 'city': 'Orlando', 'stadium': 'Inter&Co Stadium'},
|
||||
'PHI': {'name': 'Philadelphia Union', 'city': 'Chester', 'stadium': 'Subaru Park'},
|
||||
'POR': {'name': 'Portland Timbers', 'city': 'Portland', 'stadium': 'Providence Park'},
|
||||
'RSL': {'name': 'Real Salt Lake', 'city': 'Sandy', 'stadium': 'America First Field'},
|
||||
'SJ': {'name': 'San Jose Earthquakes', 'city': 'San Jose', 'stadium': 'PayPal Park'},
|
||||
'SEA': {'name': 'Seattle Sounders FC', 'city': 'Seattle', 'stadium': 'Lumen Field'},
|
||||
'SKC': {'name': 'Sporting Kansas City', 'city': 'Kansas City', 'stadium': "Children's Mercy Park"},
|
||||
'STL': {'name': 'St. Louis City SC', 'city': 'St. Louis', 'stadium': 'CityPark'},
|
||||
'TOR': {'name': 'Toronto FC', 'city': 'Toronto', 'stadium': 'BMO Field'},
|
||||
'VAN': {'name': 'Vancouver Whitecaps FC', 'city': 'Vancouver', 'stadium': 'BC Place'},
|
||||
'SD': {'name': 'San Diego FC', 'city': 'San Diego', 'stadium': 'Snapdragon Stadium'},
|
||||
}
|
||||
|
||||
|
||||
def get_mls_team_abbrev(team_name: str) -> str:
|
||||
"""Get MLS team abbreviation from full name."""
|
||||
for abbrev, info in MLS_TEAMS.items():
|
||||
if info['name'].lower() == team_name.lower():
|
||||
return abbrev
|
||||
if team_name.lower() in info['name'].lower():
|
||||
return abbrev
|
||||
|
||||
# Return first 3 letters as fallback
|
||||
return team_name[:3].upper()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# STADIUM SCRAPERS
|
||||
# =============================================================================
|
||||
|
||||
def scrape_mls_stadiums_hardcoded() -> list[Stadium]:
|
||||
"""
|
||||
Source 1: Hardcoded MLS stadiums with complete data.
|
||||
All 30 MLS stadiums with capacity (soccer configuration) and year_opened.
|
||||
"""
|
||||
mls_stadiums = {
|
||||
'Mercedes-Benz Stadium': {
|
||||
'city': 'Atlanta', 'state': 'GA',
|
||||
'lat': 33.7555, 'lng': -84.4000,
|
||||
'capacity': 42500, 'teams': ['ATL'], 'year_opened': 2017
|
||||
},
|
||||
'Q2 Stadium': {
|
||||
'city': 'Austin', 'state': 'TX',
|
||||
'lat': 30.3877, 'lng': -97.7195,
|
||||
'capacity': 20738, 'teams': ['AUS'], 'year_opened': 2021
|
||||
},
|
||||
'Bank of America Stadium': {
|
||||
'city': 'Charlotte', 'state': 'NC',
|
||||
'lat': 35.2258, 'lng': -80.8528,
|
||||
'capacity': 38000, 'teams': ['CLT'], 'year_opened': 1996
|
||||
},
|
||||
'Soldier Field': {
|
||||
'city': 'Chicago', 'state': 'IL',
|
||||
'lat': 41.8623, 'lng': -87.6167,
|
||||
'capacity': 24995, 'teams': ['CHI'], 'year_opened': 1924
|
||||
},
|
||||
'TQL Stadium': {
|
||||
'city': 'Cincinnati', 'state': 'OH',
|
||||
'lat': 39.1114, 'lng': -84.5222,
|
||||
'capacity': 26000, 'teams': ['CIN'], 'year_opened': 2021
|
||||
},
|
||||
"Dick's Sporting Goods Park": {
|
||||
'city': 'Commerce City', 'state': 'CO',
|
||||
'lat': 39.8056, 'lng': -104.8919,
|
||||
'capacity': 18061, 'teams': ['COL'], 'year_opened': 2007
|
||||
},
|
||||
'Lower.com Field': {
|
||||
'city': 'Columbus', 'state': 'OH',
|
||||
'lat': 39.9685, 'lng': -83.0171,
|
||||
'capacity': 20371, 'teams': ['CLB'], 'year_opened': 2021
|
||||
},
|
||||
'Toyota Stadium': {
|
||||
'city': 'Frisco', 'state': 'TX',
|
||||
'lat': 33.1544, 'lng': -96.8353,
|
||||
'capacity': 20500, 'teams': ['DAL'], 'year_opened': 2005
|
||||
},
|
||||
'Audi Field': {
|
||||
'city': 'Washington', 'state': 'DC',
|
||||
'lat': 38.8684, 'lng': -77.0129,
|
||||
'capacity': 20000, 'teams': ['DC'], 'year_opened': 2018
|
||||
},
|
||||
'Shell Energy Stadium': {
|
||||
'city': 'Houston', 'state': 'TX',
|
||||
'lat': 29.7522, 'lng': -95.3524,
|
||||
'capacity': 22039, 'teams': ['HOU'], 'year_opened': 2012
|
||||
},
|
||||
'Dignity Health Sports Park': {
|
||||
'city': 'Carson', 'state': 'CA',
|
||||
'lat': 33.8640, 'lng': -118.2610,
|
||||
'capacity': 27000, 'teams': ['LAG'], 'year_opened': 2003
|
||||
},
|
||||
'BMO Stadium': {
|
||||
'city': 'Los Angeles', 'state': 'CA',
|
||||
'lat': 34.0128, 'lng': -118.2841,
|
||||
'capacity': 22000, 'teams': ['LAFC'], 'year_opened': 2018
|
||||
},
|
||||
'Chase Stadium': {
|
||||
'city': 'Fort Lauderdale', 'state': 'FL',
|
||||
'lat': 26.1933, 'lng': -80.1607,
|
||||
'capacity': 21550, 'teams': ['MIA'], 'year_opened': 2020
|
||||
},
|
||||
'Allianz Field': {
|
||||
'city': 'Saint Paul', 'state': 'MN',
|
||||
'lat': 44.9531, 'lng': -93.1647,
|
||||
'capacity': 19400, 'teams': ['MIN'], 'year_opened': 2019
|
||||
},
|
||||
'Stade Saputo': {
|
||||
'city': 'Montreal', 'state': 'QC',
|
||||
'lat': 45.5631, 'lng': -73.5525,
|
||||
'capacity': 19619, 'teams': ['MTL'], 'year_opened': 2008
|
||||
},
|
||||
'Geodis Park': {
|
||||
'city': 'Nashville', 'state': 'TN',
|
||||
'lat': 36.1301, 'lng': -86.7660,
|
||||
'capacity': 30000, 'teams': ['NSH'], 'year_opened': 2022
|
||||
},
|
||||
'Gillette Stadium': {
|
||||
'city': 'Foxborough', 'state': 'MA',
|
||||
'lat': 42.0909, 'lng': -71.2643,
|
||||
'capacity': 22385, 'teams': ['NE'], 'year_opened': 2002
|
||||
},
|
||||
'Yankee Stadium': {
|
||||
'city': 'Bronx', 'state': 'NY',
|
||||
'lat': 40.8292, 'lng': -73.9264,
|
||||
'capacity': 28000, 'teams': ['NYCFC'], 'year_opened': 2009
|
||||
},
|
||||
'Red Bull Arena': {
|
||||
'city': 'Harrison', 'state': 'NJ',
|
||||
'lat': 40.7367, 'lng': -74.1503,
|
||||
'capacity': 25000, 'teams': ['NYRB'], 'year_opened': 2010
|
||||
},
|
||||
'Inter&Co Stadium': {
|
||||
'city': 'Orlando', 'state': 'FL',
|
||||
'lat': 28.5411, 'lng': -81.3893,
|
||||
'capacity': 25500, 'teams': ['ORL'], 'year_opened': 2017
|
||||
},
|
||||
'Subaru Park': {
|
||||
'city': 'Chester', 'state': 'PA',
|
||||
'lat': 39.8322, 'lng': -75.3789,
|
||||
'capacity': 18500, 'teams': ['PHI'], 'year_opened': 2010
|
||||
},
|
||||
'Providence Park': {
|
||||
'city': 'Portland', 'state': 'OR',
|
||||
'lat': 45.5214, 'lng': -122.6917,
|
||||
'capacity': 25218, 'teams': ['POR'], 'year_opened': 1926
|
||||
},
|
||||
'America First Field': {
|
||||
'city': 'Sandy', 'state': 'UT',
|
||||
'lat': 40.5829, 'lng': -111.8934,
|
||||
'capacity': 20213, 'teams': ['RSL'], 'year_opened': 2008
|
||||
},
|
||||
'PayPal Park': {
|
||||
'city': 'San Jose', 'state': 'CA',
|
||||
'lat': 37.3514, 'lng': -121.9250,
|
||||
'capacity': 18000, 'teams': ['SJ'], 'year_opened': 2015
|
||||
},
|
||||
'Lumen Field': {
|
||||
'city': 'Seattle', 'state': 'WA',
|
||||
'lat': 47.5952, 'lng': -122.3316,
|
||||
'capacity': 37722, 'teams': ['SEA'], 'year_opened': 2002
|
||||
},
|
||||
"Children's Mercy Park": {
|
||||
'city': 'Kansas City', 'state': 'KS',
|
||||
'lat': 39.1217, 'lng': -94.8232,
|
||||
'capacity': 18467, 'teams': ['SKC'], 'year_opened': 2011
|
||||
},
|
||||
'CityPark': {
|
||||
'city': 'St. Louis', 'state': 'MO',
|
||||
'lat': 38.6314, 'lng': -90.2103,
|
||||
'capacity': 22500, 'teams': ['STL'], 'year_opened': 2023
|
||||
},
|
||||
'BMO Field': {
|
||||
'city': 'Toronto', 'state': 'ON',
|
||||
'lat': 43.6332, 'lng': -79.4186,
|
||||
'capacity': 30000, 'teams': ['TOR'], 'year_opened': 2007
|
||||
},
|
||||
'BC Place': {
|
||||
'city': 'Vancouver', 'state': 'BC',
|
||||
'lat': 49.2767, 'lng': -123.1119,
|
||||
'capacity': 22120, 'teams': ['VAN'], 'year_opened': 1983
|
||||
},
|
||||
'Snapdragon Stadium': {
|
||||
'city': 'San Diego', 'state': 'CA',
|
||||
'lat': 32.7844, 'lng': -117.1228,
|
||||
'capacity': 35000, 'teams': ['SD'], 'year_opened': 2022
|
||||
},
|
||||
}
|
||||
|
||||
stadiums = []
|
||||
for name, info in mls_stadiums.items():
|
||||
# Create normalized ID (f-strings can't have backslashes)
|
||||
normalized_name = name.lower().replace(' ', '_').replace('&', 'and').replace('.', '').replace("'", '')
|
||||
stadium_id = f"mls_{normalized_name[:30]}"
|
||||
stadium = Stadium(
|
||||
id=stadium_id,
|
||||
name=name,
|
||||
city=info['city'],
|
||||
state=info['state'],
|
||||
latitude=info['lat'],
|
||||
longitude=info['lng'],
|
||||
capacity=info['capacity'],
|
||||
sport='MLS',
|
||||
team_abbrevs=info['teams'],
|
||||
source='mls_hardcoded',
|
||||
year_opened=info.get('year_opened')
|
||||
)
|
||||
stadiums.append(stadium)
|
||||
|
||||
return stadiums
|
||||
|
||||
|
||||
def scrape_mls_stadiums_gavinr() -> list[Stadium]:
|
||||
"""
|
||||
Source 2: gavinr/usa-soccer GeoJSON (fallback for coordinates).
|
||||
Note: This source lacks capacity and year_opened data.
|
||||
"""
|
||||
stadiums = []
|
||||
url = "https://raw.githubusercontent.com/gavinr/usa-soccer/master/mls.geojson"
|
||||
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
for feature in data.get('features', []):
|
||||
props = feature.get('properties', {})
|
||||
coords = feature.get('geometry', {}).get('coordinates', [0, 0])
|
||||
|
||||
stadium = Stadium(
|
||||
id=f"mls_{props.get('stadium', '').lower().replace(' ', '_')[:30]}",
|
||||
name=props.get('stadium', ''),
|
||||
city=props.get('city', ''),
|
||||
state=props.get('state', ''),
|
||||
latitude=coords[1] if len(coords) > 1 else 0,
|
||||
longitude=coords[0] if len(coords) > 0 else 0,
|
||||
capacity=props.get('capacity', 0),
|
||||
sport='MLS',
|
||||
team_abbrevs=[get_mls_team_abbrev(props.get('team', ''))],
|
||||
source='github.com/gavinr'
|
||||
)
|
||||
stadiums.append(stadium)
|
||||
|
||||
return stadiums
|
||||
|
||||
|
||||
def scrape_mls_stadiums() -> list[Stadium]:
|
||||
"""
|
||||
Fetch MLS stadium data with multi-source fallback.
|
||||
Hardcoded source is primary (has complete data).
|
||||
"""
|
||||
print("\nMLS STADIUMS")
|
||||
print("-" * 40)
|
||||
|
||||
sources = [
|
||||
StadiumScraperSource('Hardcoded', scrape_mls_stadiums_hardcoded, priority=1, min_venues=25),
|
||||
StadiumScraperSource('gavinr GeoJSON', scrape_mls_stadiums_gavinr, priority=2, min_venues=20),
|
||||
]
|
||||
|
||||
return scrape_stadiums_with_fallback('MLS', sources)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SOURCE CONFIGURATIONS
|
||||
# =============================================================================
|
||||
|
||||
MLS_STADIUM_SOURCES = [
|
||||
StadiumScraperSource('Hardcoded', scrape_mls_stadiums_hardcoded, priority=1, min_venues=25),
|
||||
StadiumScraperSource('gavinr GeoJSON', scrape_mls_stadiums_gavinr, priority=2, min_venues=20),
|
||||
]
|
||||
Reference in New Issue
Block a user