feat(02.1-01): integrate MLS module with scrape_schedules.py
- Import MLS_TEAMS, get_mls_team_abbrev, scrape_mls_stadiums from mls.py - Remove inline MLS_TEAMS dict (now imported from module) - Remove inline MLS stadium scraper functions (now in mls.py) - Update TODO comments to reflect MLS extraction complete Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -67,11 +67,18 @@ from nfl import (
|
|||||||
get_nfl_season_string,
|
get_nfl_season_string,
|
||||||
NFL_TEAMS,
|
NFL_TEAMS,
|
||||||
)
|
)
|
||||||
|
from mls import (
|
||||||
|
MLS_TEAMS,
|
||||||
|
get_mls_team_abbrev,
|
||||||
|
scrape_mls_stadiums,
|
||||||
|
MLS_STADIUM_SOURCES,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# NON-CORE SPORT TEAM MAPPINGS
|
# NON-CORE SPORT TEAM MAPPINGS
|
||||||
# TODO: Extract to separate modules (wnba.py, mls.py, nwsl.py, cbb.py)
|
# TODO: Extract to separate modules (wnba.py, nwsl.py, cbb.py)
|
||||||
|
# NOTE: MLS_TEAMS is now imported from mls.py
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
WNBA_TEAMS = {
|
WNBA_TEAMS = {
|
||||||
@@ -90,39 +97,6 @@ WNBA_TEAMS = {
|
|||||||
'WAS': {'name': 'Washington Mystics', 'city': 'Washington', 'arena': 'Entertainment & Sports Arena'},
|
'WAS': {'name': 'Washington Mystics', 'city': 'Washington', 'arena': 'Entertainment & Sports Arena'},
|
||||||
}
|
}
|
||||||
|
|
||||||
MLS_TEAMS = {
|
|
||||||
'ATL': {'name': 'Atlanta United FC', 'city': 'Atlanta', 'stadium': 'Mercedes-Benz Stadium'},
|
|
||||||
'AUS': {'name': 'Austin FC', 'city': 'Austin', 'stadium': 'Q2 Stadium'},
|
|
||||||
'CLT': {'name': 'Charlotte FC', 'city': 'Charlotte', 'stadium': 'Bank of America Stadium'},
|
|
||||||
'CHI': {'name': 'Chicago Fire FC', 'city': 'Chicago', 'stadium': 'Soldier Field'},
|
|
||||||
'CIN': {'name': 'FC Cincinnati', 'city': 'Cincinnati', 'stadium': 'TQL Stadium'},
|
|
||||||
'COL': {'name': 'Colorado Rapids', 'city': 'Commerce City', 'stadium': "Dick's Sporting Goods Park"},
|
|
||||||
'CLB': {'name': 'Columbus Crew', 'city': 'Columbus', 'stadium': 'Lower.com Field'},
|
|
||||||
'DAL': {'name': 'FC Dallas', 'city': 'Frisco', 'stadium': 'Toyota Stadium'},
|
|
||||||
'DC': {'name': 'D.C. United', 'city': 'Washington', 'stadium': 'Audi Field'},
|
|
||||||
'HOU': {'name': 'Houston Dynamo FC', 'city': 'Houston', 'stadium': 'Shell Energy Stadium'},
|
|
||||||
'LAG': {'name': 'LA Galaxy', 'city': 'Carson', 'stadium': 'Dignity Health Sports Park'},
|
|
||||||
'LAFC': {'name': 'Los Angeles FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'},
|
|
||||||
'MIA': {'name': 'Inter Miami CF', 'city': 'Fort Lauderdale', 'stadium': 'Chase Stadium'},
|
|
||||||
'MIN': {'name': 'Minnesota United FC', 'city': 'Saint Paul', 'stadium': 'Allianz Field'},
|
|
||||||
'MTL': {'name': 'CF Montreal', 'city': 'Montreal', 'stadium': 'Stade Saputo'},
|
|
||||||
'NSH': {'name': 'Nashville SC', 'city': 'Nashville', 'stadium': 'Geodis Park'},
|
|
||||||
'NE': {'name': 'New England Revolution', 'city': 'Foxborough', 'stadium': 'Gillette Stadium'},
|
|
||||||
'NYCFC': {'name': 'New York City FC', 'city': 'New York', 'stadium': 'Yankee Stadium'},
|
|
||||||
'NYRB': {'name': 'New York Red Bulls', 'city': 'Harrison', 'stadium': 'Red Bull Arena'},
|
|
||||||
'ORL': {'name': 'Orlando City SC', 'city': 'Orlando', 'stadium': 'Inter&Co Stadium'},
|
|
||||||
'PHI': {'name': 'Philadelphia Union', 'city': 'Chester', 'stadium': 'Subaru Park'},
|
|
||||||
'POR': {'name': 'Portland Timbers', 'city': 'Portland', 'stadium': 'Providence Park'},
|
|
||||||
'RSL': {'name': 'Real Salt Lake', 'city': 'Sandy', 'stadium': 'America First Field'},
|
|
||||||
'SJ': {'name': 'San Jose Earthquakes', 'city': 'San Jose', 'stadium': 'PayPal Park'},
|
|
||||||
'SEA': {'name': 'Seattle Sounders FC', 'city': 'Seattle', 'stadium': 'Lumen Field'},
|
|
||||||
'SKC': {'name': 'Sporting Kansas City', 'city': 'Kansas City', 'stadium': "Children's Mercy Park"},
|
|
||||||
'STL': {'name': 'St. Louis City SC', 'city': 'St. Louis', 'stadium': 'CityPark'},
|
|
||||||
'TOR': {'name': 'Toronto FC', 'city': 'Toronto', 'stadium': 'BMO Field'},
|
|
||||||
'VAN': {'name': 'Vancouver Whitecaps FC', 'city': 'Vancouver', 'stadium': 'BC Place'},
|
|
||||||
'SD': {'name': 'San Diego FC', 'city': 'San Diego', 'stadium': 'Snapdragon Stadium'},
|
|
||||||
}
|
|
||||||
|
|
||||||
NWSL_TEAMS = {
|
NWSL_TEAMS = {
|
||||||
'LA': {'name': 'Angel City FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'},
|
'LA': {'name': 'Angel City FC', 'city': 'Los Angeles', 'stadium': 'BMO Stadium'},
|
||||||
'SJ': {'name': 'Bay FC', 'city': 'San Jose', 'stadium': 'PayPal Park'},
|
'SJ': {'name': 'Bay FC', 'city': 'San Jose', 'stadium': 'PayPal Park'},
|
||||||
@@ -408,87 +382,10 @@ def scrape_cbb_cbssports(season: int) -> list[Game]:
|
|||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# NON-CORE STADIUM SCRAPERS
|
# NON-CORE STADIUM SCRAPERS
|
||||||
# TODO: Extract to separate modules
|
# TODO: Extract to separate modules (wnba.py, nwsl.py, cbb.py)
|
||||||
|
# NOTE: scrape_mls_stadiums() is now imported from mls.py
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
def scrape_mls_stadiums_geojson() -> list[Stadium]:
|
|
||||||
"""Source 1: gavinr/usa-soccer GeoJSON."""
|
|
||||||
stadiums = []
|
|
||||||
url = "https://raw.githubusercontent.com/gavinr/usa-soccer/master/mls.geojson"
|
|
||||||
|
|
||||||
response = requests.get(url, timeout=30)
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
|
|
||||||
for feature in data.get('features', []):
|
|
||||||
props = feature.get('properties', {})
|
|
||||||
coords = feature.get('geometry', {}).get('coordinates', [0, 0])
|
|
||||||
|
|
||||||
stadium = Stadium(
|
|
||||||
id=f"mls_{props.get('stadium', '').lower().replace(' ', '_')[:30]}",
|
|
||||||
name=props.get('stadium', ''),
|
|
||||||
city=props.get('city', ''),
|
|
||||||
state=props.get('state', ''),
|
|
||||||
latitude=coords[1] if len(coords) > 1 else 0,
|
|
||||||
longitude=coords[0] if len(coords) > 0 else 0,
|
|
||||||
capacity=props.get('capacity', 0),
|
|
||||||
sport='MLS',
|
|
||||||
team_abbrevs=[props.get('team', '')],
|
|
||||||
source='github.com/gavinr'
|
|
||||||
)
|
|
||||||
stadiums.append(stadium)
|
|
||||||
|
|
||||||
return stadiums
|
|
||||||
|
|
||||||
|
|
||||||
def scrape_mls_stadiums_csv() -> list[Stadium]:
|
|
||||||
"""Source 2: gavinr/usa-soccer CSV."""
|
|
||||||
stadiums = []
|
|
||||||
url = "https://raw.githubusercontent.com/gavinr/usa-soccer/master/mls.csv"
|
|
||||||
|
|
||||||
response = requests.get(url, timeout=30)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
reader = csv.DictReader(StringIO(response.text))
|
|
||||||
|
|
||||||
for row in reader:
|
|
||||||
stadium = Stadium(
|
|
||||||
id=f"mls_{row.get('stadium', '').lower().replace(' ', '_')[:30]}",
|
|
||||||
name=row.get('stadium', ''),
|
|
||||||
city=row.get('city', ''),
|
|
||||||
state=row.get('state', ''),
|
|
||||||
latitude=float(row.get('lat', 0) or 0),
|
|
||||||
longitude=float(row.get('lng', 0) or 0),
|
|
||||||
capacity=int(row.get('capacity', 0) or 0),
|
|
||||||
sport='MLS',
|
|
||||||
team_abbrevs=[row.get('team', '')],
|
|
||||||
source='github.com/gavinr/csv'
|
|
||||||
)
|
|
||||||
stadiums.append(stadium)
|
|
||||||
|
|
||||||
return stadiums
|
|
||||||
|
|
||||||
|
|
||||||
def scrape_mls_stadiums_hardcoded() -> list[Stadium]:
|
|
||||||
"""Source 3: Hardcoded MLS stadiums (fallback)."""
|
|
||||||
# Placeholder - would include full stadium list
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def scrape_mls_stadiums() -> list[Stadium]:
|
|
||||||
"""Fetch MLS stadium data with multi-source fallback."""
|
|
||||||
print("\nMLS STADIUMS")
|
|
||||||
print("-" * 40)
|
|
||||||
|
|
||||||
sources = [
|
|
||||||
StadiumScraperSource('gavinr GeoJSON', scrape_mls_stadiums_geojson, priority=1, min_venues=20),
|
|
||||||
StadiumScraperSource('gavinr CSV', scrape_mls_stadiums_csv, priority=2, min_venues=20),
|
|
||||||
StadiumScraperSource('Hardcoded', scrape_mls_stadiums_hardcoded, priority=3, min_venues=20),
|
|
||||||
]
|
|
||||||
|
|
||||||
return scrape_stadiums_with_fallback('MLS', sources)
|
|
||||||
|
|
||||||
|
|
||||||
def scrape_wnba_stadiums() -> list[Stadium]:
|
def scrape_wnba_stadiums() -> list[Stadium]:
|
||||||
"""Fetch WNBA arena data (hardcoded)."""
|
"""Fetch WNBA arena data (hardcoded)."""
|
||||||
print("\nWNBA STADIUMS")
|
print("\nWNBA STADIUMS")
|
||||||
|
|||||||
Reference in New Issue
Block a user