Added year_opened field to stadium data across all 4 sport modules: - MLB: 30 ballparks (1912-2023) - NBA: 30 arenas (1968-2024) - NHL: 32 arenas (1968-2021) - NFL: 30 stadiums (1924-2020) Updated Stadium object creation in all modules to pass year_opened. Stadium dataclass already supported the field. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
575 lines
24 KiB
Python
575 lines
24 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
NFL schedule and stadium scrapers for SportsTime.
|
|
|
|
This module provides:
|
|
- NFL game scrapers (ESPN, Pro-Football-Reference, CBS Sports)
|
|
- NFL stadium scrapers (ScoreBot, GeoJSON, hardcoded)
|
|
- Multi-source fallback configurations
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
import requests
|
|
|
|
# Support both direct execution and import from parent directory
|
|
try:
|
|
from core import (
|
|
Game,
|
|
Stadium,
|
|
ScraperSource,
|
|
StadiumScraperSource,
|
|
fetch_page,
|
|
scrape_with_fallback,
|
|
scrape_stadiums_with_fallback,
|
|
)
|
|
except ImportError:
|
|
from Scripts.core import (
|
|
Game,
|
|
Stadium,
|
|
ScraperSource,
|
|
StadiumScraperSource,
|
|
fetch_page,
|
|
scrape_with_fallback,
|
|
scrape_stadiums_with_fallback,
|
|
)
|
|
|
|
|
|
__all__ = [
|
|
# Team data
|
|
'NFL_TEAMS',
|
|
# Game scrapers
|
|
'scrape_nfl_espn',
|
|
'scrape_nfl_pro_football_reference',
|
|
'scrape_nfl_cbssports',
|
|
# Stadium scrapers
|
|
'scrape_nfl_stadiums',
|
|
'scrape_nfl_stadiums_scorebot',
|
|
'scrape_nfl_stadiums_geojson',
|
|
'scrape_nfl_stadiums_hardcoded',
|
|
# Source configurations
|
|
'NFL_GAME_SOURCES',
|
|
'NFL_STADIUM_SOURCES',
|
|
# Convenience functions
|
|
'scrape_nfl_games',
|
|
'get_nfl_season_string',
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# TEAM MAPPINGS
|
|
# =============================================================================
|
|
|
|
NFL_TEAMS = {
|
|
'ARI': {'name': 'Arizona Cardinals', 'city': 'Glendale', 'stadium': 'State Farm Stadium'},
|
|
'ATL': {'name': 'Atlanta Falcons', 'city': 'Atlanta', 'stadium': 'Mercedes-Benz Stadium'},
|
|
'BAL': {'name': 'Baltimore Ravens', 'city': 'Baltimore', 'stadium': 'M&T Bank Stadium'},
|
|
'BUF': {'name': 'Buffalo Bills', 'city': 'Orchard Park', 'stadium': 'Highmark Stadium'},
|
|
'CAR': {'name': 'Carolina Panthers', 'city': 'Charlotte', 'stadium': 'Bank of America Stadium'},
|
|
'CHI': {'name': 'Chicago Bears', 'city': 'Chicago', 'stadium': 'Soldier Field'},
|
|
'CIN': {'name': 'Cincinnati Bengals', 'city': 'Cincinnati', 'stadium': 'Paycor Stadium'},
|
|
'CLE': {'name': 'Cleveland Browns', 'city': 'Cleveland', 'stadium': 'Cleveland Browns Stadium'},
|
|
'DAL': {'name': 'Dallas Cowboys', 'city': 'Arlington', 'stadium': 'AT&T Stadium'},
|
|
'DEN': {'name': 'Denver Broncos', 'city': 'Denver', 'stadium': 'Empower Field at Mile High'},
|
|
'DET': {'name': 'Detroit Lions', 'city': 'Detroit', 'stadium': 'Ford Field'},
|
|
'GB': {'name': 'Green Bay Packers', 'city': 'Green Bay', 'stadium': 'Lambeau Field'},
|
|
'HOU': {'name': 'Houston Texans', 'city': 'Houston', 'stadium': 'NRG Stadium'},
|
|
'IND': {'name': 'Indianapolis Colts', 'city': 'Indianapolis', 'stadium': 'Lucas Oil Stadium'},
|
|
'JAX': {'name': 'Jacksonville Jaguars', 'city': 'Jacksonville', 'stadium': 'EverBank Stadium'},
|
|
'KC': {'name': 'Kansas City Chiefs', 'city': 'Kansas City', 'stadium': 'GEHA Field at Arrowhead Stadium'},
|
|
'LV': {'name': 'Las Vegas Raiders', 'city': 'Las Vegas', 'stadium': 'Allegiant Stadium'},
|
|
'LAC': {'name': 'Los Angeles Chargers', 'city': 'Inglewood', 'stadium': 'SoFi Stadium'},
|
|
'LAR': {'name': 'Los Angeles Rams', 'city': 'Inglewood', 'stadium': 'SoFi Stadium'},
|
|
'MIA': {'name': 'Miami Dolphins', 'city': 'Miami Gardens', 'stadium': 'Hard Rock Stadium'},
|
|
'MIN': {'name': 'Minnesota Vikings', 'city': 'Minneapolis', 'stadium': 'U.S. Bank Stadium'},
|
|
'NE': {'name': 'New England Patriots', 'city': 'Foxborough', 'stadium': 'Gillette Stadium'},
|
|
'NO': {'name': 'New Orleans Saints', 'city': 'New Orleans', 'stadium': 'Caesars Superdome'},
|
|
'NYG': {'name': 'New York Giants', 'city': 'East Rutherford', 'stadium': 'MetLife Stadium'},
|
|
'NYJ': {'name': 'New York Jets', 'city': 'East Rutherford', 'stadium': 'MetLife Stadium'},
|
|
'PHI': {'name': 'Philadelphia Eagles', 'city': 'Philadelphia', 'stadium': 'Lincoln Financial Field'},
|
|
'PIT': {'name': 'Pittsburgh Steelers', 'city': 'Pittsburgh', 'stadium': 'Acrisure Stadium'},
|
|
'SF': {'name': 'San Francisco 49ers', 'city': 'Santa Clara', 'stadium': "Levi's Stadium"},
|
|
'SEA': {'name': 'Seattle Seahawks', 'city': 'Seattle', 'stadium': 'Lumen Field'},
|
|
'TB': {'name': 'Tampa Bay Buccaneers', 'city': 'Tampa', 'stadium': 'Raymond James Stadium'},
|
|
'TEN': {'name': 'Tennessee Titans', 'city': 'Nashville', 'stadium': 'Nissan Stadium'},
|
|
'WAS': {'name': 'Washington Commanders', 'city': 'Landover', 'stadium': 'Northwest Stadium'},
|
|
}
|
|
|
|
|
|
def get_nfl_team_abbrev(team_name: str) -> str:
|
|
"""Get NFL team abbreviation from full name."""
|
|
for abbrev, info in NFL_TEAMS.items():
|
|
if info['name'].lower() == team_name.lower():
|
|
return abbrev
|
|
if team_name.lower() in info['name'].lower():
|
|
return abbrev
|
|
|
|
# Return first 3 letters as fallback
|
|
return team_name[:3].upper()
|
|
|
|
|
|
def get_nfl_season_string(season: int) -> str:
|
|
"""
|
|
Get NFL season string in "2025-26" format.
|
|
|
|
Args:
|
|
season: The ending year of the season (e.g., 2026 for 2025-26 season)
|
|
|
|
Returns:
|
|
Season string like "2025-26"
|
|
"""
|
|
return f"{season-1}-{str(season)[2:]}"
|
|
|
|
|
|
# =============================================================================
|
|
# GAME SCRAPERS
|
|
# =============================================================================
|
|
|
|
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
|
|
"""
|
|
Fetch schedule from ESPN API.
|
|
|
|
Args:
|
|
sport: 'football'
|
|
league: 'nfl'
|
|
season: Season year
|
|
date_range: (start_date, end_date) in YYYYMMDD format
|
|
"""
|
|
games = []
|
|
sport_upper = 'NFL'
|
|
|
|
print(f"Fetching {sport_upper} {season} from ESPN API...")
|
|
|
|
url = f"https://site.api.espn.com/apis/site/v2/sports/{sport}/{league}/scoreboard"
|
|
params = {
|
|
'dates': f"{date_range[0]}-{date_range[1]}",
|
|
'limit': 1000
|
|
}
|
|
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
|
|
}
|
|
|
|
try:
|
|
response = requests.get(url, params=params, headers=headers, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
events = data.get('events', [])
|
|
|
|
for event in events:
|
|
try:
|
|
# Parse date/time
|
|
date_str = event.get('date', '')[:10] # YYYY-MM-DD
|
|
time_str = event.get('date', '')[11:16] if len(event.get('date', '')) > 11 else None
|
|
|
|
# Get teams
|
|
competitions = event.get('competitions', [{}])
|
|
if not competitions:
|
|
continue
|
|
|
|
comp = competitions[0]
|
|
competitors = comp.get('competitors', [])
|
|
|
|
if len(competitors) < 2:
|
|
continue
|
|
|
|
home_team = None
|
|
away_team = None
|
|
home_abbrev = None
|
|
away_abbrev = None
|
|
|
|
for team in competitors:
|
|
team_data = team.get('team', {})
|
|
team_name = team_data.get('displayName', team_data.get('name', ''))
|
|
team_abbrev = team_data.get('abbreviation', '')
|
|
|
|
if team.get('homeAway') == 'home':
|
|
home_team = team_name
|
|
home_abbrev = team_abbrev
|
|
else:
|
|
away_team = team_name
|
|
away_abbrev = team_abbrev
|
|
|
|
if not home_team or not away_team:
|
|
continue
|
|
|
|
# Get venue
|
|
venue = comp.get('venue', {}).get('fullName', '')
|
|
|
|
game_id = f"nfl_{date_str}_{away_abbrev}_{home_abbrev}".lower()
|
|
|
|
game = Game(
|
|
id=game_id,
|
|
sport='NFL',
|
|
season=get_nfl_season_string(season),
|
|
date=date_str,
|
|
time=time_str,
|
|
home_team=home_team,
|
|
away_team=away_team,
|
|
home_team_abbrev=home_abbrev or get_nfl_team_abbrev(home_team),
|
|
away_team_abbrev=away_abbrev or get_nfl_team_abbrev(away_team),
|
|
venue=venue,
|
|
source='espn.com'
|
|
)
|
|
games.append(game)
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
print(f" Found {len(games)} games from ESPN")
|
|
|
|
except Exception as e:
|
|
print(f"Error fetching ESPN NFL: {e}")
|
|
|
|
return games
|
|
|
|
|
|
def scrape_nfl_espn(season: int) -> list[Game]:
|
|
"""Fetch NFL schedule from ESPN API."""
|
|
# NFL season: September - February (spans years)
|
|
start = f"{season-1}0901"
|
|
end = f"{season}0228"
|
|
return _scrape_espn_schedule('football', 'nfl', season, (start, end))
|
|
|
|
|
|
def scrape_nfl_pro_football_reference(season: int) -> list[Game]:
|
|
"""
|
|
Scrape NFL schedule from Pro-Football-Reference.
|
|
URL: https://www.pro-football-reference.com/years/{YEAR}/games.htm
|
|
Season year is the starting year (e.g., 2025 for 2025-26 season)
|
|
"""
|
|
games = []
|
|
year = season - 1 # PFR uses starting year
|
|
url = f"https://www.pro-football-reference.com/years/{year}/games.htm"
|
|
|
|
print(f"Scraping NFL {season} from Pro-Football-Reference...")
|
|
soup = fetch_page(url, 'pro-football-reference.com')
|
|
|
|
if not soup:
|
|
return games
|
|
|
|
table = soup.find('table', {'id': 'games'})
|
|
if not table:
|
|
print(" Could not find games table")
|
|
return games
|
|
|
|
tbody = table.find('tbody')
|
|
if not tbody:
|
|
return games
|
|
|
|
for row in tbody.find_all('tr'):
|
|
if row.get('class') and 'thead' in row.get('class'):
|
|
continue
|
|
|
|
try:
|
|
# Parse date
|
|
date_cell = row.find('td', {'data-stat': 'game_date'})
|
|
if not date_cell:
|
|
continue
|
|
date_str = date_cell.text.strip()
|
|
|
|
# Parse teams
|
|
winner_cell = row.find('td', {'data-stat': 'winner'})
|
|
loser_cell = row.find('td', {'data-stat': 'loser'})
|
|
home_cell = row.find('td', {'data-stat': 'game_location'})
|
|
|
|
if not winner_cell or not loser_cell:
|
|
continue
|
|
|
|
winner_link = winner_cell.find('a')
|
|
loser_link = loser_cell.find('a')
|
|
|
|
winner = winner_link.text if winner_link else winner_cell.text.strip()
|
|
loser = loser_link.text if loser_link else loser_cell.text.strip()
|
|
|
|
# Determine home/away - '@' in game_location means winner was away
|
|
is_at_loser = home_cell and '@' in home_cell.text
|
|
if is_at_loser:
|
|
home_team, away_team = loser, winner
|
|
else:
|
|
home_team, away_team = winner, loser
|
|
|
|
# Convert date (e.g., "September 7" or "2025-09-07")
|
|
try:
|
|
if '-' in date_str:
|
|
parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
|
|
else:
|
|
# Add year based on month
|
|
month_str = date_str.split()[0]
|
|
if month_str in ['January', 'February']:
|
|
date_with_year = f"{date_str}, {year + 1}"
|
|
else:
|
|
date_with_year = f"{date_str}, {year}"
|
|
parsed_date = datetime.strptime(date_with_year, '%B %d, %Y')
|
|
date_formatted = parsed_date.strftime('%Y-%m-%d')
|
|
except:
|
|
continue
|
|
|
|
away_abbrev = get_nfl_team_abbrev(away_team)
|
|
home_abbrev = get_nfl_team_abbrev(home_team)
|
|
game_id = f"nfl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
|
|
|
|
game = Game(
|
|
id=game_id,
|
|
sport='NFL',
|
|
season=get_nfl_season_string(season),
|
|
date=date_formatted,
|
|
time=None,
|
|
home_team=home_team,
|
|
away_team=away_team,
|
|
home_team_abbrev=home_abbrev,
|
|
away_team_abbrev=away_abbrev,
|
|
venue='',
|
|
source='pro-football-reference.com'
|
|
)
|
|
games.append(game)
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
print(f" Found {len(games)} games from Pro-Football-Reference")
|
|
return games
|
|
|
|
|
|
def scrape_nfl_cbssports(season: int) -> list[Game]:
|
|
"""
|
|
Scrape NFL schedule from CBS Sports.
|
|
Provides structured schedule data via web scraping.
|
|
"""
|
|
games = []
|
|
year = season - 1 # CBS uses starting year
|
|
print(f"Fetching NFL {season} from CBS Sports...")
|
|
|
|
# CBS Sports schedule endpoint
|
|
url = f"https://www.cbssports.com/nfl/schedule/{year}/regular/"
|
|
|
|
soup = fetch_page(url, 'cbssports.com')
|
|
if not soup:
|
|
return games
|
|
|
|
# Find game tables
|
|
tables = soup.find_all('table', class_='TableBase-table')
|
|
|
|
for table in tables:
|
|
rows = table.find_all('tr')
|
|
for row in rows:
|
|
try:
|
|
cells = row.find_all('td')
|
|
if len(cells) < 3:
|
|
continue
|
|
|
|
# Parse matchup
|
|
away_cell = cells[0] if len(cells) > 0 else None
|
|
home_cell = cells[1] if len(cells) > 1 else None
|
|
|
|
if not away_cell or not home_cell:
|
|
continue
|
|
|
|
away_team = away_cell.get_text(strip=True)
|
|
home_team = home_cell.get_text(strip=True)
|
|
|
|
if not away_team or not home_team:
|
|
continue
|
|
|
|
# CBS includes @ symbol
|
|
away_team = away_team.replace('@', '').strip()
|
|
|
|
# Get date from parent section if available
|
|
date_formatted = datetime.now().strftime('%Y-%m-%d') # Placeholder
|
|
|
|
away_abbrev = get_nfl_team_abbrev(away_team)
|
|
home_abbrev = get_nfl_team_abbrev(home_team)
|
|
game_id = f"nfl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
|
|
|
|
game = Game(
|
|
id=game_id,
|
|
sport='NFL',
|
|
season=get_nfl_season_string(season),
|
|
date=date_formatted,
|
|
time=None,
|
|
home_team=home_team,
|
|
away_team=away_team,
|
|
home_team_abbrev=home_abbrev,
|
|
away_team_abbrev=away_abbrev,
|
|
venue='',
|
|
source='cbssports.com'
|
|
)
|
|
games.append(game)
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
print(f" Found {len(games)} games from CBS Sports")
|
|
return games
|
|
|
|
|
|
# =============================================================================
|
|
# STADIUM SCRAPERS
|
|
# =============================================================================
|
|
|
|
def scrape_nfl_stadiums_scorebot() -> list[Stadium]:
|
|
"""
|
|
Source 1: NFLScoreBot/stadiums GitHub (public domain).
|
|
"""
|
|
stadiums = []
|
|
url = "https://raw.githubusercontent.com/NFLScoreBot/stadiums/main/stadiums.json"
|
|
|
|
response = requests.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
for name, info in data.items():
|
|
stadium = Stadium(
|
|
id=f"nfl_{name.lower().replace(' ', '_')[:30]}",
|
|
name=name,
|
|
city=info.get('city', ''),
|
|
state=info.get('state', ''),
|
|
latitude=info.get('lat', 0) / 1000000 if info.get('lat') else 0,
|
|
longitude=info.get('long', 0) / 1000000 if info.get('long') else 0,
|
|
capacity=info.get('capacity', 0),
|
|
sport='NFL',
|
|
team_abbrevs=info.get('teams', []),
|
|
source='github.com/NFLScoreBot'
|
|
)
|
|
stadiums.append(stadium)
|
|
|
|
return stadiums
|
|
|
|
|
|
def scrape_nfl_stadiums_geojson() -> list[Stadium]:
|
|
"""
|
|
Source 2: brianhatchl/nfl-stadiums GeoJSON gist.
|
|
"""
|
|
stadiums = []
|
|
url = "https://gist.githubusercontent.com/brianhatchl/6265918/raw/dbe6acfe5deb48f51ce5a4c4f8f5dded4f02b9bd/nfl_stadiums.geojson"
|
|
|
|
response = requests.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
for feature in data.get('features', []):
|
|
props = feature.get('properties', {})
|
|
coords = feature.get('geometry', {}).get('coordinates', [0, 0])
|
|
|
|
stadium = Stadium(
|
|
id=f"nfl_{props.get('Stadium', '').lower().replace(' ', '_')[:30]}",
|
|
name=props.get('Stadium', ''),
|
|
city=props.get('City', ''),
|
|
state=props.get('State', ''),
|
|
latitude=coords[1] if len(coords) > 1 else 0,
|
|
longitude=coords[0] if len(coords) > 0 else 0,
|
|
capacity=int(props.get('Capacity', 0) or 0),
|
|
sport='NFL',
|
|
team_abbrevs=[props.get('Team', '')],
|
|
source='gist.github.com/brianhatchl'
|
|
)
|
|
stadiums.append(stadium)
|
|
|
|
return stadiums
|
|
|
|
|
|
def scrape_nfl_stadiums_hardcoded() -> list[Stadium]:
|
|
"""
|
|
Source 3: Hardcoded NFL stadiums (fallback).
|
|
"""
|
|
nfl_stadiums_data = {
|
|
'State Farm Stadium': {'city': 'Glendale', 'state': 'AZ', 'lat': 33.5276, 'lng': -112.2626, 'capacity': 63400, 'teams': ['ARI'], 'year_opened': 2006},
|
|
'Mercedes-Benz Stadium': {'city': 'Atlanta', 'state': 'GA', 'lat': 33.7553, 'lng': -84.4006, 'capacity': 71000, 'teams': ['ATL'], 'year_opened': 2017},
|
|
'M&T Bank Stadium': {'city': 'Baltimore', 'state': 'MD', 'lat': 39.2780, 'lng': -76.6227, 'capacity': 71008, 'teams': ['BAL'], 'year_opened': 1998},
|
|
'Highmark Stadium': {'city': 'Orchard Park', 'state': 'NY', 'lat': 42.7738, 'lng': -78.7870, 'capacity': 71608, 'teams': ['BUF'], 'year_opened': 1973},
|
|
'Bank of America Stadium': {'city': 'Charlotte', 'state': 'NC', 'lat': 35.2258, 'lng': -80.8528, 'capacity': 75523, 'teams': ['CAR'], 'year_opened': 1996},
|
|
'Soldier Field': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8623, 'lng': -87.6167, 'capacity': 61500, 'teams': ['CHI'], 'year_opened': 1924},
|
|
'Paycor Stadium': {'city': 'Cincinnati', 'state': 'OH', 'lat': 39.0954, 'lng': -84.5160, 'capacity': 65515, 'teams': ['CIN'], 'year_opened': 2000},
|
|
'Cleveland Browns Stadium': {'city': 'Cleveland', 'state': 'OH', 'lat': 41.5061, 'lng': -81.6995, 'capacity': 67895, 'teams': ['CLE'], 'year_opened': 1999},
|
|
'AT&T Stadium': {'city': 'Arlington', 'state': 'TX', 'lat': 32.7480, 'lng': -97.0928, 'capacity': 80000, 'teams': ['DAL'], 'year_opened': 2009},
|
|
'Empower Field at Mile High': {'city': 'Denver', 'state': 'CO', 'lat': 39.7439, 'lng': -105.0201, 'capacity': 76125, 'teams': ['DEN'], 'year_opened': 2001},
|
|
'Ford Field': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3400, 'lng': -83.0456, 'capacity': 65000, 'teams': ['DET'], 'year_opened': 2002},
|
|
'Lambeau Field': {'city': 'Green Bay', 'state': 'WI', 'lat': 44.5013, 'lng': -88.0622, 'capacity': 81435, 'teams': ['GB'], 'year_opened': 1957},
|
|
'NRG Stadium': {'city': 'Houston', 'state': 'TX', 'lat': 29.6847, 'lng': -95.4107, 'capacity': 72220, 'teams': ['HOU'], 'year_opened': 2002},
|
|
'Lucas Oil Stadium': {'city': 'Indianapolis', 'state': 'IN', 'lat': 39.7601, 'lng': -86.1639, 'capacity': 67000, 'teams': ['IND'], 'year_opened': 2008},
|
|
'EverBank Stadium': {'city': 'Jacksonville', 'state': 'FL', 'lat': 30.3239, 'lng': -81.6373, 'capacity': 67814, 'teams': ['JAX'], 'year_opened': 1995},
|
|
'GEHA Field at Arrowhead Stadium': {'city': 'Kansas City', 'state': 'MO', 'lat': 39.0489, 'lng': -94.4839, 'capacity': 76416, 'teams': ['KC'], 'year_opened': 1972},
|
|
'Allegiant Stadium': {'city': 'Las Vegas', 'state': 'NV', 'lat': 36.0909, 'lng': -115.1833, 'capacity': 65000, 'teams': ['LV'], 'year_opened': 2020},
|
|
'SoFi Stadium': {'city': 'Inglewood', 'state': 'CA', 'lat': 33.9535, 'lng': -118.3392, 'capacity': 70240, 'teams': ['LAC', 'LAR'], 'year_opened': 2020},
|
|
'Hard Rock Stadium': {'city': 'Miami Gardens', 'state': 'FL', 'lat': 25.9580, 'lng': -80.2389, 'capacity': 64767, 'teams': ['MIA'], 'year_opened': 1987},
|
|
'U.S. Bank Stadium': {'city': 'Minneapolis', 'state': 'MN', 'lat': 44.9736, 'lng': -93.2575, 'capacity': 66655, 'teams': ['MIN'], 'year_opened': 2016},
|
|
'Gillette Stadium': {'city': 'Foxborough', 'state': 'MA', 'lat': 42.0909, 'lng': -71.2643, 'capacity': 65878, 'teams': ['NE'], 'year_opened': 2002},
|
|
'Caesars Superdome': {'city': 'New Orleans', 'state': 'LA', 'lat': 29.9511, 'lng': -90.0812, 'capacity': 73208, 'teams': ['NO'], 'year_opened': 1975},
|
|
'MetLife Stadium': {'city': 'East Rutherford', 'state': 'NJ', 'lat': 40.8135, 'lng': -74.0745, 'capacity': 82500, 'teams': ['NYG', 'NYJ'], 'year_opened': 2010},
|
|
'Lincoln Financial Field': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9008, 'lng': -75.1675, 'capacity': 69596, 'teams': ['PHI'], 'year_opened': 2003},
|
|
'Acrisure Stadium': {'city': 'Pittsburgh', 'state': 'PA', 'lat': 40.4468, 'lng': -80.0158, 'capacity': 68400, 'teams': ['PIT'], 'year_opened': 2001},
|
|
"Levi's Stadium": {'city': 'Santa Clara', 'state': 'CA', 'lat': 37.4032, 'lng': -121.9698, 'capacity': 68500, 'teams': ['SF'], 'year_opened': 2014},
|
|
'Lumen Field': {'city': 'Seattle', 'state': 'WA', 'lat': 47.5952, 'lng': -122.3316, 'capacity': 68740, 'teams': ['SEA'], 'year_opened': 2002},
|
|
'Raymond James Stadium': {'city': 'Tampa', 'state': 'FL', 'lat': 27.9759, 'lng': -82.5033, 'capacity': 65618, 'teams': ['TB'], 'year_opened': 1998},
|
|
'Nissan Stadium': {'city': 'Nashville', 'state': 'TN', 'lat': 36.1665, 'lng': -86.7713, 'capacity': 69143, 'teams': ['TEN'], 'year_opened': 1999},
|
|
'Northwest Stadium': {'city': 'Landover', 'state': 'MD', 'lat': 38.9076, 'lng': -76.8645, 'capacity': 67617, 'teams': ['WAS'], 'year_opened': 1997},
|
|
}
|
|
|
|
stadiums = []
|
|
for name, info in nfl_stadiums_data.items():
|
|
stadium = Stadium(
|
|
id=f"nfl_{name.lower().replace(' ', '_')[:30]}",
|
|
name=name,
|
|
city=info['city'],
|
|
state=info['state'],
|
|
latitude=info['lat'],
|
|
longitude=info['lng'],
|
|
capacity=info['capacity'],
|
|
sport='NFL',
|
|
team_abbrevs=info['teams'],
|
|
source='nfl_hardcoded',
|
|
year_opened=info.get('year_opened')
|
|
)
|
|
stadiums.append(stadium)
|
|
|
|
return stadiums
|
|
|
|
|
|
def scrape_nfl_stadiums() -> list[Stadium]:
|
|
"""
|
|
Fetch NFL stadium data with multi-source fallback.
|
|
"""
|
|
print("\nNFL STADIUMS")
|
|
print("-" * 40)
|
|
|
|
return scrape_stadiums_with_fallback('NFL', NFL_STADIUM_SOURCES)
|
|
|
|
|
|
# =============================================================================
|
|
# SOURCE CONFIGURATIONS
|
|
# =============================================================================
|
|
|
|
NFL_GAME_SOURCES = [
|
|
ScraperSource('ESPN', scrape_nfl_espn, priority=1, min_games=200),
|
|
ScraperSource('Pro-Football-Reference', scrape_nfl_pro_football_reference, priority=2, min_games=200),
|
|
ScraperSource('CBS Sports', scrape_nfl_cbssports, priority=3, min_games=100),
|
|
]
|
|
|
|
NFL_STADIUM_SOURCES = [
|
|
StadiumScraperSource('NFLScoreBot', scrape_nfl_stadiums_scorebot, priority=1, min_venues=28),
|
|
StadiumScraperSource('GeoJSON-Gist', scrape_nfl_stadiums_geojson, priority=2, min_venues=28),
|
|
StadiumScraperSource('Hardcoded', scrape_nfl_stadiums_hardcoded, priority=3, min_venues=28),
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# CONVENIENCE FUNCTIONS
|
|
# =============================================================================
|
|
|
|
def scrape_nfl_games(season: int) -> list[Game]:
|
|
"""
|
|
Scrape NFL games for a season using multi-source fallback.
|
|
|
|
Args:
|
|
season: Season ending year (e.g., 2026 for 2025-26 season)
|
|
|
|
Returns:
|
|
List of Game objects from the first successful source
|
|
"""
|
|
print(f"\nNFL {get_nfl_season_string(season)} SCHEDULE")
|
|
print("-" * 40)
|
|
|
|
return scrape_with_fallback('NFL', season, NFL_GAME_SOURCES)
|