Files
Sportstime/Scripts/nfl.py
Trey t 90bdf1608c feat(02-01): add year_opened to all 122 hardcoded stadiums
Added year_opened field to stadium data across all 4 sport modules:
- MLB: 30 ballparks (1912-2023)
- NBA: 30 arenas (1968-2024)
- NHL: 32 arenas (1968-2021)
- NFL: 30 stadiums (1924-2020)

Updated Stadium object creation in all modules to pass year_opened.
Stadium dataclass already supported the field.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 00:31:45 -06:00

575 lines
24 KiB
Python

#!/usr/bin/env python3
"""
NFL schedule and stadium scrapers for SportsTime.
This module provides:
- NFL game scrapers (ESPN, Pro-Football-Reference, CBS Sports)
- NFL stadium scrapers (ScoreBot, GeoJSON, hardcoded)
- Multi-source fallback configurations
"""
from datetime import datetime
from typing import Optional
import requests
# Support both direct execution and import from parent directory
try:
from core import (
Game,
Stadium,
ScraperSource,
StadiumScraperSource,
fetch_page,
scrape_with_fallback,
scrape_stadiums_with_fallback,
)
except ImportError:
from Scripts.core import (
Game,
Stadium,
ScraperSource,
StadiumScraperSource,
fetch_page,
scrape_with_fallback,
scrape_stadiums_with_fallback,
)
__all__ = [
# Team data
'NFL_TEAMS',
# Game scrapers
'scrape_nfl_espn',
'scrape_nfl_pro_football_reference',
'scrape_nfl_cbssports',
# Stadium scrapers
'scrape_nfl_stadiums',
'scrape_nfl_stadiums_scorebot',
'scrape_nfl_stadiums_geojson',
'scrape_nfl_stadiums_hardcoded',
# Source configurations
'NFL_GAME_SOURCES',
'NFL_STADIUM_SOURCES',
# Convenience functions
'scrape_nfl_games',
'get_nfl_season_string',
]
# =============================================================================
# TEAM MAPPINGS
# =============================================================================
NFL_TEAMS = {
'ARI': {'name': 'Arizona Cardinals', 'city': 'Glendale', 'stadium': 'State Farm Stadium'},
'ATL': {'name': 'Atlanta Falcons', 'city': 'Atlanta', 'stadium': 'Mercedes-Benz Stadium'},
'BAL': {'name': 'Baltimore Ravens', 'city': 'Baltimore', 'stadium': 'M&T Bank Stadium'},
'BUF': {'name': 'Buffalo Bills', 'city': 'Orchard Park', 'stadium': 'Highmark Stadium'},
'CAR': {'name': 'Carolina Panthers', 'city': 'Charlotte', 'stadium': 'Bank of America Stadium'},
'CHI': {'name': 'Chicago Bears', 'city': 'Chicago', 'stadium': 'Soldier Field'},
'CIN': {'name': 'Cincinnati Bengals', 'city': 'Cincinnati', 'stadium': 'Paycor Stadium'},
'CLE': {'name': 'Cleveland Browns', 'city': 'Cleveland', 'stadium': 'Cleveland Browns Stadium'},
'DAL': {'name': 'Dallas Cowboys', 'city': 'Arlington', 'stadium': 'AT&T Stadium'},
'DEN': {'name': 'Denver Broncos', 'city': 'Denver', 'stadium': 'Empower Field at Mile High'},
'DET': {'name': 'Detroit Lions', 'city': 'Detroit', 'stadium': 'Ford Field'},
'GB': {'name': 'Green Bay Packers', 'city': 'Green Bay', 'stadium': 'Lambeau Field'},
'HOU': {'name': 'Houston Texans', 'city': 'Houston', 'stadium': 'NRG Stadium'},
'IND': {'name': 'Indianapolis Colts', 'city': 'Indianapolis', 'stadium': 'Lucas Oil Stadium'},
'JAX': {'name': 'Jacksonville Jaguars', 'city': 'Jacksonville', 'stadium': 'EverBank Stadium'},
'KC': {'name': 'Kansas City Chiefs', 'city': 'Kansas City', 'stadium': 'GEHA Field at Arrowhead Stadium'},
'LV': {'name': 'Las Vegas Raiders', 'city': 'Las Vegas', 'stadium': 'Allegiant Stadium'},
'LAC': {'name': 'Los Angeles Chargers', 'city': 'Inglewood', 'stadium': 'SoFi Stadium'},
'LAR': {'name': 'Los Angeles Rams', 'city': 'Inglewood', 'stadium': 'SoFi Stadium'},
'MIA': {'name': 'Miami Dolphins', 'city': 'Miami Gardens', 'stadium': 'Hard Rock Stadium'},
'MIN': {'name': 'Minnesota Vikings', 'city': 'Minneapolis', 'stadium': 'U.S. Bank Stadium'},
'NE': {'name': 'New England Patriots', 'city': 'Foxborough', 'stadium': 'Gillette Stadium'},
'NO': {'name': 'New Orleans Saints', 'city': 'New Orleans', 'stadium': 'Caesars Superdome'},
'NYG': {'name': 'New York Giants', 'city': 'East Rutherford', 'stadium': 'MetLife Stadium'},
'NYJ': {'name': 'New York Jets', 'city': 'East Rutherford', 'stadium': 'MetLife Stadium'},
'PHI': {'name': 'Philadelphia Eagles', 'city': 'Philadelphia', 'stadium': 'Lincoln Financial Field'},
'PIT': {'name': 'Pittsburgh Steelers', 'city': 'Pittsburgh', 'stadium': 'Acrisure Stadium'},
'SF': {'name': 'San Francisco 49ers', 'city': 'Santa Clara', 'stadium': "Levi's Stadium"},
'SEA': {'name': 'Seattle Seahawks', 'city': 'Seattle', 'stadium': 'Lumen Field'},
'TB': {'name': 'Tampa Bay Buccaneers', 'city': 'Tampa', 'stadium': 'Raymond James Stadium'},
'TEN': {'name': 'Tennessee Titans', 'city': 'Nashville', 'stadium': 'Nissan Stadium'},
'WAS': {'name': 'Washington Commanders', 'city': 'Landover', 'stadium': 'Northwest Stadium'},
}
def get_nfl_team_abbrev(team_name: str) -> str:
"""Get NFL team abbreviation from full name."""
for abbrev, info in NFL_TEAMS.items():
if info['name'].lower() == team_name.lower():
return abbrev
if team_name.lower() in info['name'].lower():
return abbrev
# Return first 3 letters as fallback
return team_name[:3].upper()
def get_nfl_season_string(season: int) -> str:
"""
Get NFL season string in "2025-26" format.
Args:
season: The ending year of the season (e.g., 2026 for 2025-26 season)
Returns:
Season string like "2025-26"
"""
return f"{season-1}-{str(season)[2:]}"
# =============================================================================
# GAME SCRAPERS
# =============================================================================
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
"""
Fetch schedule from ESPN API.
Args:
sport: 'football'
league: 'nfl'
season: Season year
date_range: (start_date, end_date) in YYYYMMDD format
"""
games = []
sport_upper = 'NFL'
print(f"Fetching {sport_upper} {season} from ESPN API...")
url = f"https://site.api.espn.com/apis/site/v2/sports/{sport}/{league}/scoreboard"
params = {
'dates': f"{date_range[0]}-{date_range[1]}",
'limit': 1000
}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
}
try:
response = requests.get(url, params=params, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
events = data.get('events', [])
for event in events:
try:
# Parse date/time
date_str = event.get('date', '')[:10] # YYYY-MM-DD
time_str = event.get('date', '')[11:16] if len(event.get('date', '')) > 11 else None
# Get teams
competitions = event.get('competitions', [{}])
if not competitions:
continue
comp = competitions[0]
competitors = comp.get('competitors', [])
if len(competitors) < 2:
continue
home_team = None
away_team = None
home_abbrev = None
away_abbrev = None
for team in competitors:
team_data = team.get('team', {})
team_name = team_data.get('displayName', team_data.get('name', ''))
team_abbrev = team_data.get('abbreviation', '')
if team.get('homeAway') == 'home':
home_team = team_name
home_abbrev = team_abbrev
else:
away_team = team_name
away_abbrev = team_abbrev
if not home_team or not away_team:
continue
# Get venue
venue = comp.get('venue', {}).get('fullName', '')
game_id = f"nfl_{date_str}_{away_abbrev}_{home_abbrev}".lower()
game = Game(
id=game_id,
sport='NFL',
season=get_nfl_season_string(season),
date=date_str,
time=time_str,
home_team=home_team,
away_team=away_team,
home_team_abbrev=home_abbrev or get_nfl_team_abbrev(home_team),
away_team_abbrev=away_abbrev or get_nfl_team_abbrev(away_team),
venue=venue,
source='espn.com'
)
games.append(game)
except Exception:
continue
print(f" Found {len(games)} games from ESPN")
except Exception as e:
print(f"Error fetching ESPN NFL: {e}")
return games
def scrape_nfl_espn(season: int) -> list[Game]:
"""Fetch NFL schedule from ESPN API."""
# NFL season: September - February (spans years)
start = f"{season-1}0901"
end = f"{season}0228"
return _scrape_espn_schedule('football', 'nfl', season, (start, end))
def scrape_nfl_pro_football_reference(season: int) -> list[Game]:
"""
Scrape NFL schedule from Pro-Football-Reference.
URL: https://www.pro-football-reference.com/years/{YEAR}/games.htm
Season year is the starting year (e.g., 2025 for 2025-26 season)
"""
games = []
year = season - 1 # PFR uses starting year
url = f"https://www.pro-football-reference.com/years/{year}/games.htm"
print(f"Scraping NFL {season} from Pro-Football-Reference...")
soup = fetch_page(url, 'pro-football-reference.com')
if not soup:
return games
table = soup.find('table', {'id': 'games'})
if not table:
print(" Could not find games table")
return games
tbody = table.find('tbody')
if not tbody:
return games
for row in tbody.find_all('tr'):
if row.get('class') and 'thead' in row.get('class'):
continue
try:
# Parse date
date_cell = row.find('td', {'data-stat': 'game_date'})
if not date_cell:
continue
date_str = date_cell.text.strip()
# Parse teams
winner_cell = row.find('td', {'data-stat': 'winner'})
loser_cell = row.find('td', {'data-stat': 'loser'})
home_cell = row.find('td', {'data-stat': 'game_location'})
if not winner_cell or not loser_cell:
continue
winner_link = winner_cell.find('a')
loser_link = loser_cell.find('a')
winner = winner_link.text if winner_link else winner_cell.text.strip()
loser = loser_link.text if loser_link else loser_cell.text.strip()
# Determine home/away - '@' in game_location means winner was away
is_at_loser = home_cell and '@' in home_cell.text
if is_at_loser:
home_team, away_team = loser, winner
else:
home_team, away_team = winner, loser
# Convert date (e.g., "September 7" or "2025-09-07")
try:
if '-' in date_str:
parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
else:
# Add year based on month
month_str = date_str.split()[0]
if month_str in ['January', 'February']:
date_with_year = f"{date_str}, {year + 1}"
else:
date_with_year = f"{date_str}, {year}"
parsed_date = datetime.strptime(date_with_year, '%B %d, %Y')
date_formatted = parsed_date.strftime('%Y-%m-%d')
except:
continue
away_abbrev = get_nfl_team_abbrev(away_team)
home_abbrev = get_nfl_team_abbrev(home_team)
game_id = f"nfl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
game = Game(
id=game_id,
sport='NFL',
season=get_nfl_season_string(season),
date=date_formatted,
time=None,
home_team=home_team,
away_team=away_team,
home_team_abbrev=home_abbrev,
away_team_abbrev=away_abbrev,
venue='',
source='pro-football-reference.com'
)
games.append(game)
except Exception:
continue
print(f" Found {len(games)} games from Pro-Football-Reference")
return games
def scrape_nfl_cbssports(season: int) -> list[Game]:
"""
Scrape NFL schedule from CBS Sports.
Provides structured schedule data via web scraping.
"""
games = []
year = season - 1 # CBS uses starting year
print(f"Fetching NFL {season} from CBS Sports...")
# CBS Sports schedule endpoint
url = f"https://www.cbssports.com/nfl/schedule/{year}/regular/"
soup = fetch_page(url, 'cbssports.com')
if not soup:
return games
# Find game tables
tables = soup.find_all('table', class_='TableBase-table')
for table in tables:
rows = table.find_all('tr')
for row in rows:
try:
cells = row.find_all('td')
if len(cells) < 3:
continue
# Parse matchup
away_cell = cells[0] if len(cells) > 0 else None
home_cell = cells[1] if len(cells) > 1 else None
if not away_cell or not home_cell:
continue
away_team = away_cell.get_text(strip=True)
home_team = home_cell.get_text(strip=True)
if not away_team or not home_team:
continue
# CBS includes @ symbol
away_team = away_team.replace('@', '').strip()
# Get date from parent section if available
date_formatted = datetime.now().strftime('%Y-%m-%d') # Placeholder
away_abbrev = get_nfl_team_abbrev(away_team)
home_abbrev = get_nfl_team_abbrev(home_team)
game_id = f"nfl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
game = Game(
id=game_id,
sport='NFL',
season=get_nfl_season_string(season),
date=date_formatted,
time=None,
home_team=home_team,
away_team=away_team,
home_team_abbrev=home_abbrev,
away_team_abbrev=away_abbrev,
venue='',
source='cbssports.com'
)
games.append(game)
except Exception:
continue
print(f" Found {len(games)} games from CBS Sports")
return games
# =============================================================================
# STADIUM SCRAPERS
# =============================================================================
def scrape_nfl_stadiums_scorebot() -> list[Stadium]:
"""
Source 1: NFLScoreBot/stadiums GitHub (public domain).
"""
stadiums = []
url = "https://raw.githubusercontent.com/NFLScoreBot/stadiums/main/stadiums.json"
response = requests.get(url, timeout=30)
response.raise_for_status()
data = response.json()
for name, info in data.items():
stadium = Stadium(
id=f"nfl_{name.lower().replace(' ', '_')[:30]}",
name=name,
city=info.get('city', ''),
state=info.get('state', ''),
latitude=info.get('lat', 0) / 1000000 if info.get('lat') else 0,
longitude=info.get('long', 0) / 1000000 if info.get('long') else 0,
capacity=info.get('capacity', 0),
sport='NFL',
team_abbrevs=info.get('teams', []),
source='github.com/NFLScoreBot'
)
stadiums.append(stadium)
return stadiums
def scrape_nfl_stadiums_geojson() -> list[Stadium]:
"""
Source 2: brianhatchl/nfl-stadiums GeoJSON gist.
"""
stadiums = []
url = "https://gist.githubusercontent.com/brianhatchl/6265918/raw/dbe6acfe5deb48f51ce5a4c4f8f5dded4f02b9bd/nfl_stadiums.geojson"
response = requests.get(url, timeout=30)
response.raise_for_status()
data = response.json()
for feature in data.get('features', []):
props = feature.get('properties', {})
coords = feature.get('geometry', {}).get('coordinates', [0, 0])
stadium = Stadium(
id=f"nfl_{props.get('Stadium', '').lower().replace(' ', '_')[:30]}",
name=props.get('Stadium', ''),
city=props.get('City', ''),
state=props.get('State', ''),
latitude=coords[1] if len(coords) > 1 else 0,
longitude=coords[0] if len(coords) > 0 else 0,
capacity=int(props.get('Capacity', 0) or 0),
sport='NFL',
team_abbrevs=[props.get('Team', '')],
source='gist.github.com/brianhatchl'
)
stadiums.append(stadium)
return stadiums
def scrape_nfl_stadiums_hardcoded() -> list[Stadium]:
"""
Source 3: Hardcoded NFL stadiums (fallback).
"""
nfl_stadiums_data = {
'State Farm Stadium': {'city': 'Glendale', 'state': 'AZ', 'lat': 33.5276, 'lng': -112.2626, 'capacity': 63400, 'teams': ['ARI'], 'year_opened': 2006},
'Mercedes-Benz Stadium': {'city': 'Atlanta', 'state': 'GA', 'lat': 33.7553, 'lng': -84.4006, 'capacity': 71000, 'teams': ['ATL'], 'year_opened': 2017},
'M&T Bank Stadium': {'city': 'Baltimore', 'state': 'MD', 'lat': 39.2780, 'lng': -76.6227, 'capacity': 71008, 'teams': ['BAL'], 'year_opened': 1998},
'Highmark Stadium': {'city': 'Orchard Park', 'state': 'NY', 'lat': 42.7738, 'lng': -78.7870, 'capacity': 71608, 'teams': ['BUF'], 'year_opened': 1973},
'Bank of America Stadium': {'city': 'Charlotte', 'state': 'NC', 'lat': 35.2258, 'lng': -80.8528, 'capacity': 75523, 'teams': ['CAR'], 'year_opened': 1996},
'Soldier Field': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8623, 'lng': -87.6167, 'capacity': 61500, 'teams': ['CHI'], 'year_opened': 1924},
'Paycor Stadium': {'city': 'Cincinnati', 'state': 'OH', 'lat': 39.0954, 'lng': -84.5160, 'capacity': 65515, 'teams': ['CIN'], 'year_opened': 2000},
'Cleveland Browns Stadium': {'city': 'Cleveland', 'state': 'OH', 'lat': 41.5061, 'lng': -81.6995, 'capacity': 67895, 'teams': ['CLE'], 'year_opened': 1999},
'AT&T Stadium': {'city': 'Arlington', 'state': 'TX', 'lat': 32.7480, 'lng': -97.0928, 'capacity': 80000, 'teams': ['DAL'], 'year_opened': 2009},
'Empower Field at Mile High': {'city': 'Denver', 'state': 'CO', 'lat': 39.7439, 'lng': -105.0201, 'capacity': 76125, 'teams': ['DEN'], 'year_opened': 2001},
'Ford Field': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3400, 'lng': -83.0456, 'capacity': 65000, 'teams': ['DET'], 'year_opened': 2002},
'Lambeau Field': {'city': 'Green Bay', 'state': 'WI', 'lat': 44.5013, 'lng': -88.0622, 'capacity': 81435, 'teams': ['GB'], 'year_opened': 1957},
'NRG Stadium': {'city': 'Houston', 'state': 'TX', 'lat': 29.6847, 'lng': -95.4107, 'capacity': 72220, 'teams': ['HOU'], 'year_opened': 2002},
'Lucas Oil Stadium': {'city': 'Indianapolis', 'state': 'IN', 'lat': 39.7601, 'lng': -86.1639, 'capacity': 67000, 'teams': ['IND'], 'year_opened': 2008},
'EverBank Stadium': {'city': 'Jacksonville', 'state': 'FL', 'lat': 30.3239, 'lng': -81.6373, 'capacity': 67814, 'teams': ['JAX'], 'year_opened': 1995},
'GEHA Field at Arrowhead Stadium': {'city': 'Kansas City', 'state': 'MO', 'lat': 39.0489, 'lng': -94.4839, 'capacity': 76416, 'teams': ['KC'], 'year_opened': 1972},
'Allegiant Stadium': {'city': 'Las Vegas', 'state': 'NV', 'lat': 36.0909, 'lng': -115.1833, 'capacity': 65000, 'teams': ['LV'], 'year_opened': 2020},
'SoFi Stadium': {'city': 'Inglewood', 'state': 'CA', 'lat': 33.9535, 'lng': -118.3392, 'capacity': 70240, 'teams': ['LAC', 'LAR'], 'year_opened': 2020},
'Hard Rock Stadium': {'city': 'Miami Gardens', 'state': 'FL', 'lat': 25.9580, 'lng': -80.2389, 'capacity': 64767, 'teams': ['MIA'], 'year_opened': 1987},
'U.S. Bank Stadium': {'city': 'Minneapolis', 'state': 'MN', 'lat': 44.9736, 'lng': -93.2575, 'capacity': 66655, 'teams': ['MIN'], 'year_opened': 2016},
'Gillette Stadium': {'city': 'Foxborough', 'state': 'MA', 'lat': 42.0909, 'lng': -71.2643, 'capacity': 65878, 'teams': ['NE'], 'year_opened': 2002},
'Caesars Superdome': {'city': 'New Orleans', 'state': 'LA', 'lat': 29.9511, 'lng': -90.0812, 'capacity': 73208, 'teams': ['NO'], 'year_opened': 1975},
'MetLife Stadium': {'city': 'East Rutherford', 'state': 'NJ', 'lat': 40.8135, 'lng': -74.0745, 'capacity': 82500, 'teams': ['NYG', 'NYJ'], 'year_opened': 2010},
'Lincoln Financial Field': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9008, 'lng': -75.1675, 'capacity': 69596, 'teams': ['PHI'], 'year_opened': 2003},
'Acrisure Stadium': {'city': 'Pittsburgh', 'state': 'PA', 'lat': 40.4468, 'lng': -80.0158, 'capacity': 68400, 'teams': ['PIT'], 'year_opened': 2001},
"Levi's Stadium": {'city': 'Santa Clara', 'state': 'CA', 'lat': 37.4032, 'lng': -121.9698, 'capacity': 68500, 'teams': ['SF'], 'year_opened': 2014},
'Lumen Field': {'city': 'Seattle', 'state': 'WA', 'lat': 47.5952, 'lng': -122.3316, 'capacity': 68740, 'teams': ['SEA'], 'year_opened': 2002},
'Raymond James Stadium': {'city': 'Tampa', 'state': 'FL', 'lat': 27.9759, 'lng': -82.5033, 'capacity': 65618, 'teams': ['TB'], 'year_opened': 1998},
'Nissan Stadium': {'city': 'Nashville', 'state': 'TN', 'lat': 36.1665, 'lng': -86.7713, 'capacity': 69143, 'teams': ['TEN'], 'year_opened': 1999},
'Northwest Stadium': {'city': 'Landover', 'state': 'MD', 'lat': 38.9076, 'lng': -76.8645, 'capacity': 67617, 'teams': ['WAS'], 'year_opened': 1997},
}
stadiums = []
for name, info in nfl_stadiums_data.items():
stadium = Stadium(
id=f"nfl_{name.lower().replace(' ', '_')[:30]}",
name=name,
city=info['city'],
state=info['state'],
latitude=info['lat'],
longitude=info['lng'],
capacity=info['capacity'],
sport='NFL',
team_abbrevs=info['teams'],
source='nfl_hardcoded',
year_opened=info.get('year_opened')
)
stadiums.append(stadium)
return stadiums
def scrape_nfl_stadiums() -> list[Stadium]:
"""
Fetch NFL stadium data with multi-source fallback.
"""
print("\nNFL STADIUMS")
print("-" * 40)
return scrape_stadiums_with_fallback('NFL', NFL_STADIUM_SOURCES)
# =============================================================================
# SOURCE CONFIGURATIONS
# =============================================================================
NFL_GAME_SOURCES = [
ScraperSource('ESPN', scrape_nfl_espn, priority=1, min_games=200),
ScraperSource('Pro-Football-Reference', scrape_nfl_pro_football_reference, priority=2, min_games=200),
ScraperSource('CBS Sports', scrape_nfl_cbssports, priority=3, min_games=100),
]
NFL_STADIUM_SOURCES = [
StadiumScraperSource('NFLScoreBot', scrape_nfl_stadiums_scorebot, priority=1, min_venues=28),
StadiumScraperSource('GeoJSON-Gist', scrape_nfl_stadiums_geojson, priority=2, min_venues=28),
StadiumScraperSource('Hardcoded', scrape_nfl_stadiums_hardcoded, priority=3, min_venues=28),
]
# =============================================================================
# CONVENIENCE FUNCTIONS
# =============================================================================
def scrape_nfl_games(season: int) -> list[Game]:
"""
Scrape NFL games for a season using multi-source fallback.
Args:
season: Season ending year (e.g., 2026 for 2025-26 season)
Returns:
List of Game objects from the first successful source
"""
print(f"\nNFL {get_nfl_season_string(season)} SCHEDULE")
print("-" * 40)
return scrape_with_fallback('NFL', season, NFL_GAME_SOURCES)