feat(01-03): create nfl.py sport module
Extract NFL scrapers from monolithic scrape_schedules.py into dedicated sport module following established pattern from nba.py/nhl.py: - NFL_TEAMS: 32 teams with stadiums - Game scrapers: ESPN API, Pro-Football-Reference, CBS Sports - Stadium scrapers: ScoreBot, GeoJSON gist, hardcoded fallback - NFL_GAME_SOURCES and NFL_STADIUM_SOURCES configurations - get_nfl_season_string() for cross-calendar-year format (2025-26) - scrape_nfl_games() convenience function with fallback Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
573
Scripts/nfl.py
Normal file
573
Scripts/nfl.py
Normal file
@@ -0,0 +1,573 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
NFL schedule and stadium scrapers for SportsTime.
|
||||
|
||||
This module provides:
|
||||
- NFL game scrapers (ESPN, Pro-Football-Reference, CBS Sports)
|
||||
- NFL stadium scrapers (ScoreBot, GeoJSON, hardcoded)
|
||||
- Multi-source fallback configurations
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
# Support both direct execution and import from parent directory
|
||||
try:
|
||||
from core import (
|
||||
Game,
|
||||
Stadium,
|
||||
ScraperSource,
|
||||
StadiumScraperSource,
|
||||
fetch_page,
|
||||
scrape_with_fallback,
|
||||
scrape_stadiums_with_fallback,
|
||||
)
|
||||
except ImportError:
|
||||
from Scripts.core import (
|
||||
Game,
|
||||
Stadium,
|
||||
ScraperSource,
|
||||
StadiumScraperSource,
|
||||
fetch_page,
|
||||
scrape_with_fallback,
|
||||
scrape_stadiums_with_fallback,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Team data
|
||||
'NFL_TEAMS',
|
||||
# Game scrapers
|
||||
'scrape_nfl_espn',
|
||||
'scrape_nfl_pro_football_reference',
|
||||
'scrape_nfl_cbssports',
|
||||
# Stadium scrapers
|
||||
'scrape_nfl_stadiums',
|
||||
'scrape_nfl_stadiums_scorebot',
|
||||
'scrape_nfl_stadiums_geojson',
|
||||
'scrape_nfl_stadiums_hardcoded',
|
||||
# Source configurations
|
||||
'NFL_GAME_SOURCES',
|
||||
'NFL_STADIUM_SOURCES',
|
||||
# Convenience functions
|
||||
'scrape_nfl_games',
|
||||
'get_nfl_season_string',
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TEAM MAPPINGS
|
||||
# =============================================================================
|
||||
|
||||
NFL_TEAMS = {
|
||||
'ARI': {'name': 'Arizona Cardinals', 'city': 'Glendale', 'stadium': 'State Farm Stadium'},
|
||||
'ATL': {'name': 'Atlanta Falcons', 'city': 'Atlanta', 'stadium': 'Mercedes-Benz Stadium'},
|
||||
'BAL': {'name': 'Baltimore Ravens', 'city': 'Baltimore', 'stadium': 'M&T Bank Stadium'},
|
||||
'BUF': {'name': 'Buffalo Bills', 'city': 'Orchard Park', 'stadium': 'Highmark Stadium'},
|
||||
'CAR': {'name': 'Carolina Panthers', 'city': 'Charlotte', 'stadium': 'Bank of America Stadium'},
|
||||
'CHI': {'name': 'Chicago Bears', 'city': 'Chicago', 'stadium': 'Soldier Field'},
|
||||
'CIN': {'name': 'Cincinnati Bengals', 'city': 'Cincinnati', 'stadium': 'Paycor Stadium'},
|
||||
'CLE': {'name': 'Cleveland Browns', 'city': 'Cleveland', 'stadium': 'Cleveland Browns Stadium'},
|
||||
'DAL': {'name': 'Dallas Cowboys', 'city': 'Arlington', 'stadium': 'AT&T Stadium'},
|
||||
'DEN': {'name': 'Denver Broncos', 'city': 'Denver', 'stadium': 'Empower Field at Mile High'},
|
||||
'DET': {'name': 'Detroit Lions', 'city': 'Detroit', 'stadium': 'Ford Field'},
|
||||
'GB': {'name': 'Green Bay Packers', 'city': 'Green Bay', 'stadium': 'Lambeau Field'},
|
||||
'HOU': {'name': 'Houston Texans', 'city': 'Houston', 'stadium': 'NRG Stadium'},
|
||||
'IND': {'name': 'Indianapolis Colts', 'city': 'Indianapolis', 'stadium': 'Lucas Oil Stadium'},
|
||||
'JAX': {'name': 'Jacksonville Jaguars', 'city': 'Jacksonville', 'stadium': 'EverBank Stadium'},
|
||||
'KC': {'name': 'Kansas City Chiefs', 'city': 'Kansas City', 'stadium': 'GEHA Field at Arrowhead Stadium'},
|
||||
'LV': {'name': 'Las Vegas Raiders', 'city': 'Las Vegas', 'stadium': 'Allegiant Stadium'},
|
||||
'LAC': {'name': 'Los Angeles Chargers', 'city': 'Inglewood', 'stadium': 'SoFi Stadium'},
|
||||
'LAR': {'name': 'Los Angeles Rams', 'city': 'Inglewood', 'stadium': 'SoFi Stadium'},
|
||||
'MIA': {'name': 'Miami Dolphins', 'city': 'Miami Gardens', 'stadium': 'Hard Rock Stadium'},
|
||||
'MIN': {'name': 'Minnesota Vikings', 'city': 'Minneapolis', 'stadium': 'U.S. Bank Stadium'},
|
||||
'NE': {'name': 'New England Patriots', 'city': 'Foxborough', 'stadium': 'Gillette Stadium'},
|
||||
'NO': {'name': 'New Orleans Saints', 'city': 'New Orleans', 'stadium': 'Caesars Superdome'},
|
||||
'NYG': {'name': 'New York Giants', 'city': 'East Rutherford', 'stadium': 'MetLife Stadium'},
|
||||
'NYJ': {'name': 'New York Jets', 'city': 'East Rutherford', 'stadium': 'MetLife Stadium'},
|
||||
'PHI': {'name': 'Philadelphia Eagles', 'city': 'Philadelphia', 'stadium': 'Lincoln Financial Field'},
|
||||
'PIT': {'name': 'Pittsburgh Steelers', 'city': 'Pittsburgh', 'stadium': 'Acrisure Stadium'},
|
||||
'SF': {'name': 'San Francisco 49ers', 'city': 'Santa Clara', 'stadium': "Levi's Stadium"},
|
||||
'SEA': {'name': 'Seattle Seahawks', 'city': 'Seattle', 'stadium': 'Lumen Field'},
|
||||
'TB': {'name': 'Tampa Bay Buccaneers', 'city': 'Tampa', 'stadium': 'Raymond James Stadium'},
|
||||
'TEN': {'name': 'Tennessee Titans', 'city': 'Nashville', 'stadium': 'Nissan Stadium'},
|
||||
'WAS': {'name': 'Washington Commanders', 'city': 'Landover', 'stadium': 'Northwest Stadium'},
|
||||
}
|
||||
|
||||
|
||||
def get_nfl_team_abbrev(team_name: str) -> str:
|
||||
"""Get NFL team abbreviation from full name."""
|
||||
for abbrev, info in NFL_TEAMS.items():
|
||||
if info['name'].lower() == team_name.lower():
|
||||
return abbrev
|
||||
if team_name.lower() in info['name'].lower():
|
||||
return abbrev
|
||||
|
||||
# Return first 3 letters as fallback
|
||||
return team_name[:3].upper()
|
||||
|
||||
|
||||
def get_nfl_season_string(season: int) -> str:
|
||||
"""
|
||||
Get NFL season string in "2025-26" format.
|
||||
|
||||
Args:
|
||||
season: The ending year of the season (e.g., 2026 for 2025-26 season)
|
||||
|
||||
Returns:
|
||||
Season string like "2025-26"
|
||||
"""
|
||||
return f"{season-1}-{str(season)[2:]}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GAME SCRAPERS
|
||||
# =============================================================================
|
||||
|
||||
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
|
||||
"""
|
||||
Fetch schedule from ESPN API.
|
||||
|
||||
Args:
|
||||
sport: 'football'
|
||||
league: 'nfl'
|
||||
season: Season year
|
||||
date_range: (start_date, end_date) in YYYYMMDD format
|
||||
"""
|
||||
games = []
|
||||
sport_upper = 'NFL'
|
||||
|
||||
print(f"Fetching {sport_upper} {season} from ESPN API...")
|
||||
|
||||
url = f"https://site.api.espn.com/apis/site/v2/sports/{sport}/{league}/scoreboard"
|
||||
params = {
|
||||
'dates': f"{date_range[0]}-{date_range[1]}",
|
||||
'limit': 1000
|
||||
}
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(url, params=params, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
events = data.get('events', [])
|
||||
|
||||
for event in events:
|
||||
try:
|
||||
# Parse date/time
|
||||
date_str = event.get('date', '')[:10] # YYYY-MM-DD
|
||||
time_str = event.get('date', '')[11:16] if len(event.get('date', '')) > 11 else None
|
||||
|
||||
# Get teams
|
||||
competitions = event.get('competitions', [{}])
|
||||
if not competitions:
|
||||
continue
|
||||
|
||||
comp = competitions[0]
|
||||
competitors = comp.get('competitors', [])
|
||||
|
||||
if len(competitors) < 2:
|
||||
continue
|
||||
|
||||
home_team = None
|
||||
away_team = None
|
||||
home_abbrev = None
|
||||
away_abbrev = None
|
||||
|
||||
for team in competitors:
|
||||
team_data = team.get('team', {})
|
||||
team_name = team_data.get('displayName', team_data.get('name', ''))
|
||||
team_abbrev = team_data.get('abbreviation', '')
|
||||
|
||||
if team.get('homeAway') == 'home':
|
||||
home_team = team_name
|
||||
home_abbrev = team_abbrev
|
||||
else:
|
||||
away_team = team_name
|
||||
away_abbrev = team_abbrev
|
||||
|
||||
if not home_team or not away_team:
|
||||
continue
|
||||
|
||||
# Get venue
|
||||
venue = comp.get('venue', {}).get('fullName', '')
|
||||
|
||||
game_id = f"nfl_{date_str}_{away_abbrev}_{home_abbrev}".lower()
|
||||
|
||||
game = Game(
|
||||
id=game_id,
|
||||
sport='NFL',
|
||||
season=get_nfl_season_string(season),
|
||||
date=date_str,
|
||||
time=time_str,
|
||||
home_team=home_team,
|
||||
away_team=away_team,
|
||||
home_team_abbrev=home_abbrev or get_nfl_team_abbrev(home_team),
|
||||
away_team_abbrev=away_abbrev or get_nfl_team_abbrev(away_team),
|
||||
venue=venue,
|
||||
source='espn.com'
|
||||
)
|
||||
games.append(game)
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
print(f" Found {len(games)} games from ESPN")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching ESPN NFL: {e}")
|
||||
|
||||
return games
|
||||
|
||||
|
||||
def scrape_nfl_espn(season: int) -> list[Game]:
|
||||
"""Fetch NFL schedule from ESPN API."""
|
||||
# NFL season: September - February (spans years)
|
||||
start = f"{season-1}0901"
|
||||
end = f"{season}0228"
|
||||
return _scrape_espn_schedule('football', 'nfl', season, (start, end))
|
||||
|
||||
|
||||
def scrape_nfl_pro_football_reference(season: int) -> list[Game]:
|
||||
"""
|
||||
Scrape NFL schedule from Pro-Football-Reference.
|
||||
URL: https://www.pro-football-reference.com/years/{YEAR}/games.htm
|
||||
Season year is the starting year (e.g., 2025 for 2025-26 season)
|
||||
"""
|
||||
games = []
|
||||
year = season - 1 # PFR uses starting year
|
||||
url = f"https://www.pro-football-reference.com/years/{year}/games.htm"
|
||||
|
||||
print(f"Scraping NFL {season} from Pro-Football-Reference...")
|
||||
soup = fetch_page(url, 'pro-football-reference.com')
|
||||
|
||||
if not soup:
|
||||
return games
|
||||
|
||||
table = soup.find('table', {'id': 'games'})
|
||||
if not table:
|
||||
print(" Could not find games table")
|
||||
return games
|
||||
|
||||
tbody = table.find('tbody')
|
||||
if not tbody:
|
||||
return games
|
||||
|
||||
for row in tbody.find_all('tr'):
|
||||
if row.get('class') and 'thead' in row.get('class'):
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse date
|
||||
date_cell = row.find('td', {'data-stat': 'game_date'})
|
||||
if not date_cell:
|
||||
continue
|
||||
date_str = date_cell.text.strip()
|
||||
|
||||
# Parse teams
|
||||
winner_cell = row.find('td', {'data-stat': 'winner'})
|
||||
loser_cell = row.find('td', {'data-stat': 'loser'})
|
||||
home_cell = row.find('td', {'data-stat': 'game_location'})
|
||||
|
||||
if not winner_cell or not loser_cell:
|
||||
continue
|
||||
|
||||
winner_link = winner_cell.find('a')
|
||||
loser_link = loser_cell.find('a')
|
||||
|
||||
winner = winner_link.text if winner_link else winner_cell.text.strip()
|
||||
loser = loser_link.text if loser_link else loser_cell.text.strip()
|
||||
|
||||
# Determine home/away - '@' in game_location means winner was away
|
||||
is_at_loser = home_cell and '@' in home_cell.text
|
||||
if is_at_loser:
|
||||
home_team, away_team = loser, winner
|
||||
else:
|
||||
home_team, away_team = winner, loser
|
||||
|
||||
# Convert date (e.g., "September 7" or "2025-09-07")
|
||||
try:
|
||||
if '-' in date_str:
|
||||
parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
else:
|
||||
# Add year based on month
|
||||
month_str = date_str.split()[0]
|
||||
if month_str in ['January', 'February']:
|
||||
date_with_year = f"{date_str}, {year + 1}"
|
||||
else:
|
||||
date_with_year = f"{date_str}, {year}"
|
||||
parsed_date = datetime.strptime(date_with_year, '%B %d, %Y')
|
||||
date_formatted = parsed_date.strftime('%Y-%m-%d')
|
||||
except:
|
||||
continue
|
||||
|
||||
away_abbrev = get_nfl_team_abbrev(away_team)
|
||||
home_abbrev = get_nfl_team_abbrev(home_team)
|
||||
game_id = f"nfl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
|
||||
|
||||
game = Game(
|
||||
id=game_id,
|
||||
sport='NFL',
|
||||
season=get_nfl_season_string(season),
|
||||
date=date_formatted,
|
||||
time=None,
|
||||
home_team=home_team,
|
||||
away_team=away_team,
|
||||
home_team_abbrev=home_abbrev,
|
||||
away_team_abbrev=away_abbrev,
|
||||
venue='',
|
||||
source='pro-football-reference.com'
|
||||
)
|
||||
games.append(game)
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
print(f" Found {len(games)} games from Pro-Football-Reference")
|
||||
return games
|
||||
|
||||
|
||||
def scrape_nfl_cbssports(season: int) -> list[Game]:
|
||||
"""
|
||||
Scrape NFL schedule from CBS Sports.
|
||||
Provides structured schedule data via web scraping.
|
||||
"""
|
||||
games = []
|
||||
year = season - 1 # CBS uses starting year
|
||||
print(f"Fetching NFL {season} from CBS Sports...")
|
||||
|
||||
# CBS Sports schedule endpoint
|
||||
url = f"https://www.cbssports.com/nfl/schedule/{year}/regular/"
|
||||
|
||||
soup = fetch_page(url, 'cbssports.com')
|
||||
if not soup:
|
||||
return games
|
||||
|
||||
# Find game tables
|
||||
tables = soup.find_all('table', class_='TableBase-table')
|
||||
|
||||
for table in tables:
|
||||
rows = table.find_all('tr')
|
||||
for row in rows:
|
||||
try:
|
||||
cells = row.find_all('td')
|
||||
if len(cells) < 3:
|
||||
continue
|
||||
|
||||
# Parse matchup
|
||||
away_cell = cells[0] if len(cells) > 0 else None
|
||||
home_cell = cells[1] if len(cells) > 1 else None
|
||||
|
||||
if not away_cell or not home_cell:
|
||||
continue
|
||||
|
||||
away_team = away_cell.get_text(strip=True)
|
||||
home_team = home_cell.get_text(strip=True)
|
||||
|
||||
if not away_team or not home_team:
|
||||
continue
|
||||
|
||||
# CBS includes @ symbol
|
||||
away_team = away_team.replace('@', '').strip()
|
||||
|
||||
# Get date from parent section if available
|
||||
date_formatted = datetime.now().strftime('%Y-%m-%d') # Placeholder
|
||||
|
||||
away_abbrev = get_nfl_team_abbrev(away_team)
|
||||
home_abbrev = get_nfl_team_abbrev(home_team)
|
||||
game_id = f"nfl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
|
||||
|
||||
game = Game(
|
||||
id=game_id,
|
||||
sport='NFL',
|
||||
season=get_nfl_season_string(season),
|
||||
date=date_formatted,
|
||||
time=None,
|
||||
home_team=home_team,
|
||||
away_team=away_team,
|
||||
home_team_abbrev=home_abbrev,
|
||||
away_team_abbrev=away_abbrev,
|
||||
venue='',
|
||||
source='cbssports.com'
|
||||
)
|
||||
games.append(game)
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
print(f" Found {len(games)} games from CBS Sports")
|
||||
return games
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# STADIUM SCRAPERS
|
||||
# =============================================================================
|
||||
|
||||
def scrape_nfl_stadiums_scorebot() -> list[Stadium]:
|
||||
"""
|
||||
Source 1: NFLScoreBot/stadiums GitHub (public domain).
|
||||
"""
|
||||
stadiums = []
|
||||
url = "https://raw.githubusercontent.com/NFLScoreBot/stadiums/main/stadiums.json"
|
||||
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
for name, info in data.items():
|
||||
stadium = Stadium(
|
||||
id=f"nfl_{name.lower().replace(' ', '_')[:30]}",
|
||||
name=name,
|
||||
city=info.get('city', ''),
|
||||
state=info.get('state', ''),
|
||||
latitude=info.get('lat', 0) / 1000000 if info.get('lat') else 0,
|
||||
longitude=info.get('long', 0) / 1000000 if info.get('long') else 0,
|
||||
capacity=info.get('capacity', 0),
|
||||
sport='NFL',
|
||||
team_abbrevs=info.get('teams', []),
|
||||
source='github.com/NFLScoreBot'
|
||||
)
|
||||
stadiums.append(stadium)
|
||||
|
||||
return stadiums
|
||||
|
||||
|
||||
def scrape_nfl_stadiums_geojson() -> list[Stadium]:
|
||||
"""
|
||||
Source 2: brianhatchl/nfl-stadiums GeoJSON gist.
|
||||
"""
|
||||
stadiums = []
|
||||
url = "https://gist.githubusercontent.com/brianhatchl/6265918/raw/dbe6acfe5deb48f51ce5a4c4f8f5dded4f02b9bd/nfl_stadiums.geojson"
|
||||
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
for feature in data.get('features', []):
|
||||
props = feature.get('properties', {})
|
||||
coords = feature.get('geometry', {}).get('coordinates', [0, 0])
|
||||
|
||||
stadium = Stadium(
|
||||
id=f"nfl_{props.get('Stadium', '').lower().replace(' ', '_')[:30]}",
|
||||
name=props.get('Stadium', ''),
|
||||
city=props.get('City', ''),
|
||||
state=props.get('State', ''),
|
||||
latitude=coords[1] if len(coords) > 1 else 0,
|
||||
longitude=coords[0] if len(coords) > 0 else 0,
|
||||
capacity=int(props.get('Capacity', 0) or 0),
|
||||
sport='NFL',
|
||||
team_abbrevs=[props.get('Team', '')],
|
||||
source='gist.github.com/brianhatchl'
|
||||
)
|
||||
stadiums.append(stadium)
|
||||
|
||||
return stadiums
|
||||
|
||||
|
||||
def scrape_nfl_stadiums_hardcoded() -> list[Stadium]:
|
||||
"""
|
||||
Source 3: Hardcoded NFL stadiums (fallback).
|
||||
"""
|
||||
nfl_stadiums_data = {
|
||||
'State Farm Stadium': {'city': 'Glendale', 'state': 'AZ', 'lat': 33.5276, 'lng': -112.2626, 'capacity': 63400, 'teams': ['ARI']},
|
||||
'Mercedes-Benz Stadium': {'city': 'Atlanta', 'state': 'GA', 'lat': 33.7553, 'lng': -84.4006, 'capacity': 71000, 'teams': ['ATL']},
|
||||
'M&T Bank Stadium': {'city': 'Baltimore', 'state': 'MD', 'lat': 39.2780, 'lng': -76.6227, 'capacity': 71008, 'teams': ['BAL']},
|
||||
'Highmark Stadium': {'city': 'Orchard Park', 'state': 'NY', 'lat': 42.7738, 'lng': -78.7870, 'capacity': 71608, 'teams': ['BUF']},
|
||||
'Bank of America Stadium': {'city': 'Charlotte', 'state': 'NC', 'lat': 35.2258, 'lng': -80.8528, 'capacity': 75523, 'teams': ['CAR']},
|
||||
'Soldier Field': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8623, 'lng': -87.6167, 'capacity': 61500, 'teams': ['CHI']},
|
||||
'Paycor Stadium': {'city': 'Cincinnati', 'state': 'OH', 'lat': 39.0954, 'lng': -84.5160, 'capacity': 65515, 'teams': ['CIN']},
|
||||
'Cleveland Browns Stadium': {'city': 'Cleveland', 'state': 'OH', 'lat': 41.5061, 'lng': -81.6995, 'capacity': 67895, 'teams': ['CLE']},
|
||||
'AT&T Stadium': {'city': 'Arlington', 'state': 'TX', 'lat': 32.7480, 'lng': -97.0928, 'capacity': 80000, 'teams': ['DAL']},
|
||||
'Empower Field at Mile High': {'city': 'Denver', 'state': 'CO', 'lat': 39.7439, 'lng': -105.0201, 'capacity': 76125, 'teams': ['DEN']},
|
||||
'Ford Field': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3400, 'lng': -83.0456, 'capacity': 65000, 'teams': ['DET']},
|
||||
'Lambeau Field': {'city': 'Green Bay', 'state': 'WI', 'lat': 44.5013, 'lng': -88.0622, 'capacity': 81435, 'teams': ['GB']},
|
||||
'NRG Stadium': {'city': 'Houston', 'state': 'TX', 'lat': 29.6847, 'lng': -95.4107, 'capacity': 72220, 'teams': ['HOU']},
|
||||
'Lucas Oil Stadium': {'city': 'Indianapolis', 'state': 'IN', 'lat': 39.7601, 'lng': -86.1639, 'capacity': 67000, 'teams': ['IND']},
|
||||
'EverBank Stadium': {'city': 'Jacksonville', 'state': 'FL', 'lat': 30.3239, 'lng': -81.6373, 'capacity': 67814, 'teams': ['JAX']},
|
||||
'GEHA Field at Arrowhead Stadium': {'city': 'Kansas City', 'state': 'MO', 'lat': 39.0489, 'lng': -94.4839, 'capacity': 76416, 'teams': ['KC']},
|
||||
'Allegiant Stadium': {'city': 'Las Vegas', 'state': 'NV', 'lat': 36.0909, 'lng': -115.1833, 'capacity': 65000, 'teams': ['LV']},
|
||||
'SoFi Stadium': {'city': 'Inglewood', 'state': 'CA', 'lat': 33.9535, 'lng': -118.3392, 'capacity': 70240, 'teams': ['LAC', 'LAR']},
|
||||
'Hard Rock Stadium': {'city': 'Miami Gardens', 'state': 'FL', 'lat': 25.9580, 'lng': -80.2389, 'capacity': 64767, 'teams': ['MIA']},
|
||||
'U.S. Bank Stadium': {'city': 'Minneapolis', 'state': 'MN', 'lat': 44.9736, 'lng': -93.2575, 'capacity': 66655, 'teams': ['MIN']},
|
||||
'Gillette Stadium': {'city': 'Foxborough', 'state': 'MA', 'lat': 42.0909, 'lng': -71.2643, 'capacity': 65878, 'teams': ['NE']},
|
||||
'Caesars Superdome': {'city': 'New Orleans', 'state': 'LA', 'lat': 29.9511, 'lng': -90.0812, 'capacity': 73208, 'teams': ['NO']},
|
||||
'MetLife Stadium': {'city': 'East Rutherford', 'state': 'NJ', 'lat': 40.8135, 'lng': -74.0745, 'capacity': 82500, 'teams': ['NYG', 'NYJ']},
|
||||
'Lincoln Financial Field': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9008, 'lng': -75.1675, 'capacity': 69596, 'teams': ['PHI']},
|
||||
'Acrisure Stadium': {'city': 'Pittsburgh', 'state': 'PA', 'lat': 40.4468, 'lng': -80.0158, 'capacity': 68400, 'teams': ['PIT']},
|
||||
"Levi's Stadium": {'city': 'Santa Clara', 'state': 'CA', 'lat': 37.4032, 'lng': -121.9698, 'capacity': 68500, 'teams': ['SF']},
|
||||
'Lumen Field': {'city': 'Seattle', 'state': 'WA', 'lat': 47.5952, 'lng': -122.3316, 'capacity': 68740, 'teams': ['SEA']},
|
||||
'Raymond James Stadium': {'city': 'Tampa', 'state': 'FL', 'lat': 27.9759, 'lng': -82.5033, 'capacity': 65618, 'teams': ['TB']},
|
||||
'Nissan Stadium': {'city': 'Nashville', 'state': 'TN', 'lat': 36.1665, 'lng': -86.7713, 'capacity': 69143, 'teams': ['TEN']},
|
||||
'Northwest Stadium': {'city': 'Landover', 'state': 'MD', 'lat': 38.9076, 'lng': -76.8645, 'capacity': 67617, 'teams': ['WAS']},
|
||||
}
|
||||
|
||||
stadiums = []
|
||||
for name, info in nfl_stadiums_data.items():
|
||||
stadium = Stadium(
|
||||
id=f"nfl_{name.lower().replace(' ', '_')[:30]}",
|
||||
name=name,
|
||||
city=info['city'],
|
||||
state=info['state'],
|
||||
latitude=info['lat'],
|
||||
longitude=info['lng'],
|
||||
capacity=info['capacity'],
|
||||
sport='NFL',
|
||||
team_abbrevs=info['teams'],
|
||||
source='nfl_hardcoded'
|
||||
)
|
||||
stadiums.append(stadium)
|
||||
|
||||
return stadiums
|
||||
|
||||
|
||||
def scrape_nfl_stadiums() -> list[Stadium]:
|
||||
"""
|
||||
Fetch NFL stadium data with multi-source fallback.
|
||||
"""
|
||||
print("\nNFL STADIUMS")
|
||||
print("-" * 40)
|
||||
|
||||
return scrape_stadiums_with_fallback('NFL', NFL_STADIUM_SOURCES)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SOURCE CONFIGURATIONS
|
||||
# =============================================================================
|
||||
|
||||
NFL_GAME_SOURCES = [
|
||||
ScraperSource('ESPN', scrape_nfl_espn, priority=1, min_games=200),
|
||||
ScraperSource('Pro-Football-Reference', scrape_nfl_pro_football_reference, priority=2, min_games=200),
|
||||
ScraperSource('CBS Sports', scrape_nfl_cbssports, priority=3, min_games=100),
|
||||
]
|
||||
|
||||
NFL_STADIUM_SOURCES = [
|
||||
StadiumScraperSource('NFLScoreBot', scrape_nfl_stadiums_scorebot, priority=1, min_venues=28),
|
||||
StadiumScraperSource('GeoJSON-Gist', scrape_nfl_stadiums_geojson, priority=2, min_venues=28),
|
||||
StadiumScraperSource('Hardcoded', scrape_nfl_stadiums_hardcoded, priority=3, min_venues=28),
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONVENIENCE FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
def scrape_nfl_games(season: int) -> list[Game]:
|
||||
"""
|
||||
Scrape NFL games for a season using multi-source fallback.
|
||||
|
||||
Args:
|
||||
season: Season ending year (e.g., 2026 for 2025-26 season)
|
||||
|
||||
Returns:
|
||||
List of Game objects from the first successful source
|
||||
"""
|
||||
print(f"\nNFL {get_nfl_season_string(season)} SCHEDULE")
|
||||
print("-" * 40)
|
||||
|
||||
return scrape_with_fallback('NFL', season, NFL_GAME_SOURCES)
|
||||
Reference in New Issue
Block a user