Files
Sportstime/Scripts/nhl.py
Trey t c229fa73fd feat(01-02): create nhl.py sport module
NHL team mappings, Hockey-Reference/NHL API/ESPN scrapers, stadium data with coordinates.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 00:07:38 -06:00

411 lines
17 KiB
Python

#!/usr/bin/env python3
"""
NHL schedule and stadium scrapers for SportsTime.
This module provides:
- NHL game scrapers (Hockey-Reference, NHL API, ESPN)
- NHL stadium scrapers (hardcoded with coordinates)
- Multi-source fallback configurations
"""
from datetime import datetime
from typing import Optional
import requests
# Support both direct execution and import from parent directory
try:
from core import (
Game,
Stadium,
ScraperSource,
StadiumScraperSource,
fetch_page,
scrape_with_fallback,
scrape_stadiums_with_fallback,
)
except ImportError:
from Scripts.core import (
Game,
Stadium,
ScraperSource,
StadiumScraperSource,
fetch_page,
scrape_with_fallback,
scrape_stadiums_with_fallback,
)
__all__ = [
# Team data
'NHL_TEAMS',
# Game scrapers
'scrape_nhl_hockey_reference',
'scrape_nhl_api',
'scrape_nhl_espn',
# Stadium scrapers
'scrape_nhl_stadiums',
# Source configurations
'NHL_GAME_SOURCES',
'NHL_STADIUM_SOURCES',
# Convenience functions
'scrape_nhl_games',
'get_nhl_season_string',
]
# =============================================================================
# TEAM MAPPINGS
# =============================================================================
NHL_TEAMS = {
'ANA': {'name': 'Anaheim Ducks', 'city': 'Anaheim', 'arena': 'Honda Center'},
'ARI': {'name': 'Utah Hockey Club', 'city': 'Salt Lake City', 'arena': 'Delta Center'},
'BOS': {'name': 'Boston Bruins', 'city': 'Boston', 'arena': 'TD Garden'},
'BUF': {'name': 'Buffalo Sabres', 'city': 'Buffalo', 'arena': 'KeyBank Center'},
'CGY': {'name': 'Calgary Flames', 'city': 'Calgary', 'arena': 'Scotiabank Saddledome'},
'CAR': {'name': 'Carolina Hurricanes', 'city': 'Raleigh', 'arena': 'PNC Arena'},
'CHI': {'name': 'Chicago Blackhawks', 'city': 'Chicago', 'arena': 'United Center'},
'COL': {'name': 'Colorado Avalanche', 'city': 'Denver', 'arena': 'Ball Arena'},
'CBJ': {'name': 'Columbus Blue Jackets', 'city': 'Columbus', 'arena': 'Nationwide Arena'},
'DAL': {'name': 'Dallas Stars', 'city': 'Dallas', 'arena': 'American Airlines Center'},
'DET': {'name': 'Detroit Red Wings', 'city': 'Detroit', 'arena': 'Little Caesars Arena'},
'EDM': {'name': 'Edmonton Oilers', 'city': 'Edmonton', 'arena': 'Rogers Place'},
'FLA': {'name': 'Florida Panthers', 'city': 'Sunrise', 'arena': 'Amerant Bank Arena'},
'LAK': {'name': 'Los Angeles Kings', 'city': 'Los Angeles', 'arena': 'Crypto.com Arena'},
'MIN': {'name': 'Minnesota Wild', 'city': 'St. Paul', 'arena': 'Xcel Energy Center'},
'MTL': {'name': 'Montreal Canadiens', 'city': 'Montreal', 'arena': 'Bell Centre'},
'NSH': {'name': 'Nashville Predators', 'city': 'Nashville', 'arena': 'Bridgestone Arena'},
'NJD': {'name': 'New Jersey Devils', 'city': 'Newark', 'arena': 'Prudential Center'},
'NYI': {'name': 'New York Islanders', 'city': 'Elmont', 'arena': 'UBS Arena'},
'NYR': {'name': 'New York Rangers', 'city': 'New York', 'arena': 'Madison Square Garden'},
'OTT': {'name': 'Ottawa Senators', 'city': 'Ottawa', 'arena': 'Canadian Tire Centre'},
'PHI': {'name': 'Philadelphia Flyers', 'city': 'Philadelphia', 'arena': 'Wells Fargo Center'},
'PIT': {'name': 'Pittsburgh Penguins', 'city': 'Pittsburgh', 'arena': 'PPG Paints Arena'},
'SJS': {'name': 'San Jose Sharks', 'city': 'San Jose', 'arena': 'SAP Center'},
'SEA': {'name': 'Seattle Kraken', 'city': 'Seattle', 'arena': 'Climate Pledge Arena'},
'STL': {'name': 'St. Louis Blues', 'city': 'St. Louis', 'arena': 'Enterprise Center'},
'TBL': {'name': 'Tampa Bay Lightning', 'city': 'Tampa', 'arena': 'Amalie Arena'},
'TOR': {'name': 'Toronto Maple Leafs', 'city': 'Toronto', 'arena': 'Scotiabank Arena'},
'VAN': {'name': 'Vancouver Canucks', 'city': 'Vancouver', 'arena': 'Rogers Arena'},
'VGK': {'name': 'Vegas Golden Knights', 'city': 'Las Vegas', 'arena': 'T-Mobile Arena'},
'WSH': {'name': 'Washington Capitals', 'city': 'Washington', 'arena': 'Capital One Arena'},
'WPG': {'name': 'Winnipeg Jets', 'city': 'Winnipeg', 'arena': 'Canada Life Centre'},
}
def get_nhl_team_abbrev(team_name: str) -> str:
"""Get NHL team abbreviation from full name."""
for abbrev, info in NHL_TEAMS.items():
if info['name'].lower() == team_name.lower():
return abbrev
if team_name.lower() in info['name'].lower():
return abbrev
# Return first 3 letters as fallback
return team_name[:3].upper()
def get_nhl_season_string(season: int) -> str:
"""
Get NHL season string in "2024-25" format.
Args:
season: The ending year of the season (e.g., 2025 for 2024-25 season)
Returns:
Season string like "2024-25"
"""
return f"{season-1}-{str(season)[2:]}"
# =============================================================================
# GAME SCRAPERS
# =============================================================================
def scrape_nhl_hockey_reference(season: int) -> list[Game]:
"""
Scrape NHL schedule from Hockey-Reference.
URL: https://www.hockey-reference.com/leagues/NHL_{YEAR}_games.html
"""
games = []
url = f"https://www.hockey-reference.com/leagues/NHL_{season}_games.html"
print(f"Scraping NHL {season} from Hockey-Reference...")
soup = fetch_page(url, 'hockey-reference.com')
if not soup:
return games
table = soup.find('table', {'id': 'games'})
if not table:
print(" Could not find games table")
return games
tbody = table.find('tbody')
if not tbody:
return games
for row in tbody.find_all('tr'):
try:
cells = row.find_all(['td', 'th'])
if len(cells) < 5:
continue
# Parse date
date_cell = row.find('th', {'data-stat': 'date_game'})
if not date_cell:
continue
date_link = date_cell.find('a')
date_str = date_link.text if date_link else date_cell.text
# Parse teams
visitor_cell = row.find('td', {'data-stat': 'visitor_team_name'})
home_cell = row.find('td', {'data-stat': 'home_team_name'})
if not visitor_cell or not home_cell:
continue
visitor_link = visitor_cell.find('a')
home_link = home_cell.find('a')
away_team = visitor_link.text if visitor_link else visitor_cell.text
home_team = home_link.text if home_link else home_cell.text
# Convert date
try:
parsed_date = datetime.strptime(date_str.strip(), '%Y-%m-%d')
date_formatted = parsed_date.strftime('%Y-%m-%d')
except:
continue
away_abbrev = get_nhl_team_abbrev(away_team)
home_abbrev = get_nhl_team_abbrev(home_team)
game_id = f"nhl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
game = Game(
id=game_id,
sport='NHL',
season=get_nhl_season_string(season),
date=date_formatted,
time=None,
home_team=home_team,
away_team=away_team,
home_team_abbrev=home_abbrev,
away_team_abbrev=away_abbrev,
venue='',
source='hockey-reference.com'
)
games.append(game)
except Exception as e:
continue
print(f" Found {len(games)} games from Hockey-Reference")
return games
def scrape_nhl_api(season: int) -> list[Game]:
"""
Fetch NHL schedule from official API (JSON).
URL: https://api-web.nhle.com/v1/schedule/{YYYY-MM-DD}
"""
games = []
print(f"Fetching NHL {season} from NHL API...")
# NHL API provides club schedules
# We'd need to iterate through dates or teams
# Simplified implementation here
return games
def scrape_nhl_espn(season: int) -> list[Game]:
"""Fetch NHL schedule from ESPN API."""
games = []
print(f"Fetching NHL {season} from ESPN API...")
# NHL regular season: October - April (spans calendar years)
start = f"{season-1}1001"
end = f"{season}0430"
url = "https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard"
params = {
'dates': f"{start}-{end}",
'limit': 1000
}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
}
try:
response = requests.get(url, params=params, headers=headers, timeout=30)
response.raise_for_status()
data = response.json()
events = data.get('events', [])
for event in events:
try:
date_str = event.get('date', '')[:10]
time_str = event.get('date', '')[11:16] if len(event.get('date', '')) > 11 else None
competitions = event.get('competitions', [{}])
if not competitions:
continue
comp = competitions[0]
competitors = comp.get('competitors', [])
if len(competitors) < 2:
continue
home_team = away_team = home_abbrev = away_abbrev = None
for team in competitors:
team_data = team.get('team', {})
team_name = team_data.get('displayName', team_data.get('name', ''))
team_abbrev = team_data.get('abbreviation', '')
if team.get('homeAway') == 'home':
home_team = team_name
home_abbrev = team_abbrev
else:
away_team = team_name
away_abbrev = team_abbrev
if not home_team or not away_team:
continue
venue = comp.get('venue', {}).get('fullName', '')
game_id = f"nhl_{date_str}_{away_abbrev}_{home_abbrev}".lower()
game = Game(
id=game_id,
sport='NHL',
season=get_nhl_season_string(season),
date=date_str,
time=time_str,
home_team=home_team,
away_team=away_team,
home_team_abbrev=home_abbrev or get_nhl_team_abbrev(home_team),
away_team_abbrev=away_abbrev or get_nhl_team_abbrev(away_team),
venue=venue,
source='espn.com'
)
games.append(game)
except Exception:
continue
print(f" Found {len(games)} games from ESPN")
except Exception as e:
print(f"Error fetching ESPN NHL: {e}")
return games
# =============================================================================
# STADIUM SCRAPERS
# =============================================================================
def scrape_nhl_stadiums() -> list[Stadium]:
"""
Fetch NHL arena data (hardcoded with accurate coordinates).
"""
print("\nNHL STADIUMS")
print("-" * 40)
print(" Loading NHL arenas...")
nhl_arenas = {
'TD Garden': {'city': 'Boston', 'state': 'MA', 'lat': 42.3662, 'lng': -71.0621, 'capacity': 17850, 'teams': ['BOS']},
'KeyBank Center': {'city': 'Buffalo', 'state': 'NY', 'lat': 42.8750, 'lng': -78.8764, 'capacity': 19070, 'teams': ['BUF']},
'Little Caesars Arena': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3411, 'lng': -83.0553, 'capacity': 19515, 'teams': ['DET']},
'Amerant Bank Arena': {'city': 'Sunrise', 'state': 'FL', 'lat': 26.1584, 'lng': -80.3256, 'capacity': 19250, 'teams': ['FLA']},
'Bell Centre': {'city': 'Montreal', 'state': 'QC', 'lat': 45.4961, 'lng': -73.5693, 'capacity': 21302, 'teams': ['MTL']},
'Canadian Tire Centre': {'city': 'Ottawa', 'state': 'ON', 'lat': 45.2969, 'lng': -75.9272, 'capacity': 18652, 'teams': ['OTT']},
'Amalie Arena': {'city': 'Tampa', 'state': 'FL', 'lat': 27.9426, 'lng': -82.4519, 'capacity': 19092, 'teams': ['TBL']},
'Scotiabank Arena': {'city': 'Toronto', 'state': 'ON', 'lat': 43.6435, 'lng': -79.3791, 'capacity': 18800, 'teams': ['TOR']},
'PNC Arena': {'city': 'Raleigh', 'state': 'NC', 'lat': 35.8033, 'lng': -78.7220, 'capacity': 18680, 'teams': ['CAR']},
'Nationwide Arena': {'city': 'Columbus', 'state': 'OH', 'lat': 39.9692, 'lng': -83.0061, 'capacity': 18500, 'teams': ['CBJ']},
'Prudential Center': {'city': 'Newark', 'state': 'NJ', 'lat': 40.7334, 'lng': -74.1713, 'capacity': 16514, 'teams': ['NJD']},
'UBS Arena': {'city': 'Elmont', 'state': 'NY', 'lat': 40.7170, 'lng': -73.7260, 'capacity': 17255, 'teams': ['NYI']},
'Madison Square Garden': {'city': 'New York', 'state': 'NY', 'lat': 40.7505, 'lng': -73.9934, 'capacity': 18006, 'teams': ['NYR']},
'Wells Fargo Center': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9012, 'lng': -75.1720, 'capacity': 19500, 'teams': ['PHI']},
'PPG Paints Arena': {'city': 'Pittsburgh', 'state': 'PA', 'lat': 40.4395, 'lng': -79.9892, 'capacity': 18387, 'teams': ['PIT']},
'Capital One Arena': {'city': 'Washington', 'state': 'DC', 'lat': 38.8982, 'lng': -77.0209, 'capacity': 18573, 'teams': ['WSH']},
'United Center': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8807, 'lng': -87.6742, 'capacity': 19717, 'teams': ['CHI']},
'Ball Arena': {'city': 'Denver', 'state': 'CO', 'lat': 39.7487, 'lng': -105.0077, 'capacity': 18007, 'teams': ['COL']},
'American Airlines Center': {'city': 'Dallas', 'state': 'TX', 'lat': 32.7905, 'lng': -96.8103, 'capacity': 18532, 'teams': ['DAL']},
'Xcel Energy Center': {'city': 'Saint Paul', 'state': 'MN', 'lat': 44.9448, 'lng': -93.1010, 'capacity': 17954, 'teams': ['MIN']},
'Bridgestone Arena': {'city': 'Nashville', 'state': 'TN', 'lat': 36.1592, 'lng': -86.7785, 'capacity': 17159, 'teams': ['NSH']},
'Enterprise Center': {'city': 'St. Louis', 'state': 'MO', 'lat': 38.6268, 'lng': -90.2025, 'capacity': 18096, 'teams': ['STL']},
'Canada Life Centre': {'city': 'Winnipeg', 'state': 'MB', 'lat': 49.8928, 'lng': -97.1437, 'capacity': 15321, 'teams': ['WPG']},
'Honda Center': {'city': 'Anaheim', 'state': 'CA', 'lat': 33.8078, 'lng': -117.8765, 'capacity': 17174, 'teams': ['ANA']},
'Delta Center': {'city': 'Salt Lake City', 'state': 'UT', 'lat': 40.7683, 'lng': -111.9011, 'capacity': 16210, 'teams': ['ARI']},
'SAP Center': {'city': 'San Jose', 'state': 'CA', 'lat': 37.3327, 'lng': -121.9012, 'capacity': 17562, 'teams': ['SJS']},
'Rogers Arena': {'city': 'Vancouver', 'state': 'BC', 'lat': 49.2778, 'lng': -123.1089, 'capacity': 18910, 'teams': ['VAN']},
'T-Mobile Arena': {'city': 'Las Vegas', 'state': 'NV', 'lat': 36.1028, 'lng': -115.1784, 'capacity': 17500, 'teams': ['VGK']},
'Climate Pledge Arena': {'city': 'Seattle', 'state': 'WA', 'lat': 47.6220, 'lng': -122.3540, 'capacity': 17100, 'teams': ['SEA']},
'Crypto.com Arena': {'city': 'Los Angeles', 'state': 'CA', 'lat': 34.0430, 'lng': -118.2673, 'capacity': 18230, 'teams': ['LAK']},
'Rogers Place': {'city': 'Edmonton', 'state': 'AB', 'lat': 53.5469, 'lng': -113.4979, 'capacity': 18347, 'teams': ['EDM']},
'Scotiabank Saddledome': {'city': 'Calgary', 'state': 'AB', 'lat': 51.0374, 'lng': -114.0519, 'capacity': 19289, 'teams': ['CGY']},
}
stadiums = []
for name, info in nhl_arenas.items():
stadium = Stadium(
id=f"nhl_{name.lower().replace(' ', '_')[:30]}",
name=name,
city=info['city'],
state=info['state'],
latitude=info['lat'],
longitude=info['lng'],
capacity=info['capacity'],
sport='NHL',
team_abbrevs=info['teams'],
source='nhl_hardcoded'
)
stadiums.append(stadium)
print(f" ✓ Found {len(stadiums)} NHL arenas")
return stadiums
# =============================================================================
# SOURCE CONFIGURATIONS
# =============================================================================
NHL_GAME_SOURCES = [
ScraperSource('Hockey-Reference', scrape_nhl_hockey_reference, priority=1, min_games=100),
ScraperSource('ESPN', scrape_nhl_espn, priority=2, min_games=50),
ScraperSource('NHL API', scrape_nhl_api, priority=3, min_games=50),
]
NHL_STADIUM_SOURCES = [
StadiumScraperSource('Hardcoded', scrape_nhl_stadiums, priority=1, min_venues=25),
]
# =============================================================================
# CONVENIENCE FUNCTIONS
# =============================================================================
def scrape_nhl_games(season: int) -> list[Game]:
"""
Scrape NHL games for a season using multi-source fallback.
Args:
season: Season ending year (e.g., 2025 for 2024-25 season)
Returns:
List of Game objects from the first successful source
"""
print(f"\nNHL {get_nhl_season_string(season)} SCHEDULE")
print("-" * 40)
return scrape_with_fallback('NHL', season, NHL_GAME_SOURCES)