#!/usr/bin/env python3 """ NHL schedule and stadium scrapers for SportsTime. This module provides: - NHL game scrapers (Hockey-Reference, NHL API, ESPN) - NHL stadium scrapers (hardcoded with coordinates) - Multi-source fallback configurations """ from datetime import datetime from typing import Optional import requests # Support both direct execution and import from parent directory try: from core import ( Game, Stadium, ScraperSource, StadiumScraperSource, fetch_page, scrape_with_fallback, scrape_stadiums_with_fallback, ) except ImportError: from Scripts.core import ( Game, Stadium, ScraperSource, StadiumScraperSource, fetch_page, scrape_with_fallback, scrape_stadiums_with_fallback, ) __all__ = [ # Team data 'NHL_TEAMS', # Game scrapers 'scrape_nhl_hockey_reference', 'scrape_nhl_api', 'scrape_nhl_espn', # Stadium scrapers 'scrape_nhl_stadiums', # Source configurations 'NHL_GAME_SOURCES', 'NHL_STADIUM_SOURCES', # Convenience functions 'scrape_nhl_games', 'get_nhl_season_string', ] # ============================================================================= # TEAM MAPPINGS # ============================================================================= NHL_TEAMS = { 'ANA': {'name': 'Anaheim Ducks', 'city': 'Anaheim', 'arena': 'Honda Center'}, 'ARI': {'name': 'Utah Hockey Club', 'city': 'Salt Lake City', 'arena': 'Delta Center'}, 'BOS': {'name': 'Boston Bruins', 'city': 'Boston', 'arena': 'TD Garden'}, 'BUF': {'name': 'Buffalo Sabres', 'city': 'Buffalo', 'arena': 'KeyBank Center'}, 'CGY': {'name': 'Calgary Flames', 'city': 'Calgary', 'arena': 'Scotiabank Saddledome'}, 'CAR': {'name': 'Carolina Hurricanes', 'city': 'Raleigh', 'arena': 'PNC Arena'}, 'CHI': {'name': 'Chicago Blackhawks', 'city': 'Chicago', 'arena': 'United Center'}, 'COL': {'name': 'Colorado Avalanche', 'city': 'Denver', 'arena': 'Ball Arena'}, 'CBJ': {'name': 'Columbus Blue Jackets', 'city': 'Columbus', 'arena': 'Nationwide Arena'}, 'DAL': {'name': 'Dallas Stars', 'city': 'Dallas', 'arena': 'American Airlines Center'}, 'DET': {'name': 'Detroit Red Wings', 'city': 'Detroit', 'arena': 'Little Caesars Arena'}, 'EDM': {'name': 'Edmonton Oilers', 'city': 'Edmonton', 'arena': 'Rogers Place'}, 'FLA': {'name': 'Florida Panthers', 'city': 'Sunrise', 'arena': 'Amerant Bank Arena'}, 'LAK': {'name': 'Los Angeles Kings', 'city': 'Los Angeles', 'arena': 'Crypto.com Arena'}, 'MIN': {'name': 'Minnesota Wild', 'city': 'St. Paul', 'arena': 'Xcel Energy Center'}, 'MTL': {'name': 'Montreal Canadiens', 'city': 'Montreal', 'arena': 'Bell Centre'}, 'NSH': {'name': 'Nashville Predators', 'city': 'Nashville', 'arena': 'Bridgestone Arena'}, 'NJD': {'name': 'New Jersey Devils', 'city': 'Newark', 'arena': 'Prudential Center'}, 'NYI': {'name': 'New York Islanders', 'city': 'Elmont', 'arena': 'UBS Arena'}, 'NYR': {'name': 'New York Rangers', 'city': 'New York', 'arena': 'Madison Square Garden'}, 'OTT': {'name': 'Ottawa Senators', 'city': 'Ottawa', 'arena': 'Canadian Tire Centre'}, 'PHI': {'name': 'Philadelphia Flyers', 'city': 'Philadelphia', 'arena': 'Wells Fargo Center'}, 'PIT': {'name': 'Pittsburgh Penguins', 'city': 'Pittsburgh', 'arena': 'PPG Paints Arena'}, 'SJS': {'name': 'San Jose Sharks', 'city': 'San Jose', 'arena': 'SAP Center'}, 'SEA': {'name': 'Seattle Kraken', 'city': 'Seattle', 'arena': 'Climate Pledge Arena'}, 'STL': {'name': 'St. Louis Blues', 'city': 'St. Louis', 'arena': 'Enterprise Center'}, 'TBL': {'name': 'Tampa Bay Lightning', 'city': 'Tampa', 'arena': 'Amalie Arena'}, 'TOR': {'name': 'Toronto Maple Leafs', 'city': 'Toronto', 'arena': 'Scotiabank Arena'}, 'VAN': {'name': 'Vancouver Canucks', 'city': 'Vancouver', 'arena': 'Rogers Arena'}, 'VGK': {'name': 'Vegas Golden Knights', 'city': 'Las Vegas', 'arena': 'T-Mobile Arena'}, 'WSH': {'name': 'Washington Capitals', 'city': 'Washington', 'arena': 'Capital One Arena'}, 'WPG': {'name': 'Winnipeg Jets', 'city': 'Winnipeg', 'arena': 'Canada Life Centre'}, } def get_nhl_team_abbrev(team_name: str) -> str: """Get NHL team abbreviation from full name.""" for abbrev, info in NHL_TEAMS.items(): if info['name'].lower() == team_name.lower(): return abbrev if team_name.lower() in info['name'].lower(): return abbrev # Return first 3 letters as fallback return team_name[:3].upper() def get_nhl_season_string(season: int) -> str: """ Get NHL season string in "2024-25" format. Args: season: The ending year of the season (e.g., 2025 for 2024-25 season) Returns: Season string like "2024-25" """ return f"{season-1}-{str(season)[2:]}" # ============================================================================= # GAME SCRAPERS # ============================================================================= def scrape_nhl_hockey_reference(season: int) -> list[Game]: """ Scrape NHL schedule from Hockey-Reference. URL: https://www.hockey-reference.com/leagues/NHL_{YEAR}_games.html """ games = [] url = f"https://www.hockey-reference.com/leagues/NHL_{season}_games.html" print(f"Scraping NHL {season} from Hockey-Reference...") soup = fetch_page(url, 'hockey-reference.com') if not soup: return games table = soup.find('table', {'id': 'games'}) if not table: print(" Could not find games table") return games tbody = table.find('tbody') if not tbody: return games for row in tbody.find_all('tr'): try: cells = row.find_all(['td', 'th']) if len(cells) < 5: continue # Parse date date_cell = row.find('th', {'data-stat': 'date_game'}) if not date_cell: continue date_link = date_cell.find('a') date_str = date_link.text if date_link else date_cell.text # Parse teams visitor_cell = row.find('td', {'data-stat': 'visitor_team_name'}) home_cell = row.find('td', {'data-stat': 'home_team_name'}) if not visitor_cell or not home_cell: continue visitor_link = visitor_cell.find('a') home_link = home_cell.find('a') away_team = visitor_link.text if visitor_link else visitor_cell.text home_team = home_link.text if home_link else home_cell.text # Convert date try: parsed_date = datetime.strptime(date_str.strip(), '%Y-%m-%d') date_formatted = parsed_date.strftime('%Y-%m-%d') except: continue away_abbrev = get_nhl_team_abbrev(away_team) home_abbrev = get_nhl_team_abbrev(home_team) game_id = f"nhl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '') game = Game( id=game_id, sport='NHL', season=get_nhl_season_string(season), date=date_formatted, time=None, home_team=home_team, away_team=away_team, home_team_abbrev=home_abbrev, away_team_abbrev=away_abbrev, venue='', source='hockey-reference.com' ) games.append(game) except Exception as e: continue print(f" Found {len(games)} games from Hockey-Reference") return games def scrape_nhl_api(season: int) -> list[Game]: """ Fetch NHL schedule from official API (JSON). URL: https://api-web.nhle.com/v1/schedule/{YYYY-MM-DD} """ games = [] print(f"Fetching NHL {season} from NHL API...") # NHL API provides club schedules # We'd need to iterate through dates or teams # Simplified implementation here return games def scrape_nhl_espn(season: int) -> list[Game]: """Fetch NHL schedule from ESPN API.""" games = [] print(f"Fetching NHL {season} from ESPN API...") # NHL regular season: October - April (spans calendar years) start = f"{season-1}1001" end = f"{season}0430" url = "https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard" params = { 'dates': f"{start}-{end}", 'limit': 1000 } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36' } try: response = requests.get(url, params=params, headers=headers, timeout=30) response.raise_for_status() data = response.json() events = data.get('events', []) for event in events: try: date_str = event.get('date', '')[:10] time_str = event.get('date', '')[11:16] if len(event.get('date', '')) > 11 else None competitions = event.get('competitions', [{}]) if not competitions: continue comp = competitions[0] competitors = comp.get('competitors', []) if len(competitors) < 2: continue home_team = away_team = home_abbrev = away_abbrev = None for team in competitors: team_data = team.get('team', {}) team_name = team_data.get('displayName', team_data.get('name', '')) team_abbrev = team_data.get('abbreviation', '') if team.get('homeAway') == 'home': home_team = team_name home_abbrev = team_abbrev else: away_team = team_name away_abbrev = team_abbrev if not home_team or not away_team: continue venue = comp.get('venue', {}).get('fullName', '') game_id = f"nhl_{date_str}_{away_abbrev}_{home_abbrev}".lower() game = Game( id=game_id, sport='NHL', season=get_nhl_season_string(season), date=date_str, time=time_str, home_team=home_team, away_team=away_team, home_team_abbrev=home_abbrev or get_nhl_team_abbrev(home_team), away_team_abbrev=away_abbrev or get_nhl_team_abbrev(away_team), venue=venue, source='espn.com' ) games.append(game) except Exception: continue print(f" Found {len(games)} games from ESPN") except Exception as e: print(f"Error fetching ESPN NHL: {e}") return games # ============================================================================= # STADIUM SCRAPERS # ============================================================================= def scrape_nhl_stadiums() -> list[Stadium]: """ Fetch NHL arena data (hardcoded with accurate coordinates). """ print("\nNHL STADIUMS") print("-" * 40) print(" Loading NHL arenas...") nhl_arenas = { 'TD Garden': {'city': 'Boston', 'state': 'MA', 'lat': 42.3662, 'lng': -71.0621, 'capacity': 17850, 'teams': ['BOS'], 'year_opened': 1995}, 'KeyBank Center': {'city': 'Buffalo', 'state': 'NY', 'lat': 42.8750, 'lng': -78.8764, 'capacity': 19070, 'teams': ['BUF'], 'year_opened': 1996}, 'Little Caesars Arena': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3411, 'lng': -83.0553, 'capacity': 19515, 'teams': ['DET'], 'year_opened': 2017}, 'Amerant Bank Arena': {'city': 'Sunrise', 'state': 'FL', 'lat': 26.1584, 'lng': -80.3256, 'capacity': 19250, 'teams': ['FLA'], 'year_opened': 1998}, 'Bell Centre': {'city': 'Montreal', 'state': 'QC', 'lat': 45.4961, 'lng': -73.5693, 'capacity': 21302, 'teams': ['MTL'], 'year_opened': 1996}, 'Canadian Tire Centre': {'city': 'Ottawa', 'state': 'ON', 'lat': 45.2969, 'lng': -75.9272, 'capacity': 18652, 'teams': ['OTT'], 'year_opened': 1996}, 'Amalie Arena': {'city': 'Tampa', 'state': 'FL', 'lat': 27.9426, 'lng': -82.4519, 'capacity': 19092, 'teams': ['TBL'], 'year_opened': 1996}, 'Scotiabank Arena': {'city': 'Toronto', 'state': 'ON', 'lat': 43.6435, 'lng': -79.3791, 'capacity': 18800, 'teams': ['TOR'], 'year_opened': 1999}, 'PNC Arena': {'city': 'Raleigh', 'state': 'NC', 'lat': 35.8033, 'lng': -78.7220, 'capacity': 18680, 'teams': ['CAR'], 'year_opened': 1999}, 'Nationwide Arena': {'city': 'Columbus', 'state': 'OH', 'lat': 39.9692, 'lng': -83.0061, 'capacity': 18500, 'teams': ['CBJ'], 'year_opened': 2000}, 'Prudential Center': {'city': 'Newark', 'state': 'NJ', 'lat': 40.7334, 'lng': -74.1713, 'capacity': 16514, 'teams': ['NJD'], 'year_opened': 2007}, 'UBS Arena': {'city': 'Elmont', 'state': 'NY', 'lat': 40.7170, 'lng': -73.7260, 'capacity': 17255, 'teams': ['NYI'], 'year_opened': 2021}, 'Madison Square Garden': {'city': 'New York', 'state': 'NY', 'lat': 40.7505, 'lng': -73.9934, 'capacity': 18006, 'teams': ['NYR'], 'year_opened': 1968}, 'Wells Fargo Center': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9012, 'lng': -75.1720, 'capacity': 19500, 'teams': ['PHI'], 'year_opened': 1996}, 'PPG Paints Arena': {'city': 'Pittsburgh', 'state': 'PA', 'lat': 40.4395, 'lng': -79.9892, 'capacity': 18387, 'teams': ['PIT'], 'year_opened': 2010}, 'Capital One Arena': {'city': 'Washington', 'state': 'DC', 'lat': 38.8982, 'lng': -77.0209, 'capacity': 18573, 'teams': ['WSH'], 'year_opened': 1997}, 'United Center': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8807, 'lng': -87.6742, 'capacity': 19717, 'teams': ['CHI'], 'year_opened': 1994}, 'Ball Arena': {'city': 'Denver', 'state': 'CO', 'lat': 39.7487, 'lng': -105.0077, 'capacity': 18007, 'teams': ['COL'], 'year_opened': 1999}, 'American Airlines Center': {'city': 'Dallas', 'state': 'TX', 'lat': 32.7905, 'lng': -96.8103, 'capacity': 18532, 'teams': ['DAL'], 'year_opened': 2001}, 'Xcel Energy Center': {'city': 'Saint Paul', 'state': 'MN', 'lat': 44.9448, 'lng': -93.1010, 'capacity': 17954, 'teams': ['MIN'], 'year_opened': 2000}, 'Bridgestone Arena': {'city': 'Nashville', 'state': 'TN', 'lat': 36.1592, 'lng': -86.7785, 'capacity': 17159, 'teams': ['NSH'], 'year_opened': 1996}, 'Enterprise Center': {'city': 'St. Louis', 'state': 'MO', 'lat': 38.6268, 'lng': -90.2025, 'capacity': 18096, 'teams': ['STL'], 'year_opened': 1994}, 'Canada Life Centre': {'city': 'Winnipeg', 'state': 'MB', 'lat': 49.8928, 'lng': -97.1437, 'capacity': 15321, 'teams': ['WPG'], 'year_opened': 2004}, 'Honda Center': {'city': 'Anaheim', 'state': 'CA', 'lat': 33.8078, 'lng': -117.8765, 'capacity': 17174, 'teams': ['ANA'], 'year_opened': 1993}, 'Delta Center': {'city': 'Salt Lake City', 'state': 'UT', 'lat': 40.7683, 'lng': -111.9011, 'capacity': 16210, 'teams': ['ARI'], 'year_opened': 1991}, 'SAP Center': {'city': 'San Jose', 'state': 'CA', 'lat': 37.3327, 'lng': -121.9012, 'capacity': 17562, 'teams': ['SJS'], 'year_opened': 1993}, 'Rogers Arena': {'city': 'Vancouver', 'state': 'BC', 'lat': 49.2778, 'lng': -123.1089, 'capacity': 18910, 'teams': ['VAN'], 'year_opened': 1995}, 'T-Mobile Arena': {'city': 'Las Vegas', 'state': 'NV', 'lat': 36.1028, 'lng': -115.1784, 'capacity': 17500, 'teams': ['VGK'], 'year_opened': 2016}, 'Climate Pledge Arena': {'city': 'Seattle', 'state': 'WA', 'lat': 47.6220, 'lng': -122.3540, 'capacity': 17100, 'teams': ['SEA'], 'year_opened': 2021}, 'Crypto.com Arena': {'city': 'Los Angeles', 'state': 'CA', 'lat': 34.0430, 'lng': -118.2673, 'capacity': 18230, 'teams': ['LAK'], 'year_opened': 1999}, 'Rogers Place': {'city': 'Edmonton', 'state': 'AB', 'lat': 53.5469, 'lng': -113.4979, 'capacity': 18347, 'teams': ['EDM'], 'year_opened': 2016}, 'Scotiabank Saddledome': {'city': 'Calgary', 'state': 'AB', 'lat': 51.0374, 'lng': -114.0519, 'capacity': 19289, 'teams': ['CGY'], 'year_opened': 1983}, } stadiums = [] for name, info in nhl_arenas.items(): stadium = Stadium( id=f"nhl_{name.lower().replace(' ', '_')[:30]}", name=name, city=info['city'], state=info['state'], latitude=info['lat'], longitude=info['lng'], capacity=info['capacity'], sport='NHL', team_abbrevs=info['teams'], source='nhl_hardcoded', year_opened=info.get('year_opened') ) stadiums.append(stadium) print(f" ✓ Found {len(stadiums)} NHL arenas") return stadiums # ============================================================================= # SOURCE CONFIGURATIONS # ============================================================================= NHL_GAME_SOURCES = [ ScraperSource('Hockey-Reference', scrape_nhl_hockey_reference, priority=1, min_games=100), ScraperSource('ESPN', scrape_nhl_espn, priority=2, min_games=50), ScraperSource('NHL API', scrape_nhl_api, priority=3, min_games=50), ] NHL_STADIUM_SOURCES = [ StadiumScraperSource('Hardcoded', scrape_nhl_stadiums, priority=1, min_venues=25), ] # ============================================================================= # CONVENIENCE FUNCTIONS # ============================================================================= def scrape_nhl_games(season: int) -> list[Game]: """ Scrape NHL games for a season using multi-source fallback. Args: season: Season ending year (e.g., 2025 for 2024-25 season) Returns: List of Game objects from the first successful source """ print(f"\nNHL {get_nhl_season_string(season)} SCHEDULE") print("-" * 40) return scrape_with_fallback('NHL', season, NHL_GAME_SOURCES)