diff --git a/Scripts/nhl.py b/Scripts/nhl.py new file mode 100644 index 0000000..7f005ac --- /dev/null +++ b/Scripts/nhl.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 +""" +NHL schedule and stadium scrapers for SportsTime. + +This module provides: +- NHL game scrapers (Hockey-Reference, NHL API, ESPN) +- NHL stadium scrapers (hardcoded with coordinates) +- Multi-source fallback configurations +""" + +from datetime import datetime +from typing import Optional + +import requests + +# Support both direct execution and import from parent directory +try: + from core import ( + Game, + Stadium, + ScraperSource, + StadiumScraperSource, + fetch_page, + scrape_with_fallback, + scrape_stadiums_with_fallback, + ) +except ImportError: + from Scripts.core import ( + Game, + Stadium, + ScraperSource, + StadiumScraperSource, + fetch_page, + scrape_with_fallback, + scrape_stadiums_with_fallback, + ) + + +__all__ = [ + # Team data + 'NHL_TEAMS', + # Game scrapers + 'scrape_nhl_hockey_reference', + 'scrape_nhl_api', + 'scrape_nhl_espn', + # Stadium scrapers + 'scrape_nhl_stadiums', + # Source configurations + 'NHL_GAME_SOURCES', + 'NHL_STADIUM_SOURCES', + # Convenience functions + 'scrape_nhl_games', + 'get_nhl_season_string', +] + + +# ============================================================================= +# TEAM MAPPINGS +# ============================================================================= + +NHL_TEAMS = { + 'ANA': {'name': 'Anaheim Ducks', 'city': 'Anaheim', 'arena': 'Honda Center'}, + 'ARI': {'name': 'Utah Hockey Club', 'city': 'Salt Lake City', 'arena': 'Delta Center'}, + 'BOS': {'name': 'Boston Bruins', 'city': 'Boston', 'arena': 'TD Garden'}, + 'BUF': {'name': 'Buffalo Sabres', 'city': 'Buffalo', 'arena': 'KeyBank Center'}, + 'CGY': {'name': 'Calgary Flames', 'city': 'Calgary', 'arena': 'Scotiabank Saddledome'}, + 'CAR': {'name': 'Carolina Hurricanes', 'city': 'Raleigh', 'arena': 'PNC Arena'}, + 'CHI': {'name': 'Chicago Blackhawks', 'city': 'Chicago', 'arena': 'United Center'}, + 'COL': {'name': 'Colorado Avalanche', 'city': 'Denver', 'arena': 'Ball Arena'}, + 'CBJ': {'name': 'Columbus Blue Jackets', 'city': 'Columbus', 'arena': 'Nationwide Arena'}, + 'DAL': {'name': 'Dallas Stars', 'city': 'Dallas', 'arena': 'American Airlines Center'}, + 'DET': {'name': 'Detroit Red Wings', 'city': 'Detroit', 'arena': 'Little Caesars Arena'}, + 'EDM': {'name': 'Edmonton Oilers', 'city': 'Edmonton', 'arena': 'Rogers Place'}, + 'FLA': {'name': 'Florida Panthers', 'city': 'Sunrise', 'arena': 'Amerant Bank Arena'}, + 'LAK': {'name': 'Los Angeles Kings', 'city': 'Los Angeles', 'arena': 'Crypto.com Arena'}, + 'MIN': {'name': 'Minnesota Wild', 'city': 'St. Paul', 'arena': 'Xcel Energy Center'}, + 'MTL': {'name': 'Montreal Canadiens', 'city': 'Montreal', 'arena': 'Bell Centre'}, + 'NSH': {'name': 'Nashville Predators', 'city': 'Nashville', 'arena': 'Bridgestone Arena'}, + 'NJD': {'name': 'New Jersey Devils', 'city': 'Newark', 'arena': 'Prudential Center'}, + 'NYI': {'name': 'New York Islanders', 'city': 'Elmont', 'arena': 'UBS Arena'}, + 'NYR': {'name': 'New York Rangers', 'city': 'New York', 'arena': 'Madison Square Garden'}, + 'OTT': {'name': 'Ottawa Senators', 'city': 'Ottawa', 'arena': 'Canadian Tire Centre'}, + 'PHI': {'name': 'Philadelphia Flyers', 'city': 'Philadelphia', 'arena': 'Wells Fargo Center'}, + 'PIT': {'name': 'Pittsburgh Penguins', 'city': 'Pittsburgh', 'arena': 'PPG Paints Arena'}, + 'SJS': {'name': 'San Jose Sharks', 'city': 'San Jose', 'arena': 'SAP Center'}, + 'SEA': {'name': 'Seattle Kraken', 'city': 'Seattle', 'arena': 'Climate Pledge Arena'}, + 'STL': {'name': 'St. Louis Blues', 'city': 'St. Louis', 'arena': 'Enterprise Center'}, + 'TBL': {'name': 'Tampa Bay Lightning', 'city': 'Tampa', 'arena': 'Amalie Arena'}, + 'TOR': {'name': 'Toronto Maple Leafs', 'city': 'Toronto', 'arena': 'Scotiabank Arena'}, + 'VAN': {'name': 'Vancouver Canucks', 'city': 'Vancouver', 'arena': 'Rogers Arena'}, + 'VGK': {'name': 'Vegas Golden Knights', 'city': 'Las Vegas', 'arena': 'T-Mobile Arena'}, + 'WSH': {'name': 'Washington Capitals', 'city': 'Washington', 'arena': 'Capital One Arena'}, + 'WPG': {'name': 'Winnipeg Jets', 'city': 'Winnipeg', 'arena': 'Canada Life Centre'}, +} + + +def get_nhl_team_abbrev(team_name: str) -> str: + """Get NHL team abbreviation from full name.""" + for abbrev, info in NHL_TEAMS.items(): + if info['name'].lower() == team_name.lower(): + return abbrev + if team_name.lower() in info['name'].lower(): + return abbrev + + # Return first 3 letters as fallback + return team_name[:3].upper() + + +def get_nhl_season_string(season: int) -> str: + """ + Get NHL season string in "2024-25" format. + + Args: + season: The ending year of the season (e.g., 2025 for 2024-25 season) + + Returns: + Season string like "2024-25" + """ + return f"{season-1}-{str(season)[2:]}" + + +# ============================================================================= +# GAME SCRAPERS +# ============================================================================= + +def scrape_nhl_hockey_reference(season: int) -> list[Game]: + """ + Scrape NHL schedule from Hockey-Reference. + URL: https://www.hockey-reference.com/leagues/NHL_{YEAR}_games.html + """ + games = [] + url = f"https://www.hockey-reference.com/leagues/NHL_{season}_games.html" + + print(f"Scraping NHL {season} from Hockey-Reference...") + soup = fetch_page(url, 'hockey-reference.com') + + if not soup: + return games + + table = soup.find('table', {'id': 'games'}) + if not table: + print(" Could not find games table") + return games + + tbody = table.find('tbody') + if not tbody: + return games + + for row in tbody.find_all('tr'): + try: + cells = row.find_all(['td', 'th']) + if len(cells) < 5: + continue + + # Parse date + date_cell = row.find('th', {'data-stat': 'date_game'}) + if not date_cell: + continue + date_link = date_cell.find('a') + date_str = date_link.text if date_link else date_cell.text + + # Parse teams + visitor_cell = row.find('td', {'data-stat': 'visitor_team_name'}) + home_cell = row.find('td', {'data-stat': 'home_team_name'}) + + if not visitor_cell or not home_cell: + continue + + visitor_link = visitor_cell.find('a') + home_link = home_cell.find('a') + + away_team = visitor_link.text if visitor_link else visitor_cell.text + home_team = home_link.text if home_link else home_cell.text + + # Convert date + try: + parsed_date = datetime.strptime(date_str.strip(), '%Y-%m-%d') + date_formatted = parsed_date.strftime('%Y-%m-%d') + except: + continue + + away_abbrev = get_nhl_team_abbrev(away_team) + home_abbrev = get_nhl_team_abbrev(home_team) + game_id = f"nhl_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '') + + game = Game( + id=game_id, + sport='NHL', + season=get_nhl_season_string(season), + date=date_formatted, + time=None, + home_team=home_team, + away_team=away_team, + home_team_abbrev=home_abbrev, + away_team_abbrev=away_abbrev, + venue='', + source='hockey-reference.com' + ) + games.append(game) + + except Exception as e: + continue + + print(f" Found {len(games)} games from Hockey-Reference") + return games + + +def scrape_nhl_api(season: int) -> list[Game]: + """ + Fetch NHL schedule from official API (JSON). + URL: https://api-web.nhle.com/v1/schedule/{YYYY-MM-DD} + """ + games = [] + print(f"Fetching NHL {season} from NHL API...") + + # NHL API provides club schedules + # We'd need to iterate through dates or teams + # Simplified implementation here + + return games + + +def scrape_nhl_espn(season: int) -> list[Game]: + """Fetch NHL schedule from ESPN API.""" + games = [] + print(f"Fetching NHL {season} from ESPN API...") + + # NHL regular season: October - April (spans calendar years) + start = f"{season-1}1001" + end = f"{season}0430" + + url = "https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard" + params = { + 'dates': f"{start}-{end}", + 'limit': 1000 + } + + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36' + } + + try: + response = requests.get(url, params=params, headers=headers, timeout=30) + response.raise_for_status() + data = response.json() + + events = data.get('events', []) + + for event in events: + try: + date_str = event.get('date', '')[:10] + time_str = event.get('date', '')[11:16] if len(event.get('date', '')) > 11 else None + + competitions = event.get('competitions', [{}]) + if not competitions: + continue + + comp = competitions[0] + competitors = comp.get('competitors', []) + + if len(competitors) < 2: + continue + + home_team = away_team = home_abbrev = away_abbrev = None + + for team in competitors: + team_data = team.get('team', {}) + team_name = team_data.get('displayName', team_data.get('name', '')) + team_abbrev = team_data.get('abbreviation', '') + + if team.get('homeAway') == 'home': + home_team = team_name + home_abbrev = team_abbrev + else: + away_team = team_name + away_abbrev = team_abbrev + + if not home_team or not away_team: + continue + + venue = comp.get('venue', {}).get('fullName', '') + + game_id = f"nhl_{date_str}_{away_abbrev}_{home_abbrev}".lower() + + game = Game( + id=game_id, + sport='NHL', + season=get_nhl_season_string(season), + date=date_str, + time=time_str, + home_team=home_team, + away_team=away_team, + home_team_abbrev=home_abbrev or get_nhl_team_abbrev(home_team), + away_team_abbrev=away_abbrev or get_nhl_team_abbrev(away_team), + venue=venue, + source='espn.com' + ) + games.append(game) + + except Exception: + continue + + print(f" Found {len(games)} games from ESPN") + + except Exception as e: + print(f"Error fetching ESPN NHL: {e}") + + return games + + +# ============================================================================= +# STADIUM SCRAPERS +# ============================================================================= + +def scrape_nhl_stadiums() -> list[Stadium]: + """ + Fetch NHL arena data (hardcoded with accurate coordinates). + """ + print("\nNHL STADIUMS") + print("-" * 40) + print(" Loading NHL arenas...") + + nhl_arenas = { + 'TD Garden': {'city': 'Boston', 'state': 'MA', 'lat': 42.3662, 'lng': -71.0621, 'capacity': 17850, 'teams': ['BOS']}, + 'KeyBank Center': {'city': 'Buffalo', 'state': 'NY', 'lat': 42.8750, 'lng': -78.8764, 'capacity': 19070, 'teams': ['BUF']}, + 'Little Caesars Arena': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3411, 'lng': -83.0553, 'capacity': 19515, 'teams': ['DET']}, + 'Amerant Bank Arena': {'city': 'Sunrise', 'state': 'FL', 'lat': 26.1584, 'lng': -80.3256, 'capacity': 19250, 'teams': ['FLA']}, + 'Bell Centre': {'city': 'Montreal', 'state': 'QC', 'lat': 45.4961, 'lng': -73.5693, 'capacity': 21302, 'teams': ['MTL']}, + 'Canadian Tire Centre': {'city': 'Ottawa', 'state': 'ON', 'lat': 45.2969, 'lng': -75.9272, 'capacity': 18652, 'teams': ['OTT']}, + 'Amalie Arena': {'city': 'Tampa', 'state': 'FL', 'lat': 27.9426, 'lng': -82.4519, 'capacity': 19092, 'teams': ['TBL']}, + 'Scotiabank Arena': {'city': 'Toronto', 'state': 'ON', 'lat': 43.6435, 'lng': -79.3791, 'capacity': 18800, 'teams': ['TOR']}, + 'PNC Arena': {'city': 'Raleigh', 'state': 'NC', 'lat': 35.8033, 'lng': -78.7220, 'capacity': 18680, 'teams': ['CAR']}, + 'Nationwide Arena': {'city': 'Columbus', 'state': 'OH', 'lat': 39.9692, 'lng': -83.0061, 'capacity': 18500, 'teams': ['CBJ']}, + 'Prudential Center': {'city': 'Newark', 'state': 'NJ', 'lat': 40.7334, 'lng': -74.1713, 'capacity': 16514, 'teams': ['NJD']}, + 'UBS Arena': {'city': 'Elmont', 'state': 'NY', 'lat': 40.7170, 'lng': -73.7260, 'capacity': 17255, 'teams': ['NYI']}, + 'Madison Square Garden': {'city': 'New York', 'state': 'NY', 'lat': 40.7505, 'lng': -73.9934, 'capacity': 18006, 'teams': ['NYR']}, + 'Wells Fargo Center': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9012, 'lng': -75.1720, 'capacity': 19500, 'teams': ['PHI']}, + 'PPG Paints Arena': {'city': 'Pittsburgh', 'state': 'PA', 'lat': 40.4395, 'lng': -79.9892, 'capacity': 18387, 'teams': ['PIT']}, + 'Capital One Arena': {'city': 'Washington', 'state': 'DC', 'lat': 38.8982, 'lng': -77.0209, 'capacity': 18573, 'teams': ['WSH']}, + 'United Center': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8807, 'lng': -87.6742, 'capacity': 19717, 'teams': ['CHI']}, + 'Ball Arena': {'city': 'Denver', 'state': 'CO', 'lat': 39.7487, 'lng': -105.0077, 'capacity': 18007, 'teams': ['COL']}, + 'American Airlines Center': {'city': 'Dallas', 'state': 'TX', 'lat': 32.7905, 'lng': -96.8103, 'capacity': 18532, 'teams': ['DAL']}, + 'Xcel Energy Center': {'city': 'Saint Paul', 'state': 'MN', 'lat': 44.9448, 'lng': -93.1010, 'capacity': 17954, 'teams': ['MIN']}, + 'Bridgestone Arena': {'city': 'Nashville', 'state': 'TN', 'lat': 36.1592, 'lng': -86.7785, 'capacity': 17159, 'teams': ['NSH']}, + 'Enterprise Center': {'city': 'St. Louis', 'state': 'MO', 'lat': 38.6268, 'lng': -90.2025, 'capacity': 18096, 'teams': ['STL']}, + 'Canada Life Centre': {'city': 'Winnipeg', 'state': 'MB', 'lat': 49.8928, 'lng': -97.1437, 'capacity': 15321, 'teams': ['WPG']}, + 'Honda Center': {'city': 'Anaheim', 'state': 'CA', 'lat': 33.8078, 'lng': -117.8765, 'capacity': 17174, 'teams': ['ANA']}, + 'Delta Center': {'city': 'Salt Lake City', 'state': 'UT', 'lat': 40.7683, 'lng': -111.9011, 'capacity': 16210, 'teams': ['ARI']}, + 'SAP Center': {'city': 'San Jose', 'state': 'CA', 'lat': 37.3327, 'lng': -121.9012, 'capacity': 17562, 'teams': ['SJS']}, + 'Rogers Arena': {'city': 'Vancouver', 'state': 'BC', 'lat': 49.2778, 'lng': -123.1089, 'capacity': 18910, 'teams': ['VAN']}, + 'T-Mobile Arena': {'city': 'Las Vegas', 'state': 'NV', 'lat': 36.1028, 'lng': -115.1784, 'capacity': 17500, 'teams': ['VGK']}, + 'Climate Pledge Arena': {'city': 'Seattle', 'state': 'WA', 'lat': 47.6220, 'lng': -122.3540, 'capacity': 17100, 'teams': ['SEA']}, + 'Crypto.com Arena': {'city': 'Los Angeles', 'state': 'CA', 'lat': 34.0430, 'lng': -118.2673, 'capacity': 18230, 'teams': ['LAK']}, + 'Rogers Place': {'city': 'Edmonton', 'state': 'AB', 'lat': 53.5469, 'lng': -113.4979, 'capacity': 18347, 'teams': ['EDM']}, + 'Scotiabank Saddledome': {'city': 'Calgary', 'state': 'AB', 'lat': 51.0374, 'lng': -114.0519, 'capacity': 19289, 'teams': ['CGY']}, + } + + stadiums = [] + for name, info in nhl_arenas.items(): + stadium = Stadium( + id=f"nhl_{name.lower().replace(' ', '_')[:30]}", + name=name, + city=info['city'], + state=info['state'], + latitude=info['lat'], + longitude=info['lng'], + capacity=info['capacity'], + sport='NHL', + team_abbrevs=info['teams'], + source='nhl_hardcoded' + ) + stadiums.append(stadium) + + print(f" ✓ Found {len(stadiums)} NHL arenas") + return stadiums + + +# ============================================================================= +# SOURCE CONFIGURATIONS +# ============================================================================= + +NHL_GAME_SOURCES = [ + ScraperSource('Hockey-Reference', scrape_nhl_hockey_reference, priority=1, min_games=100), + ScraperSource('ESPN', scrape_nhl_espn, priority=2, min_games=50), + ScraperSource('NHL API', scrape_nhl_api, priority=3, min_games=50), +] + +NHL_STADIUM_SOURCES = [ + StadiumScraperSource('Hardcoded', scrape_nhl_stadiums, priority=1, min_venues=25), +] + + +# ============================================================================= +# CONVENIENCE FUNCTIONS +# ============================================================================= + +def scrape_nhl_games(season: int) -> list[Game]: + """ + Scrape NHL games for a season using multi-source fallback. + + Args: + season: Season ending year (e.g., 2025 for 2024-25 season) + + Returns: + List of Game objects from the first successful source + """ + print(f"\nNHL {get_nhl_season_string(season)} SCHEDULE") + print("-" * 40) + + return scrape_with_fallback('NHL', season, NHL_GAME_SOURCES)