diff --git a/Scripts/nba.py b/Scripts/nba.py new file mode 100644 index 0000000..baefb30 --- /dev/null +++ b/Scripts/nba.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python3 +""" +NBA schedule and stadium scrapers for SportsTime. + +This module provides: +- NBA game scrapers (Basketball-Reference, ESPN, CBS Sports) +- NBA stadium scrapers (hardcoded with coordinates) +- Multi-source fallback configurations +""" + +from datetime import datetime, timedelta +from typing import Optional + +import requests + +# Support both direct execution and import from parent directory +try: + from core import ( + Game, + Stadium, + ScraperSource, + StadiumScraperSource, + fetch_page, + scrape_with_fallback, + scrape_stadiums_with_fallback, + ) +except ImportError: + from Scripts.core import ( + Game, + Stadium, + ScraperSource, + StadiumScraperSource, + fetch_page, + scrape_with_fallback, + scrape_stadiums_with_fallback, + ) + + +__all__ = [ + # Team data + 'NBA_TEAMS', + # Game scrapers + 'scrape_nba_basketball_reference', + 'scrape_nba_espn', + 'scrape_nba_cbssports', + # Stadium scrapers + 'scrape_nba_stadiums', + # Source configurations + 'NBA_GAME_SOURCES', + 'NBA_STADIUM_SOURCES', + # Convenience functions + 'scrape_nba_games', + 'get_nba_season_string', +] + + +# ============================================================================= +# TEAM MAPPINGS +# ============================================================================= + +NBA_TEAMS = { + 'ATL': {'name': 'Atlanta Hawks', 'city': 'Atlanta', 'arena': 'State Farm Arena'}, + 'BOS': {'name': 'Boston Celtics', 'city': 'Boston', 'arena': 'TD Garden'}, + 'BRK': {'name': 'Brooklyn Nets', 'city': 'Brooklyn', 'arena': 'Barclays Center'}, + 'CHO': {'name': 'Charlotte Hornets', 'city': 'Charlotte', 'arena': 'Spectrum Center'}, + 'CHI': {'name': 'Chicago Bulls', 'city': 'Chicago', 'arena': 'United Center'}, + 'CLE': {'name': 'Cleveland Cavaliers', 'city': 'Cleveland', 'arena': 'Rocket Mortgage FieldHouse'}, + 'DAL': {'name': 'Dallas Mavericks', 'city': 'Dallas', 'arena': 'American Airlines Center'}, + 'DEN': {'name': 'Denver Nuggets', 'city': 'Denver', 'arena': 'Ball Arena'}, + 'DET': {'name': 'Detroit Pistons', 'city': 'Detroit', 'arena': 'Little Caesars Arena'}, + 'GSW': {'name': 'Golden State Warriors', 'city': 'San Francisco', 'arena': 'Chase Center'}, + 'HOU': {'name': 'Houston Rockets', 'city': 'Houston', 'arena': 'Toyota Center'}, + 'IND': {'name': 'Indiana Pacers', 'city': 'Indianapolis', 'arena': 'Gainbridge Fieldhouse'}, + 'LAC': {'name': 'Los Angeles Clippers', 'city': 'Inglewood', 'arena': 'Intuit Dome'}, + 'LAL': {'name': 'Los Angeles Lakers', 'city': 'Los Angeles', 'arena': 'Crypto.com Arena'}, + 'MEM': {'name': 'Memphis Grizzlies', 'city': 'Memphis', 'arena': 'FedExForum'}, + 'MIA': {'name': 'Miami Heat', 'city': 'Miami', 'arena': 'Kaseya Center'}, + 'MIL': {'name': 'Milwaukee Bucks', 'city': 'Milwaukee', 'arena': 'Fiserv Forum'}, + 'MIN': {'name': 'Minnesota Timberwolves', 'city': 'Minneapolis', 'arena': 'Target Center'}, + 'NOP': {'name': 'New Orleans Pelicans', 'city': 'New Orleans', 'arena': 'Smoothie King Center'}, + 'NYK': {'name': 'New York Knicks', 'city': 'New York', 'arena': 'Madison Square Garden'}, + 'OKC': {'name': 'Oklahoma City Thunder', 'city': 'Oklahoma City', 'arena': 'Paycom Center'}, + 'ORL': {'name': 'Orlando Magic', 'city': 'Orlando', 'arena': 'Kia Center'}, + 'PHI': {'name': 'Philadelphia 76ers', 'city': 'Philadelphia', 'arena': 'Wells Fargo Center'}, + 'PHO': {'name': 'Phoenix Suns', 'city': 'Phoenix', 'arena': 'Footprint Center'}, + 'POR': {'name': 'Portland Trail Blazers', 'city': 'Portland', 'arena': 'Moda Center'}, + 'SAC': {'name': 'Sacramento Kings', 'city': 'Sacramento', 'arena': 'Golden 1 Center'}, + 'SAS': {'name': 'San Antonio Spurs', 'city': 'San Antonio', 'arena': 'Frost Bank Center'}, + 'TOR': {'name': 'Toronto Raptors', 'city': 'Toronto', 'arena': 'Scotiabank Arena'}, + 'UTA': {'name': 'Utah Jazz', 'city': 'Salt Lake City', 'arena': 'Delta Center'}, + 'WAS': {'name': 'Washington Wizards', 'city': 'Washington', 'arena': 'Capital One Arena'}, +} + + +def get_nba_team_abbrev(team_name: str) -> str: + """Get NBA team abbreviation from full name.""" + for abbrev, info in NBA_TEAMS.items(): + if info['name'].lower() == team_name.lower(): + return abbrev + if team_name.lower() in info['name'].lower(): + return abbrev + + # Return first 3 letters as fallback + return team_name[:3].upper() + + +def get_nba_season_string(season: int) -> str: + """ + Get NBA season string in "2024-25" format. + + Args: + season: The ending year of the season (e.g., 2025 for 2024-25 season) + + Returns: + Season string like "2024-25" + """ + return f"{season-1}-{str(season)[2:]}" + + +# ============================================================================= +# GAME SCRAPERS +# ============================================================================= + +def scrape_nba_basketball_reference(season: int) -> list[Game]: + """ + Scrape NBA schedule from Basketball-Reference. + URL: https://www.basketball-reference.com/leagues/NBA_{YEAR}_games-{month}.html + Season year is the ending year (e.g., 2025 for 2024-25 season) + """ + games = [] + months = ['october', 'november', 'december', 'january', 'february', 'march', 'april', 'may', 'june'] + + print(f"Scraping NBA {season} from Basketball-Reference...") + + for month in months: + url = f"https://www.basketball-reference.com/leagues/NBA_{season}_games-{month}.html" + soup = fetch_page(url, 'basketball-reference.com') + + if not soup: + continue + + table = soup.find('table', {'id': 'schedule'}) + if not table: + continue + + tbody = table.find('tbody') + if not tbody: + continue + + for row in tbody.find_all('tr'): + if row.get('class') and 'thead' in row.get('class'): + continue + + cells = row.find_all(['td', 'th']) + if len(cells) < 6: + continue + + try: + # Parse date + date_cell = row.find('th', {'data-stat': 'date_game'}) + if not date_cell: + continue + date_link = date_cell.find('a') + date_str = date_link.text if date_link else date_cell.text + + # Parse time + time_cell = row.find('td', {'data-stat': 'game_start_time'}) + time_str = time_cell.text.strip() if time_cell else None + + # Parse teams + visitor_cell = row.find('td', {'data-stat': 'visitor_team_name'}) + home_cell = row.find('td', {'data-stat': 'home_team_name'}) + + if not visitor_cell or not home_cell: + continue + + visitor_link = visitor_cell.find('a') + home_link = home_cell.find('a') + + away_team = visitor_link.text if visitor_link else visitor_cell.text + home_team = home_link.text if home_link else home_cell.text + + # Parse arena + arena_cell = row.find('td', {'data-stat': 'arena_name'}) + arena = arena_cell.text.strip() if arena_cell else '' + + # Convert date + try: + parsed_date = datetime.strptime(date_str.strip(), '%a, %b %d, %Y') + date_formatted = parsed_date.strftime('%Y-%m-%d') + except: + continue + + # Generate game ID + away_abbrev = get_nba_team_abbrev(away_team) + home_abbrev = get_nba_team_abbrev(home_team) + game_id = f"nba_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '') + + game = Game( + id=game_id, + sport='NBA', + season=get_nba_season_string(season), + date=date_formatted, + time=time_str, + home_team=home_team, + away_team=away_team, + home_team_abbrev=home_abbrev, + away_team_abbrev=away_abbrev, + venue=arena, + source='basketball-reference.com' + ) + games.append(game) + + except Exception as e: + print(f" Error parsing row: {e}") + continue + + print(f" Found {len(games)} games from Basketball-Reference") + return games + + +def scrape_nba_espn(season: int) -> list[Game]: + """ + Scrape NBA schedule from ESPN. + URL: https://www.espn.com/nba/schedule/_/date/{YYYYMMDD} + """ + games = [] + print(f"Scraping NBA {season} from ESPN...") + + # Determine date range for season + start_date = datetime(season - 1, 10, 1) # October of previous year + end_date = datetime(season, 6, 30) # June of season year + + current_date = start_date + while current_date <= end_date: + date_str = current_date.strftime('%Y%m%d') + url = f"https://www.espn.com/nba/schedule/_/date/{date_str}" + + soup = fetch_page(url, 'espn.com') + if soup: + # ESPN uses JavaScript rendering, so we need to parse what's available + # This is a simplified version - full implementation would need Selenium + pass + + current_date += timedelta(days=7) # Sample weekly to respect rate limits + + print(f" Found {len(games)} games from ESPN") + return games + + +def scrape_nba_cbssports(season: int) -> list[Game]: + """ + Fetch NBA schedule from CBS Sports. + CBS Sports provides a JSON API for schedule data. + """ + games = [] + print(f"Fetching NBA {season} from CBS Sports...") + + # CBS Sports has a schedule endpoint + url = "https://www.cbssports.com/nba/schedule/" + + soup = fetch_page(url, 'cbssports.com') + if not soup: + return games + + # Find all game rows + tables = soup.find_all('table', class_='TableBase-table') + + for table in tables: + rows = table.find_all('tr') + for row in rows: + try: + cells = row.find_all('td') + if len(cells) < 2: + continue + + # Parse teams from row + team_cells = row.find_all('a', class_='TeamName') + if len(team_cells) < 2: + continue + + away_team = team_cells[0].get_text(strip=True) + home_team = team_cells[1].get_text(strip=True) + + # Get date from table section + date_formatted = datetime.now().strftime('%Y-%m-%d') # Placeholder + + away_abbrev = get_nba_team_abbrev(away_team) + home_abbrev = get_nba_team_abbrev(home_team) + game_id = f"nba_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '') + + game = Game( + id=game_id, + sport='NBA', + season=get_nba_season_string(season), + date=date_formatted, + time=None, + home_team=home_team, + away_team=away_team, + home_team_abbrev=home_abbrev, + away_team_abbrev=away_abbrev, + venue='', + source='cbssports.com' + ) + games.append(game) + + except Exception: + continue + + print(f" Found {len(games)} games from CBS Sports") + return games + + +# ============================================================================= +# STADIUM SCRAPERS +# ============================================================================= + +def scrape_nba_stadiums() -> list[Stadium]: + """ + Fetch NBA arena data (hardcoded with accurate coordinates). + """ + print("\nNBA STADIUMS") + print("-" * 40) + print(" Loading NBA arenas...") + + nba_arenas = { + 'State Farm Arena': {'city': 'Atlanta', 'state': 'GA', 'lat': 33.7573, 'lng': -84.3963, 'capacity': 18118, 'teams': ['ATL']}, + 'TD Garden': {'city': 'Boston', 'state': 'MA', 'lat': 42.3662, 'lng': -71.0621, 'capacity': 19156, 'teams': ['BOS']}, + 'Barclays Center': {'city': 'Brooklyn', 'state': 'NY', 'lat': 40.6826, 'lng': -73.9754, 'capacity': 17732, 'teams': ['BRK']}, + 'Spectrum Center': {'city': 'Charlotte', 'state': 'NC', 'lat': 35.2251, 'lng': -80.8392, 'capacity': 19077, 'teams': ['CHO']}, + 'United Center': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8807, 'lng': -87.6742, 'capacity': 20917, 'teams': ['CHI']}, + 'Rocket Mortgage FieldHouse': {'city': 'Cleveland', 'state': 'OH', 'lat': 41.4965, 'lng': -81.6882, 'capacity': 19432, 'teams': ['CLE']}, + 'American Airlines Center': {'city': 'Dallas', 'state': 'TX', 'lat': 32.7905, 'lng': -96.8103, 'capacity': 19200, 'teams': ['DAL']}, + 'Ball Arena': {'city': 'Denver', 'state': 'CO', 'lat': 39.7487, 'lng': -105.0077, 'capacity': 19520, 'teams': ['DEN']}, + 'Little Caesars Arena': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3411, 'lng': -83.0553, 'capacity': 20332, 'teams': ['DET']}, + 'Chase Center': {'city': 'San Francisco', 'state': 'CA', 'lat': 37.7680, 'lng': -122.3879, 'capacity': 18064, 'teams': ['GSW']}, + 'Toyota Center': {'city': 'Houston', 'state': 'TX', 'lat': 29.7508, 'lng': -95.3621, 'capacity': 18055, 'teams': ['HOU']}, + 'Gainbridge Fieldhouse': {'city': 'Indianapolis', 'state': 'IN', 'lat': 39.7640, 'lng': -86.1555, 'capacity': 17923, 'teams': ['IND']}, + 'Intuit Dome': {'city': 'Inglewood', 'state': 'CA', 'lat': 33.9425, 'lng': -118.3419, 'capacity': 18000, 'teams': ['LAC']}, + 'Crypto.com Arena': {'city': 'Los Angeles', 'state': 'CA', 'lat': 34.0430, 'lng': -118.2673, 'capacity': 18997, 'teams': ['LAL']}, + 'FedExForum': {'city': 'Memphis', 'state': 'TN', 'lat': 35.1382, 'lng': -90.0506, 'capacity': 17794, 'teams': ['MEM']}, + 'Kaseya Center': {'city': 'Miami', 'state': 'FL', 'lat': 25.7814, 'lng': -80.1870, 'capacity': 19600, 'teams': ['MIA']}, + 'Fiserv Forum': {'city': 'Milwaukee', 'state': 'WI', 'lat': 43.0451, 'lng': -87.9174, 'capacity': 17341, 'teams': ['MIL']}, + 'Target Center': {'city': 'Minneapolis', 'state': 'MN', 'lat': 44.9795, 'lng': -93.2761, 'capacity': 18978, 'teams': ['MIN']}, + 'Smoothie King Center': {'city': 'New Orleans', 'state': 'LA', 'lat': 29.9490, 'lng': -90.0821, 'capacity': 16867, 'teams': ['NOP']}, + 'Madison Square Garden': {'city': 'New York', 'state': 'NY', 'lat': 40.7505, 'lng': -73.9934, 'capacity': 19812, 'teams': ['NYK']}, + 'Paycom Center': {'city': 'Oklahoma City', 'state': 'OK', 'lat': 35.4634, 'lng': -97.5151, 'capacity': 18203, 'teams': ['OKC']}, + 'Kia Center': {'city': 'Orlando', 'state': 'FL', 'lat': 28.5392, 'lng': -81.3839, 'capacity': 18846, 'teams': ['ORL']}, + 'Wells Fargo Center': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9012, 'lng': -75.1720, 'capacity': 20478, 'teams': ['PHI']}, + 'Footprint Center': {'city': 'Phoenix', 'state': 'AZ', 'lat': 33.4457, 'lng': -112.0712, 'capacity': 17071, 'teams': ['PHO']}, + 'Moda Center': {'city': 'Portland', 'state': 'OR', 'lat': 45.5316, 'lng': -122.6668, 'capacity': 19393, 'teams': ['POR']}, + 'Golden 1 Center': {'city': 'Sacramento', 'state': 'CA', 'lat': 38.5802, 'lng': -121.4997, 'capacity': 17608, 'teams': ['SAC']}, + 'Frost Bank Center': {'city': 'San Antonio', 'state': 'TX', 'lat': 29.4270, 'lng': -98.4375, 'capacity': 18418, 'teams': ['SAS']}, + 'Scotiabank Arena': {'city': 'Toronto', 'state': 'ON', 'lat': 43.6435, 'lng': -79.3791, 'capacity': 19800, 'teams': ['TOR']}, + 'Delta Center': {'city': 'Salt Lake City', 'state': 'UT', 'lat': 40.7683, 'lng': -111.9011, 'capacity': 18306, 'teams': ['UTA']}, + 'Capital One Arena': {'city': 'Washington', 'state': 'DC', 'lat': 38.8982, 'lng': -77.0209, 'capacity': 20356, 'teams': ['WAS']}, + } + + stadiums = [] + for name, info in nba_arenas.items(): + stadium = Stadium( + id=f"nba_{name.lower().replace(' ', '_')[:30]}", + name=name, + city=info['city'], + state=info['state'], + latitude=info['lat'], + longitude=info['lng'], + capacity=info['capacity'], + sport='NBA', + team_abbrevs=info['teams'], + source='nba_hardcoded' + ) + stadiums.append(stadium) + + print(f" ✓ Found {len(stadiums)} NBA arenas") + return stadiums + + +# ============================================================================= +# SOURCE CONFIGURATIONS +# ============================================================================= + +NBA_GAME_SOURCES = [ + ScraperSource('Basketball-Reference', scrape_nba_basketball_reference, priority=1, min_games=100), + ScraperSource('CBS Sports', scrape_nba_cbssports, priority=2, min_games=50), + ScraperSource('ESPN', scrape_nba_espn, priority=3, min_games=50), +] + +NBA_STADIUM_SOURCES = [ + StadiumScraperSource('Hardcoded', scrape_nba_stadiums, priority=1, min_venues=25), +] + + +# ============================================================================= +# CONVENIENCE FUNCTIONS +# ============================================================================= + +def scrape_nba_games(season: int) -> list[Game]: + """ + Scrape NBA games for a season using multi-source fallback. + + Args: + season: Season ending year (e.g., 2025 for 2024-25 season) + + Returns: + List of Game objects from the first successful source + """ + print(f"\nNBA {get_nba_season_string(season)} SCHEDULE") + print("-" * 40) + + return scrape_with_fallback('NBA', season, NBA_GAME_SOURCES)