Files
Sportstime/Scripts/nba.py
Trey t 70acfb7bc6 feat(01-02): create nba.py sport module
NBA team mappings, Basketball-Reference/ESPN/CBS scrapers, stadium data with coordinates.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 00:07:37 -06:00

412 lines
17 KiB
Python

#!/usr/bin/env python3
"""
NBA schedule and stadium scrapers for SportsTime.
This module provides:
- NBA game scrapers (Basketball-Reference, ESPN, CBS Sports)
- NBA stadium scrapers (hardcoded with coordinates)
- Multi-source fallback configurations
"""
from datetime import datetime, timedelta
from typing import Optional
import requests
# Support both direct execution and import from parent directory
try:
from core import (
Game,
Stadium,
ScraperSource,
StadiumScraperSource,
fetch_page,
scrape_with_fallback,
scrape_stadiums_with_fallback,
)
except ImportError:
from Scripts.core import (
Game,
Stadium,
ScraperSource,
StadiumScraperSource,
fetch_page,
scrape_with_fallback,
scrape_stadiums_with_fallback,
)
__all__ = [
# Team data
'NBA_TEAMS',
# Game scrapers
'scrape_nba_basketball_reference',
'scrape_nba_espn',
'scrape_nba_cbssports',
# Stadium scrapers
'scrape_nba_stadiums',
# Source configurations
'NBA_GAME_SOURCES',
'NBA_STADIUM_SOURCES',
# Convenience functions
'scrape_nba_games',
'get_nba_season_string',
]
# =============================================================================
# TEAM MAPPINGS
# =============================================================================
NBA_TEAMS = {
'ATL': {'name': 'Atlanta Hawks', 'city': 'Atlanta', 'arena': 'State Farm Arena'},
'BOS': {'name': 'Boston Celtics', 'city': 'Boston', 'arena': 'TD Garden'},
'BRK': {'name': 'Brooklyn Nets', 'city': 'Brooklyn', 'arena': 'Barclays Center'},
'CHO': {'name': 'Charlotte Hornets', 'city': 'Charlotte', 'arena': 'Spectrum Center'},
'CHI': {'name': 'Chicago Bulls', 'city': 'Chicago', 'arena': 'United Center'},
'CLE': {'name': 'Cleveland Cavaliers', 'city': 'Cleveland', 'arena': 'Rocket Mortgage FieldHouse'},
'DAL': {'name': 'Dallas Mavericks', 'city': 'Dallas', 'arena': 'American Airlines Center'},
'DEN': {'name': 'Denver Nuggets', 'city': 'Denver', 'arena': 'Ball Arena'},
'DET': {'name': 'Detroit Pistons', 'city': 'Detroit', 'arena': 'Little Caesars Arena'},
'GSW': {'name': 'Golden State Warriors', 'city': 'San Francisco', 'arena': 'Chase Center'},
'HOU': {'name': 'Houston Rockets', 'city': 'Houston', 'arena': 'Toyota Center'},
'IND': {'name': 'Indiana Pacers', 'city': 'Indianapolis', 'arena': 'Gainbridge Fieldhouse'},
'LAC': {'name': 'Los Angeles Clippers', 'city': 'Inglewood', 'arena': 'Intuit Dome'},
'LAL': {'name': 'Los Angeles Lakers', 'city': 'Los Angeles', 'arena': 'Crypto.com Arena'},
'MEM': {'name': 'Memphis Grizzlies', 'city': 'Memphis', 'arena': 'FedExForum'},
'MIA': {'name': 'Miami Heat', 'city': 'Miami', 'arena': 'Kaseya Center'},
'MIL': {'name': 'Milwaukee Bucks', 'city': 'Milwaukee', 'arena': 'Fiserv Forum'},
'MIN': {'name': 'Minnesota Timberwolves', 'city': 'Minneapolis', 'arena': 'Target Center'},
'NOP': {'name': 'New Orleans Pelicans', 'city': 'New Orleans', 'arena': 'Smoothie King Center'},
'NYK': {'name': 'New York Knicks', 'city': 'New York', 'arena': 'Madison Square Garden'},
'OKC': {'name': 'Oklahoma City Thunder', 'city': 'Oklahoma City', 'arena': 'Paycom Center'},
'ORL': {'name': 'Orlando Magic', 'city': 'Orlando', 'arena': 'Kia Center'},
'PHI': {'name': 'Philadelphia 76ers', 'city': 'Philadelphia', 'arena': 'Wells Fargo Center'},
'PHO': {'name': 'Phoenix Suns', 'city': 'Phoenix', 'arena': 'Footprint Center'},
'POR': {'name': 'Portland Trail Blazers', 'city': 'Portland', 'arena': 'Moda Center'},
'SAC': {'name': 'Sacramento Kings', 'city': 'Sacramento', 'arena': 'Golden 1 Center'},
'SAS': {'name': 'San Antonio Spurs', 'city': 'San Antonio', 'arena': 'Frost Bank Center'},
'TOR': {'name': 'Toronto Raptors', 'city': 'Toronto', 'arena': 'Scotiabank Arena'},
'UTA': {'name': 'Utah Jazz', 'city': 'Salt Lake City', 'arena': 'Delta Center'},
'WAS': {'name': 'Washington Wizards', 'city': 'Washington', 'arena': 'Capital One Arena'},
}
def get_nba_team_abbrev(team_name: str) -> str:
"""Get NBA team abbreviation from full name."""
for abbrev, info in NBA_TEAMS.items():
if info['name'].lower() == team_name.lower():
return abbrev
if team_name.lower() in info['name'].lower():
return abbrev
# Return first 3 letters as fallback
return team_name[:3].upper()
def get_nba_season_string(season: int) -> str:
"""
Get NBA season string in "2024-25" format.
Args:
season: The ending year of the season (e.g., 2025 for 2024-25 season)
Returns:
Season string like "2024-25"
"""
return f"{season-1}-{str(season)[2:]}"
# =============================================================================
# GAME SCRAPERS
# =============================================================================
def scrape_nba_basketball_reference(season: int) -> list[Game]:
"""
Scrape NBA schedule from Basketball-Reference.
URL: https://www.basketball-reference.com/leagues/NBA_{YEAR}_games-{month}.html
Season year is the ending year (e.g., 2025 for 2024-25 season)
"""
games = []
months = ['october', 'november', 'december', 'january', 'february', 'march', 'april', 'may', 'june']
print(f"Scraping NBA {season} from Basketball-Reference...")
for month in months:
url = f"https://www.basketball-reference.com/leagues/NBA_{season}_games-{month}.html"
soup = fetch_page(url, 'basketball-reference.com')
if not soup:
continue
table = soup.find('table', {'id': 'schedule'})
if not table:
continue
tbody = table.find('tbody')
if not tbody:
continue
for row in tbody.find_all('tr'):
if row.get('class') and 'thead' in row.get('class'):
continue
cells = row.find_all(['td', 'th'])
if len(cells) < 6:
continue
try:
# Parse date
date_cell = row.find('th', {'data-stat': 'date_game'})
if not date_cell:
continue
date_link = date_cell.find('a')
date_str = date_link.text if date_link else date_cell.text
# Parse time
time_cell = row.find('td', {'data-stat': 'game_start_time'})
time_str = time_cell.text.strip() if time_cell else None
# Parse teams
visitor_cell = row.find('td', {'data-stat': 'visitor_team_name'})
home_cell = row.find('td', {'data-stat': 'home_team_name'})
if not visitor_cell or not home_cell:
continue
visitor_link = visitor_cell.find('a')
home_link = home_cell.find('a')
away_team = visitor_link.text if visitor_link else visitor_cell.text
home_team = home_link.text if home_link else home_cell.text
# Parse arena
arena_cell = row.find('td', {'data-stat': 'arena_name'})
arena = arena_cell.text.strip() if arena_cell else ''
# Convert date
try:
parsed_date = datetime.strptime(date_str.strip(), '%a, %b %d, %Y')
date_formatted = parsed_date.strftime('%Y-%m-%d')
except:
continue
# Generate game ID
away_abbrev = get_nba_team_abbrev(away_team)
home_abbrev = get_nba_team_abbrev(home_team)
game_id = f"nba_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
game = Game(
id=game_id,
sport='NBA',
season=get_nba_season_string(season),
date=date_formatted,
time=time_str,
home_team=home_team,
away_team=away_team,
home_team_abbrev=home_abbrev,
away_team_abbrev=away_abbrev,
venue=arena,
source='basketball-reference.com'
)
games.append(game)
except Exception as e:
print(f" Error parsing row: {e}")
continue
print(f" Found {len(games)} games from Basketball-Reference")
return games
def scrape_nba_espn(season: int) -> list[Game]:
"""
Scrape NBA schedule from ESPN.
URL: https://www.espn.com/nba/schedule/_/date/{YYYYMMDD}
"""
games = []
print(f"Scraping NBA {season} from ESPN...")
# Determine date range for season
start_date = datetime(season - 1, 10, 1) # October of previous year
end_date = datetime(season, 6, 30) # June of season year
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime('%Y%m%d')
url = f"https://www.espn.com/nba/schedule/_/date/{date_str}"
soup = fetch_page(url, 'espn.com')
if soup:
# ESPN uses JavaScript rendering, so we need to parse what's available
# This is a simplified version - full implementation would need Selenium
pass
current_date += timedelta(days=7) # Sample weekly to respect rate limits
print(f" Found {len(games)} games from ESPN")
return games
def scrape_nba_cbssports(season: int) -> list[Game]:
"""
Fetch NBA schedule from CBS Sports.
CBS Sports provides a JSON API for schedule data.
"""
games = []
print(f"Fetching NBA {season} from CBS Sports...")
# CBS Sports has a schedule endpoint
url = "https://www.cbssports.com/nba/schedule/"
soup = fetch_page(url, 'cbssports.com')
if not soup:
return games
# Find all game rows
tables = soup.find_all('table', class_='TableBase-table')
for table in tables:
rows = table.find_all('tr')
for row in rows:
try:
cells = row.find_all('td')
if len(cells) < 2:
continue
# Parse teams from row
team_cells = row.find_all('a', class_='TeamName')
if len(team_cells) < 2:
continue
away_team = team_cells[0].get_text(strip=True)
home_team = team_cells[1].get_text(strip=True)
# Get date from table section
date_formatted = datetime.now().strftime('%Y-%m-%d') # Placeholder
away_abbrev = get_nba_team_abbrev(away_team)
home_abbrev = get_nba_team_abbrev(home_team)
game_id = f"nba_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
game = Game(
id=game_id,
sport='NBA',
season=get_nba_season_string(season),
date=date_formatted,
time=None,
home_team=home_team,
away_team=away_team,
home_team_abbrev=home_abbrev,
away_team_abbrev=away_abbrev,
venue='',
source='cbssports.com'
)
games.append(game)
except Exception:
continue
print(f" Found {len(games)} games from CBS Sports")
return games
# =============================================================================
# STADIUM SCRAPERS
# =============================================================================
def scrape_nba_stadiums() -> list[Stadium]:
"""
Fetch NBA arena data (hardcoded with accurate coordinates).
"""
print("\nNBA STADIUMS")
print("-" * 40)
print(" Loading NBA arenas...")
nba_arenas = {
'State Farm Arena': {'city': 'Atlanta', 'state': 'GA', 'lat': 33.7573, 'lng': -84.3963, 'capacity': 18118, 'teams': ['ATL']},
'TD Garden': {'city': 'Boston', 'state': 'MA', 'lat': 42.3662, 'lng': -71.0621, 'capacity': 19156, 'teams': ['BOS']},
'Barclays Center': {'city': 'Brooklyn', 'state': 'NY', 'lat': 40.6826, 'lng': -73.9754, 'capacity': 17732, 'teams': ['BRK']},
'Spectrum Center': {'city': 'Charlotte', 'state': 'NC', 'lat': 35.2251, 'lng': -80.8392, 'capacity': 19077, 'teams': ['CHO']},
'United Center': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8807, 'lng': -87.6742, 'capacity': 20917, 'teams': ['CHI']},
'Rocket Mortgage FieldHouse': {'city': 'Cleveland', 'state': 'OH', 'lat': 41.4965, 'lng': -81.6882, 'capacity': 19432, 'teams': ['CLE']},
'American Airlines Center': {'city': 'Dallas', 'state': 'TX', 'lat': 32.7905, 'lng': -96.8103, 'capacity': 19200, 'teams': ['DAL']},
'Ball Arena': {'city': 'Denver', 'state': 'CO', 'lat': 39.7487, 'lng': -105.0077, 'capacity': 19520, 'teams': ['DEN']},
'Little Caesars Arena': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3411, 'lng': -83.0553, 'capacity': 20332, 'teams': ['DET']},
'Chase Center': {'city': 'San Francisco', 'state': 'CA', 'lat': 37.7680, 'lng': -122.3879, 'capacity': 18064, 'teams': ['GSW']},
'Toyota Center': {'city': 'Houston', 'state': 'TX', 'lat': 29.7508, 'lng': -95.3621, 'capacity': 18055, 'teams': ['HOU']},
'Gainbridge Fieldhouse': {'city': 'Indianapolis', 'state': 'IN', 'lat': 39.7640, 'lng': -86.1555, 'capacity': 17923, 'teams': ['IND']},
'Intuit Dome': {'city': 'Inglewood', 'state': 'CA', 'lat': 33.9425, 'lng': -118.3419, 'capacity': 18000, 'teams': ['LAC']},
'Crypto.com Arena': {'city': 'Los Angeles', 'state': 'CA', 'lat': 34.0430, 'lng': -118.2673, 'capacity': 18997, 'teams': ['LAL']},
'FedExForum': {'city': 'Memphis', 'state': 'TN', 'lat': 35.1382, 'lng': -90.0506, 'capacity': 17794, 'teams': ['MEM']},
'Kaseya Center': {'city': 'Miami', 'state': 'FL', 'lat': 25.7814, 'lng': -80.1870, 'capacity': 19600, 'teams': ['MIA']},
'Fiserv Forum': {'city': 'Milwaukee', 'state': 'WI', 'lat': 43.0451, 'lng': -87.9174, 'capacity': 17341, 'teams': ['MIL']},
'Target Center': {'city': 'Minneapolis', 'state': 'MN', 'lat': 44.9795, 'lng': -93.2761, 'capacity': 18978, 'teams': ['MIN']},
'Smoothie King Center': {'city': 'New Orleans', 'state': 'LA', 'lat': 29.9490, 'lng': -90.0821, 'capacity': 16867, 'teams': ['NOP']},
'Madison Square Garden': {'city': 'New York', 'state': 'NY', 'lat': 40.7505, 'lng': -73.9934, 'capacity': 19812, 'teams': ['NYK']},
'Paycom Center': {'city': 'Oklahoma City', 'state': 'OK', 'lat': 35.4634, 'lng': -97.5151, 'capacity': 18203, 'teams': ['OKC']},
'Kia Center': {'city': 'Orlando', 'state': 'FL', 'lat': 28.5392, 'lng': -81.3839, 'capacity': 18846, 'teams': ['ORL']},
'Wells Fargo Center': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9012, 'lng': -75.1720, 'capacity': 20478, 'teams': ['PHI']},
'Footprint Center': {'city': 'Phoenix', 'state': 'AZ', 'lat': 33.4457, 'lng': -112.0712, 'capacity': 17071, 'teams': ['PHO']},
'Moda Center': {'city': 'Portland', 'state': 'OR', 'lat': 45.5316, 'lng': -122.6668, 'capacity': 19393, 'teams': ['POR']},
'Golden 1 Center': {'city': 'Sacramento', 'state': 'CA', 'lat': 38.5802, 'lng': -121.4997, 'capacity': 17608, 'teams': ['SAC']},
'Frost Bank Center': {'city': 'San Antonio', 'state': 'TX', 'lat': 29.4270, 'lng': -98.4375, 'capacity': 18418, 'teams': ['SAS']},
'Scotiabank Arena': {'city': 'Toronto', 'state': 'ON', 'lat': 43.6435, 'lng': -79.3791, 'capacity': 19800, 'teams': ['TOR']},
'Delta Center': {'city': 'Salt Lake City', 'state': 'UT', 'lat': 40.7683, 'lng': -111.9011, 'capacity': 18306, 'teams': ['UTA']},
'Capital One Arena': {'city': 'Washington', 'state': 'DC', 'lat': 38.8982, 'lng': -77.0209, 'capacity': 20356, 'teams': ['WAS']},
}
stadiums = []
for name, info in nba_arenas.items():
stadium = Stadium(
id=f"nba_{name.lower().replace(' ', '_')[:30]}",
name=name,
city=info['city'],
state=info['state'],
latitude=info['lat'],
longitude=info['lng'],
capacity=info['capacity'],
sport='NBA',
team_abbrevs=info['teams'],
source='nba_hardcoded'
)
stadiums.append(stadium)
print(f" ✓ Found {len(stadiums)} NBA arenas")
return stadiums
# =============================================================================
# SOURCE CONFIGURATIONS
# =============================================================================
NBA_GAME_SOURCES = [
ScraperSource('Basketball-Reference', scrape_nba_basketball_reference, priority=1, min_games=100),
ScraperSource('CBS Sports', scrape_nba_cbssports, priority=2, min_games=50),
ScraperSource('ESPN', scrape_nba_espn, priority=3, min_games=50),
]
NBA_STADIUM_SOURCES = [
StadiumScraperSource('Hardcoded', scrape_nba_stadiums, priority=1, min_venues=25),
]
# =============================================================================
# CONVENIENCE FUNCTIONS
# =============================================================================
def scrape_nba_games(season: int) -> list[Game]:
"""
Scrape NBA games for a season using multi-source fallback.
Args:
season: Season ending year (e.g., 2025 for 2024-25 season)
Returns:
List of Game objects from the first successful source
"""
print(f"\nNBA {get_nba_season_string(season)} SCHEDULE")
print("-" * 40)
return scrape_with_fallback('NBA', season, NBA_GAME_SOURCES)