feat(01-02): create nba.py sport module
NBA team mappings, Basketball-Reference/ESPN/CBS scrapers, stadium data with coordinates. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
411
Scripts/nba.py
Normal file
411
Scripts/nba.py
Normal file
@@ -0,0 +1,411 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
NBA schedule and stadium scrapers for SportsTime.
|
||||
|
||||
This module provides:
|
||||
- NBA game scrapers (Basketball-Reference, ESPN, CBS Sports)
|
||||
- NBA stadium scrapers (hardcoded with coordinates)
|
||||
- Multi-source fallback configurations
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
# Support both direct execution and import from parent directory
|
||||
try:
|
||||
from core import (
|
||||
Game,
|
||||
Stadium,
|
||||
ScraperSource,
|
||||
StadiumScraperSource,
|
||||
fetch_page,
|
||||
scrape_with_fallback,
|
||||
scrape_stadiums_with_fallback,
|
||||
)
|
||||
except ImportError:
|
||||
from Scripts.core import (
|
||||
Game,
|
||||
Stadium,
|
||||
ScraperSource,
|
||||
StadiumScraperSource,
|
||||
fetch_page,
|
||||
scrape_with_fallback,
|
||||
scrape_stadiums_with_fallback,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Team data
|
||||
'NBA_TEAMS',
|
||||
# Game scrapers
|
||||
'scrape_nba_basketball_reference',
|
||||
'scrape_nba_espn',
|
||||
'scrape_nba_cbssports',
|
||||
# Stadium scrapers
|
||||
'scrape_nba_stadiums',
|
||||
# Source configurations
|
||||
'NBA_GAME_SOURCES',
|
||||
'NBA_STADIUM_SOURCES',
|
||||
# Convenience functions
|
||||
'scrape_nba_games',
|
||||
'get_nba_season_string',
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TEAM MAPPINGS
|
||||
# =============================================================================
|
||||
|
||||
NBA_TEAMS = {
|
||||
'ATL': {'name': 'Atlanta Hawks', 'city': 'Atlanta', 'arena': 'State Farm Arena'},
|
||||
'BOS': {'name': 'Boston Celtics', 'city': 'Boston', 'arena': 'TD Garden'},
|
||||
'BRK': {'name': 'Brooklyn Nets', 'city': 'Brooklyn', 'arena': 'Barclays Center'},
|
||||
'CHO': {'name': 'Charlotte Hornets', 'city': 'Charlotte', 'arena': 'Spectrum Center'},
|
||||
'CHI': {'name': 'Chicago Bulls', 'city': 'Chicago', 'arena': 'United Center'},
|
||||
'CLE': {'name': 'Cleveland Cavaliers', 'city': 'Cleveland', 'arena': 'Rocket Mortgage FieldHouse'},
|
||||
'DAL': {'name': 'Dallas Mavericks', 'city': 'Dallas', 'arena': 'American Airlines Center'},
|
||||
'DEN': {'name': 'Denver Nuggets', 'city': 'Denver', 'arena': 'Ball Arena'},
|
||||
'DET': {'name': 'Detroit Pistons', 'city': 'Detroit', 'arena': 'Little Caesars Arena'},
|
||||
'GSW': {'name': 'Golden State Warriors', 'city': 'San Francisco', 'arena': 'Chase Center'},
|
||||
'HOU': {'name': 'Houston Rockets', 'city': 'Houston', 'arena': 'Toyota Center'},
|
||||
'IND': {'name': 'Indiana Pacers', 'city': 'Indianapolis', 'arena': 'Gainbridge Fieldhouse'},
|
||||
'LAC': {'name': 'Los Angeles Clippers', 'city': 'Inglewood', 'arena': 'Intuit Dome'},
|
||||
'LAL': {'name': 'Los Angeles Lakers', 'city': 'Los Angeles', 'arena': 'Crypto.com Arena'},
|
||||
'MEM': {'name': 'Memphis Grizzlies', 'city': 'Memphis', 'arena': 'FedExForum'},
|
||||
'MIA': {'name': 'Miami Heat', 'city': 'Miami', 'arena': 'Kaseya Center'},
|
||||
'MIL': {'name': 'Milwaukee Bucks', 'city': 'Milwaukee', 'arena': 'Fiserv Forum'},
|
||||
'MIN': {'name': 'Minnesota Timberwolves', 'city': 'Minneapolis', 'arena': 'Target Center'},
|
||||
'NOP': {'name': 'New Orleans Pelicans', 'city': 'New Orleans', 'arena': 'Smoothie King Center'},
|
||||
'NYK': {'name': 'New York Knicks', 'city': 'New York', 'arena': 'Madison Square Garden'},
|
||||
'OKC': {'name': 'Oklahoma City Thunder', 'city': 'Oklahoma City', 'arena': 'Paycom Center'},
|
||||
'ORL': {'name': 'Orlando Magic', 'city': 'Orlando', 'arena': 'Kia Center'},
|
||||
'PHI': {'name': 'Philadelphia 76ers', 'city': 'Philadelphia', 'arena': 'Wells Fargo Center'},
|
||||
'PHO': {'name': 'Phoenix Suns', 'city': 'Phoenix', 'arena': 'Footprint Center'},
|
||||
'POR': {'name': 'Portland Trail Blazers', 'city': 'Portland', 'arena': 'Moda Center'},
|
||||
'SAC': {'name': 'Sacramento Kings', 'city': 'Sacramento', 'arena': 'Golden 1 Center'},
|
||||
'SAS': {'name': 'San Antonio Spurs', 'city': 'San Antonio', 'arena': 'Frost Bank Center'},
|
||||
'TOR': {'name': 'Toronto Raptors', 'city': 'Toronto', 'arena': 'Scotiabank Arena'},
|
||||
'UTA': {'name': 'Utah Jazz', 'city': 'Salt Lake City', 'arena': 'Delta Center'},
|
||||
'WAS': {'name': 'Washington Wizards', 'city': 'Washington', 'arena': 'Capital One Arena'},
|
||||
}
|
||||
|
||||
|
||||
def get_nba_team_abbrev(team_name: str) -> str:
|
||||
"""Get NBA team abbreviation from full name."""
|
||||
for abbrev, info in NBA_TEAMS.items():
|
||||
if info['name'].lower() == team_name.lower():
|
||||
return abbrev
|
||||
if team_name.lower() in info['name'].lower():
|
||||
return abbrev
|
||||
|
||||
# Return first 3 letters as fallback
|
||||
return team_name[:3].upper()
|
||||
|
||||
|
||||
def get_nba_season_string(season: int) -> str:
|
||||
"""
|
||||
Get NBA season string in "2024-25" format.
|
||||
|
||||
Args:
|
||||
season: The ending year of the season (e.g., 2025 for 2024-25 season)
|
||||
|
||||
Returns:
|
||||
Season string like "2024-25"
|
||||
"""
|
||||
return f"{season-1}-{str(season)[2:]}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GAME SCRAPERS
|
||||
# =============================================================================
|
||||
|
||||
def scrape_nba_basketball_reference(season: int) -> list[Game]:
|
||||
"""
|
||||
Scrape NBA schedule from Basketball-Reference.
|
||||
URL: https://www.basketball-reference.com/leagues/NBA_{YEAR}_games-{month}.html
|
||||
Season year is the ending year (e.g., 2025 for 2024-25 season)
|
||||
"""
|
||||
games = []
|
||||
months = ['october', 'november', 'december', 'january', 'february', 'march', 'april', 'may', 'june']
|
||||
|
||||
print(f"Scraping NBA {season} from Basketball-Reference...")
|
||||
|
||||
for month in months:
|
||||
url = f"https://www.basketball-reference.com/leagues/NBA_{season}_games-{month}.html"
|
||||
soup = fetch_page(url, 'basketball-reference.com')
|
||||
|
||||
if not soup:
|
||||
continue
|
||||
|
||||
table = soup.find('table', {'id': 'schedule'})
|
||||
if not table:
|
||||
continue
|
||||
|
||||
tbody = table.find('tbody')
|
||||
if not tbody:
|
||||
continue
|
||||
|
||||
for row in tbody.find_all('tr'):
|
||||
if row.get('class') and 'thead' in row.get('class'):
|
||||
continue
|
||||
|
||||
cells = row.find_all(['td', 'th'])
|
||||
if len(cells) < 6:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse date
|
||||
date_cell = row.find('th', {'data-stat': 'date_game'})
|
||||
if not date_cell:
|
||||
continue
|
||||
date_link = date_cell.find('a')
|
||||
date_str = date_link.text if date_link else date_cell.text
|
||||
|
||||
# Parse time
|
||||
time_cell = row.find('td', {'data-stat': 'game_start_time'})
|
||||
time_str = time_cell.text.strip() if time_cell else None
|
||||
|
||||
# Parse teams
|
||||
visitor_cell = row.find('td', {'data-stat': 'visitor_team_name'})
|
||||
home_cell = row.find('td', {'data-stat': 'home_team_name'})
|
||||
|
||||
if not visitor_cell or not home_cell:
|
||||
continue
|
||||
|
||||
visitor_link = visitor_cell.find('a')
|
||||
home_link = home_cell.find('a')
|
||||
|
||||
away_team = visitor_link.text if visitor_link else visitor_cell.text
|
||||
home_team = home_link.text if home_link else home_cell.text
|
||||
|
||||
# Parse arena
|
||||
arena_cell = row.find('td', {'data-stat': 'arena_name'})
|
||||
arena = arena_cell.text.strip() if arena_cell else ''
|
||||
|
||||
# Convert date
|
||||
try:
|
||||
parsed_date = datetime.strptime(date_str.strip(), '%a, %b %d, %Y')
|
||||
date_formatted = parsed_date.strftime('%Y-%m-%d')
|
||||
except:
|
||||
continue
|
||||
|
||||
# Generate game ID
|
||||
away_abbrev = get_nba_team_abbrev(away_team)
|
||||
home_abbrev = get_nba_team_abbrev(home_team)
|
||||
game_id = f"nba_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
|
||||
|
||||
game = Game(
|
||||
id=game_id,
|
||||
sport='NBA',
|
||||
season=get_nba_season_string(season),
|
||||
date=date_formatted,
|
||||
time=time_str,
|
||||
home_team=home_team,
|
||||
away_team=away_team,
|
||||
home_team_abbrev=home_abbrev,
|
||||
away_team_abbrev=away_abbrev,
|
||||
venue=arena,
|
||||
source='basketball-reference.com'
|
||||
)
|
||||
games.append(game)
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error parsing row: {e}")
|
||||
continue
|
||||
|
||||
print(f" Found {len(games)} games from Basketball-Reference")
|
||||
return games
|
||||
|
||||
|
||||
def scrape_nba_espn(season: int) -> list[Game]:
|
||||
"""
|
||||
Scrape NBA schedule from ESPN.
|
||||
URL: https://www.espn.com/nba/schedule/_/date/{YYYYMMDD}
|
||||
"""
|
||||
games = []
|
||||
print(f"Scraping NBA {season} from ESPN...")
|
||||
|
||||
# Determine date range for season
|
||||
start_date = datetime(season - 1, 10, 1) # October of previous year
|
||||
end_date = datetime(season, 6, 30) # June of season year
|
||||
|
||||
current_date = start_date
|
||||
while current_date <= end_date:
|
||||
date_str = current_date.strftime('%Y%m%d')
|
||||
url = f"https://www.espn.com/nba/schedule/_/date/{date_str}"
|
||||
|
||||
soup = fetch_page(url, 'espn.com')
|
||||
if soup:
|
||||
# ESPN uses JavaScript rendering, so we need to parse what's available
|
||||
# This is a simplified version - full implementation would need Selenium
|
||||
pass
|
||||
|
||||
current_date += timedelta(days=7) # Sample weekly to respect rate limits
|
||||
|
||||
print(f" Found {len(games)} games from ESPN")
|
||||
return games
|
||||
|
||||
|
||||
def scrape_nba_cbssports(season: int) -> list[Game]:
|
||||
"""
|
||||
Fetch NBA schedule from CBS Sports.
|
||||
CBS Sports provides a JSON API for schedule data.
|
||||
"""
|
||||
games = []
|
||||
print(f"Fetching NBA {season} from CBS Sports...")
|
||||
|
||||
# CBS Sports has a schedule endpoint
|
||||
url = "https://www.cbssports.com/nba/schedule/"
|
||||
|
||||
soup = fetch_page(url, 'cbssports.com')
|
||||
if not soup:
|
||||
return games
|
||||
|
||||
# Find all game rows
|
||||
tables = soup.find_all('table', class_='TableBase-table')
|
||||
|
||||
for table in tables:
|
||||
rows = table.find_all('tr')
|
||||
for row in rows:
|
||||
try:
|
||||
cells = row.find_all('td')
|
||||
if len(cells) < 2:
|
||||
continue
|
||||
|
||||
# Parse teams from row
|
||||
team_cells = row.find_all('a', class_='TeamName')
|
||||
if len(team_cells) < 2:
|
||||
continue
|
||||
|
||||
away_team = team_cells[0].get_text(strip=True)
|
||||
home_team = team_cells[1].get_text(strip=True)
|
||||
|
||||
# Get date from table section
|
||||
date_formatted = datetime.now().strftime('%Y-%m-%d') # Placeholder
|
||||
|
||||
away_abbrev = get_nba_team_abbrev(away_team)
|
||||
home_abbrev = get_nba_team_abbrev(home_team)
|
||||
game_id = f"nba_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')
|
||||
|
||||
game = Game(
|
||||
id=game_id,
|
||||
sport='NBA',
|
||||
season=get_nba_season_string(season),
|
||||
date=date_formatted,
|
||||
time=None,
|
||||
home_team=home_team,
|
||||
away_team=away_team,
|
||||
home_team_abbrev=home_abbrev,
|
||||
away_team_abbrev=away_abbrev,
|
||||
venue='',
|
||||
source='cbssports.com'
|
||||
)
|
||||
games.append(game)
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
print(f" Found {len(games)} games from CBS Sports")
|
||||
return games
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# STADIUM SCRAPERS
|
||||
# =============================================================================
|
||||
|
||||
def scrape_nba_stadiums() -> list[Stadium]:
|
||||
"""
|
||||
Fetch NBA arena data (hardcoded with accurate coordinates).
|
||||
"""
|
||||
print("\nNBA STADIUMS")
|
||||
print("-" * 40)
|
||||
print(" Loading NBA arenas...")
|
||||
|
||||
nba_arenas = {
|
||||
'State Farm Arena': {'city': 'Atlanta', 'state': 'GA', 'lat': 33.7573, 'lng': -84.3963, 'capacity': 18118, 'teams': ['ATL']},
|
||||
'TD Garden': {'city': 'Boston', 'state': 'MA', 'lat': 42.3662, 'lng': -71.0621, 'capacity': 19156, 'teams': ['BOS']},
|
||||
'Barclays Center': {'city': 'Brooklyn', 'state': 'NY', 'lat': 40.6826, 'lng': -73.9754, 'capacity': 17732, 'teams': ['BRK']},
|
||||
'Spectrum Center': {'city': 'Charlotte', 'state': 'NC', 'lat': 35.2251, 'lng': -80.8392, 'capacity': 19077, 'teams': ['CHO']},
|
||||
'United Center': {'city': 'Chicago', 'state': 'IL', 'lat': 41.8807, 'lng': -87.6742, 'capacity': 20917, 'teams': ['CHI']},
|
||||
'Rocket Mortgage FieldHouse': {'city': 'Cleveland', 'state': 'OH', 'lat': 41.4965, 'lng': -81.6882, 'capacity': 19432, 'teams': ['CLE']},
|
||||
'American Airlines Center': {'city': 'Dallas', 'state': 'TX', 'lat': 32.7905, 'lng': -96.8103, 'capacity': 19200, 'teams': ['DAL']},
|
||||
'Ball Arena': {'city': 'Denver', 'state': 'CO', 'lat': 39.7487, 'lng': -105.0077, 'capacity': 19520, 'teams': ['DEN']},
|
||||
'Little Caesars Arena': {'city': 'Detroit', 'state': 'MI', 'lat': 42.3411, 'lng': -83.0553, 'capacity': 20332, 'teams': ['DET']},
|
||||
'Chase Center': {'city': 'San Francisco', 'state': 'CA', 'lat': 37.7680, 'lng': -122.3879, 'capacity': 18064, 'teams': ['GSW']},
|
||||
'Toyota Center': {'city': 'Houston', 'state': 'TX', 'lat': 29.7508, 'lng': -95.3621, 'capacity': 18055, 'teams': ['HOU']},
|
||||
'Gainbridge Fieldhouse': {'city': 'Indianapolis', 'state': 'IN', 'lat': 39.7640, 'lng': -86.1555, 'capacity': 17923, 'teams': ['IND']},
|
||||
'Intuit Dome': {'city': 'Inglewood', 'state': 'CA', 'lat': 33.9425, 'lng': -118.3419, 'capacity': 18000, 'teams': ['LAC']},
|
||||
'Crypto.com Arena': {'city': 'Los Angeles', 'state': 'CA', 'lat': 34.0430, 'lng': -118.2673, 'capacity': 18997, 'teams': ['LAL']},
|
||||
'FedExForum': {'city': 'Memphis', 'state': 'TN', 'lat': 35.1382, 'lng': -90.0506, 'capacity': 17794, 'teams': ['MEM']},
|
||||
'Kaseya Center': {'city': 'Miami', 'state': 'FL', 'lat': 25.7814, 'lng': -80.1870, 'capacity': 19600, 'teams': ['MIA']},
|
||||
'Fiserv Forum': {'city': 'Milwaukee', 'state': 'WI', 'lat': 43.0451, 'lng': -87.9174, 'capacity': 17341, 'teams': ['MIL']},
|
||||
'Target Center': {'city': 'Minneapolis', 'state': 'MN', 'lat': 44.9795, 'lng': -93.2761, 'capacity': 18978, 'teams': ['MIN']},
|
||||
'Smoothie King Center': {'city': 'New Orleans', 'state': 'LA', 'lat': 29.9490, 'lng': -90.0821, 'capacity': 16867, 'teams': ['NOP']},
|
||||
'Madison Square Garden': {'city': 'New York', 'state': 'NY', 'lat': 40.7505, 'lng': -73.9934, 'capacity': 19812, 'teams': ['NYK']},
|
||||
'Paycom Center': {'city': 'Oklahoma City', 'state': 'OK', 'lat': 35.4634, 'lng': -97.5151, 'capacity': 18203, 'teams': ['OKC']},
|
||||
'Kia Center': {'city': 'Orlando', 'state': 'FL', 'lat': 28.5392, 'lng': -81.3839, 'capacity': 18846, 'teams': ['ORL']},
|
||||
'Wells Fargo Center': {'city': 'Philadelphia', 'state': 'PA', 'lat': 39.9012, 'lng': -75.1720, 'capacity': 20478, 'teams': ['PHI']},
|
||||
'Footprint Center': {'city': 'Phoenix', 'state': 'AZ', 'lat': 33.4457, 'lng': -112.0712, 'capacity': 17071, 'teams': ['PHO']},
|
||||
'Moda Center': {'city': 'Portland', 'state': 'OR', 'lat': 45.5316, 'lng': -122.6668, 'capacity': 19393, 'teams': ['POR']},
|
||||
'Golden 1 Center': {'city': 'Sacramento', 'state': 'CA', 'lat': 38.5802, 'lng': -121.4997, 'capacity': 17608, 'teams': ['SAC']},
|
||||
'Frost Bank Center': {'city': 'San Antonio', 'state': 'TX', 'lat': 29.4270, 'lng': -98.4375, 'capacity': 18418, 'teams': ['SAS']},
|
||||
'Scotiabank Arena': {'city': 'Toronto', 'state': 'ON', 'lat': 43.6435, 'lng': -79.3791, 'capacity': 19800, 'teams': ['TOR']},
|
||||
'Delta Center': {'city': 'Salt Lake City', 'state': 'UT', 'lat': 40.7683, 'lng': -111.9011, 'capacity': 18306, 'teams': ['UTA']},
|
||||
'Capital One Arena': {'city': 'Washington', 'state': 'DC', 'lat': 38.8982, 'lng': -77.0209, 'capacity': 20356, 'teams': ['WAS']},
|
||||
}
|
||||
|
||||
stadiums = []
|
||||
for name, info in nba_arenas.items():
|
||||
stadium = Stadium(
|
||||
id=f"nba_{name.lower().replace(' ', '_')[:30]}",
|
||||
name=name,
|
||||
city=info['city'],
|
||||
state=info['state'],
|
||||
latitude=info['lat'],
|
||||
longitude=info['lng'],
|
||||
capacity=info['capacity'],
|
||||
sport='NBA',
|
||||
team_abbrevs=info['teams'],
|
||||
source='nba_hardcoded'
|
||||
)
|
||||
stadiums.append(stadium)
|
||||
|
||||
print(f" ✓ Found {len(stadiums)} NBA arenas")
|
||||
return stadiums
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SOURCE CONFIGURATIONS
|
||||
# =============================================================================
|
||||
|
||||
NBA_GAME_SOURCES = [
|
||||
ScraperSource('Basketball-Reference', scrape_nba_basketball_reference, priority=1, min_games=100),
|
||||
ScraperSource('CBS Sports', scrape_nba_cbssports, priority=2, min_games=50),
|
||||
ScraperSource('ESPN', scrape_nba_espn, priority=3, min_games=50),
|
||||
]
|
||||
|
||||
NBA_STADIUM_SOURCES = [
|
||||
StadiumScraperSource('Hardcoded', scrape_nba_stadiums, priority=1, min_venues=25),
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONVENIENCE FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
def scrape_nba_games(season: int) -> list[Game]:
|
||||
"""
|
||||
Scrape NBA games for a season using multi-source fallback.
|
||||
|
||||
Args:
|
||||
season: Season ending year (e.g., 2025 for 2024-25 season)
|
||||
|
||||
Returns:
|
||||
List of Game objects from the first successful source
|
||||
"""
|
||||
print(f"\nNBA {get_nba_season_string(season)} SCHEDULE")
|
||||
print("-" * 40)
|
||||
|
||||
return scrape_with_fallback('NBA', season, NBA_GAME_SOURCES)
|
||||
Reference in New Issue
Block a user