Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
688 lines
23 KiB
Python
688 lines
23 KiB
Python
"""NBA scraper implementation with multi-source fallback."""
|
|
|
|
from datetime import datetime, date, timezone
|
|
from typing import Optional
|
|
from zoneinfo import ZoneInfo
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
|
|
from .base import BaseScraper, RawGameData, ScrapeResult
|
|
from ..models.game import Game
|
|
from ..models.team import Team
|
|
from ..models.stadium import Stadium
|
|
from ..models.aliases import ManualReviewItem
|
|
from ..normalizers.canonical_id import generate_game_id
|
|
from ..normalizers.team_resolver import (
|
|
TeamResolver,
|
|
TEAM_MAPPINGS,
|
|
get_team_resolver,
|
|
)
|
|
from ..normalizers.stadium_resolver import (
|
|
StadiumResolver,
|
|
STADIUM_MAPPINGS,
|
|
get_stadium_resolver,
|
|
)
|
|
from ..normalizers.timezone import parse_datetime
|
|
from ..utils.logging import get_logger, log_game, log_warning
|
|
|
|
|
|
# Month name to number mapping
|
|
MONTH_MAP = {
|
|
"january": 1, "february": 2, "march": 3, "april": 4,
|
|
"may": 5, "june": 6, "july": 7, "august": 8,
|
|
"september": 9, "october": 10, "november": 11, "december": 12,
|
|
}
|
|
|
|
# Basketball Reference month URLs
|
|
BR_MONTHS = [
|
|
"october", "november", "december",
|
|
"january", "february", "march", "april", "may", "june",
|
|
]
|
|
|
|
|
|
class NBAScraper(BaseScraper):
|
|
"""NBA schedule scraper with multi-source fallback.
|
|
|
|
Sources (in priority order):
|
|
1. Basketball-Reference - Most reliable, complete historical data
|
|
2. ESPN API - Good for current/future seasons
|
|
3. CBS Sports - Backup option (not implemented)
|
|
|
|
Source Timezones:
|
|
- basketball_reference: Eastern Time (ET) - times displayed as "7:30p"
|
|
- espn: UTC - ISO 8601 format with "Z" suffix
|
|
- cbs: Not implemented
|
|
"""
|
|
|
|
def __init__(self, season: int, **kwargs):
|
|
"""Initialize NBA scraper.
|
|
|
|
Args:
|
|
season: Season start year (e.g., 2025 for 2025-26)
|
|
"""
|
|
super().__init__("nba", season, **kwargs)
|
|
self._team_resolver = get_team_resolver("nba")
|
|
self._stadium_resolver = get_stadium_resolver("nba")
|
|
|
|
def _get_sources(self) -> list[str]:
|
|
"""Return source list in priority order."""
|
|
# CBS scraper not yet implemented - TODO for future
|
|
return ["basketball_reference", "espn"]
|
|
|
|
def _get_source_url(self, source: str, **kwargs) -> str:
|
|
"""Build URL for a source."""
|
|
if source == "basketball_reference":
|
|
month = kwargs.get("month", "october")
|
|
year = kwargs.get("year", self.season + 1)
|
|
return f"https://www.basketball-reference.com/leagues/NBA_{year}_games-{month}.html"
|
|
|
|
elif source == "espn":
|
|
date_str = kwargs.get("date", "")
|
|
return f"https://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard?dates={date_str}"
|
|
|
|
elif source == "cbs":
|
|
return "https://www.cbssports.com/nba/schedule/"
|
|
|
|
raise ValueError(f"Unknown source: {source}")
|
|
|
|
def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
|
|
"""Scrape games from a specific source."""
|
|
if source == "basketball_reference":
|
|
return self._scrape_basketball_reference()
|
|
elif source == "espn":
|
|
return self._scrape_espn()
|
|
elif source == "cbs":
|
|
return self._scrape_cbs()
|
|
else:
|
|
raise ValueError(f"Unknown source: {source}")
|
|
|
|
def _scrape_basketball_reference(self) -> list[RawGameData]:
|
|
"""Scrape games from Basketball-Reference.
|
|
|
|
BR organizes games by month with separate pages.
|
|
Format: https://www.basketball-reference.com/leagues/NBA_YYYY_games-month.html
|
|
where YYYY is the ending year of the season.
|
|
Bails early if first few months have no data (season doesn't exist).
|
|
"""
|
|
all_games: list[RawGameData] = []
|
|
end_year = self.season + 1
|
|
consecutive_empty_months = 0
|
|
|
|
for month in BR_MONTHS:
|
|
url = self._get_source_url("basketball_reference", month=month, year=end_year)
|
|
|
|
try:
|
|
html = self.session.get_html(url)
|
|
games = self._parse_basketball_reference(html, url)
|
|
|
|
if games:
|
|
all_games.extend(games)
|
|
consecutive_empty_months = 0
|
|
self._logger.debug(f"Found {len(games)} games in {month}")
|
|
else:
|
|
consecutive_empty_months += 1
|
|
|
|
except Exception as e:
|
|
# Some months may not exist (e.g., no games in August)
|
|
self._logger.debug(f"No data for {month}: {e}")
|
|
consecutive_empty_months += 1
|
|
|
|
# If first 3 months (Oct, Nov, Dec) all have no data, season doesn't exist
|
|
if consecutive_empty_months >= 3 and not all_games:
|
|
self._logger.info(f"No games found in first {consecutive_empty_months} months, season likely doesn't exist")
|
|
break
|
|
|
|
return all_games
|
|
|
|
def _parse_basketball_reference(
|
|
self,
|
|
html: str,
|
|
source_url: str,
|
|
) -> list[RawGameData]:
|
|
"""Parse Basketball-Reference schedule HTML.
|
|
|
|
Table structure:
|
|
- th[data-stat="date_game"]: Date (e.g., "Tue, Oct 22, 2024")
|
|
- td[data-stat="visitor_team_name"]: Away team
|
|
- td[data-stat="home_team_name"]: Home team
|
|
- td[data-stat="visitor_pts"]: Away score
|
|
- td[data-stat="home_pts"]: Home score
|
|
- td[data-stat="arena_name"]: Arena/stadium name
|
|
"""
|
|
soup = BeautifulSoup(html, "lxml")
|
|
games: list[RawGameData] = []
|
|
|
|
# Find the schedule table
|
|
table = soup.find("table", id="schedule")
|
|
if not table:
|
|
return games
|
|
|
|
tbody = table.find("tbody")
|
|
if not tbody:
|
|
return games
|
|
|
|
for row in tbody.find_all("tr"):
|
|
# Skip header rows
|
|
if row.get("class") and "thead" in row.get("class", []):
|
|
continue
|
|
|
|
try:
|
|
game = self._parse_br_row(row, source_url)
|
|
if game:
|
|
games.append(game)
|
|
except Exception as e:
|
|
self._logger.debug(f"Failed to parse row: {e}")
|
|
continue
|
|
|
|
return games
|
|
|
|
def _parse_br_row(
|
|
self,
|
|
row,
|
|
source_url: str,
|
|
) -> Optional[RawGameData]:
|
|
"""Parse a single Basketball-Reference table row."""
|
|
# Get date
|
|
date_cell = row.find("th", {"data-stat": "date_game"})
|
|
if not date_cell:
|
|
return None
|
|
|
|
date_text = date_cell.get_text(strip=True)
|
|
if not date_text:
|
|
return None
|
|
|
|
# Parse date (format: "Tue, Oct 22, 2024")
|
|
try:
|
|
game_date = datetime.strptime(date_text, "%a, %b %d, %Y")
|
|
except ValueError:
|
|
# Try alternative format
|
|
try:
|
|
game_date = datetime.strptime(date_text, "%B %d, %Y")
|
|
except ValueError:
|
|
self._logger.debug(f"Could not parse date: {date_text}")
|
|
return None
|
|
|
|
# Get game start time (format: "7:30p" or "10:00p") - times are in ET
|
|
time_cell = row.find("td", {"data-stat": "game_start_time"})
|
|
if time_cell:
|
|
time_text = time_cell.get_text(strip=True)
|
|
if time_text:
|
|
try:
|
|
# Parse time like "7:30p" or "10:00p"
|
|
# Normalize: "7:30p" -> "7:30 PM", "10:00p" -> "10:00 PM"
|
|
time_normalized = time_text.replace("p", " PM").replace("a", " AM")
|
|
game_time = datetime.strptime(time_normalized, "%I:%M %p")
|
|
# Combine date and time with ET timezone (Basketball-Reference uses ET)
|
|
game_date = game_date.replace(
|
|
hour=game_time.hour,
|
|
minute=game_time.minute,
|
|
tzinfo=ZoneInfo("America/New_York"),
|
|
)
|
|
except ValueError:
|
|
self._logger.debug(f"Could not parse time: {time_text}, using midnight")
|
|
|
|
# Get teams
|
|
away_cell = row.find("td", {"data-stat": "visitor_team_name"})
|
|
home_cell = row.find("td", {"data-stat": "home_team_name"})
|
|
|
|
if not away_cell or not home_cell:
|
|
return None
|
|
|
|
away_team = away_cell.get_text(strip=True)
|
|
home_team = home_cell.get_text(strip=True)
|
|
|
|
if not away_team or not home_team:
|
|
return None
|
|
|
|
# Get scores (may be empty for future games)
|
|
away_score_cell = row.find("td", {"data-stat": "visitor_pts"})
|
|
home_score_cell = row.find("td", {"data-stat": "home_pts"})
|
|
|
|
away_score = None
|
|
home_score = None
|
|
|
|
if away_score_cell and away_score_cell.get_text(strip=True):
|
|
try:
|
|
away_score = int(away_score_cell.get_text(strip=True))
|
|
except ValueError:
|
|
pass
|
|
|
|
if home_score_cell and home_score_cell.get_text(strip=True):
|
|
try:
|
|
home_score = int(home_score_cell.get_text(strip=True))
|
|
except ValueError:
|
|
pass
|
|
|
|
# Get arena
|
|
arena_cell = row.find("td", {"data-stat": "arena_name"})
|
|
arena = arena_cell.get_text(strip=True) if arena_cell else None
|
|
|
|
# Determine status
|
|
status = "final" if home_score is not None else "scheduled"
|
|
|
|
# Check for postponed/cancelled
|
|
notes_cell = row.find("td", {"data-stat": "game_remarks"})
|
|
if notes_cell:
|
|
notes = notes_cell.get_text(strip=True).lower()
|
|
if "postponed" in notes:
|
|
status = "postponed"
|
|
elif "cancelled" in notes or "canceled" in notes:
|
|
status = "cancelled"
|
|
|
|
return RawGameData(
|
|
game_date=game_date,
|
|
home_team_raw=home_team,
|
|
away_team_raw=away_team,
|
|
stadium_raw=arena,
|
|
home_score=home_score,
|
|
away_score=away_score,
|
|
status=status,
|
|
source_url=source_url,
|
|
)
|
|
|
|
def _scrape_espn(self) -> list[RawGameData]:
|
|
"""Scrape games from ESPN API.
|
|
|
|
ESPN API returns games for a specific date range.
|
|
We iterate through each day of the season.
|
|
Bails out early if no games found after checking first month.
|
|
"""
|
|
all_games: list[RawGameData] = []
|
|
consecutive_empty_days = 0
|
|
max_empty_days = 45 # Bail after ~1.5 months of no games
|
|
|
|
for year, month in self._get_season_months():
|
|
# Get number of days in month
|
|
if month == 12:
|
|
next_month = date(year + 1, 1, 1)
|
|
else:
|
|
next_month = date(year, month + 1, 1)
|
|
|
|
days_in_month = (next_month - date(year, month, 1)).days
|
|
|
|
for day in range(1, days_in_month + 1):
|
|
try:
|
|
game_date = date(year, month, day)
|
|
date_str = game_date.strftime("%Y%m%d")
|
|
url = self._get_source_url("espn", date=date_str)
|
|
|
|
data = self.session.get_json(url)
|
|
games = self._parse_espn_response(data, url)
|
|
|
|
if games:
|
|
all_games.extend(games)
|
|
consecutive_empty_days = 0
|
|
else:
|
|
consecutive_empty_days += 1
|
|
|
|
# Bail early if no games found for a long stretch
|
|
if consecutive_empty_days >= max_empty_days:
|
|
self._logger.info(f"No games found for {max_empty_days} consecutive days, stopping ESPN scrape")
|
|
return all_games
|
|
|
|
except Exception as e:
|
|
self._logger.debug(f"ESPN error for {year}-{month}-{day}: {e}")
|
|
consecutive_empty_days += 1
|
|
|
|
if consecutive_empty_days >= max_empty_days:
|
|
self._logger.info(f"Too many consecutive failures, stopping ESPN scrape")
|
|
return all_games
|
|
continue
|
|
|
|
return all_games
|
|
|
|
def _parse_espn_response(
|
|
self,
|
|
data: dict,
|
|
source_url: str,
|
|
) -> list[RawGameData]:
|
|
"""Parse ESPN API response."""
|
|
games: list[RawGameData] = []
|
|
|
|
events = data.get("events", [])
|
|
|
|
for event in events:
|
|
try:
|
|
game = self._parse_espn_event(event, source_url)
|
|
if game:
|
|
games.append(game)
|
|
except Exception as e:
|
|
self._logger.debug(f"Failed to parse ESPN event: {e}")
|
|
continue
|
|
|
|
return games
|
|
|
|
def _parse_espn_event(
|
|
self,
|
|
event: dict,
|
|
source_url: str,
|
|
) -> Optional[RawGameData]:
|
|
"""Parse a single ESPN event."""
|
|
# Get date
|
|
date_str = event.get("date", "")
|
|
if not date_str:
|
|
return None
|
|
|
|
try:
|
|
# ESPN uses ISO format
|
|
game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
except ValueError:
|
|
return None
|
|
|
|
# Get competitions (usually just one)
|
|
competitions = event.get("competitions", [])
|
|
if not competitions:
|
|
return None
|
|
|
|
competition = competitions[0]
|
|
|
|
# Get teams
|
|
competitors = competition.get("competitors", [])
|
|
if len(competitors) != 2:
|
|
return None
|
|
|
|
home_team = None
|
|
away_team = None
|
|
home_score = None
|
|
away_score = None
|
|
|
|
for competitor in competitors:
|
|
team_info = competitor.get("team", {})
|
|
team_name = team_info.get("displayName", "")
|
|
is_home = competitor.get("homeAway") == "home"
|
|
score = competitor.get("score")
|
|
|
|
if score:
|
|
try:
|
|
score = int(score)
|
|
except (ValueError, TypeError):
|
|
score = None
|
|
|
|
if is_home:
|
|
home_team = team_name
|
|
home_score = score
|
|
else:
|
|
away_team = team_name
|
|
away_score = score
|
|
|
|
if not home_team or not away_team:
|
|
return None
|
|
|
|
# Get venue
|
|
venue = competition.get("venue", {})
|
|
arena = venue.get("fullName")
|
|
|
|
# Get status
|
|
status_info = competition.get("status", {})
|
|
status_type = status_info.get("type", {})
|
|
status_name = status_type.get("name", "").lower()
|
|
|
|
if status_name == "status_final":
|
|
status = "final"
|
|
elif status_name == "status_postponed":
|
|
status = "postponed"
|
|
elif status_name == "status_canceled":
|
|
status = "cancelled"
|
|
else:
|
|
status = "scheduled"
|
|
|
|
return RawGameData(
|
|
game_date=game_date,
|
|
home_team_raw=home_team,
|
|
away_team_raw=away_team,
|
|
stadium_raw=arena,
|
|
home_score=home_score,
|
|
away_score=away_score,
|
|
status=status,
|
|
source_url=source_url,
|
|
)
|
|
|
|
def _scrape_cbs(self) -> list[RawGameData]:
|
|
"""Scrape games from CBS Sports.
|
|
|
|
CBS Sports is a backup source with less structured data.
|
|
"""
|
|
# CBS Sports scraping would go here
|
|
# For now, return empty to fall back to other sources
|
|
raise NotImplementedError("CBS scraper not implemented")
|
|
|
|
def _normalize_games(
|
|
self,
|
|
raw_games: list[RawGameData],
|
|
) -> tuple[list[Game], list[ManualReviewItem]]:
|
|
"""Normalize raw games to Game objects with canonical IDs."""
|
|
games: list[Game] = []
|
|
review_items: list[ManualReviewItem] = []
|
|
|
|
# Track games by date for doubleheader detection
|
|
games_by_date: dict[str, list[RawGameData]] = {}
|
|
|
|
for raw in raw_games:
|
|
date_key = raw.game_date.strftime("%Y%m%d")
|
|
matchup_key = f"{date_key}_{raw.away_team_raw}_{raw.home_team_raw}"
|
|
|
|
if matchup_key not in games_by_date:
|
|
games_by_date[matchup_key] = []
|
|
games_by_date[matchup_key].append(raw)
|
|
|
|
# Process games with doubleheader detection
|
|
for matchup_key, matchup_games in games_by_date.items():
|
|
is_doubleheader = len(matchup_games) > 1
|
|
|
|
for i, raw in enumerate(matchup_games):
|
|
game_number = (i + 1) if is_doubleheader else None
|
|
|
|
game, item_reviews = self._normalize_single_game(raw, game_number)
|
|
|
|
if game:
|
|
games.append(game)
|
|
log_game(
|
|
self.sport,
|
|
game.id,
|
|
game.home_team_id,
|
|
game.away_team_id,
|
|
game.game_date.strftime("%Y-%m-%d"),
|
|
game.status,
|
|
)
|
|
|
|
review_items.extend(item_reviews)
|
|
|
|
return games, review_items
|
|
|
|
def _normalize_single_game(
|
|
self,
|
|
raw: RawGameData,
|
|
game_number: Optional[int],
|
|
) -> tuple[Optional[Game], list[ManualReviewItem]]:
|
|
"""Normalize a single raw game."""
|
|
review_items: list[ManualReviewItem] = []
|
|
|
|
# Resolve home team
|
|
home_result = self._team_resolver.resolve(
|
|
raw.home_team_raw,
|
|
check_date=raw.game_date.date(),
|
|
source_url=raw.source_url,
|
|
)
|
|
|
|
if home_result.review_item:
|
|
review_items.append(home_result.review_item)
|
|
|
|
if not home_result.canonical_id:
|
|
log_warning(f"Could not resolve home team: {raw.home_team_raw}")
|
|
return None, review_items
|
|
|
|
# Resolve away team
|
|
away_result = self._team_resolver.resolve(
|
|
raw.away_team_raw,
|
|
check_date=raw.game_date.date(),
|
|
source_url=raw.source_url,
|
|
)
|
|
|
|
if away_result.review_item:
|
|
review_items.append(away_result.review_item)
|
|
|
|
if not away_result.canonical_id:
|
|
log_warning(f"Could not resolve away team: {raw.away_team_raw}")
|
|
return None, review_items
|
|
|
|
# Resolve stadium (optional - use home team's stadium if not found)
|
|
stadium_id = None
|
|
|
|
if raw.stadium_raw:
|
|
stadium_result = self._stadium_resolver.resolve(
|
|
raw.stadium_raw,
|
|
check_date=raw.game_date.date(),
|
|
source_url=raw.source_url,
|
|
)
|
|
|
|
if stadium_result.review_item:
|
|
review_items.append(stadium_result.review_item)
|
|
|
|
stadium_id = stadium_result.canonical_id
|
|
|
|
# If no stadium found, use home team's default stadium
|
|
if not stadium_id:
|
|
# Look up home team's stadium from mappings
|
|
home_abbrev = home_result.canonical_id.split("_")[-1].upper()
|
|
team_info = self._team_resolver.get_team_info(home_abbrev)
|
|
|
|
if team_info:
|
|
# Try to find stadium by team's home arena
|
|
for sid, sinfo in STADIUM_MAPPINGS.get("nba", {}).items():
|
|
# Match by city
|
|
if sinfo.city.lower() in team_info[2].lower():
|
|
stadium_id = sid
|
|
break
|
|
|
|
# Get abbreviations for game ID
|
|
home_abbrev = self._get_abbreviation(home_result.canonical_id)
|
|
away_abbrev = self._get_abbreviation(away_result.canonical_id)
|
|
|
|
# Generate canonical game ID
|
|
game_id = generate_game_id(
|
|
sport=self.sport,
|
|
season=self.season,
|
|
away_abbrev=away_abbrev,
|
|
home_abbrev=home_abbrev,
|
|
game_date=raw.game_date,
|
|
game_number=game_number,
|
|
)
|
|
|
|
game = Game(
|
|
id=game_id,
|
|
sport=self.sport,
|
|
season=self.season,
|
|
home_team_id=home_result.canonical_id,
|
|
away_team_id=away_result.canonical_id,
|
|
stadium_id=stadium_id or "",
|
|
game_date=raw.game_date,
|
|
game_number=game_number,
|
|
home_score=raw.home_score,
|
|
away_score=raw.away_score,
|
|
status=raw.status,
|
|
source_url=raw.source_url,
|
|
raw_home_team=raw.home_team_raw,
|
|
raw_away_team=raw.away_team_raw,
|
|
raw_stadium=raw.stadium_raw,
|
|
)
|
|
|
|
return game, review_items
|
|
|
|
def _get_abbreviation(self, team_id: str) -> str:
|
|
"""Extract abbreviation from team ID."""
|
|
# team_nba_okc -> okc
|
|
parts = team_id.split("_")
|
|
return parts[-1] if parts else ""
|
|
|
|
def scrape_teams(self) -> list[Team]:
|
|
"""Get all NBA teams from hardcoded mappings."""
|
|
teams: list[Team] = []
|
|
seen: set[str] = set()
|
|
|
|
# NBA conference/division structure
|
|
divisions = {
|
|
"Atlantic": ("Eastern", ["BOS", "BKN", "NYK", "PHI", "TOR"]),
|
|
"Central": ("Eastern", ["CHI", "CLE", "DET", "IND", "MIL"]),
|
|
"Southeast": ("Eastern", ["ATL", "CHA", "MIA", "ORL", "WAS"]),
|
|
"Northwest": ("Western", ["DEN", "MIN", "OKC", "POR", "UTA"]),
|
|
"Pacific": ("Western", ["GSW", "LAC", "LAL", "PHX", "SAC"]),
|
|
"Southwest": ("Western", ["DAL", "HOU", "MEM", "NOP", "SAS"]),
|
|
}
|
|
|
|
# Build reverse lookup
|
|
team_divisions: dict[str, tuple[str, str]] = {}
|
|
for div, (conf, abbrevs) in divisions.items():
|
|
for abbrev in abbrevs:
|
|
team_divisions[abbrev] = (conf, div)
|
|
|
|
for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("nba", {}).items():
|
|
if team_id in seen:
|
|
continue
|
|
seen.add(team_id)
|
|
|
|
# Parse full name into city and name parts
|
|
parts = full_name.split()
|
|
if len(parts) >= 2:
|
|
# Handle special cases like "Oklahoma City Thunder"
|
|
if city == "Oklahoma City":
|
|
team_name = "Thunder"
|
|
elif city == "Golden State":
|
|
team_name = "Warriors"
|
|
elif city == "San Antonio":
|
|
team_name = "Spurs"
|
|
elif city == "New York":
|
|
team_name = parts[-1] # Knicks
|
|
elif city == "New Orleans":
|
|
team_name = "Pelicans"
|
|
elif city == "Los Angeles":
|
|
team_name = parts[-1] # Lakers or Clippers
|
|
else:
|
|
team_name = parts[-1]
|
|
else:
|
|
team_name = full_name
|
|
|
|
# Get conference and division
|
|
conf, div = team_divisions.get(abbrev, (None, None))
|
|
|
|
team = Team(
|
|
id=team_id,
|
|
sport="nba",
|
|
city=city,
|
|
name=team_name,
|
|
full_name=full_name,
|
|
abbreviation=abbrev,
|
|
conference=conf,
|
|
division=div,
|
|
stadium_id=stadium_id,
|
|
)
|
|
teams.append(team)
|
|
|
|
return teams
|
|
|
|
def scrape_stadiums(self) -> list[Stadium]:
|
|
"""Get all NBA stadiums from hardcoded mappings."""
|
|
stadiums: list[Stadium] = []
|
|
|
|
for stadium_id, info in STADIUM_MAPPINGS.get("nba", {}).items():
|
|
stadium = Stadium(
|
|
id=stadium_id,
|
|
sport="nba",
|
|
name=info.name,
|
|
city=info.city,
|
|
state=info.state,
|
|
country=info.country,
|
|
latitude=info.latitude,
|
|
longitude=info.longitude,
|
|
timezone=info.timezone,
|
|
surface="hardwood",
|
|
roof_type="dome",
|
|
)
|
|
stadiums.append(stadium)
|
|
|
|
return stadiums
|
|
|
|
|
|
def create_nba_scraper(season: int) -> NBAScraper:
|
|
"""Factory function to create an NBA scraper."""
|
|
return NBAScraper(season=season)
|