feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions
@@ -0,0 +1,46 @@
+"""Scrapers for fetching sports data from various sources."""
+
+from .base import (
+    BaseScraper,
+    RawGameData,
+    ScrapeResult,
+    ScraperError,
+    PartialDataError,
+)
+from .nba import NBAScraper, create_nba_scraper
+from .mlb import MLBScraper, create_mlb_scraper
+from .nfl import NFLScraper, create_nfl_scraper
+from .nhl import NHLScraper, create_nhl_scraper
+from .mls import MLSScraper, create_mls_scraper
+from .wnba import WNBAScraper, create_wnba_scraper
+from .nwsl import NWSLScraper, create_nwsl_scraper
+
+__all__ = [
+    # Base
+    "BaseScraper",
+    "RawGameData",
+    "ScrapeResult",
+    "ScraperError",
+    "PartialDataError",
+    # NBA
+    "NBAScraper",
+    "create_nba_scraper",
+    # MLB
+    "MLBScraper",
+    "create_mlb_scraper",
+    # NFL
+    "NFLScraper",
+    "create_nfl_scraper",
+    # NHL
+    "NHLScraper",
+    "create_nhl_scraper",
+    # MLS
+    "MLSScraper",
+    "create_mls_scraper",
+    # WNBA
+    "WNBAScraper",
+    "create_wnba_scraper",
+    # NWSL
+    "NWSLScraper",
+    "create_nwsl_scraper",
+]
@@ -0,0 +1,335 @@
+"""Base scraper class for all sport scrapers."""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import date, datetime
+from typing import Optional
+
+from ..config import EXPECTED_GAME_COUNTS
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..utils.http import RateLimitedSession, get_session
+from ..utils.logging import get_logger, log_error, log_warning
+from ..utils.progress import ScrapeProgress
+
+
+@dataclass
+class RawGameData:
+    """Raw game data before normalization.
+
+    This intermediate format holds data as scraped from sources,
+    before team/stadium resolution and canonical ID generation.
+    """
+
+    game_date: datetime
+    home_team_raw: str
+    away_team_raw: str
+    stadium_raw: Optional[str] = None
+    home_score: Optional[int] = None
+    away_score: Optional[int] = None
+    status: str = "scheduled"
+    source_url: Optional[str] = None
+    game_number: Optional[int] = None  # For doubleheaders
+
+
+@dataclass
+class ScrapeResult:
+    """Result of a scraping operation.
+
+    Attributes:
+        games: List of normalized Game objects
+        teams: List of Team objects
+        stadiums: List of Stadium objects
+        review_items: Items requiring manual review
+        source: Name of the source used
+        success: Whether scraping succeeded
+        error_message: Error message if failed
+    """
+
+    games: list[Game] = field(default_factory=list)
+    teams: list[Team] = field(default_factory=list)
+    stadiums: list[Stadium] = field(default_factory=list)
+    review_items: list[ManualReviewItem] = field(default_factory=list)
+    source: str = ""
+    success: bool = True
+    error_message: Optional[str] = None
+
+    @property
+    def game_count(self) -> int:
+        return len(self.games)
+
+    @property
+    def team_count(self) -> int:
+        return len(self.teams)
+
+    @property
+    def stadium_count(self) -> int:
+        return len(self.stadiums)
+
+    @property
+    def review_count(self) -> int:
+        return len(self.review_items)
+
+
+class BaseScraper(ABC):
+    """Abstract base class for sport scrapers.
+
+    Subclasses must implement:
+    - scrape_games(): Fetch and normalize game schedule
+    - scrape_teams(): Fetch team information
+    - scrape_stadiums(): Fetch stadium information
+    - _get_sources(): Return list of source names in priority order
+
+    Features:
+    - Multi-source fallback (try sources in order)
+    - Built-in rate limiting
+    - Error handling with partial data discard
+    - Progress tracking
+    - Source URL tracking for manual review
+    """
+
+    def __init__(
+        self,
+        sport: str,
+        season: int,
+        session: Optional[RateLimitedSession] = None,
+    ):
+        """Initialize the scraper.
+
+        Args:
+            sport: Sport code (e.g., 'nba', 'mlb')
+            season: Season start year (e.g., 2025 for 2025-26)
+            session: Optional HTTP session (default: global session)
+        """
+        self.sport = sport.lower()
+        self.season = season
+        self.session = session or get_session()
+        self._logger = get_logger()
+        self._progress: Optional[ScrapeProgress] = None
+
+    @property
+    def expected_game_count(self) -> int:
+        """Get expected number of games for this sport."""
+        return EXPECTED_GAME_COUNTS.get(self.sport, 0)
+
+    @abstractmethod
+    def _get_sources(self) -> list[str]:
+        """Return list of source names in priority order.
+
+        Returns:
+            List of source identifiers (e.g., ['basketball_reference', 'espn', 'cbs'])
+        """
+        pass
+
+    @abstractmethod
+    def _scrape_games_from_source(
+        self,
+        source: str,
+    ) -> list[RawGameData]:
+        """Scrape games from a specific source.
+
+        Args:
+            source: Source identifier
+
+        Returns:
+            List of raw game data
+
+        Raises:
+            Exception: If scraping fails
+        """
+        pass
+
+    @abstractmethod
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw game data to Game objects.
+
+        Args:
+            raw_games: Raw scraped data
+
+        Returns:
+            Tuple of (normalized games, review items)
+        """
+        pass
+
+    @abstractmethod
+    def scrape_teams(self) -> list[Team]:
+        """Fetch team information.
+
+        Returns:
+            List of Team objects
+        """
+        pass
+
+    @abstractmethod
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Fetch stadium information.
+
+        Returns:
+            List of Stadium objects
+        """
+        pass
+
+    def scrape_games(self) -> ScrapeResult:
+        """Scrape games with multi-source fallback.
+
+        Tries each source in priority order. On failure, discards
+        partial data and tries the next source.
+
+        Returns:
+            ScrapeResult with games, review items, and status
+        """
+        sources = self._get_sources()
+        last_error: Optional[str] = None
+        sources_tried = 0
+        # Allow 3 sources to be tried. This enables NHL to fall back to NHL API
+        # for venue data since Hockey Reference doesn't provide it.
+        max_sources_to_try = 3
+
+        for source in sources:
+            self._logger.info(f"Trying source: {source}")
+            sources_tried += 1
+
+            try:
+                # Scrape raw data
+                raw_games = self._scrape_games_from_source(source)
+
+                if not raw_games:
+                    log_warning(f"No games found from {source}")
+                    # If multiple sources return nothing, the schedule likely doesn't exist
+                    if sources_tried >= max_sources_to_try:
+                        return ScrapeResult(
+                            success=False,
+                            error_message=f"No schedule data available (tried {sources_tried} sources)",
+                        )
+                    continue
+
+                self._logger.info(f"Found {len(raw_games)} raw games from {source}")
+
+                # Normalize data
+                games, review_items = self._normalize_games(raw_games)
+
+                self._logger.info(
+                    f"Normalized {len(games)} games, {len(review_items)} need review"
+                )
+
+                return ScrapeResult(
+                    games=games,
+                    review_items=review_items,
+                    source=source,
+                    success=True,
+                )
+
+            except Exception as e:
+                last_error = str(e)
+                log_error(f"Failed to scrape from {source}: {e}", exc_info=True)
+                # If we've tried enough sources, bail out
+                if sources_tried >= max_sources_to_try:
+                    break
+                continue
+
+        # All sources failed
+        return ScrapeResult(
+            success=False,
+            error_message=f"All sources failed. Last error: {last_error}",
+        )
+
+    def scrape_all(self) -> ScrapeResult:
+        """Scrape games, teams, and stadiums.
+
+        Returns:
+            Complete ScrapeResult with all data
+        """
+        self._progress = ScrapeProgress(self.sport, self.season)
+        self._progress.start()
+
+        try:
+            # Scrape games
+            result = self.scrape_games()
+
+            if not result.success:
+                self._progress.log_error(result.error_message or "Unknown error")
+                self._progress.finish()
+                return result
+
+            # Scrape teams
+            teams = self.scrape_teams()
+            result.teams = teams
+
+            # Scrape stadiums
+            stadiums = self.scrape_stadiums()
+            result.stadiums = stadiums
+
+            # Update progress
+            self._progress.games_count = result.game_count
+            self._progress.teams_count = result.team_count
+            self._progress.stadiums_count = result.stadium_count
+            self._progress.errors_count = result.review_count
+
+            self._progress.finish()
+
+            return result
+
+        except Exception as e:
+            log_error(f"Scraping failed: {e}", exc_info=True)
+            self._progress.finish()
+
+            return ScrapeResult(
+                success=False,
+                error_message=str(e),
+            )
+
+    def _get_season_months(self) -> list[tuple[int, int]]:
+        """Get the months to scrape for this sport's season.
+
+        Returns:
+            List of (year, month) tuples
+        """
+        # Default implementation for sports with fall-spring seasons
+        # (NBA, NHL, etc.)
+        months = []
+
+        # Fall months of season start year
+        for month in range(10, 13):  # Oct-Dec
+            months.append((self.season, month))
+
+        # Winter-spring months of following year
+        for month in range(1, 7):  # Jan-Jun
+            months.append((self.season + 1, month))
+
+        return months
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build a source URL with parameters.
+
+        Subclasses should override this to build URLs for their sources.
+
+        Args:
+            source: Source identifier
+            **kwargs: URL parameters
+
+        Returns:
+            Complete URL string
+        """
+        raise NotImplementedError(f"URL builder not implemented for {source}")
+
+
+class ScraperError(Exception):
+    """Exception raised when scraping fails."""
+
+    def __init__(self, source: str, message: str):
+        self.source = source
+        self.message = message
+        super().__init__(f"[{source}] {message}")
+
+
+class PartialDataError(ScraperError):
+    """Exception raised when only partial data was retrieved."""
+
+    def __init__(self, source: str, message: str, partial_count: int):
+        self.partial_count = partial_count
+        super().__init__(source, f"{message} (got {partial_count} items)")
@@ -0,0 +1,685 @@
+"""MLB scraper implementation with multi-source fallback."""
+
+from datetime import datetime, date, timedelta
+from typing import Optional
+from bs4 import BeautifulSoup
+
+from .base import BaseScraper, RawGameData, ScrapeResult
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..normalizers.canonical_id import generate_game_id
+from ..normalizers.team_resolver import (
+    TeamResolver,
+    TEAM_MAPPINGS,
+    get_team_resolver,
+)
+from ..normalizers.stadium_resolver import (
+    StadiumResolver,
+    STADIUM_MAPPINGS,
+    get_stadium_resolver,
+)
+from ..normalizers.timezone import parse_datetime
+from ..utils.logging import get_logger, log_game, log_warning
+
+
+class MLBScraper(BaseScraper):
+    """MLB schedule scraper with multi-source fallback.
+
+    Sources (in priority order):
+    1. Baseball-Reference - Most reliable, complete historical data
+    2. MLB Stats API - Official MLB data
+    3. ESPN API - Backup option
+    """
+
+    def __init__(self, season: int, **kwargs):
+        """Initialize MLB scraper.
+
+        Args:
+            season: Season year (e.g., 2026 for 2026 season)
+        """
+        super().__init__("mlb", season, **kwargs)
+        self._team_resolver = get_team_resolver("mlb")
+        self._stadium_resolver = get_stadium_resolver("mlb")
+
+    def _get_sources(self) -> list[str]:
+        """Return source list in priority order."""
+        # MLB API is best - returns full schedule in one request
+        # ESPN caps at ~25 results for baseball
+        # Baseball-Reference requires HTML parsing
+        return ["mlb_api", "espn", "baseball_reference"]
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build URL for a source."""
+        if source == "baseball_reference":
+            month = kwargs.get("month", "april")
+            # Baseball-Reference uses season year in URL
+            return f"https://www.baseball-reference.com/leagues/majors/{self.season}-schedule.shtml"
+
+        elif source == "mlb_api":
+            start_date = kwargs.get("start_date", "")
+            end_date = kwargs.get("end_date", "")
+            return f"https://statsapi.mlb.com/api/v1/schedule?sportId=1&startDate={start_date}&endDate={end_date}"
+
+        elif source == "espn":
+            date_str = kwargs.get("date", "")
+            return f"https://site.api.espn.com/apis/site/v2/sports/baseball/mlb/scoreboard?dates={date_str}"
+
+        raise ValueError(f"Unknown source: {source}")
+
+    def _get_season_months(self) -> list[tuple[int, int]]:
+        """Get the months to scrape for MLB season.
+
+        MLB season runs March/April through October/November.
+        """
+        months = []
+
+        # Spring training / early season
+        for month in range(3, 12):  # March-November
+            months.append((self.season, month))
+
+        return months
+
+    def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
+        """Scrape games from a specific source."""
+        if source == "baseball_reference":
+            return self._scrape_baseball_reference()
+        elif source == "mlb_api":
+            return self._scrape_mlb_api()
+        elif source == "espn":
+            return self._scrape_espn()
+        else:
+            raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_baseball_reference(self) -> list[RawGameData]:
+        """Scrape games from Baseball-Reference.
+
+        BR has a single schedule page per season.
+        Format: https://www.baseball-reference.com/leagues/majors/YYYY-schedule.shtml
+        """
+        url = self._get_source_url("baseball_reference")
+
+        try:
+            html = self.session.get_html(url)
+            games = self._parse_baseball_reference(html, url)
+            return games
+
+        except Exception as e:
+            self._logger.error(f"Failed to scrape Baseball-Reference: {e}")
+            raise
+
+    def _parse_baseball_reference(
+        self,
+        html: str,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse Baseball-Reference schedule HTML.
+
+        Structure: Games are organized by date in div elements.
+        Each game row has: date, away team, away score, home team, home score, venue.
+        """
+        soup = BeautifulSoup(html, "lxml")
+        games: list[RawGameData] = []
+
+        # Find all game divs - they use class "game" or similar
+        # Baseball-Reference uses <p class="game"> for each game
+        game_paragraphs = soup.find_all("p", class_="game")
+
+        current_date = None
+
+        for elem in soup.find_all(["h3", "p"]):
+            # H3 contains date headers
+            if elem.name == "h3":
+                date_text = elem.get_text(strip=True)
+                try:
+                    # Format: "Thursday, April 1, 2026"
+                    current_date = datetime.strptime(date_text, "%A, %B %d, %Y")
+                except ValueError:
+                    continue
+
+            elif elem.name == "p" and "game" in elem.get("class", []):
+                if current_date is None:
+                    continue
+
+                try:
+                    game = self._parse_br_game(elem, current_date, source_url)
+                    if game:
+                        games.append(game)
+                except Exception as e:
+                    self._logger.debug(f"Failed to parse game: {e}")
+                    continue
+
+        return games
+
+    def _parse_br_game(
+        self,
+        elem,
+        game_date: datetime,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single Baseball-Reference game element."""
+        text = elem.get_text(" ", strip=True)
+
+        # Parse game text - formats vary:
+        # "Team A (5) @ Team B (3)" or "Team A @ Team B"
+        # Also handles doubleheader notation
+
+        # Find all links - usually team names
+        links = elem.find_all("a")
+        if len(links) < 2:
+            return None
+
+        # First link is away team, second is home team
+        away_team = links[0].get_text(strip=True)
+        home_team = links[1].get_text(strip=True)
+
+        # Try to extract scores from text
+        away_score = None
+        home_score = None
+
+        # Look for score pattern "(N)"
+        import re
+        score_pattern = r"\((\d+)\)"
+        scores = re.findall(score_pattern, text)
+
+        if len(scores) >= 2:
+            try:
+                away_score = int(scores[0])
+                home_score = int(scores[1])
+            except (ValueError, IndexError):
+                pass
+
+        # Determine status
+        status = "final" if home_score is not None else "scheduled"
+
+        # Check for postponed/cancelled
+        text_lower = text.lower()
+        if "postponed" in text_lower:
+            status = "postponed"
+        elif "cancelled" in text_lower or "canceled" in text_lower:
+            status = "cancelled"
+
+        # Extract venue if present (usually after @ symbol)
+        stadium = None
+        if len(links) > 2:
+            # Third link might be stadium
+            stadium = links[2].get_text(strip=True)
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_mlb_api(self) -> list[RawGameData]:
+        """Scrape games from MLB Stats API using full season query."""
+        # Build date range for entire season (March-November)
+        season_months = self._get_season_months()
+        start_year, start_month = season_months[0]
+        end_year, end_month = season_months[-1]
+
+        # Get last day of end month
+        if end_month == 12:
+            end_date = date(end_year + 1, 1, 1) - timedelta(days=1)
+        else:
+            end_date = date(end_year, end_month + 1, 1) - timedelta(days=1)
+
+        start_date = date(start_year, start_month, 1)
+
+        url = f"https://statsapi.mlb.com/api/v1/schedule?sportId=1&startDate={start_date.strftime('%Y-%m-%d')}&endDate={end_date.strftime('%Y-%m-%d')}"
+        self._logger.info(f"Fetching MLB schedule: {start_date} to {end_date}")
+
+        try:
+            data = self.session.get_json(url)
+            return self._parse_mlb_api_response(data, url)
+        except Exception as e:
+            self._logger.error(f"MLB API error: {e}")
+            return []
+
+    def _parse_mlb_api_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse MLB Stats API response."""
+        games: list[RawGameData] = []
+
+        dates = data.get("dates", [])
+
+        for date_entry in dates:
+            for game in date_entry.get("games", []):
+                try:
+                    raw_game = self._parse_mlb_api_game(game, source_url)
+                    if raw_game:
+                        games.append(raw_game)
+                except Exception as e:
+                    self._logger.debug(f"Failed to parse MLB API game: {e}")
+                    continue
+
+        return games
+
+    def _parse_mlb_api_game(
+        self,
+        game: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single MLB API game."""
+        # Get game date/time
+        game_date_str = game.get("gameDate", "")
+        if not game_date_str:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(game_date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get teams
+        teams = game.get("teams", {})
+        away_data = teams.get("away", {})
+        home_data = teams.get("home", {})
+
+        away_team_info = away_data.get("team", {})
+        home_team_info = home_data.get("team", {})
+
+        away_team = away_team_info.get("name", "")
+        home_team = home_team_info.get("name", "")
+
+        if not away_team or not home_team:
+            return None
+
+        # Get scores
+        away_score = away_data.get("score")
+        home_score = home_data.get("score")
+
+        # Get venue
+        venue = game.get("venue", {})
+        stadium = venue.get("name")
+
+        # Get status
+        status_data = game.get("status", {})
+        abstract_game_state = status_data.get("abstractGameState", "").lower()
+        detailed_state = status_data.get("detailedState", "").lower()
+
+        if abstract_game_state == "final":
+            status = "final"
+        elif "postponed" in detailed_state:
+            status = "postponed"
+        elif "cancelled" in detailed_state or "canceled" in detailed_state:
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        # Check for doubleheader
+        game_number = game.get("gameNumber")
+        if game.get("doubleHeader") == "Y":
+            game_number = game.get("gameNumber", 1)
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+            game_number=game_number if game.get("doubleHeader") == "Y" else None,
+        )
+
+    def _scrape_espn(self) -> list[RawGameData]:
+        """Scrape games from ESPN API using date range query."""
+        # Build date range for entire season (March-November)
+        season_months = self._get_season_months()
+        start_year, start_month = season_months[0]
+        end_year, end_month = season_months[-1]
+
+        # Get last day of end month
+        if end_month == 12:
+            end_date = date(end_year + 1, 1, 1) - timedelta(days=1)
+        else:
+            end_date = date(end_year, end_month + 1, 1) - timedelta(days=1)
+
+        start_date = date(start_year, start_month, 1)
+        date_range = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
+
+        url = f"https://site.api.espn.com/apis/site/v2/sports/baseball/mlb/scoreboard?limit=3000&dates={date_range}"
+        self._logger.info(f"Fetching MLB schedule: {date_range}")
+
+        try:
+            data = self.session.get_json(url)
+            return self._parse_espn_response(data, url)
+        except Exception as e:
+            self._logger.error(f"ESPN error: {e}")
+            return []
+
+    def _parse_espn_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse ESPN API response."""
+        games: list[RawGameData] = []
+
+        events = data.get("events", [])
+
+        for event in events:
+            try:
+                game = self._parse_espn_event(event, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse ESPN event: {e}")
+                continue
+
+        return games
+
+    def _parse_espn_event(
+        self,
+        event: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single ESPN event."""
+        # Get date
+        date_str = event.get("date", "")
+        if not date_str:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get competitions
+        competitions = event.get("competitions", [])
+        if not competitions:
+            return None
+
+        competition = competitions[0]
+
+        # Get teams
+        competitors = competition.get("competitors", [])
+        if len(competitors) != 2:
+            return None
+
+        home_team = None
+        away_team = None
+        home_score = None
+        away_score = None
+
+        for competitor in competitors:
+            team_info = competitor.get("team", {})
+            team_name = team_info.get("displayName", "")
+            is_home = competitor.get("homeAway") == "home"
+            score = competitor.get("score")
+
+            if score:
+                try:
+                    score = int(score)
+                except (ValueError, TypeError):
+                    score = None
+
+            if is_home:
+                home_team = team_name
+                home_score = score
+            else:
+                away_team = team_name
+                away_score = score
+
+        if not home_team or not away_team:
+            return None
+
+        # Get venue
+        venue = competition.get("venue", {})
+        stadium = venue.get("fullName")
+
+        # Get status
+        status_info = competition.get("status", {})
+        status_type = status_info.get("type", {})
+        status_name = status_type.get("name", "").lower()
+
+        if status_name == "status_final":
+            status = "final"
+        elif status_name == "status_postponed":
+            status = "postponed"
+        elif status_name == "status_canceled":
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw games to Game objects with canonical IDs."""
+        games: list[Game] = []
+        review_items: list[ManualReviewItem] = []
+
+        # Track games by date/matchup for doubleheader detection
+        games_by_matchup: dict[str, list[RawGameData]] = {}
+
+        for raw in raw_games:
+            date_key = raw.game_date.strftime("%Y%m%d")
+            matchup_key = f"{date_key}_{raw.away_team_raw}_{raw.home_team_raw}"
+
+            if matchup_key not in games_by_matchup:
+                games_by_matchup[matchup_key] = []
+            games_by_matchup[matchup_key].append(raw)
+
+        # Process games with doubleheader detection
+        for matchup_key, matchup_games in games_by_matchup.items():
+            is_doubleheader = len(matchup_games) > 1
+
+            # Sort by time if doubleheader
+            if is_doubleheader:
+                matchup_games.sort(key=lambda g: g.game_date)
+
+            for i, raw in enumerate(matchup_games):
+                # Use provided game_number or calculate from order
+                game_number = raw.game_number or ((i + 1) if is_doubleheader else None)
+
+                game, item_reviews = self._normalize_single_game(raw, game_number)
+
+                if game:
+                    games.append(game)
+                    log_game(
+                        self.sport,
+                        game.id,
+                        game.home_team_id,
+                        game.away_team_id,
+                        game.game_date.strftime("%Y-%m-%d"),
+                        game.status,
+                    )
+
+                review_items.extend(item_reviews)
+
+        return games, review_items
+
+    def _normalize_single_game(
+        self,
+        raw: RawGameData,
+        game_number: Optional[int],
+    ) -> tuple[Optional[Game], list[ManualReviewItem]]:
+        """Normalize a single raw game."""
+        review_items: list[ManualReviewItem] = []
+
+        # Resolve home team
+        home_result = self._team_resolver.resolve(
+            raw.home_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if home_result.review_item:
+            review_items.append(home_result.review_item)
+
+        if not home_result.canonical_id:
+            log_warning(f"Could not resolve home team: {raw.home_team_raw}")
+            return None, review_items
+
+        # Resolve away team
+        away_result = self._team_resolver.resolve(
+            raw.away_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if away_result.review_item:
+            review_items.append(away_result.review_item)
+
+        if not away_result.canonical_id:
+            log_warning(f"Could not resolve away team: {raw.away_team_raw}")
+            return None, review_items
+
+        # Resolve stadium
+        stadium_id = None
+
+        if raw.stadium_raw:
+            stadium_result = self._stadium_resolver.resolve(
+                raw.stadium_raw,
+                check_date=raw.game_date.date(),
+                source_url=raw.source_url,
+            )
+
+            if stadium_result.review_item:
+                review_items.append(stadium_result.review_item)
+
+            stadium_id = stadium_result.canonical_id
+
+        # Get abbreviations for game ID
+        home_abbrev = self._get_abbreviation(home_result.canonical_id)
+        away_abbrev = self._get_abbreviation(away_result.canonical_id)
+
+        # Generate canonical game ID
+        game_id = generate_game_id(
+            sport=self.sport,
+            season=self.season,
+            away_abbrev=away_abbrev,
+            home_abbrev=home_abbrev,
+            game_date=raw.game_date,
+            game_number=game_number,
+        )
+
+        game = Game(
+            id=game_id,
+            sport=self.sport,
+            season=self.season,
+            home_team_id=home_result.canonical_id,
+            away_team_id=away_result.canonical_id,
+            stadium_id=stadium_id or "",
+            game_date=raw.game_date,
+            game_number=game_number,
+            home_score=raw.home_score,
+            away_score=raw.away_score,
+            status=raw.status,
+            source_url=raw.source_url,
+            raw_home_team=raw.home_team_raw,
+            raw_away_team=raw.away_team_raw,
+            raw_stadium=raw.stadium_raw,
+        )
+
+        return game, review_items
+
+    def _get_abbreviation(self, team_id: str) -> str:
+        """Extract abbreviation from team ID."""
+        # team_mlb_nyy -> nyy
+        parts = team_id.split("_")
+        return parts[-1] if parts else ""
+
+    def scrape_teams(self) -> list[Team]:
+        """Get all MLB teams from hardcoded mappings."""
+        teams: list[Team] = []
+        seen: set[str] = set()
+
+        # MLB league/division structure
+        divisions = {
+            "AL East": ("American", ["BAL", "BOS", "NYY", "TB", "TOR"]),
+            "AL Central": ("American", ["CHW", "CLE", "DET", "KC", "MIN"]),
+            "AL West": ("American", ["HOU", "LAA", "OAK", "SEA", "TEX"]),
+            "NL East": ("National", ["ATL", "MIA", "NYM", "PHI", "WSN"]),
+            "NL Central": ("National", ["CHC", "CIN", "MIL", "PIT", "STL"]),
+            "NL West": ("National", ["ARI", "COL", "LAD", "SD", "SF"]),
+        }
+
+        # Build reverse lookup
+        team_divisions: dict[str, tuple[str, str]] = {}
+        for div, (league, abbrevs) in divisions.items():
+            for abbrev in abbrevs:
+                team_divisions[abbrev] = (league, div)
+
+        for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("mlb", {}).items():
+            if team_id in seen:
+                continue
+            seen.add(team_id)
+
+            # Parse team name from full name
+            parts = full_name.split()
+            if len(parts) >= 2:
+                team_name = parts[-1]
+                # Handle multi-word team names
+                if team_name in ["Sox", "Jays"]:
+                    team_name = " ".join(parts[-2:])
+            else:
+                team_name = full_name
+
+            # Get league and division
+            league, div = team_divisions.get(abbrev, (None, None))
+
+            team = Team(
+                id=team_id,
+                sport="mlb",
+                city=city,
+                name=team_name,
+                full_name=full_name,
+                abbreviation=abbrev,
+                conference=league,  # MLB uses "league" but we map to conference field
+                division=div,
+                stadium_id=stadium_id,
+            )
+            teams.append(team)
+
+        return teams
+
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Get all MLB stadiums from hardcoded mappings."""
+        stadiums: list[Stadium] = []
+
+        mlb_stadiums = STADIUM_MAPPINGS.get("mlb", {})
+        for stadium_id, info in mlb_stadiums.items():
+            stadium = Stadium(
+                id=stadium_id,
+                sport="mlb",
+                name=info.name,
+                city=info.city,
+                state=info.state,
+                country=info.country,
+                latitude=info.latitude,
+                longitude=info.longitude,
+                surface="grass",  # Most MLB stadiums
+                roof_type="open",  # Most MLB stadiums
+            )
+            stadiums.append(stadium)
+
+        return stadiums
+
+
+def create_mlb_scraper(season: int) -> MLBScraper:
+    """Factory function to create an MLB scraper."""
+    return MLBScraper(season=season)
@@ -0,0 +1,400 @@
+"""MLS scraper implementation with multi-source fallback."""
+
+from datetime import datetime, date, timedelta
+from typing import Optional
+
+from .base import BaseScraper, RawGameData, ScrapeResult
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..normalizers.canonical_id import generate_game_id
+from ..normalizers.team_resolver import (
+    TeamResolver,
+    TEAM_MAPPINGS,
+    get_team_resolver,
+)
+from ..normalizers.stadium_resolver import (
+    StadiumResolver,
+    STADIUM_MAPPINGS,
+    get_stadium_resolver,
+)
+from ..utils.logging import get_logger, log_game, log_warning
+
+
+class MLSScraper(BaseScraper):
+    """MLS schedule scraper with multi-source fallback.
+
+    Sources (in priority order):
+    1. ESPN API - Most reliable for MLS
+    2. FBref - Backup option
+    """
+
+    def __init__(self, season: int, **kwargs):
+        """Initialize MLS scraper.
+
+        Args:
+            season: Season year (e.g., 2026 for 2026 season)
+        """
+        super().__init__("mls", season, **kwargs)
+        self._team_resolver = get_team_resolver("mls")
+        self._stadium_resolver = get_stadium_resolver("mls")
+
+    def _get_sources(self) -> list[str]:
+        """Return source list in priority order."""
+        # FBref scraper not yet implemented - TODO for future
+        return ["espn"]
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build URL for a source."""
+        if source == "espn":
+            date_str = kwargs.get("date", "")
+            return f"https://site.api.espn.com/apis/site/v2/sports/soccer/usa.1/scoreboard?dates={date_str}"
+
+        elif source == "fbref":
+            return f"https://fbref.com/en/comps/22/{self.season}/schedule/{self.season}-Major-League-Soccer-Scores-and-Fixtures"
+
+        raise ValueError(f"Unknown source: {source}")
+
+    def _get_season_months(self) -> list[tuple[int, int]]:
+        """Get the months to scrape for MLS season.
+
+        MLS season runs February/March through October/November.
+        """
+        months = []
+
+        # MLS runs within a calendar year
+        for month in range(2, 12):  # Feb-Nov
+            months.append((self.season, month))
+
+        return months
+
+    def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
+        """Scrape games from a specific source."""
+        if source == "espn":
+            return self._scrape_espn()
+        elif source == "fbref":
+            return self._scrape_fbref()
+        else:
+            raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_espn(self) -> list[RawGameData]:
+        """Scrape games from ESPN API using date range query."""
+        # Build date range for entire season (Feb-November)
+        season_months = self._get_season_months()
+        start_year, start_month = season_months[0]
+        end_year, end_month = season_months[-1]
+
+        # Get last day of end month
+        if end_month == 12:
+            end_date = date(end_year + 1, 1, 1) - timedelta(days=1)
+        else:
+            end_date = date(end_year, end_month + 1, 1) - timedelta(days=1)
+
+        start_date = date(start_year, start_month, 1)
+        date_range = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
+
+        url = f"https://site.api.espn.com/apis/site/v2/sports/soccer/usa.1/scoreboard?limit=1000&dates={date_range}"
+        self._logger.info(f"Fetching MLS schedule: {date_range}")
+
+        try:
+            data = self.session.get_json(url)
+            return self._parse_espn_response(data, url)
+        except Exception as e:
+            self._logger.error(f"ESPN error: {e}")
+            return []
+
+    def _parse_espn_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse ESPN API response."""
+        games: list[RawGameData] = []
+
+        events = data.get("events", [])
+
+        for event in events:
+            try:
+                game = self._parse_espn_event(event, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse ESPN event: {e}")
+                continue
+
+        return games
+
+    def _parse_espn_event(
+        self,
+        event: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single ESPN event."""
+        # Get date
+        date_str = event.get("date", "")
+        if not date_str:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get competitions
+        competitions = event.get("competitions", [])
+        if not competitions:
+            return None
+
+        competition = competitions[0]
+
+        # Get teams
+        competitors = competition.get("competitors", [])
+        if len(competitors) != 2:
+            return None
+
+        home_team = None
+        away_team = None
+        home_score = None
+        away_score = None
+
+        for competitor in competitors:
+            team_info = competitor.get("team", {})
+            team_name = team_info.get("displayName", "")
+            is_home = competitor.get("homeAway") == "home"
+            score = competitor.get("score")
+
+            if score:
+                try:
+                    score = int(score)
+                except (ValueError, TypeError):
+                    score = None
+
+            if is_home:
+                home_team = team_name
+                home_score = score
+            else:
+                away_team = team_name
+                away_score = score
+
+        if not home_team or not away_team:
+            return None
+
+        # Get venue
+        venue = competition.get("venue", {})
+        stadium = venue.get("fullName")
+
+        # Get status
+        status_info = competition.get("status", {})
+        status_type = status_info.get("type", {})
+        status_name = status_type.get("name", "").lower()
+
+        if status_name == "status_final":
+            status = "final"
+        elif status_name == "status_postponed":
+            status = "postponed"
+        elif status_name == "status_canceled":
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_fbref(self) -> list[RawGameData]:
+        """Scrape games from FBref."""
+        # FBref scraping would go here
+        raise NotImplementedError("FBref scraper not implemented")
+
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw games to Game objects with canonical IDs."""
+        games: list[Game] = []
+        review_items: list[ManualReviewItem] = []
+
+        for raw in raw_games:
+            game, item_reviews = self._normalize_single_game(raw)
+
+            if game:
+                games.append(game)
+                log_game(
+                    self.sport,
+                    game.id,
+                    game.home_team_id,
+                    game.away_team_id,
+                    game.game_date.strftime("%Y-%m-%d"),
+                    game.status,
+                )
+
+            review_items.extend(item_reviews)
+
+        return games, review_items
+
+    def _normalize_single_game(
+        self,
+        raw: RawGameData,
+    ) -> tuple[Optional[Game], list[ManualReviewItem]]:
+        """Normalize a single raw game."""
+        review_items: list[ManualReviewItem] = []
+
+        # Resolve home team
+        home_result = self._team_resolver.resolve(
+            raw.home_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if home_result.review_item:
+            review_items.append(home_result.review_item)
+
+        if not home_result.canonical_id:
+            log_warning(f"Could not resolve home team: {raw.home_team_raw}")
+            return None, review_items
+
+        # Resolve away team
+        away_result = self._team_resolver.resolve(
+            raw.away_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if away_result.review_item:
+            review_items.append(away_result.review_item)
+
+        if not away_result.canonical_id:
+            log_warning(f"Could not resolve away team: {raw.away_team_raw}")
+            return None, review_items
+
+        # Resolve stadium
+        stadium_id = None
+
+        if raw.stadium_raw:
+            stadium_result = self._stadium_resolver.resolve(
+                raw.stadium_raw,
+                check_date=raw.game_date.date(),
+                source_url=raw.source_url,
+            )
+
+            if stadium_result.review_item:
+                review_items.append(stadium_result.review_item)
+
+            stadium_id = stadium_result.canonical_id
+
+        # Get abbreviations for game ID
+        home_abbrev = self._get_abbreviation(home_result.canonical_id)
+        away_abbrev = self._get_abbreviation(away_result.canonical_id)
+
+        # Generate canonical game ID
+        game_id = generate_game_id(
+            sport=self.sport,
+            season=self.season,
+            away_abbrev=away_abbrev,
+            home_abbrev=home_abbrev,
+            game_date=raw.game_date,
+            game_number=None,
+        )
+
+        game = Game(
+            id=game_id,
+            sport=self.sport,
+            season=self.season,
+            home_team_id=home_result.canonical_id,
+            away_team_id=away_result.canonical_id,
+            stadium_id=stadium_id or "",
+            game_date=raw.game_date,
+            game_number=None,
+            home_score=raw.home_score,
+            away_score=raw.away_score,
+            status=raw.status,
+            source_url=raw.source_url,
+            raw_home_team=raw.home_team_raw,
+            raw_away_team=raw.away_team_raw,
+            raw_stadium=raw.stadium_raw,
+        )
+
+        return game, review_items
+
+    def _get_abbreviation(self, team_id: str) -> str:
+        """Extract abbreviation from team ID."""
+        parts = team_id.split("_")
+        return parts[-1] if parts else ""
+
+    def scrape_teams(self) -> list[Team]:
+        """Get all MLS teams from hardcoded mappings."""
+        teams: list[Team] = []
+        seen: set[str] = set()
+
+        # MLS conference structure
+        conferences = {
+            "Eastern": ["ATL", "CLT", "CHI", "CIN", "CLB", "DC", "MIA", "MTL", "NE", "NYC", "RB", "ORL", "PHI", "TOR"],
+            "Western": ["AUS", "COL", "DAL", "HOU", "LAG", "LAFC", "MIN", "NSH", "POR", "SLC", "SD", "SJ", "SEA", "SKC", "STL", "VAN"],
+        }
+
+        # Build reverse lookup
+        team_conferences: dict[str, str] = {}
+        for conf, abbrevs in conferences.items():
+            for abbrev in abbrevs:
+                team_conferences[abbrev] = conf
+
+        for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("mls", {}).items():
+            if team_id in seen:
+                continue
+            seen.add(team_id)
+
+            # Parse team name
+            team_name = full_name
+
+            # Get conference
+            conf = team_conferences.get(abbrev)
+
+            team = Team(
+                id=team_id,
+                sport="mls",
+                city=city,
+                name=team_name,
+                full_name=full_name,
+                abbreviation=abbrev,
+                conference=conf,
+                division=None,  # MLS doesn't have divisions
+                stadium_id=stadium_id,
+            )
+            teams.append(team)
+
+        return teams
+
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Get all MLS stadiums from hardcoded mappings."""
+        stadiums: list[Stadium] = []
+
+        mls_stadiums = STADIUM_MAPPINGS.get("mls", {})
+        for stadium_id, info in mls_stadiums.items():
+            stadium = Stadium(
+                id=stadium_id,
+                sport="mls",
+                name=info.name,
+                city=info.city,
+                state=info.state,
+                country=info.country,
+                latitude=info.latitude,
+                longitude=info.longitude,
+                surface="grass",
+                roof_type="open",
+            )
+            stadiums.append(stadium)
+
+        return stadiums
+
+
+def create_mls_scraper(season: int) -> MLSScraper:
+    """Factory function to create an MLS scraper."""
+    return MLSScraper(season=season)
@@ -0,0 +1,661 @@
+"""NBA scraper implementation with multi-source fallback."""
+
+from datetime import datetime, date, timezone
+from typing import Optional
+from bs4 import BeautifulSoup
+import re
+
+from .base import BaseScraper, RawGameData, ScrapeResult
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..normalizers.canonical_id import generate_game_id
+from ..normalizers.team_resolver import (
+    TeamResolver,
+    TEAM_MAPPINGS,
+    get_team_resolver,
+)
+from ..normalizers.stadium_resolver import (
+    StadiumResolver,
+    STADIUM_MAPPINGS,
+    get_stadium_resolver,
+)
+from ..normalizers.timezone import parse_datetime
+from ..utils.logging import get_logger, log_game, log_warning
+
+
+# Month name to number mapping
+MONTH_MAP = {
+    "january": 1, "february": 2, "march": 3, "april": 4,
+    "may": 5, "june": 6, "july": 7, "august": 8,
+    "september": 9, "october": 10, "november": 11, "december": 12,
+}
+
+# Basketball Reference month URLs
+BR_MONTHS = [
+    "october", "november", "december",
+    "january", "february", "march", "april", "may", "june",
+]
+
+
+class NBAScraper(BaseScraper):
+    """NBA schedule scraper with multi-source fallback.
+
+    Sources (in priority order):
+    1. Basketball-Reference - Most reliable, complete historical data
+    2. ESPN API - Good for current/future seasons
+    3. CBS Sports - Backup option
+    """
+
+    def __init__(self, season: int, **kwargs):
+        """Initialize NBA scraper.
+
+        Args:
+            season: Season start year (e.g., 2025 for 2025-26)
+        """
+        super().__init__("nba", season, **kwargs)
+        self._team_resolver = get_team_resolver("nba")
+        self._stadium_resolver = get_stadium_resolver("nba")
+
+    def _get_sources(self) -> list[str]:
+        """Return source list in priority order."""
+        # CBS scraper not yet implemented - TODO for future
+        return ["basketball_reference", "espn"]
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build URL for a source."""
+        if source == "basketball_reference":
+            month = kwargs.get("month", "october")
+            year = kwargs.get("year", self.season + 1)
+            return f"https://www.basketball-reference.com/leagues/NBA_{year}_games-{month}.html"
+
+        elif source == "espn":
+            date_str = kwargs.get("date", "")
+            return f"https://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard?dates={date_str}"
+
+        elif source == "cbs":
+            return "https://www.cbssports.com/nba/schedule/"
+
+        raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
+        """Scrape games from a specific source."""
+        if source == "basketball_reference":
+            return self._scrape_basketball_reference()
+        elif source == "espn":
+            return self._scrape_espn()
+        elif source == "cbs":
+            return self._scrape_cbs()
+        else:
+            raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_basketball_reference(self) -> list[RawGameData]:
+        """Scrape games from Basketball-Reference.
+
+        BR organizes games by month with separate pages.
+        Format: https://www.basketball-reference.com/leagues/NBA_YYYY_games-month.html
+        where YYYY is the ending year of the season.
+        Bails early if first few months have no data (season doesn't exist).
+        """
+        all_games: list[RawGameData] = []
+        end_year = self.season + 1
+        consecutive_empty_months = 0
+
+        for month in BR_MONTHS:
+            url = self._get_source_url("basketball_reference", month=month, year=end_year)
+
+            try:
+                html = self.session.get_html(url)
+                games = self._parse_basketball_reference(html, url)
+
+                if games:
+                    all_games.extend(games)
+                    consecutive_empty_months = 0
+                    self._logger.debug(f"Found {len(games)} games in {month}")
+                else:
+                    consecutive_empty_months += 1
+
+            except Exception as e:
+                # Some months may not exist (e.g., no games in August)
+                self._logger.debug(f"No data for {month}: {e}")
+                consecutive_empty_months += 1
+
+            # If first 3 months (Oct, Nov, Dec) all have no data, season doesn't exist
+            if consecutive_empty_months >= 3 and not all_games:
+                self._logger.info(f"No games found in first {consecutive_empty_months} months, season likely doesn't exist")
+                break
+
+        return all_games
+
+    def _parse_basketball_reference(
+        self,
+        html: str,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse Basketball-Reference schedule HTML.
+
+        Table structure:
+        - th[data-stat="date_game"]: Date (e.g., "Tue, Oct 22, 2024")
+        - td[data-stat="visitor_team_name"]: Away team
+        - td[data-stat="home_team_name"]: Home team
+        - td[data-stat="visitor_pts"]: Away score
+        - td[data-stat="home_pts"]: Home score
+        - td[data-stat="arena_name"]: Arena/stadium name
+        """
+        soup = BeautifulSoup(html, "lxml")
+        games: list[RawGameData] = []
+
+        # Find the schedule table
+        table = soup.find("table", id="schedule")
+        if not table:
+            return games
+
+        tbody = table.find("tbody")
+        if not tbody:
+            return games
+
+        for row in tbody.find_all("tr"):
+            # Skip header rows
+            if row.get("class") and "thead" in row.get("class", []):
+                continue
+
+            try:
+                game = self._parse_br_row(row, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse row: {e}")
+                continue
+
+        return games
+
+    def _parse_br_row(
+        self,
+        row,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single Basketball-Reference table row."""
+        # Get date
+        date_cell = row.find("th", {"data-stat": "date_game"})
+        if not date_cell:
+            return None
+
+        date_text = date_cell.get_text(strip=True)
+        if not date_text:
+            return None
+
+        # Parse date (format: "Tue, Oct 22, 2024")
+        try:
+            game_date = datetime.strptime(date_text, "%a, %b %d, %Y")
+        except ValueError:
+            # Try alternative format
+            try:
+                game_date = datetime.strptime(date_text, "%B %d, %Y")
+            except ValueError:
+                self._logger.debug(f"Could not parse date: {date_text}")
+                return None
+
+        # Get teams
+        away_cell = row.find("td", {"data-stat": "visitor_team_name"})
+        home_cell = row.find("td", {"data-stat": "home_team_name"})
+
+        if not away_cell or not home_cell:
+            return None
+
+        away_team = away_cell.get_text(strip=True)
+        home_team = home_cell.get_text(strip=True)
+
+        if not away_team or not home_team:
+            return None
+
+        # Get scores (may be empty for future games)
+        away_score_cell = row.find("td", {"data-stat": "visitor_pts"})
+        home_score_cell = row.find("td", {"data-stat": "home_pts"})
+
+        away_score = None
+        home_score = None
+
+        if away_score_cell and away_score_cell.get_text(strip=True):
+            try:
+                away_score = int(away_score_cell.get_text(strip=True))
+            except ValueError:
+                pass
+
+        if home_score_cell and home_score_cell.get_text(strip=True):
+            try:
+                home_score = int(home_score_cell.get_text(strip=True))
+            except ValueError:
+                pass
+
+        # Get arena
+        arena_cell = row.find("td", {"data-stat": "arena_name"})
+        arena = arena_cell.get_text(strip=True) if arena_cell else None
+
+        # Determine status
+        status = "final" if home_score is not None else "scheduled"
+
+        # Check for postponed/cancelled
+        notes_cell = row.find("td", {"data-stat": "game_remarks"})
+        if notes_cell:
+            notes = notes_cell.get_text(strip=True).lower()
+            if "postponed" in notes:
+                status = "postponed"
+            elif "cancelled" in notes or "canceled" in notes:
+                status = "cancelled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=arena,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_espn(self) -> list[RawGameData]:
+        """Scrape games from ESPN API.
+
+        ESPN API returns games for a specific date range.
+        We iterate through each day of the season.
+        Bails out early if no games found after checking first month.
+        """
+        all_games: list[RawGameData] = []
+        consecutive_empty_days = 0
+        max_empty_days = 45  # Bail after ~1.5 months of no games
+
+        for year, month in self._get_season_months():
+            # Get number of days in month
+            if month == 12:
+                next_month = date(year + 1, 1, 1)
+            else:
+                next_month = date(year, month + 1, 1)
+
+            days_in_month = (next_month - date(year, month, 1)).days
+
+            for day in range(1, days_in_month + 1):
+                try:
+                    game_date = date(year, month, day)
+                    date_str = game_date.strftime("%Y%m%d")
+                    url = self._get_source_url("espn", date=date_str)
+
+                    data = self.session.get_json(url)
+                    games = self._parse_espn_response(data, url)
+
+                    if games:
+                        all_games.extend(games)
+                        consecutive_empty_days = 0
+                    else:
+                        consecutive_empty_days += 1
+
+                    # Bail early if no games found for a long stretch
+                    if consecutive_empty_days >= max_empty_days:
+                        self._logger.info(f"No games found for {max_empty_days} consecutive days, stopping ESPN scrape")
+                        return all_games
+
+                except Exception as e:
+                    self._logger.debug(f"ESPN error for {year}-{month}-{day}: {e}")
+                    consecutive_empty_days += 1
+
+                    if consecutive_empty_days >= max_empty_days:
+                        self._logger.info(f"Too many consecutive failures, stopping ESPN scrape")
+                        return all_games
+                    continue
+
+        return all_games
+
+    def _parse_espn_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse ESPN API response."""
+        games: list[RawGameData] = []
+
+        events = data.get("events", [])
+
+        for event in events:
+            try:
+                game = self._parse_espn_event(event, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse ESPN event: {e}")
+                continue
+
+        return games
+
+    def _parse_espn_event(
+        self,
+        event: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single ESPN event."""
+        # Get date
+        date_str = event.get("date", "")
+        if not date_str:
+            return None
+
+        try:
+            # ESPN uses ISO format
+            game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get competitions (usually just one)
+        competitions = event.get("competitions", [])
+        if not competitions:
+            return None
+
+        competition = competitions[0]
+
+        # Get teams
+        competitors = competition.get("competitors", [])
+        if len(competitors) != 2:
+            return None
+
+        home_team = None
+        away_team = None
+        home_score = None
+        away_score = None
+
+        for competitor in competitors:
+            team_info = competitor.get("team", {})
+            team_name = team_info.get("displayName", "")
+            is_home = competitor.get("homeAway") == "home"
+            score = competitor.get("score")
+
+            if score:
+                try:
+                    score = int(score)
+                except (ValueError, TypeError):
+                    score = None
+
+            if is_home:
+                home_team = team_name
+                home_score = score
+            else:
+                away_team = team_name
+                away_score = score
+
+        if not home_team or not away_team:
+            return None
+
+        # Get venue
+        venue = competition.get("venue", {})
+        arena = venue.get("fullName")
+
+        # Get status
+        status_info = competition.get("status", {})
+        status_type = status_info.get("type", {})
+        status_name = status_type.get("name", "").lower()
+
+        if status_name == "status_final":
+            status = "final"
+        elif status_name == "status_postponed":
+            status = "postponed"
+        elif status_name == "status_canceled":
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=arena,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_cbs(self) -> list[RawGameData]:
+        """Scrape games from CBS Sports.
+
+        CBS Sports is a backup source with less structured data.
+        """
+        # CBS Sports scraping would go here
+        # For now, return empty to fall back to other sources
+        raise NotImplementedError("CBS scraper not implemented")
+
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw games to Game objects with canonical IDs."""
+        games: list[Game] = []
+        review_items: list[ManualReviewItem] = []
+
+        # Track games by date for doubleheader detection
+        games_by_date: dict[str, list[RawGameData]] = {}
+
+        for raw in raw_games:
+            date_key = raw.game_date.strftime("%Y%m%d")
+            matchup_key = f"{date_key}_{raw.away_team_raw}_{raw.home_team_raw}"
+
+            if matchup_key not in games_by_date:
+                games_by_date[matchup_key] = []
+            games_by_date[matchup_key].append(raw)
+
+        # Process games with doubleheader detection
+        for matchup_key, matchup_games in games_by_date.items():
+            is_doubleheader = len(matchup_games) > 1
+
+            for i, raw in enumerate(matchup_games):
+                game_number = (i + 1) if is_doubleheader else None
+
+                game, item_reviews = self._normalize_single_game(raw, game_number)
+
+                if game:
+                    games.append(game)
+                    log_game(
+                        self.sport,
+                        game.id,
+                        game.home_team_id,
+                        game.away_team_id,
+                        game.game_date.strftime("%Y-%m-%d"),
+                        game.status,
+                    )
+
+                review_items.extend(item_reviews)
+
+        return games, review_items
+
+    def _normalize_single_game(
+        self,
+        raw: RawGameData,
+        game_number: Optional[int],
+    ) -> tuple[Optional[Game], list[ManualReviewItem]]:
+        """Normalize a single raw game."""
+        review_items: list[ManualReviewItem] = []
+
+        # Resolve home team
+        home_result = self._team_resolver.resolve(
+            raw.home_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if home_result.review_item:
+            review_items.append(home_result.review_item)
+
+        if not home_result.canonical_id:
+            log_warning(f"Could not resolve home team: {raw.home_team_raw}")
+            return None, review_items
+
+        # Resolve away team
+        away_result = self._team_resolver.resolve(
+            raw.away_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if away_result.review_item:
+            review_items.append(away_result.review_item)
+
+        if not away_result.canonical_id:
+            log_warning(f"Could not resolve away team: {raw.away_team_raw}")
+            return None, review_items
+
+        # Resolve stadium (optional - use home team's stadium if not found)
+        stadium_id = None
+
+        if raw.stadium_raw:
+            stadium_result = self._stadium_resolver.resolve(
+                raw.stadium_raw,
+                check_date=raw.game_date.date(),
+                source_url=raw.source_url,
+            )
+
+            if stadium_result.review_item:
+                review_items.append(stadium_result.review_item)
+
+            stadium_id = stadium_result.canonical_id
+
+        # If no stadium found, use home team's default stadium
+        if not stadium_id:
+            # Look up home team's stadium from mappings
+            home_abbrev = home_result.canonical_id.split("_")[-1].upper()
+            team_info = self._team_resolver.get_team_info(home_abbrev)
+
+            if team_info:
+                # Try to find stadium by team's home arena
+                for sid, sinfo in STADIUM_MAPPINGS.get("nba", {}).items():
+                    # Match by city
+                    if sinfo.city.lower() in team_info[2].lower():
+                        stadium_id = sid
+                        break
+
+        # Get abbreviations for game ID
+        home_abbrev = self._get_abbreviation(home_result.canonical_id)
+        away_abbrev = self._get_abbreviation(away_result.canonical_id)
+
+        # Generate canonical game ID
+        game_id = generate_game_id(
+            sport=self.sport,
+            season=self.season,
+            away_abbrev=away_abbrev,
+            home_abbrev=home_abbrev,
+            game_date=raw.game_date,
+            game_number=game_number,
+        )
+
+        game = Game(
+            id=game_id,
+            sport=self.sport,
+            season=self.season,
+            home_team_id=home_result.canonical_id,
+            away_team_id=away_result.canonical_id,
+            stadium_id=stadium_id or "",
+            game_date=raw.game_date,
+            game_number=game_number,
+            home_score=raw.home_score,
+            away_score=raw.away_score,
+            status=raw.status,
+            source_url=raw.source_url,
+            raw_home_team=raw.home_team_raw,
+            raw_away_team=raw.away_team_raw,
+            raw_stadium=raw.stadium_raw,
+        )
+
+        return game, review_items
+
+    def _get_abbreviation(self, team_id: str) -> str:
+        """Extract abbreviation from team ID."""
+        # team_nba_okc -> okc
+        parts = team_id.split("_")
+        return parts[-1] if parts else ""
+
+    def scrape_teams(self) -> list[Team]:
+        """Get all NBA teams from hardcoded mappings."""
+        teams: list[Team] = []
+        seen: set[str] = set()
+
+        # NBA conference/division structure
+        divisions = {
+            "Atlantic": ("Eastern", ["BOS", "BKN", "NYK", "PHI", "TOR"]),
+            "Central": ("Eastern", ["CHI", "CLE", "DET", "IND", "MIL"]),
+            "Southeast": ("Eastern", ["ATL", "CHA", "MIA", "ORL", "WAS"]),
+            "Northwest": ("Western", ["DEN", "MIN", "OKC", "POR", "UTA"]),
+            "Pacific": ("Western", ["GSW", "LAC", "LAL", "PHX", "SAC"]),
+            "Southwest": ("Western", ["DAL", "HOU", "MEM", "NOP", "SAS"]),
+        }
+
+        # Build reverse lookup
+        team_divisions: dict[str, tuple[str, str]] = {}
+        for div, (conf, abbrevs) in divisions.items():
+            for abbrev in abbrevs:
+                team_divisions[abbrev] = (conf, div)
+
+        for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("nba", {}).items():
+            if team_id in seen:
+                continue
+            seen.add(team_id)
+
+            # Parse full name into city and name parts
+            parts = full_name.split()
+            if len(parts) >= 2:
+                # Handle special cases like "Oklahoma City Thunder"
+                if city == "Oklahoma City":
+                    team_name = "Thunder"
+                elif city == "Golden State":
+                    team_name = "Warriors"
+                elif city == "San Antonio":
+                    team_name = "Spurs"
+                elif city == "New York":
+                    team_name = parts[-1]  # Knicks
+                elif city == "New Orleans":
+                    team_name = "Pelicans"
+                elif city == "Los Angeles":
+                    team_name = parts[-1]  # Lakers or Clippers
+                else:
+                    team_name = parts[-1]
+            else:
+                team_name = full_name
+
+            # Get conference and division
+            conf, div = team_divisions.get(abbrev, (None, None))
+
+            team = Team(
+                id=team_id,
+                sport="nba",
+                city=city,
+                name=team_name,
+                full_name=full_name,
+                abbreviation=abbrev,
+                conference=conf,
+                division=div,
+                stadium_id=stadium_id,
+            )
+            teams.append(team)
+
+        return teams
+
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Get all NBA stadiums from hardcoded mappings."""
+        stadiums: list[Stadium] = []
+
+        for stadium_id, info in STADIUM_MAPPINGS.get("nba", {}).items():
+            stadium = Stadium(
+                id=stadium_id,
+                sport="nba",
+                name=info.name,
+                city=info.city,
+                state=info.state,
+                country=info.country,
+                latitude=info.latitude,
+                longitude=info.longitude,
+                surface="hardwood",
+                roof_type="dome",
+            )
+            stadiums.append(stadium)
+
+        return stadiums
+
+
+def create_nba_scraper(season: int) -> NBAScraper:
+    """Factory function to create an NBA scraper."""
+    return NBAScraper(season=season)
@@ -0,0 +1,579 @@
+"""NFL scraper implementation with multi-source fallback."""
+
+from datetime import datetime, date
+from typing import Optional
+from bs4 import BeautifulSoup
+
+from .base import BaseScraper, RawGameData, ScrapeResult
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..normalizers.canonical_id import generate_game_id
+from ..normalizers.team_resolver import (
+    TeamResolver,
+    TEAM_MAPPINGS,
+    get_team_resolver,
+)
+from ..normalizers.stadium_resolver import (
+    StadiumResolver,
+    STADIUM_MAPPINGS,
+    get_stadium_resolver,
+)
+from ..utils.logging import get_logger, log_game, log_warning
+
+
+# International game locations to filter out
+INTERNATIONAL_LOCATIONS = {"London", "Mexico City", "Frankfurt", "Munich", "São Paulo"}
+
+
+class NFLScraper(BaseScraper):
+    """NFL schedule scraper with multi-source fallback.
+
+    Sources (in priority order):
+    1. ESPN API - Most reliable for NFL
+    2. Pro-Football-Reference - Complete historical data
+    3. CBS Sports - Backup option
+    """
+
+    def __init__(self, season: int, **kwargs):
+        """Initialize NFL scraper.
+
+        Args:
+            season: Season year (e.g., 2025 for 2025 season)
+        """
+        super().__init__("nfl", season, **kwargs)
+        self._team_resolver = get_team_resolver("nfl")
+        self._stadium_resolver = get_stadium_resolver("nfl")
+
+    def _get_sources(self) -> list[str]:
+        """Return source list in priority order."""
+        # CBS scraper not yet implemented - TODO for future
+        return ["espn", "pro_football_reference"]
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build URL for a source."""
+        if source == "espn":
+            week = kwargs.get("week", 1)
+            season_type = kwargs.get("season_type", 2)  # 1=preseason, 2=regular, 3=postseason
+            return f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?seasontype={season_type}&week={week}"
+
+        elif source == "pro_football_reference":
+            return f"https://www.pro-football-reference.com/years/{self.season}/games.htm"
+
+        elif source == "cbs":
+            return "https://www.cbssports.com/nfl/schedule/"
+
+        raise ValueError(f"Unknown source: {source}")
+
+    def _get_season_months(self) -> list[tuple[int, int]]:
+        """Get the months to scrape for NFL season.
+
+        NFL season runs September through February.
+        """
+        months = []
+
+        # Regular season months
+        for month in range(9, 13):  # Sept-Dec
+            months.append((self.season, month))
+
+        # Playoff months
+        for month in range(1, 3):  # Jan-Feb
+            months.append((self.season + 1, month))
+
+        return months
+
+    def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
+        """Scrape games from a specific source."""
+        if source == "espn":
+            return self._scrape_espn()
+        elif source == "pro_football_reference":
+            return self._scrape_pro_football_reference()
+        elif source == "cbs":
+            return self._scrape_cbs()
+        else:
+            raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_espn(self) -> list[RawGameData]:
+        """Scrape games from ESPN API.
+
+        ESPN NFL API uses week numbers.
+        """
+        all_games: list[RawGameData] = []
+
+        # Scrape preseason (4 weeks)
+        for week in range(1, 5):
+            try:
+                url = self._get_source_url("espn", week=week, season_type=1)
+                data = self.session.get_json(url)
+                games = self._parse_espn_response(data, url)
+                all_games.extend(games)
+            except Exception as e:
+                self._logger.debug(f"ESPN preseason week {week} error: {e}")
+                continue
+
+        # Scrape regular season (18 weeks)
+        for week in range(1, 19):
+            try:
+                url = self._get_source_url("espn", week=week, season_type=2)
+                data = self.session.get_json(url)
+                games = self._parse_espn_response(data, url)
+                all_games.extend(games)
+                self._logger.debug(f"Found {len(games)} games in week {week}")
+            except Exception as e:
+                self._logger.debug(f"ESPN regular season week {week} error: {e}")
+                continue
+
+        # Scrape postseason (4 rounds)
+        for week in range(1, 5):
+            try:
+                url = self._get_source_url("espn", week=week, season_type=3)
+                data = self.session.get_json(url)
+                games = self._parse_espn_response(data, url)
+                all_games.extend(games)
+            except Exception as e:
+                self._logger.debug(f"ESPN postseason week {week} error: {e}")
+                continue
+
+        return all_games
+
+    def _parse_espn_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse ESPN API response."""
+        games: list[RawGameData] = []
+
+        events = data.get("events", [])
+
+        for event in events:
+            try:
+                game = self._parse_espn_event(event, source_url)
+                if game:
+                    # Filter international games
+                    if game.stadium_raw and any(loc in game.stadium_raw for loc in INTERNATIONAL_LOCATIONS):
+                        self._logger.debug(f"Skipping international game: {game.stadium_raw}")
+                        continue
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse ESPN event: {e}")
+                continue
+
+        return games
+
+    def _parse_espn_event(
+        self,
+        event: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single ESPN event."""
+        # Get date
+        date_str = event.get("date", "")
+        if not date_str:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get competitions
+        competitions = event.get("competitions", [])
+        if not competitions:
+            return None
+
+        competition = competitions[0]
+
+        # Check for neutral site (international games)
+        if competition.get("neutralSite"):
+            venue = competition.get("venue", {})
+            venue_city = venue.get("address", {}).get("city", "")
+            if venue_city in INTERNATIONAL_LOCATIONS:
+                return None
+
+        # Get teams
+        competitors = competition.get("competitors", [])
+        if len(competitors) != 2:
+            return None
+
+        home_team = None
+        away_team = None
+        home_score = None
+        away_score = None
+
+        for competitor in competitors:
+            team_info = competitor.get("team", {})
+            team_name = team_info.get("displayName", "")
+            is_home = competitor.get("homeAway") == "home"
+            score = competitor.get("score")
+
+            if score:
+                try:
+                    score = int(score)
+                except (ValueError, TypeError):
+                    score = None
+
+            if is_home:
+                home_team = team_name
+                home_score = score
+            else:
+                away_team = team_name
+                away_score = score
+
+        if not home_team or not away_team:
+            return None
+
+        # Get venue
+        venue = competition.get("venue", {})
+        stadium = venue.get("fullName")
+
+        # Get status
+        status_info = competition.get("status", {})
+        status_type = status_info.get("type", {})
+        status_name = status_type.get("name", "").lower()
+
+        if status_name == "status_final":
+            status = "final"
+        elif status_name == "status_postponed":
+            status = "postponed"
+        elif status_name == "status_canceled":
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_pro_football_reference(self) -> list[RawGameData]:
+        """Scrape games from Pro-Football-Reference.
+
+        PFR has a single schedule page per season.
+        """
+        url = self._get_source_url("pro_football_reference")
+
+        try:
+            html = self.session.get_html(url)
+            games = self._parse_pfr(html, url)
+            return games
+        except Exception as e:
+            self._logger.error(f"Failed to scrape Pro-Football-Reference: {e}")
+            raise
+
+    def _parse_pfr(
+        self,
+        html: str,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse Pro-Football-Reference schedule HTML."""
+        soup = BeautifulSoup(html, "lxml")
+        games: list[RawGameData] = []
+
+        # Find the schedule table
+        table = soup.find("table", id="games")
+        if not table:
+            return games
+
+        tbody = table.find("tbody")
+        if not tbody:
+            return games
+
+        for row in tbody.find_all("tr"):
+            # Skip header rows
+            if row.get("class") and "thead" in row.get("class", []):
+                continue
+
+            try:
+                game = self._parse_pfr_row(row, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse PFR row: {e}")
+                continue
+
+        return games
+
+    def _parse_pfr_row(
+        self,
+        row,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single Pro-Football-Reference table row."""
+        # Get date
+        date_cell = row.find("td", {"data-stat": "game_date"})
+        if not date_cell:
+            return None
+
+        date_text = date_cell.get_text(strip=True)
+        if not date_text:
+            return None
+
+        # Parse date
+        try:
+            # PFR uses YYYY-MM-DD format
+            game_date = datetime.strptime(date_text, "%Y-%m-%d")
+        except ValueError:
+            return None
+
+        # Get teams
+        winner_cell = row.find("td", {"data-stat": "winner"})
+        loser_cell = row.find("td", {"data-stat": "loser"})
+
+        if not winner_cell or not loser_cell:
+            return None
+
+        winner = winner_cell.get_text(strip=True)
+        loser = loser_cell.get_text(strip=True)
+
+        if not winner or not loser:
+            return None
+
+        # Determine home/away based on @ symbol
+        game_location = row.find("td", {"data-stat": "game_location"})
+        at_home = game_location and "@" in game_location.get_text()
+
+        if at_home:
+            home_team = loser
+            away_team = winner
+        else:
+            home_team = winner
+            away_team = loser
+
+        # Get scores
+        pts_win_cell = row.find("td", {"data-stat": "pts_win"})
+        pts_lose_cell = row.find("td", {"data-stat": "pts_lose"})
+
+        home_score = None
+        away_score = None
+
+        if pts_win_cell and pts_lose_cell:
+            try:
+                winner_pts = int(pts_win_cell.get_text(strip=True))
+                loser_pts = int(pts_lose_cell.get_text(strip=True))
+
+                if at_home:
+                    home_score = loser_pts
+                    away_score = winner_pts
+                else:
+                    home_score = winner_pts
+                    away_score = loser_pts
+            except ValueError:
+                pass
+
+        # Determine status
+        status = "final" if home_score is not None else "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=None,  # PFR doesn't always have stadium
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_cbs(self) -> list[RawGameData]:
+        """Scrape games from CBS Sports."""
+        raise NotImplementedError("CBS scraper not implemented")
+
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw games to Game objects with canonical IDs."""
+        games: list[Game] = []
+        review_items: list[ManualReviewItem] = []
+
+        for raw in raw_games:
+            game, item_reviews = self._normalize_single_game(raw)
+
+            if game:
+                games.append(game)
+                log_game(
+                    self.sport,
+                    game.id,
+                    game.home_team_id,
+                    game.away_team_id,
+                    game.game_date.strftime("%Y-%m-%d"),
+                    game.status,
+                )
+
+            review_items.extend(item_reviews)
+
+        return games, review_items
+
+    def _normalize_single_game(
+        self,
+        raw: RawGameData,
+    ) -> tuple[Optional[Game], list[ManualReviewItem]]:
+        """Normalize a single raw game."""
+        review_items: list[ManualReviewItem] = []
+
+        # Resolve home team
+        home_result = self._team_resolver.resolve(
+            raw.home_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if home_result.review_item:
+            review_items.append(home_result.review_item)
+
+        if not home_result.canonical_id:
+            log_warning(f"Could not resolve home team: {raw.home_team_raw}")
+            return None, review_items
+
+        # Resolve away team
+        away_result = self._team_resolver.resolve(
+            raw.away_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if away_result.review_item:
+            review_items.append(away_result.review_item)
+
+        if not away_result.canonical_id:
+            log_warning(f"Could not resolve away team: {raw.away_team_raw}")
+            return None, review_items
+
+        # Resolve stadium
+        stadium_id = None
+
+        if raw.stadium_raw:
+            stadium_result = self._stadium_resolver.resolve(
+                raw.stadium_raw,
+                check_date=raw.game_date.date(),
+                source_url=raw.source_url,
+            )
+
+            if stadium_result.review_item:
+                review_items.append(stadium_result.review_item)
+
+            stadium_id = stadium_result.canonical_id
+
+        # Get abbreviations for game ID
+        home_abbrev = self._get_abbreviation(home_result.canonical_id)
+        away_abbrev = self._get_abbreviation(away_result.canonical_id)
+
+        # Generate canonical game ID
+        game_id = generate_game_id(
+            sport=self.sport,
+            season=self.season,
+            away_abbrev=away_abbrev,
+            home_abbrev=home_abbrev,
+            game_date=raw.game_date,
+            game_number=None,  # NFL doesn't have doubleheaders
+        )
+
+        game = Game(
+            id=game_id,
+            sport=self.sport,
+            season=self.season,
+            home_team_id=home_result.canonical_id,
+            away_team_id=away_result.canonical_id,
+            stadium_id=stadium_id or "",
+            game_date=raw.game_date,
+            game_number=None,
+            home_score=raw.home_score,
+            away_score=raw.away_score,
+            status=raw.status,
+            source_url=raw.source_url,
+            raw_home_team=raw.home_team_raw,
+            raw_away_team=raw.away_team_raw,
+            raw_stadium=raw.stadium_raw,
+        )
+
+        return game, review_items
+
+    def _get_abbreviation(self, team_id: str) -> str:
+        """Extract abbreviation from team ID."""
+        parts = team_id.split("_")
+        return parts[-1] if parts else ""
+
+    def scrape_teams(self) -> list[Team]:
+        """Get all NFL teams from hardcoded mappings."""
+        teams: list[Team] = []
+        seen: set[str] = set()
+
+        # NFL conference/division structure
+        divisions = {
+            "AFC East": ("AFC", ["BUF", "MIA", "NE", "NYJ"]),
+            "AFC North": ("AFC", ["BAL", "CIN", "CLE", "PIT"]),
+            "AFC South": ("AFC", ["HOU", "IND", "JAX", "TEN"]),
+            "AFC West": ("AFC", ["DEN", "KC", "LV", "LAC"]),
+            "NFC East": ("NFC", ["DAL", "NYG", "PHI", "WAS"]),
+            "NFC North": ("NFC", ["CHI", "DET", "GB", "MIN"]),
+            "NFC South": ("NFC", ["ATL", "CAR", "NO", "TB"]),
+            "NFC West": ("NFC", ["ARI", "LAR", "SF", "SEA"]),
+        }
+
+        # Build reverse lookup
+        team_divisions: dict[str, tuple[str, str]] = {}
+        for div, (conf, abbrevs) in divisions.items():
+            for abbrev in abbrevs:
+                team_divisions[abbrev] = (conf, div)
+
+        for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("nfl", {}).items():
+            if team_id in seen:
+                continue
+            seen.add(team_id)
+
+            # Parse team name
+            parts = full_name.split()
+            team_name = parts[-1] if parts else full_name
+
+            # Get conference and division
+            conf, div = team_divisions.get(abbrev, (None, None))
+
+            team = Team(
+                id=team_id,
+                sport="nfl",
+                city=city,
+                name=team_name,
+                full_name=full_name,
+                abbreviation=abbrev,
+                conference=conf,
+                division=div,
+                stadium_id=stadium_id,
+            )
+            teams.append(team)
+
+        return teams
+
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Get all NFL stadiums from hardcoded mappings."""
+        stadiums: list[Stadium] = []
+
+        nfl_stadiums = STADIUM_MAPPINGS.get("nfl", {})
+        for stadium_id, info in nfl_stadiums.items():
+            stadium = Stadium(
+                id=stadium_id,
+                sport="nfl",
+                name=info.name,
+                city=info.city,
+                state=info.state,
+                country=info.country,
+                latitude=info.latitude,
+                longitude=info.longitude,
+                surface="turf",  # Many NFL stadiums
+                roof_type="open",  # Most outdoor
+            )
+            stadiums.append(stadium)
+
+        return stadiums
+
+
+def create_nfl_scraper(season: int) -> NFLScraper:
+    """Factory function to create an NFL scraper."""
+    return NFLScraper(season=season)
@@ -0,0 +1,657 @@
+"""NHL scraper implementation with multi-source fallback."""
+
+from datetime import datetime, date
+from typing import Optional
+from bs4 import BeautifulSoup
+
+from .base import BaseScraper, RawGameData, ScrapeResult
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..normalizers.canonical_id import generate_game_id
+from ..normalizers.team_resolver import (
+    TeamResolver,
+    TEAM_MAPPINGS,
+    get_team_resolver,
+)
+from ..normalizers.stadium_resolver import (
+    StadiumResolver,
+    STADIUM_MAPPINGS,
+    get_stadium_resolver,
+)
+from ..utils.logging import get_logger, log_game, log_warning
+
+
+# International game locations to filter out
+INTERNATIONAL_LOCATIONS = {"Prague", "Stockholm", "Helsinki", "Tampere", "Gothenburg"}
+
+# Hockey Reference month URLs
+HR_MONTHS = [
+    "october", "november", "december",
+    "january", "february", "march", "april", "may", "june",
+]
+
+
+class NHLScraper(BaseScraper):
+    """NHL schedule scraper with multi-source fallback.
+
+    Sources (in priority order):
+    1. Hockey-Reference - Most reliable for NHL
+    2. NHL API - Official NHL data
+    3. ESPN API - Backup option
+    """
+
+    def __init__(self, season: int, **kwargs):
+        """Initialize NHL scraper.
+
+        Args:
+            season: Season start year (e.g., 2025 for 2025-26)
+        """
+        super().__init__("nhl", season, **kwargs)
+        self._team_resolver = get_team_resolver("nhl")
+        self._stadium_resolver = get_stadium_resolver("nhl")
+
+    def _get_sources(self) -> list[str]:
+        """Return source list in priority order."""
+        return ["hockey_reference", "nhl_api", "espn"]
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build URL for a source."""
+        if source == "hockey_reference":
+            month = kwargs.get("month", "october")
+            year = kwargs.get("year", self.season + 1)
+            return f"https://www.hockey-reference.com/leagues/NHL_{year}_games.html"
+
+        elif source == "nhl_api":
+            start_date = kwargs.get("start_date", "")
+            end_date = kwargs.get("end_date", "")
+            return f"https://api-web.nhle.com/v1/schedule/{start_date}"
+
+        elif source == "espn":
+            date_str = kwargs.get("date", "")
+            return f"https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard?dates={date_str}"
+
+        raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
+        """Scrape games from a specific source."""
+        if source == "hockey_reference":
+            return self._scrape_hockey_reference()
+        elif source == "nhl_api":
+            return self._scrape_nhl_api()
+        elif source == "espn":
+            return self._scrape_espn()
+        else:
+            raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_hockey_reference(self) -> list[RawGameData]:
+        """Scrape games from Hockey-Reference.
+
+        HR has a single schedule page per season.
+        """
+        end_year = self.season + 1
+        url = self._get_source_url("hockey_reference", year=end_year)
+
+        try:
+            html = self.session.get_html(url)
+            games = self._parse_hockey_reference(html, url)
+            return games
+        except Exception as e:
+            self._logger.error(f"Failed to scrape Hockey-Reference: {e}")
+            raise
+
+    def _parse_hockey_reference(
+        self,
+        html: str,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse Hockey-Reference schedule HTML."""
+        soup = BeautifulSoup(html, "lxml")
+        games: list[RawGameData] = []
+
+        # Find the schedule table
+        table = soup.find("table", id="games")
+        if not table:
+            return games
+
+        tbody = table.find("tbody")
+        if not tbody:
+            return games
+
+        for row in tbody.find_all("tr"):
+            # Skip header rows
+            if row.get("class") and "thead" in row.get("class", []):
+                continue
+
+            try:
+                game = self._parse_hr_row(row, source_url)
+                if game:
+                    # Filter international games
+                    if game.stadium_raw and any(loc in game.stadium_raw for loc in INTERNATIONAL_LOCATIONS):
+                        continue
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse HR row: {e}")
+                continue
+
+        return games
+
+    def _parse_hr_row(
+        self,
+        row,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single Hockey-Reference table row."""
+        # Get date
+        date_cell = row.find("th", {"data-stat": "date_game"})
+        if not date_cell:
+            return None
+
+        date_text = date_cell.get_text(strip=True)
+        if not date_text:
+            return None
+
+        # Parse date (format: "2025-10-15")
+        try:
+            game_date = datetime.strptime(date_text, "%Y-%m-%d")
+        except ValueError:
+            return None
+
+        # Get teams
+        visitor_cell = row.find("td", {"data-stat": "visitor_team_name"})
+        home_cell = row.find("td", {"data-stat": "home_team_name"})
+
+        if not visitor_cell or not home_cell:
+            return None
+
+        away_team = visitor_cell.get_text(strip=True)
+        home_team = home_cell.get_text(strip=True)
+
+        if not away_team or not home_team:
+            return None
+
+        # Get scores
+        visitor_goals_cell = row.find("td", {"data-stat": "visitor_goals"})
+        home_goals_cell = row.find("td", {"data-stat": "home_goals"})
+
+        away_score = None
+        home_score = None
+
+        if visitor_goals_cell and visitor_goals_cell.get_text(strip=True):
+            try:
+                away_score = int(visitor_goals_cell.get_text(strip=True))
+            except ValueError:
+                pass
+
+        if home_goals_cell and home_goals_cell.get_text(strip=True):
+            try:
+                home_score = int(home_goals_cell.get_text(strip=True))
+            except ValueError:
+                pass
+
+        # Determine status
+        status = "final" if home_score is not None else "scheduled"
+
+        # Check for OT/SO
+        overtimes_cell = row.find("td", {"data-stat": "overtimes"})
+        if overtimes_cell:
+            ot_text = overtimes_cell.get_text(strip=True)
+            if ot_text:
+                status = "final"  # OT games are still final
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=None,  # HR doesn't have stadium
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_nhl_api(self) -> list[RawGameData]:
+        """Scrape games from NHL API."""
+        all_games: list[RawGameData] = []
+
+        for year, month in self._get_season_months():
+            start_date = date(year, month, 1)
+
+            url = self._get_source_url("nhl_api", start_date=start_date.strftime("%Y-%m-%d"))
+
+            try:
+                data = self.session.get_json(url)
+                games = self._parse_nhl_api_response(data, url)
+                all_games.extend(games)
+            except Exception as e:
+                self._logger.debug(f"NHL API error for {year}-{month}: {e}")
+                continue
+
+        return all_games
+
+    def _parse_nhl_api_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse NHL API response."""
+        games: list[RawGameData] = []
+
+        game_weeks = data.get("gameWeek", [])
+
+        for week in game_weeks:
+            for game_day in week.get("games", []):
+                try:
+                    game = self._parse_nhl_api_game(game_day, source_url)
+                    if game:
+                        games.append(game)
+                except Exception as e:
+                    self._logger.debug(f"Failed to parse NHL API game: {e}")
+                    continue
+
+        return games
+
+    def _parse_nhl_api_game(
+        self,
+        game: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single NHL API game."""
+        # Get date
+        start_time = game.get("startTimeUTC", "")
+        if not start_time:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(start_time.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get teams
+        away_team_data = game.get("awayTeam", {})
+        home_team_data = game.get("homeTeam", {})
+
+        away_team = away_team_data.get("placeName", {}).get("default", "")
+        home_team = home_team_data.get("placeName", {}).get("default", "")
+
+        if not away_team or not home_team:
+            # Try full name
+            away_team = away_team_data.get("name", {}).get("default", "")
+            home_team = home_team_data.get("name", {}).get("default", "")
+
+        if not away_team or not home_team:
+            return None
+
+        # Get scores
+        away_score = away_team_data.get("score")
+        home_score = home_team_data.get("score")
+
+        # Get venue
+        venue = game.get("venue", {})
+        stadium = venue.get("default")
+
+        # Get status
+        game_state = game.get("gameState", "").lower()
+
+        if game_state in ["final", "off"]:
+            status = "final"
+        elif game_state == "postponed":
+            status = "postponed"
+        elif game_state in ["cancelled", "canceled"]:
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _scrape_espn(self) -> list[RawGameData]:
+        """Scrape games from ESPN API."""
+        all_games: list[RawGameData] = []
+
+        for year, month in self._get_season_months():
+            # Get number of days in month
+            if month == 12:
+                next_month = date(year + 1, 1, 1)
+            else:
+                next_month = date(year, month + 1, 1)
+
+            days_in_month = (next_month - date(year, month, 1)).days
+
+            for day in range(1, days_in_month + 1):
+                try:
+                    game_date = date(year, month, day)
+                    date_str = game_date.strftime("%Y%m%d")
+                    url = self._get_source_url("espn", date=date_str)
+
+                    data = self.session.get_json(url)
+                    games = self._parse_espn_response(data, url)
+                    all_games.extend(games)
+
+                except Exception as e:
+                    self._logger.debug(f"ESPN error for {year}-{month}-{day}: {e}")
+                    continue
+
+        return all_games
+
+    def _parse_espn_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse ESPN API response."""
+        games: list[RawGameData] = []
+
+        events = data.get("events", [])
+
+        for event in events:
+            try:
+                game = self._parse_espn_event(event, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse ESPN event: {e}")
+                continue
+
+        return games
+
+    def _parse_espn_event(
+        self,
+        event: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single ESPN event."""
+        # Get date
+        date_str = event.get("date", "")
+        if not date_str:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get competitions
+        competitions = event.get("competitions", [])
+        if not competitions:
+            return None
+
+        competition = competitions[0]
+
+        # Check for neutral site (international games like Global Series)
+        if competition.get("neutralSite"):
+            venue = competition.get("venue", {})
+            venue_city = venue.get("address", {}).get("city", "")
+            if venue_city in INTERNATIONAL_LOCATIONS:
+                return None
+
+        # Get teams
+        competitors = competition.get("competitors", [])
+        if len(competitors) != 2:
+            return None
+
+        home_team = None
+        away_team = None
+        home_score = None
+        away_score = None
+
+        for competitor in competitors:
+            team_info = competitor.get("team", {})
+            team_name = team_info.get("displayName", "")
+            is_home = competitor.get("homeAway") == "home"
+            score = competitor.get("score")
+
+            if score:
+                try:
+                    score = int(score)
+                except (ValueError, TypeError):
+                    score = None
+
+            if is_home:
+                home_team = team_name
+                home_score = score
+            else:
+                away_team = team_name
+                away_score = score
+
+        if not home_team or not away_team:
+            return None
+
+        # Get venue
+        venue = competition.get("venue", {})
+        stadium = venue.get("fullName")
+
+        # Get status
+        status_info = competition.get("status", {})
+        status_type = status_info.get("type", {})
+        status_name = status_type.get("name", "").lower()
+
+        if status_name == "status_final":
+            status = "final"
+        elif status_name == "status_postponed":
+            status = "postponed"
+        elif status_name == "status_canceled":
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw games to Game objects with canonical IDs."""
+        games: list[Game] = []
+        review_items: list[ManualReviewItem] = []
+
+        for raw in raw_games:
+            game, item_reviews = self._normalize_single_game(raw)
+
+            if game:
+                games.append(game)
+                log_game(
+                    self.sport,
+                    game.id,
+                    game.home_team_id,
+                    game.away_team_id,
+                    game.game_date.strftime("%Y-%m-%d"),
+                    game.status,
+                )
+
+            review_items.extend(item_reviews)
+
+        return games, review_items
+
+    def _normalize_single_game(
+        self,
+        raw: RawGameData,
+    ) -> tuple[Optional[Game], list[ManualReviewItem]]:
+        """Normalize a single raw game."""
+        review_items: list[ManualReviewItem] = []
+
+        # Resolve home team
+        home_result = self._team_resolver.resolve(
+            raw.home_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if home_result.review_item:
+            review_items.append(home_result.review_item)
+
+        if not home_result.canonical_id:
+            log_warning(f"Could not resolve home team: {raw.home_team_raw}")
+            return None, review_items
+
+        # Resolve away team
+        away_result = self._team_resolver.resolve(
+            raw.away_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if away_result.review_item:
+            review_items.append(away_result.review_item)
+
+        if not away_result.canonical_id:
+            log_warning(f"Could not resolve away team: {raw.away_team_raw}")
+            return None, review_items
+
+        # Resolve stadium
+        stadium_id = None
+
+        if raw.stadium_raw:
+            stadium_result = self._stadium_resolver.resolve(
+                raw.stadium_raw,
+                check_date=raw.game_date.date(),
+                source_url=raw.source_url,
+            )
+
+            if stadium_result.review_item:
+                review_items.append(stadium_result.review_item)
+
+            stadium_id = stadium_result.canonical_id
+
+        # Fallback: Use home team's default stadium if no venue provided
+        # This is common for Hockey-Reference which doesn't have venue data
+        if not stadium_id:
+            home_team_data = TEAM_MAPPINGS.get("nhl", {})
+            home_abbrev = self._get_abbreviation(home_result.canonical_id)
+            for abbrev, (team_id, _, _, default_stadium) in home_team_data.items():
+                if team_id == home_result.canonical_id:
+                    stadium_id = default_stadium
+                    break
+
+        # Get abbreviations for game ID
+        home_abbrev = self._get_abbreviation(home_result.canonical_id)
+        away_abbrev = self._get_abbreviation(away_result.canonical_id)
+
+        # Generate canonical game ID
+        game_id = generate_game_id(
+            sport=self.sport,
+            season=self.season,
+            away_abbrev=away_abbrev,
+            home_abbrev=home_abbrev,
+            game_date=raw.game_date,
+            game_number=None,  # NHL doesn't have doubleheaders
+        )
+
+        game = Game(
+            id=game_id,
+            sport=self.sport,
+            season=self.season,
+            home_team_id=home_result.canonical_id,
+            away_team_id=away_result.canonical_id,
+            stadium_id=stadium_id or "",
+            game_date=raw.game_date,
+            game_number=None,
+            home_score=raw.home_score,
+            away_score=raw.away_score,
+            status=raw.status,
+            source_url=raw.source_url,
+            raw_home_team=raw.home_team_raw,
+            raw_away_team=raw.away_team_raw,
+            raw_stadium=raw.stadium_raw,
+        )
+
+        return game, review_items
+
+    def _get_abbreviation(self, team_id: str) -> str:
+        """Extract abbreviation from team ID."""
+        parts = team_id.split("_")
+        return parts[-1] if parts else ""
+
+    def scrape_teams(self) -> list[Team]:
+        """Get all NHL teams from hardcoded mappings."""
+        teams: list[Team] = []
+        seen: set[str] = set()
+
+        # NHL conference/division structure
+        divisions = {
+            "Atlantic": ("Eastern", ["BOS", "BUF", "DET", "FLA", "MTL", "OTT", "TB", "TOR"]),
+            "Metropolitan": ("Eastern", ["CAR", "CBJ", "NJ", "NYI", "NYR", "PHI", "PIT", "WAS"]),
+            "Central": ("Western", ["ARI", "CHI", "COL", "DAL", "MIN", "NSH", "STL", "WPG"]),
+            "Pacific": ("Western", ["ANA", "CGY", "EDM", "LA", "SJ", "SEA", "VAN", "VGK"]),
+        }
+
+        # Build reverse lookup
+        team_divisions: dict[str, tuple[str, str]] = {}
+        for div, (conf, abbrevs) in divisions.items():
+            for abbrev in abbrevs:
+                team_divisions[abbrev] = (conf, div)
+
+        for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("nhl", {}).items():
+            if team_id in seen:
+                continue
+            seen.add(team_id)
+
+            # Parse team name
+            parts = full_name.split()
+            team_name = parts[-1] if parts else full_name
+            # Handle multi-word names
+            if team_name in ["Wings", "Jackets", "Knights", "Leafs"]:
+                team_name = " ".join(parts[-2:])
+
+            # Get conference and division
+            conf, div = team_divisions.get(abbrev, (None, None))
+
+            team = Team(
+                id=team_id,
+                sport="nhl",
+                city=city,
+                name=team_name,
+                full_name=full_name,
+                abbreviation=abbrev,
+                conference=conf,
+                division=div,
+                stadium_id=stadium_id,
+            )
+            teams.append(team)
+
+        return teams
+
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Get all NHL stadiums from hardcoded mappings."""
+        stadiums: list[Stadium] = []
+
+        nhl_stadiums = STADIUM_MAPPINGS.get("nhl", {})
+        for stadium_id, info in nhl_stadiums.items():
+            stadium = Stadium(
+                id=stadium_id,
+                sport="nhl",
+                name=info.name,
+                city=info.city,
+                state=info.state,
+                country=info.country,
+                latitude=info.latitude,
+                longitude=info.longitude,
+                surface="ice",
+                roof_type="dome",
+            )
+            stadiums.append(stadium)
+
+        return stadiums
+
+
+def create_nhl_scraper(season: int) -> NHLScraper:
+    """Factory function to create an NHL scraper."""
+    return NHLScraper(season=season)
@@ -0,0 +1,374 @@
+"""NWSL scraper implementation with multi-source fallback."""
+
+from datetime import datetime, date, timedelta
+from typing import Optional
+
+from .base import BaseScraper, RawGameData, ScrapeResult
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..normalizers.canonical_id import generate_game_id
+from ..normalizers.team_resolver import (
+    TeamResolver,
+    TEAM_MAPPINGS,
+    get_team_resolver,
+)
+from ..normalizers.stadium_resolver import (
+    StadiumResolver,
+    STADIUM_MAPPINGS,
+    get_stadium_resolver,
+)
+from ..utils.logging import get_logger, log_game, log_warning
+
+
+class NWSLScraper(BaseScraper):
+    """NWSL schedule scraper with multi-source fallback.
+
+    Sources (in priority order):
+    1. ESPN API - Most reliable for NWSL
+    2. NWSL official (via ESPN) - Backup option
+    """
+
+    def __init__(self, season: int, **kwargs):
+        """Initialize NWSL scraper.
+
+        Args:
+            season: Season year (e.g., 2026 for 2026 season)
+        """
+        super().__init__("nwsl", season, **kwargs)
+        self._team_resolver = get_team_resolver("nwsl")
+        self._stadium_resolver = get_stadium_resolver("nwsl")
+
+    def _get_sources(self) -> list[str]:
+        """Return source list in priority order."""
+        return ["espn"]
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build URL for a source."""
+        if source == "espn":
+            date_str = kwargs.get("date", "")
+            return f"https://site.api.espn.com/apis/site/v2/sports/soccer/usa.nwsl/scoreboard?dates={date_str}"
+
+        raise ValueError(f"Unknown source: {source}")
+
+    def _get_season_months(self) -> list[tuple[int, int]]:
+        """Get the months to scrape for NWSL season.
+
+        NWSL season runs March through November.
+        """
+        months = []
+
+        # NWSL regular season + playoffs
+        for month in range(3, 12):  # March-Nov
+            months.append((self.season, month))
+
+        return months
+
+    def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
+        """Scrape games from a specific source."""
+        if source == "espn":
+            return self._scrape_espn()
+        else:
+            raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_espn(self) -> list[RawGameData]:
+        """Scrape games from ESPN API using date range query."""
+        # Build date range for entire season (March-November)
+        season_months = self._get_season_months()
+        start_year, start_month = season_months[0]
+        end_year, end_month = season_months[-1]
+
+        # Get last day of end month
+        if end_month == 12:
+            end_date = date(end_year + 1, 1, 1) - timedelta(days=1)
+        else:
+            end_date = date(end_year, end_month + 1, 1) - timedelta(days=1)
+
+        start_date = date(start_year, start_month, 1)
+        date_range = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
+
+        url = f"https://site.api.espn.com/apis/site/v2/sports/soccer/usa.nwsl/scoreboard?limit=1000&dates={date_range}"
+        self._logger.info(f"Fetching NWSL schedule: {date_range}")
+
+        try:
+            data = self.session.get_json(url)
+            return self._parse_espn_response(data, url)
+        except Exception as e:
+            self._logger.error(f"ESPN error: {e}")
+            return []
+
+    def _parse_espn_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse ESPN API response."""
+        games: list[RawGameData] = []
+
+        events = data.get("events", [])
+
+        for event in events:
+            try:
+                game = self._parse_espn_event(event, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse ESPN event: {e}")
+                continue
+
+        return games
+
+    def _parse_espn_event(
+        self,
+        event: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single ESPN event."""
+        # Get date
+        date_str = event.get("date", "")
+        if not date_str:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get competitions
+        competitions = event.get("competitions", [])
+        if not competitions:
+            return None
+
+        competition = competitions[0]
+
+        # Get teams
+        competitors = competition.get("competitors", [])
+        if len(competitors) != 2:
+            return None
+
+        home_team = None
+        away_team = None
+        home_score = None
+        away_score = None
+
+        for competitor in competitors:
+            team_info = competitor.get("team", {})
+            team_name = team_info.get("displayName", "")
+            is_home = competitor.get("homeAway") == "home"
+            score = competitor.get("score")
+
+            if score:
+                try:
+                    score = int(score)
+                except (ValueError, TypeError):
+                    score = None
+
+            if is_home:
+                home_team = team_name
+                home_score = score
+            else:
+                away_team = team_name
+                away_score = score
+
+        if not home_team or not away_team:
+            return None
+
+        # Get venue
+        venue = competition.get("venue", {})
+        stadium = venue.get("fullName")
+
+        # Get status
+        status_info = competition.get("status", {})
+        status_type = status_info.get("type", {})
+        status_name = status_type.get("name", "").lower()
+
+        if status_name == "status_final":
+            status = "final"
+        elif status_name == "status_postponed":
+            status = "postponed"
+        elif status_name == "status_canceled":
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw games to Game objects with canonical IDs."""
+        games: list[Game] = []
+        review_items: list[ManualReviewItem] = []
+
+        for raw in raw_games:
+            game, item_reviews = self._normalize_single_game(raw)
+
+            if game:
+                games.append(game)
+                log_game(
+                    self.sport,
+                    game.id,
+                    game.home_team_id,
+                    game.away_team_id,
+                    game.game_date.strftime("%Y-%m-%d"),
+                    game.status,
+                )
+
+            review_items.extend(item_reviews)
+
+        return games, review_items
+
+    def _normalize_single_game(
+        self,
+        raw: RawGameData,
+    ) -> tuple[Optional[Game], list[ManualReviewItem]]:
+        """Normalize a single raw game."""
+        review_items: list[ManualReviewItem] = []
+
+        # Resolve home team
+        home_result = self._team_resolver.resolve(
+            raw.home_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if home_result.review_item:
+            review_items.append(home_result.review_item)
+
+        if not home_result.canonical_id:
+            log_warning(f"Could not resolve home team: {raw.home_team_raw}")
+            return None, review_items
+
+        # Resolve away team
+        away_result = self._team_resolver.resolve(
+            raw.away_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if away_result.review_item:
+            review_items.append(away_result.review_item)
+
+        if not away_result.canonical_id:
+            log_warning(f"Could not resolve away team: {raw.away_team_raw}")
+            return None, review_items
+
+        # Resolve stadium
+        stadium_id = None
+
+        if raw.stadium_raw:
+            stadium_result = self._stadium_resolver.resolve(
+                raw.stadium_raw,
+                check_date=raw.game_date.date(),
+                source_url=raw.source_url,
+            )
+
+            if stadium_result.review_item:
+                review_items.append(stadium_result.review_item)
+
+            stadium_id = stadium_result.canonical_id
+
+        # Get abbreviations for game ID
+        home_abbrev = self._get_abbreviation(home_result.canonical_id)
+        away_abbrev = self._get_abbreviation(away_result.canonical_id)
+
+        # Generate canonical game ID
+        game_id = generate_game_id(
+            sport=self.sport,
+            season=self.season,
+            away_abbrev=away_abbrev,
+            home_abbrev=home_abbrev,
+            game_date=raw.game_date,
+            game_number=None,
+        )
+
+        game = Game(
+            id=game_id,
+            sport=self.sport,
+            season=self.season,
+            home_team_id=home_result.canonical_id,
+            away_team_id=away_result.canonical_id,
+            stadium_id=stadium_id or "",
+            game_date=raw.game_date,
+            game_number=None,
+            home_score=raw.home_score,
+            away_score=raw.away_score,
+            status=raw.status,
+            source_url=raw.source_url,
+            raw_home_team=raw.home_team_raw,
+            raw_away_team=raw.away_team_raw,
+            raw_stadium=raw.stadium_raw,
+        )
+
+        return game, review_items
+
+    def _get_abbreviation(self, team_id: str) -> str:
+        """Extract abbreviation from team ID."""
+        parts = team_id.split("_")
+        return parts[-1] if parts else ""
+
+    def scrape_teams(self) -> list[Team]:
+        """Get all NWSL teams from hardcoded mappings."""
+        teams: list[Team] = []
+        seen: set[str] = set()
+
+        for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("nwsl", {}).items():
+            if team_id in seen:
+                continue
+            seen.add(team_id)
+
+            # Parse team name
+            team_name = full_name
+
+            team = Team(
+                id=team_id,
+                sport="nwsl",
+                city=city,
+                name=team_name,
+                full_name=full_name,
+                abbreviation=abbrev,
+                conference=None,  # NWSL uses single table
+                division=None,
+                stadium_id=stadium_id,
+            )
+            teams.append(team)
+
+        return teams
+
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Get all NWSL stadiums from hardcoded mappings."""
+        stadiums: list[Stadium] = []
+
+        nwsl_stadiums = STADIUM_MAPPINGS.get("nwsl", {})
+        for stadium_id, info in nwsl_stadiums.items():
+            stadium = Stadium(
+                id=stadium_id,
+                sport="nwsl",
+                name=info.name,
+                city=info.city,
+                state=info.state,
+                country=info.country,
+                latitude=info.latitude,
+                longitude=info.longitude,
+                surface="grass",
+                roof_type="open",
+            )
+            stadiums.append(stadium)
+
+        return stadiums
+
+
+def create_nwsl_scraper(season: int) -> NWSLScraper:
+    """Factory function to create an NWSL scraper."""
+    return NWSLScraper(season=season)
@@ -0,0 +1,375 @@
+"""WNBA scraper implementation with multi-source fallback."""
+
+from datetime import datetime, date, timedelta
+from typing import Optional
+
+from .base import BaseScraper, RawGameData, ScrapeResult
+from ..models.game import Game
+from ..models.team import Team
+from ..models.stadium import Stadium
+from ..models.aliases import ManualReviewItem
+from ..normalizers.canonical_id import generate_game_id
+from ..normalizers.team_resolver import (
+    TeamResolver,
+    TEAM_MAPPINGS,
+    get_team_resolver,
+)
+from ..normalizers.stadium_resolver import (
+    StadiumResolver,
+    STADIUM_MAPPINGS,
+    get_stadium_resolver,
+)
+from ..utils.logging import get_logger, log_game, log_warning
+
+
+class WNBAScraper(BaseScraper):
+    """WNBA schedule scraper with multi-source fallback.
+
+    Sources (in priority order):
+    1. ESPN API - Most reliable for WNBA
+    2. WNBA official (via ESPN) - Backup option
+    """
+
+    def __init__(self, season: int, **kwargs):
+        """Initialize WNBA scraper.
+
+        Args:
+            season: Season year (e.g., 2026 for 2026 season)
+        """
+        super().__init__("wnba", season, **kwargs)
+        self._team_resolver = get_team_resolver("wnba")
+        self._stadium_resolver = get_stadium_resolver("wnba")
+
+    def _get_sources(self) -> list[str]:
+        """Return source list in priority order."""
+        return ["espn"]
+
+    def _get_source_url(self, source: str, **kwargs) -> str:
+        """Build URL for a source."""
+        if source == "espn":
+            date_str = kwargs.get("date", "")
+            return f"https://site.api.espn.com/apis/site/v2/sports/basketball/wnba/scoreboard?dates={date_str}"
+
+        raise ValueError(f"Unknown source: {source}")
+
+    def _get_season_months(self) -> list[tuple[int, int]]:
+        """Get the months to scrape for WNBA season.
+
+        WNBA season runs May through September/October.
+        """
+        months = []
+
+        # WNBA regular season + playoffs
+        for month in range(5, 11):  # May-Oct
+            months.append((self.season, month))
+
+        return months
+
+    def _scrape_games_from_source(self, source: str) -> list[RawGameData]:
+        """Scrape games from a specific source."""
+        if source == "espn":
+            return self._scrape_espn()
+        else:
+            raise ValueError(f"Unknown source: {source}")
+
+    def _scrape_espn(self) -> list[RawGameData]:
+        """Scrape games from ESPN API using date range query."""
+        # Build date range for entire season (May-October)
+        season_months = self._get_season_months()
+        start_year, start_month = season_months[0]
+        end_year, end_month = season_months[-1]
+
+        # Get last day of end month
+        if end_month == 12:
+            end_date = date(end_year + 1, 1, 1) - timedelta(days=1)
+        else:
+            end_date = date(end_year, end_month + 1, 1) - timedelta(days=1)
+
+        start_date = date(start_year, start_month, 1)
+        date_range = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
+
+        url = f"https://site.api.espn.com/apis/site/v2/sports/basketball/wnba/scoreboard?limit=1000&dates={date_range}"
+        self._logger.info(f"Fetching WNBA schedule: {date_range}")
+
+        try:
+            data = self.session.get_json(url)
+            return self._parse_espn_response(data, url)
+        except Exception as e:
+            self._logger.error(f"ESPN error: {e}")
+            return []
+
+    def _parse_espn_response(
+        self,
+        data: dict,
+        source_url: str,
+    ) -> list[RawGameData]:
+        """Parse ESPN API response."""
+        games: list[RawGameData] = []
+
+        events = data.get("events", [])
+
+        for event in events:
+            try:
+                game = self._parse_espn_event(event, source_url)
+                if game:
+                    games.append(game)
+            except Exception as e:
+                self._logger.debug(f"Failed to parse ESPN event: {e}")
+                continue
+
+        return games
+
+    def _parse_espn_event(
+        self,
+        event: dict,
+        source_url: str,
+    ) -> Optional[RawGameData]:
+        """Parse a single ESPN event."""
+        # Get date
+        date_str = event.get("date", "")
+        if not date_str:
+            return None
+
+        try:
+            game_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+        except ValueError:
+            return None
+
+        # Get competitions
+        competitions = event.get("competitions", [])
+        if not competitions:
+            return None
+
+        competition = competitions[0]
+
+        # Get teams
+        competitors = competition.get("competitors", [])
+        if len(competitors) != 2:
+            return None
+
+        home_team = None
+        away_team = None
+        home_score = None
+        away_score = None
+
+        for competitor in competitors:
+            team_info = competitor.get("team", {})
+            team_name = team_info.get("displayName", "")
+            is_home = competitor.get("homeAway") == "home"
+            score = competitor.get("score")
+
+            if score:
+                try:
+                    score = int(score)
+                except (ValueError, TypeError):
+                    score = None
+
+            if is_home:
+                home_team = team_name
+                home_score = score
+            else:
+                away_team = team_name
+                away_score = score
+
+        if not home_team or not away_team:
+            return None
+
+        # Get venue
+        venue = competition.get("venue", {})
+        stadium = venue.get("fullName")
+
+        # Get status
+        status_info = competition.get("status", {})
+        status_type = status_info.get("type", {})
+        status_name = status_type.get("name", "").lower()
+
+        if status_name == "status_final":
+            status = "final"
+        elif status_name == "status_postponed":
+            status = "postponed"
+        elif status_name == "status_canceled":
+            status = "cancelled"
+        else:
+            status = "scheduled"
+
+        return RawGameData(
+            game_date=game_date,
+            home_team_raw=home_team,
+            away_team_raw=away_team,
+            stadium_raw=stadium,
+            home_score=home_score,
+            away_score=away_score,
+            status=status,
+            source_url=source_url,
+        )
+
+    def _normalize_games(
+        self,
+        raw_games: list[RawGameData],
+    ) -> tuple[list[Game], list[ManualReviewItem]]:
+        """Normalize raw games to Game objects with canonical IDs."""
+        games: list[Game] = []
+        review_items: list[ManualReviewItem] = []
+
+        for raw in raw_games:
+            game, item_reviews = self._normalize_single_game(raw)
+
+            if game:
+                games.append(game)
+                log_game(
+                    self.sport,
+                    game.id,
+                    game.home_team_id,
+                    game.away_team_id,
+                    game.game_date.strftime("%Y-%m-%d"),
+                    game.status,
+                )
+
+            review_items.extend(item_reviews)
+
+        return games, review_items
+
+    def _normalize_single_game(
+        self,
+        raw: RawGameData,
+    ) -> tuple[Optional[Game], list[ManualReviewItem]]:
+        """Normalize a single raw game."""
+        review_items: list[ManualReviewItem] = []
+
+        # Resolve home team
+        home_result = self._team_resolver.resolve(
+            raw.home_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if home_result.review_item:
+            review_items.append(home_result.review_item)
+
+        if not home_result.canonical_id:
+            log_warning(f"Could not resolve home team: {raw.home_team_raw}")
+            return None, review_items
+
+        # Resolve away team
+        away_result = self._team_resolver.resolve(
+            raw.away_team_raw,
+            check_date=raw.game_date.date(),
+            source_url=raw.source_url,
+        )
+
+        if away_result.review_item:
+            review_items.append(away_result.review_item)
+
+        if not away_result.canonical_id:
+            log_warning(f"Could not resolve away team: {raw.away_team_raw}")
+            return None, review_items
+
+        # Resolve stadium
+        stadium_id = None
+
+        if raw.stadium_raw:
+            stadium_result = self._stadium_resolver.resolve(
+                raw.stadium_raw,
+                check_date=raw.game_date.date(),
+                source_url=raw.source_url,
+            )
+
+            if stadium_result.review_item:
+                review_items.append(stadium_result.review_item)
+
+            stadium_id = stadium_result.canonical_id
+
+        # Get abbreviations for game ID
+        home_abbrev = self._get_abbreviation(home_result.canonical_id)
+        away_abbrev = self._get_abbreviation(away_result.canonical_id)
+
+        # Generate canonical game ID
+        game_id = generate_game_id(
+            sport=self.sport,
+            season=self.season,
+            away_abbrev=away_abbrev,
+            home_abbrev=home_abbrev,
+            game_date=raw.game_date,
+            game_number=None,
+        )
+
+        game = Game(
+            id=game_id,
+            sport=self.sport,
+            season=self.season,
+            home_team_id=home_result.canonical_id,
+            away_team_id=away_result.canonical_id,
+            stadium_id=stadium_id or "",
+            game_date=raw.game_date,
+            game_number=None,
+            home_score=raw.home_score,
+            away_score=raw.away_score,
+            status=raw.status,
+            source_url=raw.source_url,
+            raw_home_team=raw.home_team_raw,
+            raw_away_team=raw.away_team_raw,
+            raw_stadium=raw.stadium_raw,
+        )
+
+        return game, review_items
+
+    def _get_abbreviation(self, team_id: str) -> str:
+        """Extract abbreviation from team ID."""
+        parts = team_id.split("_")
+        return parts[-1] if parts else ""
+
+    def scrape_teams(self) -> list[Team]:
+        """Get all WNBA teams from hardcoded mappings."""
+        teams: list[Team] = []
+        seen: set[str] = set()
+
+        for abbrev, (team_id, full_name, city, stadium_id) in TEAM_MAPPINGS.get("wnba", {}).items():
+            if team_id in seen:
+                continue
+            seen.add(team_id)
+
+            # Parse team name
+            parts = full_name.split()
+            team_name = parts[-1] if parts else full_name
+
+            team = Team(
+                id=team_id,
+                sport="wnba",
+                city=city,
+                name=team_name,
+                full_name=full_name,
+                abbreviation=abbrev,
+                conference=None,  # WNBA uses single table now
+                division=None,
+                stadium_id=stadium_id,
+            )
+            teams.append(team)
+
+        return teams
+
+    def scrape_stadiums(self) -> list[Stadium]:
+        """Get all WNBA stadiums from hardcoded mappings."""
+        stadiums: list[Stadium] = []
+
+        wnba_stadiums = STADIUM_MAPPINGS.get("wnba", {})
+        for stadium_id, info in wnba_stadiums.items():
+            stadium = Stadium(
+                id=stadium_id,
+                sport="wnba",
+                name=info.name,
+                city=info.city,
+                state=info.state,
+                country=info.country,
+                latitude=info.latitude,
+                longitude=info.longitude,
+                surface="hardwood",
+                roof_type="dome",
+            )
+            stadiums.append(stadium)
+
+        return stadiums
+
+
+def create_wnba_scraper(season: int) -> WNBAScraper:
+    """Factory function to create a WNBA scraper."""
+    return WNBAScraper(season=season)