"""Tests for NFL scraper.""" from datetime import datetime from unittest.mock import patch import pytest from sportstime_parser.scrapers.nfl import NFLScraper, create_nfl_scraper from sportstime_parser.scrapers.base import RawGameData from sportstime_parser.tests.fixtures import ( load_json_fixture, NFL_ESPN_SCOREBOARD_JSON, ) class TestNFLScraperInit: """Test NFLScraper initialization.""" def test_creates_scraper_with_season(self): """Test scraper initializes with correct season.""" scraper = NFLScraper(season=2025) assert scraper.sport == "nfl" assert scraper.season == 2025 def test_factory_function_creates_scraper(self): """Test factory function creates correct scraper.""" scraper = create_nfl_scraper(season=2025) assert isinstance(scraper, NFLScraper) assert scraper.season == 2025 def test_expected_game_count(self): """Test expected game count is correct for NFL.""" scraper = NFLScraper(season=2025) assert scraper.expected_game_count == 272 def test_sources_in_priority_order(self): """Test sources are returned in correct priority order.""" scraper = NFLScraper(season=2025) sources = scraper._get_sources() assert sources == ["espn", "pro_football_reference", "cbs"] class TestESPNParsing: """Test ESPN API response parsing.""" def test_parses_completed_games(self): """Test parsing completed games from ESPN.""" scraper = NFLScraper(season=2025) data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON) games = scraper._parse_espn_response(data, "http://espn.com/api") completed = [g for g in games if g.status == "final"] assert len(completed) == 2 # Chiefs @ Ravens kc_bal = next(g for g in completed if g.away_team_raw == "Kansas City Chiefs") assert kc_bal.home_team_raw == "Baltimore Ravens" assert kc_bal.away_score == 27 assert kc_bal.home_score == 20 assert kc_bal.stadium_raw == "M&T Bank Stadium" def test_parses_scheduled_games(self): """Test parsing scheduled games from ESPN.""" scraper = NFLScraper(season=2025) data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON) games = scraper._parse_espn_response(data, "http://espn.com/api") scheduled = [g for g in games if g.status == "scheduled"] assert len(scheduled) == 1 dal_cle = scheduled[0] assert dal_cle.away_team_raw == "Dallas Cowboys" assert dal_cle.home_team_raw == "Cleveland Browns" assert dal_cle.stadium_raw == "Cleveland Browns Stadium" def test_parses_venue_info(self): """Test venue information is extracted.""" scraper = NFLScraper(season=2025) data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON) games = scraper._parse_espn_response(data, "http://espn.com/api") for game in games: assert game.stadium_raw is not None class TestGameNormalization: """Test game normalization and canonical ID generation.""" def test_normalizes_games_with_canonical_ids(self): """Test games are normalized with correct canonical IDs.""" scraper = NFLScraper(season=2025) raw_games = [ RawGameData( game_date=datetime(2025, 9, 7), home_team_raw="Baltimore Ravens", away_team_raw="Kansas City Chiefs", stadium_raw="M&T Bank Stadium", home_score=20, away_score=27, status="final", source_url="http://example.com", ) ] games, review_items = scraper._normalize_games(raw_games) assert len(games) == 1 game = games[0] # Check canonical ID format assert game.id == "nfl_2025_kc_bal_0907" assert game.sport == "nfl" assert game.season == 2025 # Check team IDs assert game.home_team_id == "team_nfl_bal" assert game.away_team_id == "team_nfl_kc" # Check scores preserved assert game.home_score == 20 assert game.away_score == 27 def test_creates_review_items_for_unresolved_teams(self): """Test review items are created for unresolved teams.""" scraper = NFLScraper(season=2025) raw_games = [ RawGameData( game_date=datetime(2025, 9, 7), home_team_raw="Unknown Team XYZ", away_team_raw="Kansas City Chiefs", stadium_raw="Arrowhead Stadium", status="scheduled", ), ] games, review_items = scraper._normalize_games(raw_games) # Game should not be created due to unresolved team assert len(games) == 0 # But there should be a review item assert len(review_items) >= 1 class TestTeamAndStadiumScraping: """Test team and stadium data scraping.""" def test_scrapes_all_nfl_teams(self): """Test all 32 NFL teams are returned.""" scraper = NFLScraper(season=2025) teams = scraper.scrape_teams() # 32 NFL teams assert len(teams) == 32 # Check team IDs are unique team_ids = [t.id for t in teams] assert len(set(team_ids)) == 32 # Check all teams have required fields for team in teams: assert team.id.startswith("team_nfl_") assert team.sport == "nfl" assert team.city assert team.name assert team.full_name assert team.abbreviation def test_teams_have_conferences_and_divisions(self): """Test teams have conference and division info.""" scraper = NFLScraper(season=2025) teams = scraper.scrape_teams() # Count teams by conference afc = [t for t in teams if t.conference == "AFC"] nfc = [t for t in teams if t.conference == "NFC"] assert len(afc) == 16 assert len(nfc) == 16 def test_scrapes_all_nfl_stadiums(self): """Test all NFL stadiums are returned.""" scraper = NFLScraper(season=2025) stadiums = scraper.scrape_stadiums() # Should have stadiums for all teams (some share) assert len(stadiums) >= 30 # Check all stadiums have required fields for stadium in stadiums: assert stadium.id.startswith("stadium_nfl_") assert stadium.sport == "nfl" assert stadium.name assert stadium.city assert stadium.state assert stadium.country == "USA" assert stadium.latitude != 0 assert stadium.longitude != 0 class TestScrapeFallback: """Test multi-source fallback behavior.""" def test_falls_back_to_next_source_on_failure(self): """Test scraper tries next source when first fails.""" scraper = NFLScraper(season=2025) with patch.object(scraper, '_scrape_espn') as mock_espn, \ patch.object(scraper, '_scrape_pro_football_reference') as mock_pfr: # Make ESPN fail mock_espn.side_effect = Exception("Connection failed") # Make PFR return data mock_pfr.return_value = [ RawGameData( game_date=datetime(2025, 9, 7), home_team_raw="Baltimore Ravens", away_team_raw="Kansas City Chiefs", stadium_raw="M&T Bank Stadium", status="scheduled", ) ] result = scraper.scrape_games() assert result.success assert result.source == "pro_football_reference" assert mock_espn.called assert mock_pfr.called class TestSeasonMonths: """Test season month calculation.""" def test_gets_correct_season_months(self): """Test correct months are returned for NFL season.""" scraper = NFLScraper(season=2025) months = scraper._get_season_months() # NFL season is September-February assert len(months) == 6 # Sep, Oct, Nov, Dec, Jan, Feb # Check first month is September of season year assert months[0] == (2025, 9) # Check last month is February of following year assert months[-1] == (2026, 2) # Check transition to new year assert months[3] == (2025, 12) # December assert months[4] == (2026, 1) # January class TestInternationalFiltering: """Test international game filtering. Note: Filtering happens in _parse_espn_response, not _normalize_games. """ def test_filters_london_games_during_parsing(self): """Test London games are filtered out during ESPN parsing.""" scraper = NFLScraper(season=2025) # Create ESPN-like data with London game espn_data = { "events": [ { "date": "2025-10-15T09:30:00Z", "competitions": [ { "neutralSite": True, "venue": { "fullName": "London Stadium", "address": {"city": "London", "country": "UK"}, }, "competitors": [ {"homeAway": "home", "team": {"displayName": "Jacksonville Jaguars"}}, {"homeAway": "away", "team": {"displayName": "Buffalo Bills"}}, ], } ], } ] } games = scraper._parse_espn_response(espn_data, "http://espn.com/api") # London game should be filtered assert len(games) == 0 def test_keeps_us_games(self): """Test US games are kept.""" scraper = NFLScraper(season=2025) raw_games = [ RawGameData( game_date=datetime(2025, 9, 7), home_team_raw="Baltimore Ravens", away_team_raw="Kansas City Chiefs", stadium_raw="M&T Bank Stadium", status="scheduled", ), ] games, _ = scraper._normalize_games(raw_games) assert len(games) == 1