wip
This commit is contained in:
@@ -95,9 +95,11 @@ class NBAScraper(BaseScraper):
|
||||
BR organizes games by month with separate pages.
|
||||
Format: https://www.basketball-reference.com/leagues/NBA_YYYY_games-month.html
|
||||
where YYYY is the ending year of the season.
|
||||
Bails early if first few months have no data (season doesn't exist).
|
||||
"""
|
||||
all_games: list[RawGameData] = []
|
||||
end_year = self.season + 1
|
||||
consecutive_empty_months = 0
|
||||
|
||||
for month in BR_MONTHS:
|
||||
url = self._get_source_url("basketball_reference", month=month, year=end_year)
|
||||
@@ -105,13 +107,23 @@ class NBAScraper(BaseScraper):
|
||||
try:
|
||||
html = self.session.get_html(url)
|
||||
games = self._parse_basketball_reference(html, url)
|
||||
all_games.extend(games)
|
||||
self._logger.debug(f"Found {len(games)} games in {month}")
|
||||
|
||||
if games:
|
||||
all_games.extend(games)
|
||||
consecutive_empty_months = 0
|
||||
self._logger.debug(f"Found {len(games)} games in {month}")
|
||||
else:
|
||||
consecutive_empty_months += 1
|
||||
|
||||
except Exception as e:
|
||||
# Some months may not exist (e.g., no games in August)
|
||||
self._logger.debug(f"No data for {month}: {e}")
|
||||
continue
|
||||
consecutive_empty_months += 1
|
||||
|
||||
# If first 3 months (Oct, Nov, Dec) all have no data, season doesn't exist
|
||||
if consecutive_empty_months >= 3 and not all_games:
|
||||
self._logger.info(f"No games found in first {consecutive_empty_months} months, season likely doesn't exist")
|
||||
break
|
||||
|
||||
return all_games
|
||||
|
||||
@@ -247,8 +259,11 @@ class NBAScraper(BaseScraper):
|
||||
|
||||
ESPN API returns games for a specific date range.
|
||||
We iterate through each day of the season.
|
||||
Bails out early if no games found after checking first month.
|
||||
"""
|
||||
all_games: list[RawGameData] = []
|
||||
consecutive_empty_days = 0
|
||||
max_empty_days = 45 # Bail after ~1.5 months of no games
|
||||
|
||||
for year, month in self._get_season_months():
|
||||
# Get number of days in month
|
||||
@@ -267,10 +282,25 @@ class NBAScraper(BaseScraper):
|
||||
|
||||
data = self.session.get_json(url)
|
||||
games = self._parse_espn_response(data, url)
|
||||
all_games.extend(games)
|
||||
|
||||
if games:
|
||||
all_games.extend(games)
|
||||
consecutive_empty_days = 0
|
||||
else:
|
||||
consecutive_empty_days += 1
|
||||
|
||||
# Bail early if no games found for a long stretch
|
||||
if consecutive_empty_days >= max_empty_days:
|
||||
self._logger.info(f"No games found for {max_empty_days} consecutive days, stopping ESPN scrape")
|
||||
return all_games
|
||||
|
||||
except Exception as e:
|
||||
self._logger.debug(f"ESPN error for {year}-{month}-{day}: {e}")
|
||||
consecutive_empty_days += 1
|
||||
|
||||
if consecutive_empty_days >= max_empty_days:
|
||||
self._logger.info(f"Too many consecutive failures, stopping ESPN scrape")
|
||||
return all_games
|
||||
continue
|
||||
|
||||
return all_games
|
||||
|
||||
Reference in New Issue
Block a user