fix(data): add timezone handling for Sports-Reference scrapers and new stadiums
- Add ET timezone (America/New_York) to all Sports-Reference scrapers:
- NBA: Basketball-Reference times parsed as ET
- NFL: Pro-Football-Reference times parsed as ET
- NHL: Hockey-Reference times parsed as ET
- MLB: Baseball-Reference times parsed as ET
- Document source timezones in scraper docstrings
- Add 11 new stadiums to STADIUM_MAPPINGS:
- NFL: 5 international venues (Corinthians Arena, Croke Park,
Olympic Stadium Berlin, Santiago Bernabéu, Tom Benson Hall of Fame)
- MLS: 4 alternate venues (Miami Freedom Park, Citi Field,
LA Memorial Coliseum, M&T Bank Stadium)
- NWSL: 2 alternate venues (Northwestern Medicine Field, ONE Spokane)
- Add 15 stadium aliases for MLS/NWSL team-based lookups
- Fix CanonicalSyncService to sync timezone identifier to SwiftData
- Update debug logging to use stadium timezone for display
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from datetime import datetime, date, timezone
|
||||
from typing import Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
@@ -45,7 +46,12 @@ class NBAScraper(BaseScraper):
|
||||
Sources (in priority order):
|
||||
1. Basketball-Reference - Most reliable, complete historical data
|
||||
2. ESPN API - Good for current/future seasons
|
||||
3. CBS Sports - Backup option
|
||||
3. CBS Sports - Backup option (not implemented)
|
||||
|
||||
Source Timezones:
|
||||
- basketball_reference: Eastern Time (ET) - times displayed as "7:30p"
|
||||
- espn: UTC - ISO 8601 format with "Z" suffix
|
||||
- cbs: Not implemented
|
||||
"""
|
||||
|
||||
def __init__(self, season: int, **kwargs):
|
||||
@@ -196,6 +202,25 @@ class NBAScraper(BaseScraper):
|
||||
self._logger.debug(f"Could not parse date: {date_text}")
|
||||
return None
|
||||
|
||||
# Get game start time (format: "7:30p" or "10:00p") - times are in ET
|
||||
time_cell = row.find("td", {"data-stat": "game_start_time"})
|
||||
if time_cell:
|
||||
time_text = time_cell.get_text(strip=True)
|
||||
if time_text:
|
||||
try:
|
||||
# Parse time like "7:30p" or "10:00p"
|
||||
# Normalize: "7:30p" -> "7:30 PM", "10:00p" -> "10:00 PM"
|
||||
time_normalized = time_text.replace("p", " PM").replace("a", " AM")
|
||||
game_time = datetime.strptime(time_normalized, "%I:%M %p")
|
||||
# Combine date and time with ET timezone (Basketball-Reference uses ET)
|
||||
game_date = game_date.replace(
|
||||
hour=game_time.hour,
|
||||
minute=game_time.minute,
|
||||
tzinfo=ZoneInfo("America/New_York"),
|
||||
)
|
||||
except ValueError:
|
||||
self._logger.debug(f"Could not parse time: {time_text}, using midnight")
|
||||
|
||||
# Get teams
|
||||
away_cell = row.find("td", {"data-stat": "visitor_team_name"})
|
||||
home_cell = row.find("td", {"data-stat": "home_team_name"})
|
||||
|
||||
Reference in New Issue
Block a user