feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export

Adds the full Django application layer on top of sportstime_parser:
- core: Sport, Team, Stadium, Game models with aliases and league structure
- scraper: orchestration engine, adapter, job management, Celery tasks
- cloudkit: CloudKit sync client, sync state tracking, sync jobs
- dashboard: staff dashboard for monitoring scrapers, sync, review queue
- notifications: email reports for scrape/sync results
- Docker setup for deployment (Dockerfile, docker-compose, entrypoint)

Game exports now use game_datetime_utc (ISO 8601 UTC) instead of
venue-local date+time strings, matching the canonical format used
by the iOS app.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-02-19 14:04:27 -06:00
parent 4353d5943c
commit 63acf7accb
114 changed files with 13070 additions and 887 deletions

View File

@@ -2,6 +2,7 @@
from datetime import datetime, date, timedelta
from typing import Optional
from zoneinfo import ZoneInfo
from bs4 import BeautifulSoup
from .base import BaseScraper, RawGameData, ScrapeResult
@@ -28,9 +29,14 @@ class MLBScraper(BaseScraper):
"""MLB schedule scraper with multi-source fallback.
Sources (in priority order):
1. Baseball-Reference - Most reliable, complete historical data
2. MLB Stats API - Official MLB data
3. ESPN API - Backup option
1. MLB Stats API - Official MLB data (primary)
2. ESPN API - Backup option
3. Baseball-Reference - Complete historical data
Source Timezones:
- mlb_api: UTC - ISO 8601 format with "Z" suffix (gameDate field)
- espn: UTC - ISO 8601 format with "Z" suffix
- baseball_reference: Eastern Time (ET) - times displayed as "7:05 PM ET"
"""
def __init__(self, season: int, **kwargs):
@@ -143,7 +149,22 @@ class MLBScraper(BaseScraper):
continue
try:
game = self._parse_br_game(elem, current_date, source_url)
# Extract game time from the element if present
# Baseball-Reference may have time in a span or in the text
game_time_for_row = None
time_elem = elem.find("span", class_="game_time")
if time_elem:
time_text = time_elem.get_text(strip=True)
if time_text:
try:
# Parse time like "7:05 PM ET" or "1:10 PM"
# Remove timezone suffix if present
time_clean = time_text.replace(" ET", "").replace(" PT", "").replace(" CT", "").replace(" MT", "").strip()
game_time_for_row = datetime.strptime(time_clean, "%I:%M %p")
except ValueError:
pass
game = self._parse_br_game(elem, current_date, source_url, game_time_for_row)
if game:
games.append(game)
except Exception as e:
@@ -157,6 +178,7 @@ class MLBScraper(BaseScraper):
elem,
game_date: datetime,
source_url: str,
game_time: Optional[datetime] = None,
) -> Optional[RawGameData]:
"""Parse a single Baseball-Reference game element."""
text = elem.get_text(" ", strip=True)
@@ -206,8 +228,17 @@ class MLBScraper(BaseScraper):
# Third link might be stadium
stadium = links[2].get_text(strip=True)
# Combine date and time if time was provided, with ET timezone (Baseball-Reference uses ET)
final_game_date = game_date
if game_time:
final_game_date = game_date.replace(
hour=game_time.hour,
minute=game_time.minute,
tzinfo=ZoneInfo("America/New_York"),
)
return RawGameData(
game_date=game_date,
game_date=final_game_date,
home_team_raw=home_team,
away_team_raw=away_team,
stadium_raw=stadium,
@@ -672,6 +703,7 @@ class MLBScraper(BaseScraper):
country=info.country,
latitude=info.latitude,
longitude=info.longitude,
timezone=info.timezone,
surface="grass", # Most MLB stadiums
roof_type="open", # Most MLB stadiums
)