feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export

Adds the full Django application layer on top of sportstime_parser:
- core: Sport, Team, Stadium, Game models with aliases and league structure
- scraper: orchestration engine, adapter, job management, Celery tasks
- cloudkit: CloudKit sync client, sync state tracking, sync jobs
- dashboard: staff dashboard for monitoring scrapers, sync, review queue
- notifications: email reports for scrape/sync results
- Docker setup for deployment (Dockerfile, docker-compose, entrypoint)

Game exports now use game_datetime_utc (ISO 8601 UTC) instead of
venue-local date+time strings, matching the canonical format used
by the iOS app.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-02-19 14:04:27 -06:00
parent 4353d5943c
commit 63acf7accb
114 changed files with 13070 additions and 887 deletions

199
scraper/models.py Normal file
View File

@@ -0,0 +1,199 @@
"""
Scraper models for tracking scraping jobs and manual reviews.
"""
from django.db import models
from simple_history.models import HistoricalRecords
class ScraperConfig(models.Model):
"""
Configuration for a sport scraper per season.
"""
sport = models.ForeignKey(
'core.Sport',
on_delete=models.CASCADE,
related_name='scraper_configs'
)
season = models.PositiveSmallIntegerField(
help_text='Season year (start year for split seasons)'
)
is_active = models.BooleanField(
default=True,
help_text='Whether this config is actively scraping'
)
schedule_url = models.URLField(
blank=True,
help_text='Base URL for schedule scraping'
)
scrape_interval_hours = models.PositiveSmallIntegerField(
default=24,
help_text='How often to run the scraper (hours)'
)
last_scrape_at = models.DateTimeField(
null=True,
blank=True,
help_text='When the last scrape completed'
)
next_scrape_at = models.DateTimeField(
null=True,
blank=True,
help_text='When the next scrape is scheduled'
)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
# Audit trail
history = HistoricalRecords()
class Meta:
ordering = ['-season', 'sport']
unique_together = ['sport', 'season']
verbose_name = 'Scraper Config'
verbose_name_plural = 'Scraper Configs'
def __str__(self):
return f"{self.sport.short_name} {self.sport.get_season_display(self.season)}"
class ScrapeJob(models.Model):
"""
Record of a scraping job execution.
"""
STATUS_CHOICES = [
('pending', 'Pending'),
('running', 'Running'),
('completed', 'Completed'),
('failed', 'Failed'),
('cancelled', 'Cancelled'),
]
config = models.ForeignKey(
ScraperConfig,
on_delete=models.CASCADE,
related_name='jobs'
)
status = models.CharField(
max_length=20,
choices=STATUS_CHOICES,
default='pending'
)
started_at = models.DateTimeField(
null=True,
blank=True
)
completed_at = models.DateTimeField(
null=True,
blank=True
)
games_found = models.PositiveIntegerField(default=0)
games_created = models.PositiveIntegerField(default=0)
games_updated = models.PositiveIntegerField(default=0)
errors = models.TextField(blank=True)
log_output = models.TextField(blank=True)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['-created_at']
verbose_name = 'Scrape Job'
verbose_name_plural = 'Scrape Jobs'
def __str__(self):
return f"{self.config} - {self.status} ({self.created_at.strftime('%Y-%m-%d %H:%M')})"
@property
def duration(self):
"""Return job duration in seconds."""
if self.started_at and self.completed_at:
return (self.completed_at - self.started_at).total_seconds()
return None
class ManualReviewItem(models.Model):
"""
Items flagged for manual review (fuzzy matches, conflicts, etc).
"""
STATUS_CHOICES = [
('pending', 'Pending Review'),
('approved', 'Approved'),
('rejected', 'Rejected'),
('resolved', 'Resolved'),
]
ITEM_TYPE_CHOICES = [
('team', 'Team Match'),
('stadium', 'Stadium Match'),
('game', 'Game Conflict'),
('alias', 'New Alias'),
]
sport = models.ForeignKey(
'core.Sport',
on_delete=models.CASCADE,
related_name='review_items'
)
job = models.ForeignKey(
ScrapeJob,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='review_items'
)
item_type = models.CharField(
max_length=20,
choices=ITEM_TYPE_CHOICES
)
status = models.CharField(
max_length=20,
choices=STATUS_CHOICES,
default='pending'
)
raw_value = models.CharField(
max_length=500,
help_text='The raw scraped value'
)
matched_value = models.CharField(
max_length=500,
blank=True,
help_text='The matched canonical value (if any)'
)
confidence = models.PositiveSmallIntegerField(
default=0,
help_text='Match confidence score (0-100)'
)
context = models.JSONField(
default=dict,
blank=True,
help_text='Additional context (game date, opposing team, etc)'
)
resolution_notes = models.TextField(
blank=True,
help_text='Notes about the resolution'
)
resolved_at = models.DateTimeField(
null=True,
blank=True
)
resolved_by = models.ForeignKey(
'auth.User',
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='resolved_reviews'
)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['-confidence', '-created_at']
verbose_name = 'Manual Review Item'
verbose_name_plural = 'Manual Review Items'
def __str__(self):
return f"{self.item_type}: {self.raw_value} ({self.confidence}%)"