Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
160 lines
4.5 KiB
Python
160 lines
4.5 KiB
Python
from django.db import models
|
|
from simple_history.models import HistoricalRecords
|
|
|
|
|
|
class ScrapeJob(models.Model):
|
|
"""
|
|
Record of a scraping job execution.
|
|
"""
|
|
STATUS_CHOICES = [
|
|
('pending', 'Pending'),
|
|
('running', 'Running'),
|
|
('completed', 'Completed'),
|
|
('failed', 'Failed'),
|
|
('cancelled', 'Cancelled'),
|
|
]
|
|
|
|
config = models.ForeignKey(
|
|
'scraper.ScraperConfig',
|
|
on_delete=models.CASCADE,
|
|
related_name='jobs'
|
|
)
|
|
status = models.CharField(
|
|
max_length=20,
|
|
choices=STATUS_CHOICES,
|
|
default='pending'
|
|
)
|
|
triggered_by = models.CharField(
|
|
max_length=50,
|
|
default='manual',
|
|
help_text='How the job was triggered (manual, scheduled, api)'
|
|
)
|
|
|
|
# Timing
|
|
started_at = models.DateTimeField(null=True, blank=True)
|
|
finished_at = models.DateTimeField(null=True, blank=True)
|
|
|
|
# Results
|
|
games_found = models.PositiveIntegerField(default=0)
|
|
games_new = models.PositiveIntegerField(default=0)
|
|
games_updated = models.PositiveIntegerField(default=0)
|
|
games_unchanged = models.PositiveIntegerField(default=0)
|
|
games_errors = models.PositiveIntegerField(default=0)
|
|
|
|
teams_found = models.PositiveIntegerField(default=0)
|
|
stadiums_found = models.PositiveIntegerField(default=0)
|
|
review_items_created = models.PositiveIntegerField(default=0)
|
|
|
|
# Error tracking
|
|
error_message = models.TextField(blank=True)
|
|
error_traceback = models.TextField(blank=True)
|
|
|
|
# Celery task ID for tracking
|
|
celery_task_id = models.CharField(
|
|
max_length=255,
|
|
blank=True,
|
|
help_text='Celery task ID for this job'
|
|
)
|
|
|
|
# Metadata
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
class Meta:
|
|
ordering = ['-created_at']
|
|
verbose_name = 'Scrape Job'
|
|
verbose_name_plural = 'Scrape Jobs'
|
|
indexes = [
|
|
models.Index(fields=['config', 'status']),
|
|
models.Index(fields=['status', 'created_at']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.config} - {self.created_at.strftime('%Y-%m-%d %H:%M')}"
|
|
|
|
@property
|
|
def duration(self):
|
|
"""Return job duration as timedelta or None."""
|
|
if self.started_at and self.finished_at:
|
|
return self.finished_at - self.started_at
|
|
return None
|
|
|
|
@property
|
|
def duration_display(self):
|
|
"""Return formatted duration string."""
|
|
duration = self.duration
|
|
if duration:
|
|
total_seconds = int(duration.total_seconds())
|
|
minutes, seconds = divmod(total_seconds, 60)
|
|
if minutes > 0:
|
|
return f"{minutes}m {seconds}s"
|
|
return f"{seconds}s"
|
|
return '-'
|
|
|
|
@property
|
|
def sport(self):
|
|
return self.config.sport
|
|
|
|
@property
|
|
def season(self):
|
|
return self.config.season
|
|
|
|
def get_summary(self):
|
|
"""Return summary dict for notifications."""
|
|
return {
|
|
'sport': self.config.sport.short_name,
|
|
'season': self.config.sport.get_season_display(self.config.season),
|
|
'status': self.status,
|
|
'duration': self.duration_display,
|
|
'games_found': self.games_found,
|
|
'games_new': self.games_new,
|
|
'games_updated': self.games_updated,
|
|
'games_unchanged': self.games_unchanged,
|
|
'games_errors': self.games_errors,
|
|
'review_items': self.review_items_created,
|
|
'error_message': self.error_message,
|
|
}
|
|
|
|
|
|
class ScrapeJobLog(models.Model):
|
|
"""
|
|
Log entries for a scrape job.
|
|
"""
|
|
LEVEL_CHOICES = [
|
|
('debug', 'Debug'),
|
|
('info', 'Info'),
|
|
('warning', 'Warning'),
|
|
('error', 'Error'),
|
|
]
|
|
|
|
job = models.ForeignKey(
|
|
ScrapeJob,
|
|
on_delete=models.CASCADE,
|
|
related_name='logs'
|
|
)
|
|
level = models.CharField(
|
|
max_length=10,
|
|
choices=LEVEL_CHOICES,
|
|
default='info'
|
|
)
|
|
message = models.TextField()
|
|
source = models.CharField(
|
|
max_length=100,
|
|
blank=True,
|
|
help_text='Source/component that generated this log'
|
|
)
|
|
extra_data = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text='Additional structured data'
|
|
)
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
|
|
class Meta:
|
|
ordering = ['created_at']
|
|
verbose_name = 'Scrape Job Log'
|
|
verbose_name_plural = 'Scrape Job Logs'
|
|
|
|
def __str__(self):
|
|
return f"[{self.level.upper()}] {self.message[:50]}"
|