from django.db import models from simple_history.models import HistoricalRecords class ScrapeJob(models.Model): """ Record of a scraping job execution. """ STATUS_CHOICES = [ ('pending', 'Pending'), ('running', 'Running'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled'), ] config = models.ForeignKey( 'scraper.ScraperConfig', on_delete=models.CASCADE, related_name='jobs' ) status = models.CharField( max_length=20, choices=STATUS_CHOICES, default='pending' ) triggered_by = models.CharField( max_length=50, default='manual', help_text='How the job was triggered (manual, scheduled, api)' ) # Timing started_at = models.DateTimeField(null=True, blank=True) finished_at = models.DateTimeField(null=True, blank=True) # Results games_found = models.PositiveIntegerField(default=0) games_new = models.PositiveIntegerField(default=0) games_updated = models.PositiveIntegerField(default=0) games_unchanged = models.PositiveIntegerField(default=0) games_errors = models.PositiveIntegerField(default=0) teams_found = models.PositiveIntegerField(default=0) stadiums_found = models.PositiveIntegerField(default=0) review_items_created = models.PositiveIntegerField(default=0) # Error tracking error_message = models.TextField(blank=True) error_traceback = models.TextField(blank=True) # Celery task ID for tracking celery_task_id = models.CharField( max_length=255, blank=True, help_text='Celery task ID for this job' ) # Metadata created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) class Meta: ordering = ['-created_at'] verbose_name = 'Scrape Job' verbose_name_plural = 'Scrape Jobs' indexes = [ models.Index(fields=['config', 'status']), models.Index(fields=['status', 'created_at']), ] def __str__(self): return f"{self.config} - {self.created_at.strftime('%Y-%m-%d %H:%M')}" @property def duration(self): """Return job duration as timedelta or None.""" if self.started_at and self.finished_at: return self.finished_at - self.started_at return None @property def duration_display(self): """Return formatted duration string.""" duration = self.duration if duration: total_seconds = int(duration.total_seconds()) minutes, seconds = divmod(total_seconds, 60) if minutes > 0: return f"{minutes}m {seconds}s" return f"{seconds}s" return '-' @property def sport(self): return self.config.sport @property def season(self): return self.config.season def get_summary(self): """Return summary dict for notifications.""" return { 'sport': self.config.sport.short_name, 'season': self.config.sport.get_season_display(self.config.season), 'status': self.status, 'duration': self.duration_display, 'games_found': self.games_found, 'games_new': self.games_new, 'games_updated': self.games_updated, 'games_unchanged': self.games_unchanged, 'games_errors': self.games_errors, 'review_items': self.review_items_created, 'error_message': self.error_message, } class ScrapeJobLog(models.Model): """ Log entries for a scrape job. """ LEVEL_CHOICES = [ ('debug', 'Debug'), ('info', 'Info'), ('warning', 'Warning'), ('error', 'Error'), ] job = models.ForeignKey( ScrapeJob, on_delete=models.CASCADE, related_name='logs' ) level = models.CharField( max_length=10, choices=LEVEL_CHOICES, default='info' ) message = models.TextField() source = models.CharField( max_length=100, blank=True, help_text='Source/component that generated this log' ) extra_data = models.JSONField( null=True, blank=True, help_text='Additional structured data' ) created_at = models.DateTimeField(auto_now_add=True) class Meta: ordering = ['created_at'] verbose_name = 'Scrape Job Log' verbose_name_plural = 'Scrape Job Logs' def __str__(self): return f"[{self.level.upper()}] {self.message[:50]}"