from django.db import models from django.conf import settings from simple_history.models import HistoricalRecords class ScraperConfig(models.Model): """ Configuration for a sport's scraper. """ sport = models.ForeignKey( 'core.Sport', on_delete=models.CASCADE, related_name='scraper_configs' ) season = models.PositiveSmallIntegerField( help_text='Season to scrape (e.g., 2025 for 2025-26 season)' ) is_enabled = models.BooleanField( default=True, help_text='Whether this scraper is enabled for scheduling' ) # Source configuration sources = models.JSONField( default=list, help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])' ) primary_source = models.CharField( max_length=100, blank=True, help_text='Primary source for this scraper' ) # Rate limiting request_delay = models.FloatField( default=settings.SCRAPER_REQUEST_DELAY, help_text='Seconds between requests' ) max_retries = models.PositiveSmallIntegerField( default=settings.SCRAPER_MAX_RETRIES, help_text='Maximum retry attempts' ) # Fuzzy matching fuzzy_threshold = models.PositiveSmallIntegerField( default=settings.SCRAPER_FUZZY_THRESHOLD, help_text='Minimum fuzzy match confidence (0-100)' ) # Scheduling last_run = models.DateTimeField( null=True, blank=True, help_text='Last successful run timestamp' ) last_run_status = models.CharField( max_length=20, blank=True, help_text='Status of last run' ) last_run_games = models.PositiveIntegerField( default=0, help_text='Games found in last run' ) # Notes notes = models.TextField( blank=True, help_text='Configuration notes' ) # Metadata created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) # Audit trail history = HistoricalRecords() class Meta: ordering = ['sport', 'season'] unique_together = ['sport', 'season'] verbose_name = 'Scraper Configuration' verbose_name_plural = 'Scraper Configurations' def __str__(self): return f"{self.sport.short_name} {self.sport.get_season_display(self.season)}" def get_sources_list(self): """Return sources as list, using defaults if empty.""" if self.sources: return self.sources # Default sources per sport defaults = { 'nba': ['basketball_reference', 'espn'], 'mlb': ['baseball_reference', 'mlb_api', 'espn'], 'nfl': ['espn', 'pro_football_reference'], 'nhl': ['hockey_reference', 'nhl_api', 'espn'], 'mls': ['espn'], 'wnba': ['espn'], 'nwsl': ['espn'], } return defaults.get(self.sport.code, ['espn'])