Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
103 lines
2.9 KiB
Python
103 lines
2.9 KiB
Python
from django.db import models
|
|
from django.conf import settings
|
|
from simple_history.models import HistoricalRecords
|
|
|
|
|
|
class ScraperConfig(models.Model):
|
|
"""
|
|
Configuration for a sport's scraper.
|
|
"""
|
|
sport = models.ForeignKey(
|
|
'core.Sport',
|
|
on_delete=models.CASCADE,
|
|
related_name='scraper_configs'
|
|
)
|
|
season = models.PositiveSmallIntegerField(
|
|
help_text='Season to scrape (e.g., 2025 for 2025-26 season)'
|
|
)
|
|
is_enabled = models.BooleanField(
|
|
default=True,
|
|
help_text='Whether this scraper is enabled for scheduling'
|
|
)
|
|
|
|
# Source configuration
|
|
sources = models.JSONField(
|
|
default=list,
|
|
help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])'
|
|
)
|
|
primary_source = models.CharField(
|
|
max_length=100,
|
|
blank=True,
|
|
help_text='Primary source for this scraper'
|
|
)
|
|
|
|
# Rate limiting
|
|
request_delay = models.FloatField(
|
|
default=settings.SCRAPER_REQUEST_DELAY,
|
|
help_text='Seconds between requests'
|
|
)
|
|
max_retries = models.PositiveSmallIntegerField(
|
|
default=settings.SCRAPER_MAX_RETRIES,
|
|
help_text='Maximum retry attempts'
|
|
)
|
|
|
|
# Fuzzy matching
|
|
fuzzy_threshold = models.PositiveSmallIntegerField(
|
|
default=settings.SCRAPER_FUZZY_THRESHOLD,
|
|
help_text='Minimum fuzzy match confidence (0-100)'
|
|
)
|
|
|
|
# Scheduling
|
|
last_run = models.DateTimeField(
|
|
null=True,
|
|
blank=True,
|
|
help_text='Last successful run timestamp'
|
|
)
|
|
last_run_status = models.CharField(
|
|
max_length=20,
|
|
blank=True,
|
|
help_text='Status of last run'
|
|
)
|
|
last_run_games = models.PositiveIntegerField(
|
|
default=0,
|
|
help_text='Games found in last run'
|
|
)
|
|
|
|
# Notes
|
|
notes = models.TextField(
|
|
blank=True,
|
|
help_text='Configuration notes'
|
|
)
|
|
|
|
# Metadata
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
# Audit trail
|
|
history = HistoricalRecords()
|
|
|
|
class Meta:
|
|
ordering = ['sport', 'season']
|
|
unique_together = ['sport', 'season']
|
|
verbose_name = 'Scraper Configuration'
|
|
verbose_name_plural = 'Scraper Configurations'
|
|
|
|
def __str__(self):
|
|
return f"{self.sport.short_name} {self.sport.get_season_display(self.season)}"
|
|
|
|
def get_sources_list(self):
|
|
"""Return sources as list, using defaults if empty."""
|
|
if self.sources:
|
|
return self.sources
|
|
# Default sources per sport
|
|
defaults = {
|
|
'nba': ['basketball_reference', 'espn'],
|
|
'mlb': ['baseball_reference', 'mlb_api', 'espn'],
|
|
'nfl': ['espn', 'pro_football_reference'],
|
|
'nhl': ['hockey_reference', 'nhl_api', 'espn'],
|
|
'mls': ['espn'],
|
|
'wnba': ['espn'],
|
|
'nwsl': ['espn'],
|
|
}
|
|
return defaults.get(self.sport.code, ['espn'])
|