feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export
Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
102
scraper/models/config.py
Normal file
102
scraper/models/config.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from django.db import models
|
||||
from django.conf import settings
|
||||
from simple_history.models import HistoricalRecords
|
||||
|
||||
|
||||
class ScraperConfig(models.Model):
|
||||
"""
|
||||
Configuration for a sport's scraper.
|
||||
"""
|
||||
sport = models.ForeignKey(
|
||||
'core.Sport',
|
||||
on_delete=models.CASCADE,
|
||||
related_name='scraper_configs'
|
||||
)
|
||||
season = models.PositiveSmallIntegerField(
|
||||
help_text='Season to scrape (e.g., 2025 for 2025-26 season)'
|
||||
)
|
||||
is_enabled = models.BooleanField(
|
||||
default=True,
|
||||
help_text='Whether this scraper is enabled for scheduling'
|
||||
)
|
||||
|
||||
# Source configuration
|
||||
sources = models.JSONField(
|
||||
default=list,
|
||||
help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])'
|
||||
)
|
||||
primary_source = models.CharField(
|
||||
max_length=100,
|
||||
blank=True,
|
||||
help_text='Primary source for this scraper'
|
||||
)
|
||||
|
||||
# Rate limiting
|
||||
request_delay = models.FloatField(
|
||||
default=settings.SCRAPER_REQUEST_DELAY,
|
||||
help_text='Seconds between requests'
|
||||
)
|
||||
max_retries = models.PositiveSmallIntegerField(
|
||||
default=settings.SCRAPER_MAX_RETRIES,
|
||||
help_text='Maximum retry attempts'
|
||||
)
|
||||
|
||||
# Fuzzy matching
|
||||
fuzzy_threshold = models.PositiveSmallIntegerField(
|
||||
default=settings.SCRAPER_FUZZY_THRESHOLD,
|
||||
help_text='Minimum fuzzy match confidence (0-100)'
|
||||
)
|
||||
|
||||
# Scheduling
|
||||
last_run = models.DateTimeField(
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text='Last successful run timestamp'
|
||||
)
|
||||
last_run_status = models.CharField(
|
||||
max_length=20,
|
||||
blank=True,
|
||||
help_text='Status of last run'
|
||||
)
|
||||
last_run_games = models.PositiveIntegerField(
|
||||
default=0,
|
||||
help_text='Games found in last run'
|
||||
)
|
||||
|
||||
# Notes
|
||||
notes = models.TextField(
|
||||
blank=True,
|
||||
help_text='Configuration notes'
|
||||
)
|
||||
|
||||
# Metadata
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
# Audit trail
|
||||
history = HistoricalRecords()
|
||||
|
||||
class Meta:
|
||||
ordering = ['sport', 'season']
|
||||
unique_together = ['sport', 'season']
|
||||
verbose_name = 'Scraper Configuration'
|
||||
verbose_name_plural = 'Scraper Configurations'
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.sport.short_name} {self.sport.get_season_display(self.season)}"
|
||||
|
||||
def get_sources_list(self):
|
||||
"""Return sources as list, using defaults if empty."""
|
||||
if self.sources:
|
||||
return self.sources
|
||||
# Default sources per sport
|
||||
defaults = {
|
||||
'nba': ['basketball_reference', 'espn'],
|
||||
'mlb': ['baseball_reference', 'mlb_api', 'espn'],
|
||||
'nfl': ['espn', 'pro_football_reference'],
|
||||
'nhl': ['hockey_reference', 'nhl_api', 'espn'],
|
||||
'mls': ['espn'],
|
||||
'wnba': ['espn'],
|
||||
'nwsl': ['espn'],
|
||||
}
|
||||
return defaults.get(self.sport.code, ['espn'])
|
||||
Reference in New Issue
Block a user