Files
SportstimeAPI/scraper/models/config.py
Trey t 63acf7accb feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export
Adds the full Django application layer on top of sportstime_parser:
- core: Sport, Team, Stadium, Game models with aliases and league structure
- scraper: orchestration engine, adapter, job management, Celery tasks
- cloudkit: CloudKit sync client, sync state tracking, sync jobs
- dashboard: staff dashboard for monitoring scrapers, sync, review queue
- notifications: email reports for scrape/sync results
- Docker setup for deployment (Dockerfile, docker-compose, entrypoint)

Game exports now use game_datetime_utc (ISO 8601 UTC) instead of
venue-local date+time strings, matching the canonical format used
by the iOS app.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 14:04:27 -06:00

103 lines
2.9 KiB
Python

from django.db import models
from django.conf import settings
from simple_history.models import HistoricalRecords
class ScraperConfig(models.Model):
"""
Configuration for a sport's scraper.
"""
sport = models.ForeignKey(
'core.Sport',
on_delete=models.CASCADE,
related_name='scraper_configs'
)
season = models.PositiveSmallIntegerField(
help_text='Season to scrape (e.g., 2025 for 2025-26 season)'
)
is_enabled = models.BooleanField(
default=True,
help_text='Whether this scraper is enabled for scheduling'
)
# Source configuration
sources = models.JSONField(
default=list,
help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])'
)
primary_source = models.CharField(
max_length=100,
blank=True,
help_text='Primary source for this scraper'
)
# Rate limiting
request_delay = models.FloatField(
default=settings.SCRAPER_REQUEST_DELAY,
help_text='Seconds between requests'
)
max_retries = models.PositiveSmallIntegerField(
default=settings.SCRAPER_MAX_RETRIES,
help_text='Maximum retry attempts'
)
# Fuzzy matching
fuzzy_threshold = models.PositiveSmallIntegerField(
default=settings.SCRAPER_FUZZY_THRESHOLD,
help_text='Minimum fuzzy match confidence (0-100)'
)
# Scheduling
last_run = models.DateTimeField(
null=True,
blank=True,
help_text='Last successful run timestamp'
)
last_run_status = models.CharField(
max_length=20,
blank=True,
help_text='Status of last run'
)
last_run_games = models.PositiveIntegerField(
default=0,
help_text='Games found in last run'
)
# Notes
notes = models.TextField(
blank=True,
help_text='Configuration notes'
)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
# Audit trail
history = HistoricalRecords()
class Meta:
ordering = ['sport', 'season']
unique_together = ['sport', 'season']
verbose_name = 'Scraper Configuration'
verbose_name_plural = 'Scraper Configurations'
def __str__(self):
return f"{self.sport.short_name} {self.sport.get_season_display(self.season)}"
def get_sources_list(self):
"""Return sources as list, using defaults if empty."""
if self.sources:
return self.sources
# Default sources per sport
defaults = {
'nba': ['basketball_reference', 'espn'],
'mlb': ['baseball_reference', 'mlb_api', 'espn'],
'nfl': ['espn', 'pro_football_reference'],
'nhl': ['hockey_reference', 'nhl_api', 'espn'],
'mls': ['espn'],
'wnba': ['espn'],
'nwsl': ['espn'],
}
return defaults.get(self.sport.code, ['espn'])