feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export

Adds the full Django application layer on top of sportstime_parser:
- core: Sport, Team, Stadium, Game models with aliases and league structure
- scraper: orchestration engine, adapter, job management, Celery tasks
- cloudkit: CloudKit sync client, sync state tracking, sync jobs
- dashboard: staff dashboard for monitoring scrapers, sync, review queue
- notifications: email reports for scrape/sync results
- Docker setup for deployment (Dockerfile, docker-compose, entrypoint)

Game exports now use game_datetime_utc (ISO 8601 UTC) instead of
venue-local date+time strings, matching the canonical format used
by the iOS app.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-02-19 14:04:27 -06:00
parent 4353d5943c
commit 63acf7accb
114 changed files with 13070 additions and 887 deletions

394
cloudkit/models.py Normal file
View File

@@ -0,0 +1,394 @@
from django.db import models
from django.conf import settings
from simple_history.models import HistoricalRecords
class CloudKitConfiguration(models.Model):
"""
CloudKit configuration for syncing.
"""
ENVIRONMENT_CHOICES = [
('development', 'Development'),
('production', 'Production'),
]
name = models.CharField(
max_length=100,
unique=True,
help_text='Configuration name (e.g., "Production", "Development")'
)
environment = models.CharField(
max_length=20,
choices=ENVIRONMENT_CHOICES,
default='development'
)
container_id = models.CharField(
max_length=200,
default=settings.CLOUDKIT_CONTAINER,
help_text='CloudKit container ID (e.g., iCloud.com.sportstime.app)'
)
key_id = models.CharField(
max_length=200,
blank=True,
help_text='CloudKit API key ID'
)
private_key = models.TextField(
blank=True,
help_text='EC P-256 private key content (PEM format). Paste key here OR use path below.'
)
private_key_path = models.CharField(
max_length=500,
blank=True,
help_text='Path to EC P-256 private key file (alternative to pasting key above)'
)
is_active = models.BooleanField(
default=False,
help_text='Whether this configuration is active for syncing'
)
# Sync settings
batch_size = models.PositiveIntegerField(
default=200,
help_text='Maximum records per batch upload'
)
auto_sync_after_scrape = models.BooleanField(
default=False,
help_text='Automatically sync after scraper jobs complete'
)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
# Audit trail
history = HistoricalRecords()
class Meta:
verbose_name = 'CloudKit Configuration'
verbose_name_plural = 'CloudKit Configurations'
def __str__(self):
return f"{self.name} ({self.environment})"
def save(self, *args, **kwargs):
# Ensure only one active configuration
if self.is_active:
CloudKitConfiguration.objects.filter(is_active=True).exclude(pk=self.pk).update(is_active=False)
super().save(*args, **kwargs)
def get_client(self):
"""Create a CloudKitClient from this configuration."""
from cloudkit.client import CloudKitClient
return CloudKitClient(
container_id=self.container_id,
environment=self.environment,
key_id=self.key_id,
private_key=self.private_key,
private_key_path=self.private_key_path,
)
@classmethod
def get_active(cls):
"""Get the active CloudKit configuration."""
return cls.objects.filter(is_active=True).first()
class CloudKitSyncState(models.Model):
"""
Tracks sync state for individual records.
"""
RECORD_TYPE_CHOICES = [
('Sport', 'Sport'),
('Conference', 'Conference'),
('Division', 'Division'),
('Team', 'Team'),
('Stadium', 'Stadium'),
('TeamAlias', 'Team Alias'),
('StadiumAlias', 'Stadium Alias'),
('Game', 'Game'),
]
SYNC_STATUS_CHOICES = [
('pending', 'Pending Sync'),
('synced', 'Synced'),
('failed', 'Failed'),
('deleted', 'Deleted'),
]
record_type = models.CharField(
max_length=20,
choices=RECORD_TYPE_CHOICES
)
record_id = models.CharField(
max_length=100,
help_text='Local record ID (canonical ID)'
)
cloudkit_record_name = models.CharField(
max_length=200,
blank=True,
help_text='CloudKit record name (may differ from local ID)'
)
local_hash = models.CharField(
max_length=64,
blank=True,
help_text='Hash of local record data for change detection'
)
remote_change_tag = models.CharField(
max_length=200,
blank=True,
help_text='CloudKit change tag for conflict detection'
)
sync_status = models.CharField(
max_length=20,
choices=SYNC_STATUS_CHOICES,
default='pending'
)
last_synced = models.DateTimeField(
null=True,
blank=True
)
last_error = models.TextField(
blank=True,
help_text='Last sync error message'
)
retry_count = models.PositiveSmallIntegerField(
default=0
)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['-updated_at']
unique_together = ['record_type', 'record_id']
verbose_name = 'CloudKit Sync State'
verbose_name_plural = 'CloudKit Sync States'
indexes = [
models.Index(fields=['sync_status', 'record_type']),
models.Index(fields=['record_type', 'last_synced']),
]
def __str__(self):
return f"{self.record_type}:{self.record_id} ({self.sync_status})"
def mark_synced(self, change_tag=''):
"""Mark record as successfully synced."""
from django.utils import timezone
self.sync_status = 'synced'
self.remote_change_tag = change_tag
self.last_synced = timezone.now()
self.last_error = ''
self.retry_count = 0
self.save()
def mark_failed(self, error_message):
"""Mark record as failed to sync."""
self.sync_status = 'failed'
self.last_error = error_message
self.retry_count += 1
self.save()
def mark_pending(self, new_hash=''):
"""Mark record as pending sync (e.g., after local change)."""
self.sync_status = 'pending'
if new_hash:
self.local_hash = new_hash
self.save()
class CloudKitSyncJob(models.Model):
"""
Record of a CloudKit sync job execution.
"""
STATUS_CHOICES = [
('pending', 'Pending'),
('running', 'Running'),
('completed', 'Completed'),
('completed_with_errors', 'Completed with Errors'),
('failed', 'Failed'),
('cancelled', 'Cancelled'),
]
configuration = models.ForeignKey(
CloudKitConfiguration,
on_delete=models.CASCADE,
related_name='sync_jobs'
)
status = models.CharField(
max_length=25,
choices=STATUS_CHOICES,
default='pending'
)
triggered_by = models.CharField(
max_length=50,
default='manual',
help_text='How the sync was triggered'
)
# Timing
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
# Results
records_synced = models.PositiveIntegerField(default=0)
records_created = models.PositiveIntegerField(default=0)
records_updated = models.PositiveIntegerField(default=0)
records_deleted = models.PositiveIntegerField(default=0)
records_failed = models.PositiveIntegerField(default=0)
# Filter (optional - sync specific records)
sport_filter = models.ForeignKey(
'core.Sport',
on_delete=models.SET_NULL,
null=True,
blank=True,
help_text='Only sync this sport (all if blank)'
)
record_type_filter = models.CharField(
max_length=20,
blank=True,
help_text='Only sync this record type (all if blank)'
)
# Error tracking
error_message = models.TextField(blank=True)
# Progress tracking
current_record_type = models.CharField(
max_length=20,
blank=True,
help_text='Currently syncing record type'
)
sports_total = models.PositiveIntegerField(default=0)
sports_synced = models.PositiveIntegerField(default=0)
sports_failed = models.PositiveIntegerField(default=0)
teams_total = models.PositiveIntegerField(default=0)
teams_synced = models.PositiveIntegerField(default=0)
teams_failed = models.PositiveIntegerField(default=0)
stadiums_total = models.PositiveIntegerField(default=0)
stadiums_synced = models.PositiveIntegerField(default=0)
stadiums_failed = models.PositiveIntegerField(default=0)
conferences_total = models.PositiveIntegerField(default=0)
conferences_synced = models.PositiveIntegerField(default=0)
conferences_failed = models.PositiveIntegerField(default=0)
divisions_total = models.PositiveIntegerField(default=0)
divisions_synced = models.PositiveIntegerField(default=0)
divisions_failed = models.PositiveIntegerField(default=0)
team_aliases_total = models.PositiveIntegerField(default=0)
team_aliases_synced = models.PositiveIntegerField(default=0)
team_aliases_failed = models.PositiveIntegerField(default=0)
stadium_aliases_total = models.PositiveIntegerField(default=0)
stadium_aliases_synced = models.PositiveIntegerField(default=0)
stadium_aliases_failed = models.PositiveIntegerField(default=0)
games_total = models.PositiveIntegerField(default=0)
games_synced = models.PositiveIntegerField(default=0)
games_failed = models.PositiveIntegerField(default=0)
# Celery task ID
celery_task_id = models.CharField(
max_length=255,
blank=True
)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['-created_at']
verbose_name = 'CloudKit Sync Job'
verbose_name_plural = 'CloudKit Sync Jobs'
def __str__(self):
return f"Sync {self.configuration.name} - {self.created_at.strftime('%Y-%m-%d %H:%M')}"
@property
def duration(self):
if self.started_at and self.finished_at:
return self.finished_at - self.started_at
return None
@property
def duration_display(self):
duration = self.duration
if duration:
total_seconds = int(duration.total_seconds())
minutes, seconds = divmod(total_seconds, 60)
if minutes > 0:
return f"{minutes}m {seconds}s"
return f"{seconds}s"
return '-'
def get_progress(self):
"""Get progress data for API/display."""
total = (self.sports_total + self.conferences_total + self.divisions_total
+ self.teams_total + self.stadiums_total
+ self.team_aliases_total + self.stadium_aliases_total
+ self.games_total)
synced = (self.sports_synced + self.conferences_synced + self.divisions_synced
+ self.teams_synced + self.stadiums_synced
+ self.team_aliases_synced + self.stadium_aliases_synced
+ self.games_synced)
failed = (self.sports_failed + self.conferences_failed + self.divisions_failed
+ self.teams_failed + self.stadiums_failed
+ self.team_aliases_failed + self.stadium_aliases_failed
+ self.games_failed)
return {
'status': self.status,
'current_type': self.current_record_type,
'total': total,
'synced': synced,
'failed': failed,
'remaining': total - synced - failed,
'percent': round((synced + failed) / total * 100) if total > 0 else 0,
'sports': {
'total': self.sports_total,
'synced': self.sports_synced,
'failed': self.sports_failed,
'remaining': self.sports_total - self.sports_synced - self.sports_failed,
},
'conferences': {
'total': self.conferences_total,
'synced': self.conferences_synced,
'failed': self.conferences_failed,
'remaining': self.conferences_total - self.conferences_synced - self.conferences_failed,
},
'divisions': {
'total': self.divisions_total,
'synced': self.divisions_synced,
'failed': self.divisions_failed,
'remaining': self.divisions_total - self.divisions_synced - self.divisions_failed,
},
'teams': {
'total': self.teams_total,
'synced': self.teams_synced,
'failed': self.teams_failed,
'remaining': self.teams_total - self.teams_synced - self.teams_failed,
},
'stadiums': {
'total': self.stadiums_total,
'synced': self.stadiums_synced,
'failed': self.stadiums_failed,
'remaining': self.stadiums_total - self.stadiums_synced - self.stadiums_failed,
},
'team_aliases': {
'total': self.team_aliases_total,
'synced': self.team_aliases_synced,
'failed': self.team_aliases_failed,
'remaining': self.team_aliases_total - self.team_aliases_synced - self.team_aliases_failed,
},
'stadium_aliases': {
'total': self.stadium_aliases_total,
'synced': self.stadium_aliases_synced,
'failed': self.stadium_aliases_failed,
'remaining': self.stadium_aliases_total - self.stadium_aliases_synced - self.stadium_aliases_failed,
},
'games': {
'total': self.games_total,
'synced': self.games_synced,
'failed': self.games_failed,
'remaining': self.games_total - self.games_synced - self.games_failed,
},
}