feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export

Adds the full Django application layer on top of sportstime_parser:
- core: Sport, Team, Stadium, Game models with aliases and league structure
- scraper: orchestration engine, adapter, job management, Celery tasks
- cloudkit: CloudKit sync client, sync state tracking, sync jobs
- dashboard: staff dashboard for monitoring scrapers, sync, review queue
- notifications: email reports for scrape/sync results
- Docker setup for deployment (Dockerfile, docker-compose, entrypoint)

Game exports now use game_datetime_utc (ISO 8601 UTC) instead of
venue-local date+time strings, matching the canonical format used
by the iOS app.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-02-19 14:04:27 -06:00
parent 4353d5943c
commit 63acf7accb
114 changed files with 13070 additions and 887 deletions

View File

@@ -0,0 +1,201 @@
# Generated by Django 5.1.15 on 2026-01-26 08:59
import django.db.models.deletion
import simple_history.models
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
('core', '0001_initial'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name='ScrapeJob',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], default='pending', max_length=20)),
('triggered_by', models.CharField(default='manual', help_text='How the job was triggered (manual, scheduled, api)', max_length=50)),
('started_at', models.DateTimeField(blank=True, null=True)),
('finished_at', models.DateTimeField(blank=True, null=True)),
('games_found', models.PositiveIntegerField(default=0)),
('games_new', models.PositiveIntegerField(default=0)),
('games_updated', models.PositiveIntegerField(default=0)),
('games_unchanged', models.PositiveIntegerField(default=0)),
('games_errors', models.PositiveIntegerField(default=0)),
('teams_found', models.PositiveIntegerField(default=0)),
('stadiums_found', models.PositiveIntegerField(default=0)),
('review_items_created', models.PositiveIntegerField(default=0)),
('error_message', models.TextField(blank=True)),
('error_traceback', models.TextField(blank=True)),
('celery_task_id', models.CharField(blank=True, help_text='Celery task ID for this job', max_length=255)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
],
options={
'verbose_name': 'Scrape Job',
'verbose_name_plural': 'Scrape Jobs',
'ordering': ['-created_at'],
},
),
migrations.CreateModel(
name='HistoricalScraperConfig',
fields=[
('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')),
('season', models.PositiveSmallIntegerField(help_text='Season to scrape (e.g., 2025 for 2025-26 season)')),
('is_enabled', models.BooleanField(default=True, help_text='Whether this scraper is enabled for scheduling')),
('sources', models.JSONField(default=list, help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])')),
('primary_source', models.CharField(blank=True, help_text='Primary source for this scraper', max_length=100)),
('request_delay', models.FloatField(default=3.0, help_text='Seconds between requests')),
('max_retries', models.PositiveSmallIntegerField(default=3, help_text='Maximum retry attempts')),
('fuzzy_threshold', models.PositiveSmallIntegerField(default=85, help_text='Minimum fuzzy match confidence (0-100)')),
('last_run', models.DateTimeField(blank=True, help_text='Last successful run timestamp', null=True)),
('last_run_status', models.CharField(blank=True, help_text='Status of last run', max_length=20)),
('last_run_games', models.PositiveIntegerField(default=0, help_text='Games found in last run')),
('notes', models.TextField(blank=True, help_text='Configuration notes')),
('created_at', models.DateTimeField(blank=True, editable=False)),
('updated_at', models.DateTimeField(blank=True, editable=False)),
('history_id', models.AutoField(primary_key=True, serialize=False)),
('history_date', models.DateTimeField(db_index=True)),
('history_change_reason', models.CharField(max_length=100, null=True)),
('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)),
('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')),
],
options={
'verbose_name': 'historical Scraper Configuration',
'verbose_name_plural': 'historical Scraper Configurations',
'ordering': ('-history_date', '-history_id'),
'get_latest_by': ('history_date', 'history_id'),
},
bases=(simple_history.models.HistoricalChanges, models.Model),
),
migrations.CreateModel(
name='HistoricalManualReviewItem',
fields=[
('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')),
('item_type', models.CharField(choices=[('team', 'Team'), ('stadium', 'Stadium')], max_length=20)),
('raw_value', models.CharField(help_text='Original scraped value', max_length=300)),
('suggested_id', models.CharField(blank=True, help_text='Suggested canonical ID (if any match found)', max_length=100)),
('confidence', models.FloatField(default=0.0, help_text='Match confidence (0.0 - 1.0)')),
('reason', models.CharField(choices=[('no_match', 'No Match Found'), ('low_confidence', 'Low Confidence Match'), ('ambiguous', 'Ambiguous Match'), ('new_entity', 'Potentially New Entity')], help_text='Why manual review is needed', max_length=20)),
('source_url', models.URLField(blank=True, help_text='URL where this value was found')),
('check_date', models.DateField(blank=True, help_text='Date context for alias resolution', null=True)),
('context', models.JSONField(blank=True, help_text='Additional context (e.g., game info)', null=True)),
('status', models.CharField(choices=[('pending', 'Pending Review'), ('resolved', 'Resolved'), ('ignored', 'Ignored'), ('new_entity', 'Created New Entity')], default='pending', max_length=20)),
('resolved_to', models.CharField(blank=True, help_text='Final resolved canonical ID', max_length=100)),
('resolved_at', models.DateTimeField(blank=True, null=True)),
('resolution_notes', models.TextField(blank=True, help_text='Notes about the resolution')),
('create_alias', models.BooleanField(default=False, help_text='Whether to create an alias from this resolution')),
('created_at', models.DateTimeField(blank=True, editable=False)),
('updated_at', models.DateTimeField(blank=True, editable=False)),
('history_id', models.AutoField(primary_key=True, serialize=False)),
('history_date', models.DateTimeField(db_index=True)),
('history_change_reason', models.CharField(max_length=100, null=True)),
('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)),
('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
('resolved_by', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to=settings.AUTH_USER_MODEL)),
('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')),
('job', models.ForeignKey(blank=True, db_constraint=False, help_text='Job that created this review item', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='scraper.scrapejob')),
],
options={
'verbose_name': 'historical Manual Review Item',
'verbose_name_plural': 'historical Manual Review Items',
'ordering': ('-history_date', '-history_id'),
'get_latest_by': ('history_date', 'history_id'),
},
bases=(simple_history.models.HistoricalChanges, models.Model),
),
migrations.CreateModel(
name='ScrapeJobLog',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('level', models.CharField(choices=[('debug', 'Debug'), ('info', 'Info'), ('warning', 'Warning'), ('error', 'Error')], default='info', max_length=10)),
('message', models.TextField()),
('source', models.CharField(blank=True, help_text='Source/component that generated this log', max_length=100)),
('extra_data', models.JSONField(blank=True, help_text='Additional structured data', null=True)),
('created_at', models.DateTimeField(auto_now_add=True)),
('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='logs', to='scraper.scrapejob')),
],
options={
'verbose_name': 'Scrape Job Log',
'verbose_name_plural': 'Scrape Job Logs',
'ordering': ['created_at'],
},
),
migrations.CreateModel(
name='ScraperConfig',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('season', models.PositiveSmallIntegerField(help_text='Season to scrape (e.g., 2025 for 2025-26 season)')),
('is_enabled', models.BooleanField(default=True, help_text='Whether this scraper is enabled for scheduling')),
('sources', models.JSONField(default=list, help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])')),
('primary_source', models.CharField(blank=True, help_text='Primary source for this scraper', max_length=100)),
('request_delay', models.FloatField(default=3.0, help_text='Seconds between requests')),
('max_retries', models.PositiveSmallIntegerField(default=3, help_text='Maximum retry attempts')),
('fuzzy_threshold', models.PositiveSmallIntegerField(default=85, help_text='Minimum fuzzy match confidence (0-100)')),
('last_run', models.DateTimeField(blank=True, help_text='Last successful run timestamp', null=True)),
('last_run_status', models.CharField(blank=True, help_text='Status of last run', max_length=20)),
('last_run_games', models.PositiveIntegerField(default=0, help_text='Games found in last run')),
('notes', models.TextField(blank=True, help_text='Configuration notes')),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='scraper_configs', to='core.sport')),
],
options={
'verbose_name': 'Scraper Configuration',
'verbose_name_plural': 'Scraper Configurations',
'ordering': ['sport', 'season'],
'unique_together': {('sport', 'season')},
},
),
migrations.AddField(
model_name='scrapejob',
name='config',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='jobs', to='scraper.scraperconfig'),
),
migrations.CreateModel(
name='ManualReviewItem',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('item_type', models.CharField(choices=[('team', 'Team'), ('stadium', 'Stadium')], max_length=20)),
('raw_value', models.CharField(help_text='Original scraped value', max_length=300)),
('suggested_id', models.CharField(blank=True, help_text='Suggested canonical ID (if any match found)', max_length=100)),
('confidence', models.FloatField(default=0.0, help_text='Match confidence (0.0 - 1.0)')),
('reason', models.CharField(choices=[('no_match', 'No Match Found'), ('low_confidence', 'Low Confidence Match'), ('ambiguous', 'Ambiguous Match'), ('new_entity', 'Potentially New Entity')], help_text='Why manual review is needed', max_length=20)),
('source_url', models.URLField(blank=True, help_text='URL where this value was found')),
('check_date', models.DateField(blank=True, help_text='Date context for alias resolution', null=True)),
('context', models.JSONField(blank=True, help_text='Additional context (e.g., game info)', null=True)),
('status', models.CharField(choices=[('pending', 'Pending Review'), ('resolved', 'Resolved'), ('ignored', 'Ignored'), ('new_entity', 'Created New Entity')], default='pending', max_length=20)),
('resolved_to', models.CharField(blank=True, help_text='Final resolved canonical ID', max_length=100)),
('resolved_at', models.DateTimeField(blank=True, null=True)),
('resolution_notes', models.TextField(blank=True, help_text='Notes about the resolution')),
('create_alias', models.BooleanField(default=False, help_text='Whether to create an alias from this resolution')),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('resolved_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='resolved_review_items', to=settings.AUTH_USER_MODEL)),
('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='review_items', to='core.sport')),
('job', models.ForeignKey(blank=True, help_text='Job that created this review item', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='review_items', to='scraper.scrapejob')),
],
options={
'verbose_name': 'Manual Review Item',
'verbose_name_plural': 'Manual Review Items',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['status', 'item_type'], name='scraper_man_status_5d06e2_idx'), models.Index(fields=['sport', 'status'], name='scraper_man_sport_i_7af37b_idx'), models.Index(fields=['raw_value'], name='scraper_man_raw_val_abdd0a_idx')],
},
),
migrations.AddIndex(
model_name='scrapejob',
index=models.Index(fields=['config', 'status'], name='scraper_scr_config__4c4058_idx'),
),
migrations.AddIndex(
model_name='scrapejob',
index=models.Index(fields=['status', 'created_at'], name='scraper_scr_status_f3978d_idx'),
),
]