feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export
Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
201
scraper/migrations/0001_initial.py
Normal file
201
scraper/migrations/0001_initial.py
Normal file
@@ -0,0 +1,201 @@
|
||||
# Generated by Django 5.1.15 on 2026-01-26 08:59
|
||||
|
||||
import django.db.models.deletion
|
||||
import simple_history.models
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
('core', '0001_initial'),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='ScrapeJob',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], default='pending', max_length=20)),
|
||||
('triggered_by', models.CharField(default='manual', help_text='How the job was triggered (manual, scheduled, api)', max_length=50)),
|
||||
('started_at', models.DateTimeField(blank=True, null=True)),
|
||||
('finished_at', models.DateTimeField(blank=True, null=True)),
|
||||
('games_found', models.PositiveIntegerField(default=0)),
|
||||
('games_new', models.PositiveIntegerField(default=0)),
|
||||
('games_updated', models.PositiveIntegerField(default=0)),
|
||||
('games_unchanged', models.PositiveIntegerField(default=0)),
|
||||
('games_errors', models.PositiveIntegerField(default=0)),
|
||||
('teams_found', models.PositiveIntegerField(default=0)),
|
||||
('stadiums_found', models.PositiveIntegerField(default=0)),
|
||||
('review_items_created', models.PositiveIntegerField(default=0)),
|
||||
('error_message', models.TextField(blank=True)),
|
||||
('error_traceback', models.TextField(blank=True)),
|
||||
('celery_task_id', models.CharField(blank=True, help_text='Celery task ID for this job', max_length=255)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Scrape Job',
|
||||
'verbose_name_plural': 'Scrape Jobs',
|
||||
'ordering': ['-created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='HistoricalScraperConfig',
|
||||
fields=[
|
||||
('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')),
|
||||
('season', models.PositiveSmallIntegerField(help_text='Season to scrape (e.g., 2025 for 2025-26 season)')),
|
||||
('is_enabled', models.BooleanField(default=True, help_text='Whether this scraper is enabled for scheduling')),
|
||||
('sources', models.JSONField(default=list, help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])')),
|
||||
('primary_source', models.CharField(blank=True, help_text='Primary source for this scraper', max_length=100)),
|
||||
('request_delay', models.FloatField(default=3.0, help_text='Seconds between requests')),
|
||||
('max_retries', models.PositiveSmallIntegerField(default=3, help_text='Maximum retry attempts')),
|
||||
('fuzzy_threshold', models.PositiveSmallIntegerField(default=85, help_text='Minimum fuzzy match confidence (0-100)')),
|
||||
('last_run', models.DateTimeField(blank=True, help_text='Last successful run timestamp', null=True)),
|
||||
('last_run_status', models.CharField(blank=True, help_text='Status of last run', max_length=20)),
|
||||
('last_run_games', models.PositiveIntegerField(default=0, help_text='Games found in last run')),
|
||||
('notes', models.TextField(blank=True, help_text='Configuration notes')),
|
||||
('created_at', models.DateTimeField(blank=True, editable=False)),
|
||||
('updated_at', models.DateTimeField(blank=True, editable=False)),
|
||||
('history_id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('history_date', models.DateTimeField(db_index=True)),
|
||||
('history_change_reason', models.CharField(max_length=100, null=True)),
|
||||
('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)),
|
||||
('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
|
||||
('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'historical Scraper Configuration',
|
||||
'verbose_name_plural': 'historical Scraper Configurations',
|
||||
'ordering': ('-history_date', '-history_id'),
|
||||
'get_latest_by': ('history_date', 'history_id'),
|
||||
},
|
||||
bases=(simple_history.models.HistoricalChanges, models.Model),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='HistoricalManualReviewItem',
|
||||
fields=[
|
||||
('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')),
|
||||
('item_type', models.CharField(choices=[('team', 'Team'), ('stadium', 'Stadium')], max_length=20)),
|
||||
('raw_value', models.CharField(help_text='Original scraped value', max_length=300)),
|
||||
('suggested_id', models.CharField(blank=True, help_text='Suggested canonical ID (if any match found)', max_length=100)),
|
||||
('confidence', models.FloatField(default=0.0, help_text='Match confidence (0.0 - 1.0)')),
|
||||
('reason', models.CharField(choices=[('no_match', 'No Match Found'), ('low_confidence', 'Low Confidence Match'), ('ambiguous', 'Ambiguous Match'), ('new_entity', 'Potentially New Entity')], help_text='Why manual review is needed', max_length=20)),
|
||||
('source_url', models.URLField(blank=True, help_text='URL where this value was found')),
|
||||
('check_date', models.DateField(blank=True, help_text='Date context for alias resolution', null=True)),
|
||||
('context', models.JSONField(blank=True, help_text='Additional context (e.g., game info)', null=True)),
|
||||
('status', models.CharField(choices=[('pending', 'Pending Review'), ('resolved', 'Resolved'), ('ignored', 'Ignored'), ('new_entity', 'Created New Entity')], default='pending', max_length=20)),
|
||||
('resolved_to', models.CharField(blank=True, help_text='Final resolved canonical ID', max_length=100)),
|
||||
('resolved_at', models.DateTimeField(blank=True, null=True)),
|
||||
('resolution_notes', models.TextField(blank=True, help_text='Notes about the resolution')),
|
||||
('create_alias', models.BooleanField(default=False, help_text='Whether to create an alias from this resolution')),
|
||||
('created_at', models.DateTimeField(blank=True, editable=False)),
|
||||
('updated_at', models.DateTimeField(blank=True, editable=False)),
|
||||
('history_id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('history_date', models.DateTimeField(db_index=True)),
|
||||
('history_change_reason', models.CharField(max_length=100, null=True)),
|
||||
('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)),
|
||||
('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
|
||||
('resolved_by', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to=settings.AUTH_USER_MODEL)),
|
||||
('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')),
|
||||
('job', models.ForeignKey(blank=True, db_constraint=False, help_text='Job that created this review item', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='scraper.scrapejob')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'historical Manual Review Item',
|
||||
'verbose_name_plural': 'historical Manual Review Items',
|
||||
'ordering': ('-history_date', '-history_id'),
|
||||
'get_latest_by': ('history_date', 'history_id'),
|
||||
},
|
||||
bases=(simple_history.models.HistoricalChanges, models.Model),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ScrapeJobLog',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('level', models.CharField(choices=[('debug', 'Debug'), ('info', 'Info'), ('warning', 'Warning'), ('error', 'Error')], default='info', max_length=10)),
|
||||
('message', models.TextField()),
|
||||
('source', models.CharField(blank=True, help_text='Source/component that generated this log', max_length=100)),
|
||||
('extra_data', models.JSONField(blank=True, help_text='Additional structured data', null=True)),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='logs', to='scraper.scrapejob')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Scrape Job Log',
|
||||
'verbose_name_plural': 'Scrape Job Logs',
|
||||
'ordering': ['created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ScraperConfig',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('season', models.PositiveSmallIntegerField(help_text='Season to scrape (e.g., 2025 for 2025-26 season)')),
|
||||
('is_enabled', models.BooleanField(default=True, help_text='Whether this scraper is enabled for scheduling')),
|
||||
('sources', models.JSONField(default=list, help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])')),
|
||||
('primary_source', models.CharField(blank=True, help_text='Primary source for this scraper', max_length=100)),
|
||||
('request_delay', models.FloatField(default=3.0, help_text='Seconds between requests')),
|
||||
('max_retries', models.PositiveSmallIntegerField(default=3, help_text='Maximum retry attempts')),
|
||||
('fuzzy_threshold', models.PositiveSmallIntegerField(default=85, help_text='Minimum fuzzy match confidence (0-100)')),
|
||||
('last_run', models.DateTimeField(blank=True, help_text='Last successful run timestamp', null=True)),
|
||||
('last_run_status', models.CharField(blank=True, help_text='Status of last run', max_length=20)),
|
||||
('last_run_games', models.PositiveIntegerField(default=0, help_text='Games found in last run')),
|
||||
('notes', models.TextField(blank=True, help_text='Configuration notes')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='scraper_configs', to='core.sport')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Scraper Configuration',
|
||||
'verbose_name_plural': 'Scraper Configurations',
|
||||
'ordering': ['sport', 'season'],
|
||||
'unique_together': {('sport', 'season')},
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='scrapejob',
|
||||
name='config',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='jobs', to='scraper.scraperconfig'),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='ManualReviewItem',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('item_type', models.CharField(choices=[('team', 'Team'), ('stadium', 'Stadium')], max_length=20)),
|
||||
('raw_value', models.CharField(help_text='Original scraped value', max_length=300)),
|
||||
('suggested_id', models.CharField(blank=True, help_text='Suggested canonical ID (if any match found)', max_length=100)),
|
||||
('confidence', models.FloatField(default=0.0, help_text='Match confidence (0.0 - 1.0)')),
|
||||
('reason', models.CharField(choices=[('no_match', 'No Match Found'), ('low_confidence', 'Low Confidence Match'), ('ambiguous', 'Ambiguous Match'), ('new_entity', 'Potentially New Entity')], help_text='Why manual review is needed', max_length=20)),
|
||||
('source_url', models.URLField(blank=True, help_text='URL where this value was found')),
|
||||
('check_date', models.DateField(blank=True, help_text='Date context for alias resolution', null=True)),
|
||||
('context', models.JSONField(blank=True, help_text='Additional context (e.g., game info)', null=True)),
|
||||
('status', models.CharField(choices=[('pending', 'Pending Review'), ('resolved', 'Resolved'), ('ignored', 'Ignored'), ('new_entity', 'Created New Entity')], default='pending', max_length=20)),
|
||||
('resolved_to', models.CharField(blank=True, help_text='Final resolved canonical ID', max_length=100)),
|
||||
('resolved_at', models.DateTimeField(blank=True, null=True)),
|
||||
('resolution_notes', models.TextField(blank=True, help_text='Notes about the resolution')),
|
||||
('create_alias', models.BooleanField(default=False, help_text='Whether to create an alias from this resolution')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
('resolved_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='resolved_review_items', to=settings.AUTH_USER_MODEL)),
|
||||
('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='review_items', to='core.sport')),
|
||||
('job', models.ForeignKey(blank=True, help_text='Job that created this review item', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='review_items', to='scraper.scrapejob')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Manual Review Item',
|
||||
'verbose_name_plural': 'Manual Review Items',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['status', 'item_type'], name='scraper_man_status_5d06e2_idx'), models.Index(fields=['sport', 'status'], name='scraper_man_sport_i_7af37b_idx'), models.Index(fields=['raw_value'], name='scraper_man_raw_val_abdd0a_idx')],
|
||||
},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='scrapejob',
|
||||
index=models.Index(fields=['config', 'status'], name='scraper_scr_config__4c4058_idx'),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name='scrapejob',
|
||||
index=models.Index(fields=['status', 'created_at'], name='scraper_scr_status_f3978d_idx'),
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user