Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
193 lines
5.6 KiB
Python
193 lines
5.6 KiB
Python
from django.db import models
|
|
from simple_history.models import HistoricalRecords
|
|
|
|
|
|
class ManualReviewItem(models.Model):
|
|
"""
|
|
Items that require manual review before resolution.
|
|
"""
|
|
ITEM_TYPE_CHOICES = [
|
|
('team', 'Team'),
|
|
('stadium', 'Stadium'),
|
|
]
|
|
|
|
STATUS_CHOICES = [
|
|
('pending', 'Pending Review'),
|
|
('resolved', 'Resolved'),
|
|
('ignored', 'Ignored'),
|
|
('new_entity', 'Created New Entity'),
|
|
]
|
|
|
|
REASON_CHOICES = [
|
|
('no_match', 'No Match Found'),
|
|
('low_confidence', 'Low Confidence Match'),
|
|
('ambiguous', 'Ambiguous Match'),
|
|
('new_entity', 'Potentially New Entity'),
|
|
]
|
|
|
|
job = models.ForeignKey(
|
|
'scraper.ScrapeJob',
|
|
on_delete=models.CASCADE,
|
|
related_name='review_items',
|
|
null=True,
|
|
blank=True,
|
|
help_text='Job that created this review item'
|
|
)
|
|
item_type = models.CharField(
|
|
max_length=20,
|
|
choices=ITEM_TYPE_CHOICES
|
|
)
|
|
sport = models.ForeignKey(
|
|
'core.Sport',
|
|
on_delete=models.CASCADE,
|
|
related_name='review_items'
|
|
)
|
|
|
|
# Raw value from scraping
|
|
raw_value = models.CharField(
|
|
max_length=300,
|
|
help_text='Original scraped value'
|
|
)
|
|
|
|
# Suggested resolution
|
|
suggested_id = models.CharField(
|
|
max_length=100,
|
|
blank=True,
|
|
help_text='Suggested canonical ID (if any match found)'
|
|
)
|
|
confidence = models.FloatField(
|
|
default=0.0,
|
|
help_text='Match confidence (0.0 - 1.0)'
|
|
)
|
|
reason = models.CharField(
|
|
max_length=20,
|
|
choices=REASON_CHOICES,
|
|
help_text='Why manual review is needed'
|
|
)
|
|
|
|
# Context
|
|
source_url = models.URLField(
|
|
blank=True,
|
|
help_text='URL where this value was found'
|
|
)
|
|
check_date = models.DateField(
|
|
null=True,
|
|
blank=True,
|
|
help_text='Date context for alias resolution'
|
|
)
|
|
context = models.JSONField(
|
|
null=True,
|
|
blank=True,
|
|
help_text='Additional context (e.g., game info)'
|
|
)
|
|
|
|
# Resolution
|
|
status = models.CharField(
|
|
max_length=20,
|
|
choices=STATUS_CHOICES,
|
|
default='pending'
|
|
)
|
|
resolved_to = models.CharField(
|
|
max_length=100,
|
|
blank=True,
|
|
help_text='Final resolved canonical ID'
|
|
)
|
|
resolved_by = models.ForeignKey(
|
|
'auth.User',
|
|
on_delete=models.SET_NULL,
|
|
null=True,
|
|
blank=True,
|
|
related_name='resolved_review_items'
|
|
)
|
|
resolved_at = models.DateTimeField(null=True, blank=True)
|
|
resolution_notes = models.TextField(
|
|
blank=True,
|
|
help_text='Notes about the resolution'
|
|
)
|
|
create_alias = models.BooleanField(
|
|
default=False,
|
|
help_text='Whether to create an alias from this resolution'
|
|
)
|
|
|
|
# Metadata
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
# Audit trail
|
|
history = HistoricalRecords()
|
|
|
|
class Meta:
|
|
ordering = ['-created_at']
|
|
verbose_name = 'Manual Review Item'
|
|
verbose_name_plural = 'Manual Review Items'
|
|
indexes = [
|
|
models.Index(fields=['status', 'item_type']),
|
|
models.Index(fields=['sport', 'status']),
|
|
models.Index(fields=['raw_value']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.item_type}: {self.raw_value} ({self.get_status_display()})"
|
|
|
|
@property
|
|
def confidence_display(self):
|
|
"""Return confidence as percentage string."""
|
|
return f"{self.confidence * 100:.0f}%"
|
|
|
|
def resolve(self, canonical_id, user=None, notes='', create_alias=False):
|
|
"""Resolve this review item."""
|
|
from django.utils import timezone
|
|
self.status = 'resolved'
|
|
self.resolved_to = canonical_id
|
|
self.resolved_by = user
|
|
self.resolved_at = timezone.now()
|
|
self.resolution_notes = notes
|
|
self.create_alias = create_alias
|
|
self.save()
|
|
|
|
# Optionally create alias
|
|
if create_alias and canonical_id:
|
|
self._create_alias(canonical_id)
|
|
|
|
def _create_alias(self, canonical_id):
|
|
"""Create an alias from this resolution."""
|
|
from core.models import TeamAlias, StadiumAlias, Team, Stadium
|
|
|
|
if self.item_type == 'team':
|
|
try:
|
|
team = Team.objects.get(id=canonical_id)
|
|
TeamAlias.objects.get_or_create(
|
|
team=team,
|
|
alias=self.raw_value,
|
|
defaults={
|
|
'alias_type': 'historical',
|
|
'source': 'manual_review',
|
|
'notes': f'Created from review item #{self.id}',
|
|
}
|
|
)
|
|
except Team.DoesNotExist:
|
|
pass
|
|
elif self.item_type == 'stadium':
|
|
try:
|
|
stadium = Stadium.objects.get(id=canonical_id)
|
|
StadiumAlias.objects.get_or_create(
|
|
stadium=stadium,
|
|
alias=self.raw_value,
|
|
defaults={
|
|
'alias_type': 'former',
|
|
'source': 'manual_review',
|
|
'notes': f'Created from review item #{self.id}',
|
|
}
|
|
)
|
|
except Stadium.DoesNotExist:
|
|
pass
|
|
|
|
def ignore(self, user=None, notes=''):
|
|
"""Mark this review item as ignored."""
|
|
from django.utils import timezone
|
|
self.status = 'ignored'
|
|
self.resolved_by = user
|
|
self.resolved_at = timezone.now()
|
|
self.resolution_notes = notes
|
|
self.save()
|