"""Alias and manual review data models for sportstime-parser.""" from dataclasses import dataclass, field from datetime import date, datetime from enum import Enum from typing import Optional import json class AliasType(Enum): """Type of team alias.""" NAME = "name" ABBREVIATION = "abbreviation" CITY = "city" class ReviewReason(Enum): """Reason an item requires manual review.""" UNRESOLVED_TEAM = "unresolved_team" UNRESOLVED_STADIUM = "unresolved_stadium" LOW_CONFIDENCE_MATCH = "low_confidence_match" MISSING_DATA = "missing_data" DUPLICATE_GAME = "duplicate_game" TIMEZONE_UNKNOWN = "timezone_unknown" GEOGRAPHIC_FILTER = "geographic_filter" @dataclass class TeamAlias: """Represents a team alias with optional date validity. Attributes: id: Unique alias ID team_canonical_id: The canonical team ID this alias resolves to alias_type: Type of alias (name, abbreviation, city) alias_value: The alias value to match against valid_from: Start date of alias validity (None = always valid) valid_until: End date of alias validity (None = still valid) """ id: str team_canonical_id: str alias_type: AliasType alias_value: str valid_from: Optional[date] = None valid_until: Optional[date] = None def is_valid_on(self, check_date: date) -> bool: """Check if this alias is valid on the given date.""" if self.valid_from and check_date < self.valid_from: return False if self.valid_until and check_date > self.valid_until: return False return True def to_dict(self) -> dict: """Convert to dictionary for JSON serialization.""" return { "id": self.id, "team_canonical_id": self.team_canonical_id, "alias_type": self.alias_type.value, "alias_value": self.alias_value, "valid_from": self.valid_from.isoformat() if self.valid_from else None, "valid_until": self.valid_until.isoformat() if self.valid_until else None, } @classmethod def from_dict(cls, data: dict) -> "TeamAlias": """Create a TeamAlias from a dictionary.""" valid_from = None if data.get("valid_from"): valid_from = date.fromisoformat(data["valid_from"]) valid_until = None if data.get("valid_until"): valid_until = date.fromisoformat(data["valid_until"]) return cls( id=data["id"], team_canonical_id=data["team_canonical_id"], alias_type=AliasType(data["alias_type"]), alias_value=data["alias_value"], valid_from=valid_from, valid_until=valid_until, ) @dataclass class StadiumAlias: """Represents a stadium alias with optional date validity. Attributes: alias_name: The alias name to match against (lowercase) stadium_canonical_id: The canonical stadium ID this alias resolves to valid_from: Start date of alias validity (None = always valid) valid_until: End date of alias validity (None = still valid) """ alias_name: str stadium_canonical_id: str valid_from: Optional[date] = None valid_until: Optional[date] = None def is_valid_on(self, check_date: date) -> bool: """Check if this alias is valid on the given date.""" if self.valid_from and check_date < self.valid_from: return False if self.valid_until and check_date > self.valid_until: return False return True def to_dict(self) -> dict: """Convert to dictionary for JSON serialization.""" return { "alias_name": self.alias_name, "stadium_canonical_id": self.stadium_canonical_id, "valid_from": self.valid_from.isoformat() if self.valid_from else None, "valid_until": self.valid_until.isoformat() if self.valid_until else None, } @classmethod def from_dict(cls, data: dict) -> "StadiumAlias": """Create a StadiumAlias from a dictionary.""" valid_from = None if data.get("valid_from"): valid_from = date.fromisoformat(data["valid_from"]) valid_until = None if data.get("valid_until"): valid_until = date.fromisoformat(data["valid_until"]) return cls( alias_name=data["alias_name"], stadium_canonical_id=data["stadium_canonical_id"], valid_from=valid_from, valid_until=valid_until, ) @dataclass class FuzzyMatch: """Represents a fuzzy match suggestion with confidence score.""" canonical_id: str canonical_name: str confidence: int # 0-100 def to_dict(self) -> dict: """Convert to dictionary for JSON serialization.""" return { "canonical_id": self.canonical_id, "canonical_name": self.canonical_name, "confidence": self.confidence, } @dataclass class ManualReviewItem: """Represents an item requiring manual review. Attributes: id: Unique review item ID reason: Why this item needs review sport: Sport code raw_value: The original unresolved value context: Additional context about the issue source_url: URL of the source page suggested_matches: List of potential matches with confidence scores game_date: Date of the game (if applicable) created_at: When this review item was created """ id: str reason: ReviewReason sport: str raw_value: str context: dict = field(default_factory=dict) source_url: Optional[str] = None suggested_matches: list[FuzzyMatch] = field(default_factory=list) game_date: Optional[date] = None created_at: datetime = field(default_factory=datetime.now) def to_dict(self) -> dict: """Convert to dictionary for JSON serialization.""" return { "id": self.id, "reason": self.reason.value, "sport": self.sport, "raw_value": self.raw_value, "context": self.context, "source_url": self.source_url, "suggested_matches": [m.to_dict() for m in self.suggested_matches], "game_date": self.game_date.isoformat() if self.game_date else None, "created_at": self.created_at.isoformat(), } @classmethod def from_dict(cls, data: dict) -> "ManualReviewItem": """Create a ManualReviewItem from a dictionary.""" game_date = None if data.get("game_date"): game_date = date.fromisoformat(data["game_date"]) created_at = datetime.now() if data.get("created_at"): created_at = datetime.fromisoformat(data["created_at"]) suggested_matches = [] for match_data in data.get("suggested_matches", []): suggested_matches.append(FuzzyMatch( canonical_id=match_data["canonical_id"], canonical_name=match_data["canonical_name"], confidence=match_data["confidence"], )) return cls( id=data["id"], reason=ReviewReason(data["reason"]), sport=data["sport"], raw_value=data["raw_value"], context=data.get("context", {}), source_url=data.get("source_url"), suggested_matches=suggested_matches, game_date=game_date, created_at=created_at, ) def to_markdown(self) -> str: """Generate markdown representation for validation report.""" lines = [ f"### {self.reason.value.replace('_', ' ').title()}: {self.raw_value}", "", f"**Sport**: {self.sport.upper()}", ] if self.game_date: lines.append(f"**Game Date**: {self.game_date.isoformat()}") if self.context: lines.append("") lines.append("**Context**:") for key, value in self.context.items(): lines.append(f"- {key}: {value}") if self.suggested_matches: lines.append("") lines.append("**Suggested Matches**:") for i, match in enumerate(self.suggested_matches, 1): marker = " <- likely correct" if match.confidence >= 90 else "" lines.append( f"{i}. `{match.canonical_id}` ({match.confidence}%){marker}" ) if self.source_url: lines.append("") lines.append(f"**Source**: [{self.source_url}]({self.source_url})") lines.append("") lines.append("---") lines.append("") return "\n".join(lines)