Replace monolithic scraping scripts with sportstime_parser package: - Multi-source scrapers with automatic fallback for 7 sports - Canonical ID generation for games, teams, and stadiums - Fuzzy matching with configurable thresholds for name resolution - CloudKit Web Services uploader with JWT auth, diff-based updates - Resumable uploads with checkpoint state persistence - Validation reports with manual review items and suggested matches - Comprehensive test suite (249 tests) CLI: sportstime-parser scrape|validate|upload|status|retry|clear Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
426 lines
14 KiB
Python
426 lines
14 KiB
Python
"""Record differ for CloudKit uploads.
|
|
|
|
This module compares local records with CloudKit records to determine
|
|
what needs to be created, updated, or deleted.
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Optional
|
|
|
|
from ..models.game import Game
|
|
from ..models.team import Team
|
|
from ..models.stadium import Stadium
|
|
from .cloudkit import CloudKitRecord, RecordType
|
|
|
|
|
|
class DiffAction(str, Enum):
|
|
"""Action to take for a record."""
|
|
CREATE = "create"
|
|
UPDATE = "update"
|
|
DELETE = "delete"
|
|
UNCHANGED = "unchanged"
|
|
|
|
|
|
@dataclass
|
|
class RecordDiff:
|
|
"""Represents the difference between local and remote records.
|
|
|
|
Attributes:
|
|
record_name: Canonical record ID
|
|
record_type: CloudKit record type
|
|
action: Action to take (create, update, delete, unchanged)
|
|
local_record: Local CloudKitRecord (None if delete)
|
|
remote_record: Remote record dict (None if create)
|
|
changed_fields: List of field names that changed (for update)
|
|
record_change_tag: Remote record's change tag (for update)
|
|
"""
|
|
record_name: str
|
|
record_type: RecordType
|
|
action: DiffAction
|
|
local_record: Optional[CloudKitRecord] = None
|
|
remote_record: Optional[dict] = None
|
|
changed_fields: list[str] = field(default_factory=list)
|
|
record_change_tag: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class DiffResult:
|
|
"""Result of diffing local and remote records.
|
|
|
|
Attributes:
|
|
creates: Records to create
|
|
updates: Records to update
|
|
deletes: Records to delete (record names)
|
|
unchanged: Records with no changes
|
|
"""
|
|
creates: list[RecordDiff] = field(default_factory=list)
|
|
updates: list[RecordDiff] = field(default_factory=list)
|
|
deletes: list[RecordDiff] = field(default_factory=list)
|
|
unchanged: list[RecordDiff] = field(default_factory=list)
|
|
|
|
@property
|
|
def create_count(self) -> int:
|
|
return len(self.creates)
|
|
|
|
@property
|
|
def update_count(self) -> int:
|
|
return len(self.updates)
|
|
|
|
@property
|
|
def delete_count(self) -> int:
|
|
return len(self.deletes)
|
|
|
|
@property
|
|
def unchanged_count(self) -> int:
|
|
return len(self.unchanged)
|
|
|
|
@property
|
|
def total_changes(self) -> int:
|
|
return self.create_count + self.update_count + self.delete_count
|
|
|
|
def get_records_to_upload(self) -> list[CloudKitRecord]:
|
|
"""Get all records that need to be uploaded (creates + updates)."""
|
|
records = []
|
|
|
|
for diff in self.creates:
|
|
if diff.local_record:
|
|
records.append(diff.local_record)
|
|
|
|
for diff in self.updates:
|
|
if diff.local_record:
|
|
# Add change tag for update
|
|
diff.local_record.record_change_tag = diff.record_change_tag
|
|
records.append(diff.local_record)
|
|
|
|
return records
|
|
|
|
|
|
class RecordDiffer:
|
|
"""Compares local records with CloudKit records."""
|
|
|
|
# Fields to compare for each record type
|
|
GAME_FIELDS = [
|
|
"sport", "season", "home_team_id", "away_team_id", "stadium_id",
|
|
"game_date", "game_number", "home_score", "away_score", "status",
|
|
]
|
|
|
|
TEAM_FIELDS = [
|
|
"sport", "city", "name", "full_name", "abbreviation",
|
|
"conference", "division", "primary_color", "secondary_color",
|
|
"logo_url", "stadium_id",
|
|
]
|
|
|
|
STADIUM_FIELDS = [
|
|
"sport", "name", "city", "state", "country",
|
|
"latitude", "longitude", "capacity", "surface",
|
|
"roof_type", "opened_year", "image_url", "timezone",
|
|
]
|
|
|
|
def diff_games(
|
|
self,
|
|
local_games: list[Game],
|
|
remote_records: list[dict],
|
|
) -> DiffResult:
|
|
"""Diff local games against remote CloudKit records.
|
|
|
|
Args:
|
|
local_games: List of local Game objects
|
|
remote_records: List of remote record dictionaries
|
|
|
|
Returns:
|
|
DiffResult with creates, updates, deletes
|
|
"""
|
|
local_records = [self._game_to_record(g) for g in local_games]
|
|
return self._diff_records(
|
|
local_records,
|
|
remote_records,
|
|
RecordType.GAME,
|
|
self.GAME_FIELDS,
|
|
)
|
|
|
|
def diff_teams(
|
|
self,
|
|
local_teams: list[Team],
|
|
remote_records: list[dict],
|
|
) -> DiffResult:
|
|
"""Diff local teams against remote CloudKit records.
|
|
|
|
Args:
|
|
local_teams: List of local Team objects
|
|
remote_records: List of remote record dictionaries
|
|
|
|
Returns:
|
|
DiffResult with creates, updates, deletes
|
|
"""
|
|
local_records = [self._team_to_record(t) for t in local_teams]
|
|
return self._diff_records(
|
|
local_records,
|
|
remote_records,
|
|
RecordType.TEAM,
|
|
self.TEAM_FIELDS,
|
|
)
|
|
|
|
def diff_stadiums(
|
|
self,
|
|
local_stadiums: list[Stadium],
|
|
remote_records: list[dict],
|
|
) -> DiffResult:
|
|
"""Diff local stadiums against remote CloudKit records.
|
|
|
|
Args:
|
|
local_stadiums: List of local Stadium objects
|
|
remote_records: List of remote record dictionaries
|
|
|
|
Returns:
|
|
DiffResult with creates, updates, deletes
|
|
"""
|
|
local_records = [self._stadium_to_record(s) for s in local_stadiums]
|
|
return self._diff_records(
|
|
local_records,
|
|
remote_records,
|
|
RecordType.STADIUM,
|
|
self.STADIUM_FIELDS,
|
|
)
|
|
|
|
def _diff_records(
|
|
self,
|
|
local_records: list[CloudKitRecord],
|
|
remote_records: list[dict],
|
|
record_type: RecordType,
|
|
compare_fields: list[str],
|
|
) -> DiffResult:
|
|
"""Compare local and remote records.
|
|
|
|
Args:
|
|
local_records: List of local CloudKitRecord objects
|
|
remote_records: List of remote record dictionaries
|
|
record_type: Type of records being compared
|
|
compare_fields: List of field names to compare
|
|
|
|
Returns:
|
|
DiffResult with categorized differences
|
|
"""
|
|
result = DiffResult()
|
|
|
|
# Index remote records by name
|
|
remote_by_name: dict[str, dict] = {}
|
|
for record in remote_records:
|
|
name = record.get("recordName")
|
|
if name:
|
|
remote_by_name[name] = record
|
|
|
|
# Index local records by name
|
|
local_by_name: dict[str, CloudKitRecord] = {}
|
|
for record in local_records:
|
|
local_by_name[record.record_name] = record
|
|
|
|
# Find creates and updates
|
|
for local_record in local_records:
|
|
remote = remote_by_name.get(local_record.record_name)
|
|
|
|
if remote is None:
|
|
# New record
|
|
result.creates.append(RecordDiff(
|
|
record_name=local_record.record_name,
|
|
record_type=record_type,
|
|
action=DiffAction.CREATE,
|
|
local_record=local_record,
|
|
))
|
|
else:
|
|
# Check for changes
|
|
changed_fields = self._compare_fields(
|
|
local_record.fields,
|
|
remote.get("fields", {}),
|
|
compare_fields,
|
|
)
|
|
|
|
if changed_fields:
|
|
result.updates.append(RecordDiff(
|
|
record_name=local_record.record_name,
|
|
record_type=record_type,
|
|
action=DiffAction.UPDATE,
|
|
local_record=local_record,
|
|
remote_record=remote,
|
|
changed_fields=changed_fields,
|
|
record_change_tag=remote.get("recordChangeTag"),
|
|
))
|
|
else:
|
|
result.unchanged.append(RecordDiff(
|
|
record_name=local_record.record_name,
|
|
record_type=record_type,
|
|
action=DiffAction.UNCHANGED,
|
|
local_record=local_record,
|
|
remote_record=remote,
|
|
record_change_tag=remote.get("recordChangeTag"),
|
|
))
|
|
|
|
# Find deletes (remote records not in local)
|
|
local_names = set(local_by_name.keys())
|
|
for remote_name, remote in remote_by_name.items():
|
|
if remote_name not in local_names:
|
|
result.deletes.append(RecordDiff(
|
|
record_name=remote_name,
|
|
record_type=record_type,
|
|
action=DiffAction.DELETE,
|
|
remote_record=remote,
|
|
record_change_tag=remote.get("recordChangeTag"),
|
|
))
|
|
|
|
return result
|
|
|
|
def _compare_fields(
|
|
self,
|
|
local_fields: dict[str, Any],
|
|
remote_fields: dict[str, dict],
|
|
compare_fields: list[str],
|
|
) -> list[str]:
|
|
"""Compare field values between local and remote.
|
|
|
|
Args:
|
|
local_fields: Local field values
|
|
remote_fields: Remote field values (CloudKit format)
|
|
compare_fields: Fields to compare
|
|
|
|
Returns:
|
|
List of field names that differ
|
|
"""
|
|
changed = []
|
|
|
|
for field_name in compare_fields:
|
|
local_value = local_fields.get(field_name)
|
|
remote_field = remote_fields.get(field_name, {})
|
|
remote_value = remote_field.get("value") if remote_field else None
|
|
|
|
# Normalize values for comparison
|
|
local_normalized = self._normalize_value(local_value)
|
|
remote_normalized = self._normalize_remote_value(remote_value, remote_field)
|
|
|
|
if local_normalized != remote_normalized:
|
|
changed.append(field_name)
|
|
|
|
return changed
|
|
|
|
def _normalize_value(self, value: Any) -> Any:
|
|
"""Normalize a local value for comparison."""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, datetime):
|
|
# Convert to milliseconds since epoch
|
|
return int(value.timestamp() * 1000)
|
|
if isinstance(value, float):
|
|
# Round to 6 decimal places for coordinate comparison
|
|
return round(value, 6)
|
|
return value
|
|
|
|
def _normalize_remote_value(self, value: Any, field_data: dict) -> Any:
|
|
"""Normalize a remote CloudKit value for comparison."""
|
|
if value is None:
|
|
return None
|
|
|
|
field_type = field_data.get("type", "")
|
|
|
|
if field_type == "TIMESTAMP":
|
|
# Already in milliseconds
|
|
return value
|
|
if field_type == "DOUBLE":
|
|
return round(value, 6)
|
|
if field_type == "LOCATION":
|
|
# Return as tuple for comparison
|
|
if isinstance(value, dict):
|
|
return (
|
|
round(value.get("latitude", 0), 6),
|
|
round(value.get("longitude", 0), 6),
|
|
)
|
|
|
|
return value
|
|
|
|
def _game_to_record(self, game: Game) -> CloudKitRecord:
|
|
"""Convert a Game to a CloudKitRecord."""
|
|
return CloudKitRecord(
|
|
record_name=game.id,
|
|
record_type=RecordType.GAME,
|
|
fields={
|
|
"sport": game.sport,
|
|
"season": game.season,
|
|
"home_team_id": game.home_team_id,
|
|
"away_team_id": game.away_team_id,
|
|
"stadium_id": game.stadium_id,
|
|
"game_date": game.game_date,
|
|
"game_number": game.game_number,
|
|
"home_score": game.home_score,
|
|
"away_score": game.away_score,
|
|
"status": game.status,
|
|
},
|
|
)
|
|
|
|
def _team_to_record(self, team: Team) -> CloudKitRecord:
|
|
"""Convert a Team to a CloudKitRecord."""
|
|
return CloudKitRecord(
|
|
record_name=team.id,
|
|
record_type=RecordType.TEAM,
|
|
fields={
|
|
"sport": team.sport,
|
|
"city": team.city,
|
|
"name": team.name,
|
|
"full_name": team.full_name,
|
|
"abbreviation": team.abbreviation,
|
|
"conference": team.conference,
|
|
"division": team.division,
|
|
"primary_color": team.primary_color,
|
|
"secondary_color": team.secondary_color,
|
|
"logo_url": team.logo_url,
|
|
"stadium_id": team.stadium_id,
|
|
},
|
|
)
|
|
|
|
def _stadium_to_record(self, stadium: Stadium) -> CloudKitRecord:
|
|
"""Convert a Stadium to a CloudKitRecord."""
|
|
return CloudKitRecord(
|
|
record_name=stadium.id,
|
|
record_type=RecordType.STADIUM,
|
|
fields={
|
|
"sport": stadium.sport,
|
|
"name": stadium.name,
|
|
"city": stadium.city,
|
|
"state": stadium.state,
|
|
"country": stadium.country,
|
|
"latitude": stadium.latitude,
|
|
"longitude": stadium.longitude,
|
|
"capacity": stadium.capacity,
|
|
"surface": stadium.surface,
|
|
"roof_type": stadium.roof_type,
|
|
"opened_year": stadium.opened_year,
|
|
"image_url": stadium.image_url,
|
|
"timezone": stadium.timezone,
|
|
},
|
|
)
|
|
|
|
|
|
def game_to_cloudkit_record(game: Game) -> CloudKitRecord:
|
|
"""Convert a Game to a CloudKitRecord.
|
|
|
|
Convenience function for external use.
|
|
"""
|
|
differ = RecordDiffer()
|
|
return differ._game_to_record(game)
|
|
|
|
|
|
def team_to_cloudkit_record(team: Team) -> CloudKitRecord:
|
|
"""Convert a Team to a CloudKitRecord.
|
|
|
|
Convenience function for external use.
|
|
"""
|
|
differ = RecordDiffer()
|
|
return differ._team_to_record(team)
|
|
|
|
|
|
def stadium_to_cloudkit_record(stadium: Stadium) -> CloudKitRecord:
|
|
"""Convert a Stadium to a CloudKitRecord.
|
|
|
|
Convenience function for external use.
|
|
"""
|
|
differ = RecordDiffer()
|
|
return differ._stadium_to_record(stadium)
|