feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
52
sportstime_parser/uploaders/__init__.py
Normal file
52
sportstime_parser/uploaders/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""CloudKit uploaders for sportstime-parser."""
|
||||
|
||||
from .cloudkit import (
|
||||
CloudKitClient,
|
||||
CloudKitRecord,
|
||||
CloudKitError,
|
||||
CloudKitAuthError,
|
||||
CloudKitRateLimitError,
|
||||
CloudKitServerError,
|
||||
RecordType,
|
||||
OperationResult,
|
||||
BatchResult,
|
||||
)
|
||||
from .state import (
|
||||
RecordState,
|
||||
UploadSession,
|
||||
StateManager,
|
||||
)
|
||||
from .diff import (
|
||||
DiffAction,
|
||||
RecordDiff,
|
||||
DiffResult,
|
||||
RecordDiffer,
|
||||
game_to_cloudkit_record,
|
||||
team_to_cloudkit_record,
|
||||
stadium_to_cloudkit_record,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# CloudKit client
|
||||
"CloudKitClient",
|
||||
"CloudKitRecord",
|
||||
"CloudKitError",
|
||||
"CloudKitAuthError",
|
||||
"CloudKitRateLimitError",
|
||||
"CloudKitServerError",
|
||||
"RecordType",
|
||||
"OperationResult",
|
||||
"BatchResult",
|
||||
# State manager
|
||||
"RecordState",
|
||||
"UploadSession",
|
||||
"StateManager",
|
||||
# Differ
|
||||
"DiffAction",
|
||||
"RecordDiff",
|
||||
"DiffResult",
|
||||
"RecordDiffer",
|
||||
"game_to_cloudkit_record",
|
||||
"team_to_cloudkit_record",
|
||||
"stadium_to_cloudkit_record",
|
||||
]
|
||||
578
sportstime_parser/uploaders/cloudkit.py
Normal file
578
sportstime_parser/uploaders/cloudkit.py
Normal file
@@ -0,0 +1,578 @@
|
||||
"""CloudKit Web Services client for sportstime-parser.
|
||||
|
||||
This module provides a client for uploading data to CloudKit using the
|
||||
CloudKit Web Services API. It handles JWT authentication, request signing,
|
||||
and batch operations.
|
||||
|
||||
Reference: https://developer.apple.com/documentation/cloudkitwebservices
|
||||
"""
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from enum import Enum
|
||||
|
||||
import jwt
|
||||
import requests
|
||||
from cryptography.hazmat.primitives import hashes, serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import ec
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
|
||||
from ..config import (
|
||||
CLOUDKIT_CONTAINER_ID,
|
||||
CLOUDKIT_ENVIRONMENT,
|
||||
CLOUDKIT_BATCH_SIZE,
|
||||
CLOUDKIT_KEY_ID,
|
||||
CLOUDKIT_PRIVATE_KEY_PATH,
|
||||
)
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
|
||||
class RecordType(str, Enum):
|
||||
"""CloudKit record types for SportsTime.
|
||||
|
||||
Must match CKRecordType constants in CKModels.swift.
|
||||
"""
|
||||
GAME = "Game"
|
||||
TEAM = "Team"
|
||||
STADIUM = "Stadium"
|
||||
TEAM_ALIAS = "TeamAlias"
|
||||
STADIUM_ALIAS = "StadiumAlias"
|
||||
SPORT = "Sport"
|
||||
LEAGUE_STRUCTURE = "LeagueStructure"
|
||||
TRIP_POLL = "TripPoll"
|
||||
POLL_VOTE = "PollVote"
|
||||
ITINERARY_ITEM = "ItineraryItem"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CloudKitRecord:
|
||||
"""Represents a CloudKit record for upload.
|
||||
|
||||
Attributes:
|
||||
record_name: Unique record identifier (canonical ID)
|
||||
record_type: CloudKit record type
|
||||
fields: Dictionary of field name -> field value
|
||||
record_change_tag: Version tag for conflict detection (None for new records)
|
||||
"""
|
||||
record_name: str
|
||||
record_type: RecordType
|
||||
fields: dict[str, Any]
|
||||
record_change_tag: Optional[str] = None
|
||||
|
||||
def to_cloudkit_dict(self) -> dict:
|
||||
"""Convert to CloudKit API format."""
|
||||
record = {
|
||||
"recordName": self.record_name,
|
||||
"recordType": self.record_type.value,
|
||||
"fields": self._format_fields(),
|
||||
}
|
||||
if self.record_change_tag:
|
||||
record["recordChangeTag"] = self.record_change_tag
|
||||
return record
|
||||
|
||||
def _format_fields(self) -> dict:
|
||||
"""Format fields for CloudKit API."""
|
||||
formatted = {}
|
||||
for key, value in self.fields.items():
|
||||
if value is None:
|
||||
continue
|
||||
formatted[key] = self._format_field_value(value)
|
||||
return formatted
|
||||
|
||||
def _format_field_value(self, value: Any) -> dict:
|
||||
"""Format a single field value for CloudKit API."""
|
||||
# Check bool BEFORE int (bool is a subclass of int in Python)
|
||||
if isinstance(value, bool):
|
||||
return {"value": 1 if value else 0, "type": "INT64"}
|
||||
elif isinstance(value, str):
|
||||
return {"value": value, "type": "STRING"}
|
||||
elif isinstance(value, int):
|
||||
return {"value": value, "type": "INT64"}
|
||||
elif isinstance(value, float):
|
||||
return {"value": value, "type": "DOUBLE"}
|
||||
elif isinstance(value, datetime):
|
||||
# CloudKit expects milliseconds since epoch
|
||||
timestamp_ms = int(value.timestamp() * 1000)
|
||||
return {"value": timestamp_ms, "type": "TIMESTAMP"}
|
||||
elif isinstance(value, list):
|
||||
return {"value": value, "type": "STRING_LIST"}
|
||||
elif isinstance(value, dict) and "latitude" in value and "longitude" in value:
|
||||
return {
|
||||
"value": {
|
||||
"latitude": value["latitude"],
|
||||
"longitude": value["longitude"],
|
||||
},
|
||||
"type": "LOCATION",
|
||||
}
|
||||
else:
|
||||
# Default to string
|
||||
return {"value": str(value), "type": "STRING"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class OperationResult:
|
||||
"""Result of a CloudKit operation."""
|
||||
record_name: str
|
||||
success: bool
|
||||
record_change_tag: Optional[str] = None
|
||||
error_code: Optional[str] = None
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchResult:
|
||||
"""Result of a batch CloudKit operation."""
|
||||
successful: list[OperationResult] = field(default_factory=list)
|
||||
failed: list[OperationResult] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def all_succeeded(self) -> bool:
|
||||
return len(self.failed) == 0
|
||||
|
||||
@property
|
||||
def success_count(self) -> int:
|
||||
return len(self.successful)
|
||||
|
||||
@property
|
||||
def failure_count(self) -> int:
|
||||
return len(self.failed)
|
||||
|
||||
|
||||
class CloudKitClient:
|
||||
"""Client for CloudKit Web Services API.
|
||||
|
||||
Handles authentication via server-to-server JWT tokens and provides
|
||||
methods for CRUD operations on CloudKit records.
|
||||
|
||||
Authentication requires:
|
||||
- Key ID: CloudKit key identifier from Apple Developer Portal
|
||||
- Private Key: EC private key in PEM format
|
||||
|
||||
Environment variables:
|
||||
- CLOUDKIT_KEY_ID: The key identifier
|
||||
- CLOUDKIT_PRIVATE_KEY_PATH: Path to the private key file
|
||||
- CLOUDKIT_PRIVATE_KEY: The private key contents (alternative to path)
|
||||
"""
|
||||
|
||||
BASE_URL = "https://api.apple-cloudkit.com"
|
||||
TOKEN_EXPIRY_SECONDS = 3600 # 1 hour
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
container_id: str = CLOUDKIT_CONTAINER_ID,
|
||||
environment: str = CLOUDKIT_ENVIRONMENT,
|
||||
key_id: Optional[str] = None,
|
||||
private_key: Optional[str] = None,
|
||||
private_key_path: Optional[str] = None,
|
||||
):
|
||||
"""Initialize the CloudKit client.
|
||||
|
||||
Args:
|
||||
container_id: CloudKit container identifier
|
||||
environment: 'development' or 'production'
|
||||
key_id: CloudKit server-to-server key ID
|
||||
private_key: PEM-encoded EC private key contents
|
||||
private_key_path: Path to PEM-encoded EC private key file
|
||||
"""
|
||||
self.container_id = container_id
|
||||
self.environment = environment
|
||||
self.logger = get_logger()
|
||||
|
||||
# Load authentication credentials (config defaults > env vars > None)
|
||||
self.key_id = key_id or os.environ.get("CLOUDKIT_KEY_ID") or CLOUDKIT_KEY_ID
|
||||
|
||||
if private_key:
|
||||
self._private_key_pem = private_key
|
||||
elif private_key_path:
|
||||
self._private_key_pem = Path(private_key_path).read_text()
|
||||
elif os.environ.get("CLOUDKIT_PRIVATE_KEY"):
|
||||
self._private_key_pem = os.environ["CLOUDKIT_PRIVATE_KEY"]
|
||||
elif os.environ.get("CLOUDKIT_PRIVATE_KEY_PATH"):
|
||||
self._private_key_pem = Path(os.environ["CLOUDKIT_PRIVATE_KEY_PATH"]).read_text()
|
||||
elif CLOUDKIT_PRIVATE_KEY_PATH.exists():
|
||||
self._private_key_pem = CLOUDKIT_PRIVATE_KEY_PATH.read_text()
|
||||
else:
|
||||
self._private_key_pem = None
|
||||
|
||||
# Parse the private key if available
|
||||
self._private_key = None
|
||||
if self._private_key_pem:
|
||||
self._private_key = serialization.load_pem_private_key(
|
||||
self._private_key_pem.encode(),
|
||||
password=None,
|
||||
backend=default_backend(),
|
||||
)
|
||||
|
||||
# Token cache
|
||||
self._token: Optional[str] = None
|
||||
self._token_expiry: float = 0
|
||||
|
||||
# Session for connection pooling
|
||||
self._session = requests.Session()
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if the client has valid authentication credentials."""
|
||||
return bool(self.key_id and self._private_key)
|
||||
|
||||
def _get_api_path(self, operation: str) -> str:
|
||||
"""Build the full API path for an operation."""
|
||||
return f"/database/1/{self.container_id}/{self.environment}/public/{operation}"
|
||||
|
||||
def _get_token(self) -> str:
|
||||
"""Get a valid JWT token, generating a new one if needed."""
|
||||
if not self.is_configured:
|
||||
raise ValueError(
|
||||
"CloudKit client not configured. Set CLOUDKIT_KEY_ID and "
|
||||
"CLOUDKIT_PRIVATE_KEY_PATH environment variables."
|
||||
)
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Return cached token if still valid (with 5 min buffer)
|
||||
if self._token and (self._token_expiry - now) > 300:
|
||||
return self._token
|
||||
|
||||
# Generate new token
|
||||
expiry = now + self.TOKEN_EXPIRY_SECONDS
|
||||
|
||||
payload = {
|
||||
"iss": self.key_id,
|
||||
"iat": int(now),
|
||||
"exp": int(expiry),
|
||||
"sub": self.container_id,
|
||||
}
|
||||
|
||||
self._token = jwt.encode(
|
||||
payload,
|
||||
self._private_key,
|
||||
algorithm="ES256",
|
||||
)
|
||||
self._token_expiry = expiry
|
||||
|
||||
return self._token
|
||||
|
||||
def _sign_request(self, method: str, path: str, body: Optional[bytes] = None) -> dict:
|
||||
"""Generate request headers with authentication.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
path: API path
|
||||
body: Request body bytes
|
||||
|
||||
Returns:
|
||||
Dictionary of headers to include in the request
|
||||
"""
|
||||
token = self._get_token()
|
||||
|
||||
# CloudKit uses date in ISO format
|
||||
date_str = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# Calculate body hash
|
||||
if body:
|
||||
body_hash = base64.b64encode(
|
||||
hashlib.sha256(body).digest()
|
||||
).decode()
|
||||
else:
|
||||
body_hash = base64.b64encode(
|
||||
hashlib.sha256(b"").digest()
|
||||
).decode()
|
||||
|
||||
# Build the message to sign
|
||||
# Format: date:body_hash:path
|
||||
message = f"{date_str}:{body_hash}:{path}"
|
||||
|
||||
# Sign the message
|
||||
signature = self._private_key.sign(
|
||||
message.encode(),
|
||||
ec.ECDSA(hashes.SHA256()),
|
||||
)
|
||||
signature_b64 = base64.b64encode(signature).decode()
|
||||
|
||||
return {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"X-Apple-CloudKit-Request-KeyID": self.key_id,
|
||||
"X-Apple-CloudKit-Request-ISO8601Date": date_str,
|
||||
"X-Apple-CloudKit-Request-SignatureV1": signature_b64,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
def _request(
|
||||
self,
|
||||
method: str,
|
||||
operation: str,
|
||||
body: Optional[dict] = None,
|
||||
) -> dict:
|
||||
"""Make a request to the CloudKit API.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
operation: API operation path
|
||||
body: Request body as dictionary
|
||||
|
||||
Returns:
|
||||
Response data as dictionary
|
||||
|
||||
Raises:
|
||||
CloudKitError: If the request fails
|
||||
"""
|
||||
path = self._get_api_path(operation)
|
||||
url = f"{self.BASE_URL}{path}"
|
||||
|
||||
body_bytes = json.dumps(body).encode() if body else None
|
||||
headers = self._sign_request(method, path, body_bytes)
|
||||
|
||||
response = self._session.request(
|
||||
method=method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
data=body_bytes,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
elif response.status_code == 421:
|
||||
# Authentication required - token may be expired
|
||||
self._token = None
|
||||
raise CloudKitAuthError("Authentication failed - check credentials")
|
||||
elif response.status_code == 429:
|
||||
raise CloudKitRateLimitError("Rate limit exceeded")
|
||||
elif response.status_code >= 500:
|
||||
raise CloudKitServerError(f"Server error: {response.status_code}")
|
||||
else:
|
||||
try:
|
||||
error_data = response.json()
|
||||
error_msg = error_data.get("serverErrorCode", str(response.status_code))
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
error_msg = response.text
|
||||
raise CloudKitError(f"Request failed: {error_msg}")
|
||||
|
||||
def fetch_records(
|
||||
self,
|
||||
record_type: RecordType,
|
||||
record_names: Optional[list[str]] = None,
|
||||
limit: int = 200,
|
||||
) -> list[dict]:
|
||||
"""Fetch records from CloudKit.
|
||||
|
||||
Args:
|
||||
record_type: Type of records to fetch
|
||||
record_names: Specific record names to fetch (optional)
|
||||
limit: Maximum records to return (default 200)
|
||||
|
||||
Returns:
|
||||
List of record dictionaries
|
||||
"""
|
||||
if record_names:
|
||||
# Fetch specific records by name
|
||||
body = {
|
||||
"records": [{"recordName": name} for name in record_names],
|
||||
}
|
||||
response = self._request("POST", "records/lookup", body)
|
||||
else:
|
||||
# Query all records of type
|
||||
body = {
|
||||
"query": {
|
||||
"recordType": record_type.value,
|
||||
},
|
||||
"resultsLimit": limit,
|
||||
}
|
||||
response = self._request("POST", "records/query", body)
|
||||
|
||||
records = response.get("records", [])
|
||||
return [r for r in records if "recordName" in r]
|
||||
|
||||
def fetch_all_records(self, record_type: RecordType) -> list[dict]:
|
||||
"""Fetch all records of a type using pagination.
|
||||
|
||||
Args:
|
||||
record_type: Type of records to fetch
|
||||
|
||||
Returns:
|
||||
List of all record dictionaries
|
||||
"""
|
||||
all_records = []
|
||||
continuation_marker = None
|
||||
|
||||
while True:
|
||||
body = {
|
||||
"query": {
|
||||
"recordType": record_type.value,
|
||||
},
|
||||
"resultsLimit": 200,
|
||||
}
|
||||
|
||||
if continuation_marker:
|
||||
body["continuationMarker"] = continuation_marker
|
||||
|
||||
response = self._request("POST", "records/query", body)
|
||||
|
||||
records = response.get("records", [])
|
||||
all_records.extend([r for r in records if "recordName" in r])
|
||||
|
||||
continuation_marker = response.get("continuationMarker")
|
||||
if not continuation_marker:
|
||||
break
|
||||
|
||||
return all_records
|
||||
|
||||
def save_records(self, records: list[CloudKitRecord]) -> BatchResult:
|
||||
"""Save records to CloudKit (create or update).
|
||||
|
||||
Args:
|
||||
records: List of records to save
|
||||
|
||||
Returns:
|
||||
BatchResult with success/failure details
|
||||
"""
|
||||
result = BatchResult()
|
||||
|
||||
# Process in batches
|
||||
for i in range(0, len(records), CLOUDKIT_BATCH_SIZE):
|
||||
batch = records[i:i + CLOUDKIT_BATCH_SIZE]
|
||||
batch_result = self._save_batch(batch)
|
||||
result.successful.extend(batch_result.successful)
|
||||
result.failed.extend(batch_result.failed)
|
||||
|
||||
return result
|
||||
|
||||
def _save_batch(self, records: list[CloudKitRecord]) -> BatchResult:
|
||||
"""Save a single batch of records.
|
||||
|
||||
Args:
|
||||
records: List of records (max CLOUDKIT_BATCH_SIZE)
|
||||
|
||||
Returns:
|
||||
BatchResult with success/failure details
|
||||
"""
|
||||
result = BatchResult()
|
||||
|
||||
operations = []
|
||||
for record in records:
|
||||
op = {
|
||||
"operationType": "forceReplace",
|
||||
"record": record.to_cloudkit_dict(),
|
||||
}
|
||||
operations.append(op)
|
||||
|
||||
body = {"operations": operations}
|
||||
|
||||
try:
|
||||
response = self._request("POST", "records/modify", body)
|
||||
except CloudKitError as e:
|
||||
# Entire batch failed
|
||||
for record in records:
|
||||
result.failed.append(OperationResult(
|
||||
record_name=record.record_name,
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
))
|
||||
return result
|
||||
|
||||
# Process individual results
|
||||
for record_data in response.get("records", []):
|
||||
record_name = record_data.get("recordName", "unknown")
|
||||
|
||||
if "serverErrorCode" in record_data:
|
||||
result.failed.append(OperationResult(
|
||||
record_name=record_name,
|
||||
success=False,
|
||||
error_code=record_data.get("serverErrorCode"),
|
||||
error_message=record_data.get("reason"),
|
||||
))
|
||||
else:
|
||||
result.successful.append(OperationResult(
|
||||
record_name=record_name,
|
||||
success=True,
|
||||
record_change_tag=record_data.get("recordChangeTag"),
|
||||
))
|
||||
|
||||
return result
|
||||
|
||||
def delete_records(
|
||||
self,
|
||||
record_type: RecordType,
|
||||
records: list[dict],
|
||||
) -> BatchResult:
|
||||
"""Delete records from CloudKit.
|
||||
|
||||
Args:
|
||||
record_type: Type of records to delete
|
||||
records: List of record dicts (must have recordName and recordChangeTag)
|
||||
|
||||
Returns:
|
||||
BatchResult with success/failure details
|
||||
"""
|
||||
result = BatchResult()
|
||||
|
||||
# Process in batches
|
||||
for i in range(0, len(records), CLOUDKIT_BATCH_SIZE):
|
||||
batch = records[i:i + CLOUDKIT_BATCH_SIZE]
|
||||
|
||||
operations = []
|
||||
for record in batch:
|
||||
operations.append({
|
||||
"operationType": "delete",
|
||||
"record": {
|
||||
"recordName": record["recordName"],
|
||||
"recordChangeTag": record.get("recordChangeTag"),
|
||||
},
|
||||
})
|
||||
|
||||
body = {"operations": operations}
|
||||
|
||||
try:
|
||||
response = self._request("POST", "records/modify", body)
|
||||
except CloudKitError as e:
|
||||
for record in batch:
|
||||
result.failed.append(OperationResult(
|
||||
record_name=record["recordName"],
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
))
|
||||
continue
|
||||
|
||||
for record_data in response.get("records", []):
|
||||
record_name = record_data.get("recordName", "unknown")
|
||||
|
||||
if "serverErrorCode" in record_data:
|
||||
result.failed.append(OperationResult(
|
||||
record_name=record_name,
|
||||
success=False,
|
||||
error_code=record_data.get("serverErrorCode"),
|
||||
error_message=record_data.get("reason"),
|
||||
))
|
||||
else:
|
||||
result.successful.append(OperationResult(
|
||||
record_name=record_name,
|
||||
success=True,
|
||||
))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class CloudKitError(Exception):
|
||||
"""Base exception for CloudKit errors."""
|
||||
pass
|
||||
|
||||
|
||||
class CloudKitAuthError(CloudKitError):
|
||||
"""Authentication error."""
|
||||
pass
|
||||
|
||||
|
||||
class CloudKitRateLimitError(CloudKitError):
|
||||
"""Rate limit exceeded."""
|
||||
pass
|
||||
|
||||
|
||||
class CloudKitServerError(CloudKitError):
|
||||
"""Server-side error."""
|
||||
pass
|
||||
741
sportstime_parser/uploaders/diff.py
Normal file
741
sportstime_parser/uploaders/diff.py
Normal file
@@ -0,0 +1,741 @@
|
||||
"""Record differ for CloudKit uploads.
|
||||
|
||||
This module compares local records with CloudKit records to determine
|
||||
what needs to be created, updated, or deleted.
|
||||
|
||||
Field names must match CKModels.swift exactly:
|
||||
- Stadium: stadiumId, canonicalId, name, city, state, location (CLLocation),
|
||||
capacity, yearOpened, imageURL, sport
|
||||
- Team: teamId, canonicalId, name, abbreviation, sport, city, stadiumCanonicalId,
|
||||
logoURL, primaryColor, secondaryColor
|
||||
- Game: gameId, canonicalId, homeTeamCanonicalId, awayTeamCanonicalId,
|
||||
stadiumCanonicalId, dateTime, sport, season, isPlayoff, broadcastInfo
|
||||
- TeamAlias: aliasId, teamCanonicalId, aliasType, aliasValue, validFrom, validUntil
|
||||
- StadiumAlias: aliasName, stadiumCanonicalId, validFrom, validUntil
|
||||
- Sport: sportId, abbreviation, displayName, iconName, colorHex,
|
||||
seasonStartMonth, seasonEndMonth, isActive
|
||||
- LeagueStructure: structureId, sport, type, name, abbreviation, parentId, displayOrder
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, date
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..models.game import Game
|
||||
from ..models.team import Team
|
||||
from ..models.stadium import Stadium
|
||||
from ..models.aliases import TeamAlias, StadiumAlias, AliasType
|
||||
from ..models.sport import Sport, LeagueStructure
|
||||
from .cloudkit import CloudKitRecord, RecordType
|
||||
|
||||
|
||||
def _date_to_datetime(d: Optional[date]) -> Optional[datetime]:
|
||||
"""Convert a date to a datetime at midnight UTC.
|
||||
|
||||
CloudKit TIMESTAMP fields require datetime, not date.
|
||||
"""
|
||||
if d is None:
|
||||
return None
|
||||
return datetime(d.year, d.month, d.day, 0, 0, 0)
|
||||
|
||||
|
||||
class DiffAction(str, Enum):
|
||||
"""Action to take for a record."""
|
||||
CREATE = "create"
|
||||
UPDATE = "update"
|
||||
DELETE = "delete"
|
||||
UNCHANGED = "unchanged"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecordDiff:
|
||||
"""Represents the difference between local and remote records.
|
||||
|
||||
Attributes:
|
||||
record_name: Canonical record ID
|
||||
record_type: CloudKit record type
|
||||
action: Action to take (create, update, delete, unchanged)
|
||||
local_record: Local CloudKitRecord (None if delete)
|
||||
remote_record: Remote record dict (None if create)
|
||||
changed_fields: List of field names that changed (for update)
|
||||
record_change_tag: Remote record's change tag (for update)
|
||||
"""
|
||||
record_name: str
|
||||
record_type: RecordType
|
||||
action: DiffAction
|
||||
local_record: Optional[CloudKitRecord] = None
|
||||
remote_record: Optional[dict] = None
|
||||
changed_fields: list[str] = field(default_factory=list)
|
||||
record_change_tag: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiffResult:
|
||||
"""Result of diffing local and remote records.
|
||||
|
||||
Attributes:
|
||||
creates: Records to create
|
||||
updates: Records to update
|
||||
deletes: Records to delete (record names)
|
||||
unchanged: Records with no changes
|
||||
"""
|
||||
creates: list[RecordDiff] = field(default_factory=list)
|
||||
updates: list[RecordDiff] = field(default_factory=list)
|
||||
deletes: list[RecordDiff] = field(default_factory=list)
|
||||
unchanged: list[RecordDiff] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def create_count(self) -> int:
|
||||
return len(self.creates)
|
||||
|
||||
@property
|
||||
def update_count(self) -> int:
|
||||
return len(self.updates)
|
||||
|
||||
@property
|
||||
def delete_count(self) -> int:
|
||||
return len(self.deletes)
|
||||
|
||||
@property
|
||||
def unchanged_count(self) -> int:
|
||||
return len(self.unchanged)
|
||||
|
||||
@property
|
||||
def total_changes(self) -> int:
|
||||
return self.create_count + self.update_count + self.delete_count
|
||||
|
||||
def get_records_to_upload(self) -> list[CloudKitRecord]:
|
||||
"""Get all records that need to be uploaded (creates + updates)."""
|
||||
records = []
|
||||
|
||||
for diff in self.creates:
|
||||
if diff.local_record:
|
||||
records.append(diff.local_record)
|
||||
|
||||
for diff in self.updates:
|
||||
if diff.local_record:
|
||||
# Add change tag for update
|
||||
diff.local_record.record_change_tag = diff.record_change_tag
|
||||
records.append(diff.local_record)
|
||||
|
||||
return records
|
||||
|
||||
|
||||
class RecordDiffer:
|
||||
"""Compares local records with CloudKit records.
|
||||
|
||||
Field names must match CKModels.swift field keys exactly (camelCase).
|
||||
"""
|
||||
|
||||
# Fields to compare for each record type (matching CKModels.swift keys)
|
||||
GAME_FIELDS = [
|
||||
"gameId", "canonicalId", "sport", "season", "dateTime",
|
||||
"homeTeamCanonicalId", "awayTeamCanonicalId", "stadiumCanonicalId",
|
||||
"isPlayoff", "broadcastInfo",
|
||||
]
|
||||
|
||||
TEAM_FIELDS = [
|
||||
"teamId", "canonicalId", "sport", "city", "name", "abbreviation",
|
||||
"stadiumCanonicalId", "logoURL", "primaryColor", "secondaryColor",
|
||||
]
|
||||
|
||||
STADIUM_FIELDS = [
|
||||
"stadiumId", "canonicalId", "sport", "name", "city", "state",
|
||||
"location", "capacity", "yearOpened", "imageURL",
|
||||
]
|
||||
|
||||
TEAM_ALIAS_FIELDS = [
|
||||
"aliasId", "teamCanonicalId", "aliasType", "aliasValue",
|
||||
"validFrom", "validUntil",
|
||||
]
|
||||
|
||||
STADIUM_ALIAS_FIELDS = [
|
||||
"aliasName", "stadiumCanonicalId", "validFrom", "validUntil",
|
||||
]
|
||||
|
||||
SPORT_FIELDS = [
|
||||
"sportId", "abbreviation", "displayName", "iconName",
|
||||
"colorHex", "seasonStartMonth", "seasonEndMonth", "isActive",
|
||||
]
|
||||
|
||||
LEAGUE_STRUCTURE_FIELDS = [
|
||||
"structureId", "sport", "type", "name", "abbreviation",
|
||||
"parentId", "displayOrder",
|
||||
]
|
||||
|
||||
def diff_games(
|
||||
self,
|
||||
local_games: list[Game],
|
||||
remote_records: list[dict],
|
||||
) -> DiffResult:
|
||||
"""Diff local games against remote CloudKit records.
|
||||
|
||||
Args:
|
||||
local_games: List of local Game objects
|
||||
remote_records: List of remote record dictionaries
|
||||
|
||||
Returns:
|
||||
DiffResult with creates, updates, deletes
|
||||
"""
|
||||
local_records = [self._game_to_record(g) for g in local_games]
|
||||
return self._diff_records(
|
||||
local_records,
|
||||
remote_records,
|
||||
RecordType.GAME,
|
||||
self.GAME_FIELDS,
|
||||
)
|
||||
|
||||
def diff_teams(
|
||||
self,
|
||||
local_teams: list[Team],
|
||||
remote_records: list[dict],
|
||||
) -> DiffResult:
|
||||
"""Diff local teams against remote CloudKit records.
|
||||
|
||||
Args:
|
||||
local_teams: List of local Team objects
|
||||
remote_records: List of remote record dictionaries
|
||||
|
||||
Returns:
|
||||
DiffResult with creates, updates, deletes
|
||||
"""
|
||||
local_records = [self._team_to_record(t) for t in local_teams]
|
||||
return self._diff_records(
|
||||
local_records,
|
||||
remote_records,
|
||||
RecordType.TEAM,
|
||||
self.TEAM_FIELDS,
|
||||
)
|
||||
|
||||
def diff_stadiums(
|
||||
self,
|
||||
local_stadiums: list[Stadium],
|
||||
remote_records: list[dict],
|
||||
) -> DiffResult:
|
||||
"""Diff local stadiums against remote CloudKit records.
|
||||
|
||||
Args:
|
||||
local_stadiums: List of local Stadium objects
|
||||
remote_records: List of remote record dictionaries
|
||||
|
||||
Returns:
|
||||
DiffResult with creates, updates, deletes
|
||||
"""
|
||||
local_records = [self._stadium_to_record(s) for s in local_stadiums]
|
||||
return self._diff_records(
|
||||
local_records,
|
||||
remote_records,
|
||||
RecordType.STADIUM,
|
||||
self.STADIUM_FIELDS,
|
||||
)
|
||||
|
||||
def diff_team_aliases(
|
||||
self,
|
||||
local_aliases: list[TeamAlias],
|
||||
remote_records: list[dict],
|
||||
) -> DiffResult:
|
||||
"""Diff local team aliases against remote CloudKit records.
|
||||
|
||||
Args:
|
||||
local_aliases: List of local TeamAlias objects
|
||||
remote_records: List of remote record dictionaries
|
||||
|
||||
Returns:
|
||||
DiffResult with creates, updates, deletes
|
||||
"""
|
||||
local_records = [self._team_alias_to_record(a) for a in local_aliases]
|
||||
return self._diff_records(
|
||||
local_records,
|
||||
remote_records,
|
||||
RecordType.TEAM_ALIAS,
|
||||
self.TEAM_ALIAS_FIELDS,
|
||||
)
|
||||
|
||||
def diff_stadium_aliases(
|
||||
self,
|
||||
local_aliases: list[StadiumAlias],
|
||||
remote_records: list[dict],
|
||||
) -> DiffResult:
|
||||
"""Diff local stadium aliases against remote CloudKit records.
|
||||
|
||||
Args:
|
||||
local_aliases: List of local StadiumAlias objects
|
||||
remote_records: List of remote record dictionaries
|
||||
|
||||
Returns:
|
||||
DiffResult with creates, updates, deletes
|
||||
"""
|
||||
local_records = [self._stadium_alias_to_record(a) for a in local_aliases]
|
||||
return self._diff_records(
|
||||
local_records,
|
||||
remote_records,
|
||||
RecordType.STADIUM_ALIAS,
|
||||
self.STADIUM_ALIAS_FIELDS,
|
||||
)
|
||||
|
||||
def diff_sports(
|
||||
self,
|
||||
local_sports: list[Sport],
|
||||
remote_records: list[dict],
|
||||
) -> DiffResult:
|
||||
"""Diff local sports against remote CloudKit records.
|
||||
|
||||
Args:
|
||||
local_sports: List of local Sport objects
|
||||
remote_records: List of remote record dictionaries
|
||||
|
||||
Returns:
|
||||
DiffResult with creates, updates, deletes
|
||||
"""
|
||||
local_records = [self._sport_to_record(s) for s in local_sports]
|
||||
return self._diff_records(
|
||||
local_records,
|
||||
remote_records,
|
||||
RecordType.SPORT,
|
||||
self.SPORT_FIELDS,
|
||||
)
|
||||
|
||||
def diff_league_structures(
|
||||
self,
|
||||
local_structures: list[LeagueStructure],
|
||||
remote_records: list[dict],
|
||||
) -> DiffResult:
|
||||
"""Diff local league structures against remote CloudKit records.
|
||||
|
||||
Args:
|
||||
local_structures: List of local LeagueStructure objects
|
||||
remote_records: List of remote record dictionaries
|
||||
|
||||
Returns:
|
||||
DiffResult with creates, updates, deletes
|
||||
"""
|
||||
local_records = [self._league_structure_to_record(s) for s in local_structures]
|
||||
return self._diff_records(
|
||||
local_records,
|
||||
remote_records,
|
||||
RecordType.LEAGUE_STRUCTURE,
|
||||
self.LEAGUE_STRUCTURE_FIELDS,
|
||||
)
|
||||
|
||||
def _diff_records(
|
||||
self,
|
||||
local_records: list[CloudKitRecord],
|
||||
remote_records: list[dict],
|
||||
record_type: RecordType,
|
||||
compare_fields: list[str],
|
||||
) -> DiffResult:
|
||||
"""Compare local and remote records.
|
||||
|
||||
Args:
|
||||
local_records: List of local CloudKitRecord objects
|
||||
remote_records: List of remote record dictionaries
|
||||
record_type: Type of records being compared
|
||||
compare_fields: List of field names to compare
|
||||
|
||||
Returns:
|
||||
DiffResult with categorized differences
|
||||
"""
|
||||
result = DiffResult()
|
||||
|
||||
# Index remote records by name
|
||||
remote_by_name: dict[str, dict] = {}
|
||||
for record in remote_records:
|
||||
name = record.get("recordName")
|
||||
if name:
|
||||
remote_by_name[name] = record
|
||||
|
||||
# Index local records by name
|
||||
local_by_name: dict[str, CloudKitRecord] = {}
|
||||
for record in local_records:
|
||||
local_by_name[record.record_name] = record
|
||||
|
||||
# Find creates and updates
|
||||
for local_record in local_records:
|
||||
remote = remote_by_name.get(local_record.record_name)
|
||||
|
||||
if remote is None:
|
||||
# New record
|
||||
result.creates.append(RecordDiff(
|
||||
record_name=local_record.record_name,
|
||||
record_type=record_type,
|
||||
action=DiffAction.CREATE,
|
||||
local_record=local_record,
|
||||
))
|
||||
else:
|
||||
# Check for changes
|
||||
changed_fields = self._compare_fields(
|
||||
local_record.fields,
|
||||
remote.get("fields", {}),
|
||||
compare_fields,
|
||||
)
|
||||
|
||||
if changed_fields:
|
||||
result.updates.append(RecordDiff(
|
||||
record_name=local_record.record_name,
|
||||
record_type=record_type,
|
||||
action=DiffAction.UPDATE,
|
||||
local_record=local_record,
|
||||
remote_record=remote,
|
||||
changed_fields=changed_fields,
|
||||
record_change_tag=remote.get("recordChangeTag"),
|
||||
))
|
||||
else:
|
||||
result.unchanged.append(RecordDiff(
|
||||
record_name=local_record.record_name,
|
||||
record_type=record_type,
|
||||
action=DiffAction.UNCHANGED,
|
||||
local_record=local_record,
|
||||
remote_record=remote,
|
||||
record_change_tag=remote.get("recordChangeTag"),
|
||||
))
|
||||
|
||||
# Find deletes (remote records not in local)
|
||||
local_names = set(local_by_name.keys())
|
||||
for remote_name, remote in remote_by_name.items():
|
||||
if remote_name not in local_names:
|
||||
result.deletes.append(RecordDiff(
|
||||
record_name=remote_name,
|
||||
record_type=record_type,
|
||||
action=DiffAction.DELETE,
|
||||
remote_record=remote,
|
||||
record_change_tag=remote.get("recordChangeTag"),
|
||||
))
|
||||
|
||||
return result
|
||||
|
||||
def _compare_fields(
|
||||
self,
|
||||
local_fields: dict[str, Any],
|
||||
remote_fields: dict[str, dict],
|
||||
compare_fields: list[str],
|
||||
) -> list[str]:
|
||||
"""Compare field values between local and remote.
|
||||
|
||||
Args:
|
||||
local_fields: Local field values
|
||||
remote_fields: Remote field values (CloudKit format)
|
||||
compare_fields: Fields to compare
|
||||
|
||||
Returns:
|
||||
List of field names that differ
|
||||
"""
|
||||
changed = []
|
||||
|
||||
for field_name in compare_fields:
|
||||
local_value = local_fields.get(field_name)
|
||||
remote_field = remote_fields.get(field_name, {})
|
||||
remote_value = remote_field.get("value") if remote_field else None
|
||||
|
||||
# Normalize values for comparison
|
||||
local_normalized = self._normalize_value(local_value)
|
||||
remote_normalized = self._normalize_remote_value(remote_value, remote_field)
|
||||
|
||||
if local_normalized != remote_normalized:
|
||||
changed.append(field_name)
|
||||
|
||||
return changed
|
||||
|
||||
def _normalize_value(self, value: Any) -> Any:
|
||||
"""Normalize a local value for comparison."""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
# Convert to milliseconds since epoch
|
||||
return int(value.timestamp() * 1000)
|
||||
if isinstance(value, float):
|
||||
# Round to 6 decimal places for coordinate comparison
|
||||
return round(value, 6)
|
||||
return value
|
||||
|
||||
def _normalize_remote_value(self, value: Any, field_data: dict) -> Any:
|
||||
"""Normalize a remote CloudKit value for comparison."""
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
field_type = field_data.get("type", "")
|
||||
|
||||
if field_type == "TIMESTAMP":
|
||||
# Already in milliseconds
|
||||
return value
|
||||
if field_type == "DOUBLE":
|
||||
return round(value, 6)
|
||||
if field_type == "LOCATION":
|
||||
# Return as tuple for comparison
|
||||
if isinstance(value, dict):
|
||||
return (
|
||||
round(value.get("latitude", 0), 6),
|
||||
round(value.get("longitude", 0), 6),
|
||||
)
|
||||
|
||||
return value
|
||||
|
||||
def _game_to_record(self, game: Game) -> CloudKitRecord:
|
||||
"""Convert a Game to a CloudKitRecord.
|
||||
|
||||
Field names match CKGame keys in CKModels.swift:
|
||||
- gameId, canonicalId: Unique identifiers
|
||||
- homeTeamCanonicalId, awayTeamCanonicalId, stadiumCanonicalId: References as strings
|
||||
- dateTime: Game time as datetime (will be converted to TIMESTAMP)
|
||||
- sport: Sport code uppercase (e.g., "MLB")
|
||||
- season: Season string (e.g., "2025-26" or "2026")
|
||||
- isPlayoff: Boolean as int (1 or 0)
|
||||
- broadcastInfo: Optional broadcast network string
|
||||
"""
|
||||
# Format season as string
|
||||
sport_lower = game.sport.lower()
|
||||
if sport_lower in ("nba", "nhl"):
|
||||
season_str = f"{game.season}-{str(game.season + 1)[-2:]}"
|
||||
else:
|
||||
season_str = str(game.season)
|
||||
|
||||
return CloudKitRecord(
|
||||
record_name=game.id,
|
||||
record_type=RecordType.GAME,
|
||||
fields={
|
||||
"gameId": game.id,
|
||||
"canonicalId": game.id,
|
||||
"sport": game.sport.upper(),
|
||||
"season": season_str,
|
||||
"dateTime": game.game_date,
|
||||
"homeTeamCanonicalId": game.home_team_id,
|
||||
"awayTeamCanonicalId": game.away_team_id,
|
||||
"stadiumCanonicalId": game.stadium_id,
|
||||
"isPlayoff": False, # Default, can be overridden
|
||||
"broadcastInfo": None, # Default, can be overridden
|
||||
},
|
||||
)
|
||||
|
||||
def _team_to_record(self, team: Team) -> CloudKitRecord:
|
||||
"""Convert a Team to a CloudKitRecord.
|
||||
|
||||
Field names match CKTeam keys in CKModels.swift:
|
||||
- teamId, canonicalId: Unique identifiers
|
||||
- name, abbreviation, city: Team info
|
||||
- sport: Sport code uppercase (e.g., "NBA")
|
||||
- stadiumCanonicalId: Home stadium canonical ID string
|
||||
- logoURL: URL string for team logo
|
||||
- primaryColor, secondaryColor: Hex color strings
|
||||
"""
|
||||
return CloudKitRecord(
|
||||
record_name=team.id,
|
||||
record_type=RecordType.TEAM,
|
||||
fields={
|
||||
"teamId": team.id,
|
||||
"canonicalId": team.id,
|
||||
"sport": team.sport.upper(),
|
||||
"city": team.city,
|
||||
"name": team.name,
|
||||
"abbreviation": team.abbreviation,
|
||||
"stadiumCanonicalId": team.stadium_id,
|
||||
"logoURL": team.logo_url,
|
||||
"primaryColor": team.primary_color,
|
||||
"secondaryColor": team.secondary_color,
|
||||
},
|
||||
)
|
||||
|
||||
def _stadium_to_record(self, stadium: Stadium) -> CloudKitRecord:
|
||||
"""Convert a Stadium to a CloudKitRecord.
|
||||
|
||||
Field names match CKStadium keys in CKModels.swift:
|
||||
- stadiumId, canonicalId: Unique identifiers
|
||||
- name, city, state: Location info
|
||||
- location: CloudKit LOCATION type with latitude/longitude
|
||||
- capacity: Seating capacity as int
|
||||
- yearOpened: Year opened as int
|
||||
- imageURL: URL string for stadium image
|
||||
- sport: Sport code uppercase (e.g., "MLB")
|
||||
"""
|
||||
return CloudKitRecord(
|
||||
record_name=stadium.id,
|
||||
record_type=RecordType.STADIUM,
|
||||
fields={
|
||||
"stadiumId": stadium.id,
|
||||
"canonicalId": stadium.id,
|
||||
"sport": stadium.sport.upper(),
|
||||
"name": stadium.name,
|
||||
"city": stadium.city,
|
||||
"state": stadium.state,
|
||||
# CloudKit LOCATION type expects dict with latitude/longitude
|
||||
"location": {
|
||||
"latitude": stadium.latitude,
|
||||
"longitude": stadium.longitude,
|
||||
},
|
||||
"capacity": stadium.capacity,
|
||||
"yearOpened": stadium.opened_year,
|
||||
"imageURL": stadium.image_url,
|
||||
},
|
||||
)
|
||||
|
||||
def _team_alias_to_record(self, alias: TeamAlias) -> CloudKitRecord:
|
||||
"""Convert a TeamAlias to a CloudKitRecord.
|
||||
|
||||
Field names match CKTeamAlias keys in CKModels.swift:
|
||||
- aliasId: Unique identifier
|
||||
- teamCanonicalId: The canonical team this alias resolves to
|
||||
- aliasType: Type of alias ("abbreviation", "name", "city")
|
||||
- aliasValue: The alias value to match
|
||||
- validFrom, validUntil: Optional date bounds
|
||||
- schemaVersion, lastModified: Versioning fields
|
||||
"""
|
||||
return CloudKitRecord(
|
||||
record_name=alias.id,
|
||||
record_type=RecordType.TEAM_ALIAS,
|
||||
fields={
|
||||
"aliasId": alias.id,
|
||||
"teamCanonicalId": alias.team_canonical_id,
|
||||
"aliasType": alias.alias_type.value,
|
||||
"aliasValue": alias.alias_value,
|
||||
"validFrom": _date_to_datetime(alias.valid_from),
|
||||
"validUntil": _date_to_datetime(alias.valid_until),
|
||||
"schemaVersion": 1,
|
||||
"lastModified": datetime.utcnow(),
|
||||
},
|
||||
)
|
||||
|
||||
def _stadium_alias_to_record(self, alias: StadiumAlias) -> CloudKitRecord:
|
||||
"""Convert a StadiumAlias to a CloudKitRecord.
|
||||
|
||||
Field names match CKStadiumAlias keys in CKModels.swift:
|
||||
- aliasName: The alias name (used as record name/primary key)
|
||||
- stadiumCanonicalId: The canonical stadium this alias resolves to
|
||||
- validFrom, validUntil: Optional date bounds
|
||||
- schemaVersion, lastModified: Versioning fields
|
||||
"""
|
||||
# Record name must be unique - combine alias name with stadium ID
|
||||
# to handle cases like "yankee stadium" mapping to both MLB and MLS stadiums
|
||||
record_name = f"{alias.alias_name.lower()}|{alias.stadium_canonical_id}"
|
||||
return CloudKitRecord(
|
||||
record_name=record_name,
|
||||
record_type=RecordType.STADIUM_ALIAS,
|
||||
fields={
|
||||
"aliasName": alias.alias_name.lower(),
|
||||
"stadiumCanonicalId": alias.stadium_canonical_id,
|
||||
"validFrom": _date_to_datetime(alias.valid_from),
|
||||
"validUntil": _date_to_datetime(alias.valid_until),
|
||||
"schemaVersion": 1,
|
||||
"lastModified": datetime.utcnow(),
|
||||
},
|
||||
)
|
||||
|
||||
def _sport_to_record(self, sport: Sport) -> CloudKitRecord:
|
||||
"""Convert a Sport to a CloudKitRecord.
|
||||
|
||||
Field names match CKSport keys in CKModels.swift:
|
||||
- sportId: Unique identifier (e.g., 'MLB', 'NBA')
|
||||
- abbreviation: Sport abbreviation
|
||||
- displayName: Full display name
|
||||
- iconName: SF Symbol name
|
||||
- colorHex: Primary color as hex string
|
||||
- seasonStartMonth, seasonEndMonth: Season boundary months (1-12)
|
||||
- isActive: Whether sport is currently supported
|
||||
- schemaVersion, lastModified: Versioning fields
|
||||
"""
|
||||
return CloudKitRecord(
|
||||
record_name=sport.id,
|
||||
record_type=RecordType.SPORT,
|
||||
fields={
|
||||
"sportId": sport.id,
|
||||
"abbreviation": sport.abbreviation,
|
||||
"displayName": sport.display_name,
|
||||
"iconName": sport.icon_name,
|
||||
"colorHex": sport.color_hex,
|
||||
"seasonStartMonth": sport.season_start_month,
|
||||
"seasonEndMonth": sport.season_end_month,
|
||||
"isActive": sport.is_active,
|
||||
"schemaVersion": 1,
|
||||
"lastModified": datetime.utcnow(),
|
||||
},
|
||||
)
|
||||
|
||||
def _league_structure_to_record(self, structure: LeagueStructure) -> CloudKitRecord:
|
||||
"""Convert a LeagueStructure to a CloudKitRecord.
|
||||
|
||||
Field names match CKLeagueStructure keys in CKModels.swift:
|
||||
- structureId: Unique identifier (e.g., 'nba_eastern', 'mlb_al_east')
|
||||
- sport: Sport code (e.g., 'NBA', 'MLB')
|
||||
- type: Structure type ('conference', 'division', 'league')
|
||||
- name: Full name
|
||||
- abbreviation: Optional abbreviation
|
||||
- parentId: Parent structure ID (e.g., division's parent is conference)
|
||||
- displayOrder: Order for display (0-indexed)
|
||||
- schemaVersion, lastModified: Versioning fields
|
||||
"""
|
||||
return CloudKitRecord(
|
||||
record_name=structure.id,
|
||||
record_type=RecordType.LEAGUE_STRUCTURE,
|
||||
fields={
|
||||
"structureId": structure.id,
|
||||
"sport": structure.sport.upper(),
|
||||
"type": structure.structure_type.value,
|
||||
"name": structure.name,
|
||||
"abbreviation": structure.abbreviation,
|
||||
"parentId": structure.parent_id,
|
||||
"displayOrder": structure.display_order,
|
||||
"schemaVersion": 1,
|
||||
"lastModified": datetime.utcnow(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def game_to_cloudkit_record(game: Game) -> CloudKitRecord:
|
||||
"""Convert a Game to a CloudKitRecord.
|
||||
|
||||
Convenience function for external use.
|
||||
"""
|
||||
differ = RecordDiffer()
|
||||
return differ._game_to_record(game)
|
||||
|
||||
|
||||
def team_to_cloudkit_record(team: Team) -> CloudKitRecord:
|
||||
"""Convert a Team to a CloudKitRecord.
|
||||
|
||||
Convenience function for external use.
|
||||
"""
|
||||
differ = RecordDiffer()
|
||||
return differ._team_to_record(team)
|
||||
|
||||
|
||||
def stadium_to_cloudkit_record(stadium: Stadium) -> CloudKitRecord:
|
||||
"""Convert a Stadium to a CloudKitRecord.
|
||||
|
||||
Convenience function for external use.
|
||||
"""
|
||||
differ = RecordDiffer()
|
||||
return differ._stadium_to_record(stadium)
|
||||
|
||||
|
||||
def team_alias_to_cloudkit_record(alias: TeamAlias) -> CloudKitRecord:
|
||||
"""Convert a TeamAlias to a CloudKitRecord.
|
||||
|
||||
Convenience function for external use.
|
||||
"""
|
||||
differ = RecordDiffer()
|
||||
return differ._team_alias_to_record(alias)
|
||||
|
||||
|
||||
def stadium_alias_to_cloudkit_record(alias: StadiumAlias) -> CloudKitRecord:
|
||||
"""Convert a StadiumAlias to a CloudKitRecord.
|
||||
|
||||
Convenience function for external use.
|
||||
"""
|
||||
differ = RecordDiffer()
|
||||
return differ._stadium_alias_to_record(alias)
|
||||
|
||||
|
||||
def sport_to_cloudkit_record(sport: Sport) -> CloudKitRecord:
|
||||
"""Convert a Sport to a CloudKitRecord.
|
||||
|
||||
Convenience function for external use.
|
||||
"""
|
||||
differ = RecordDiffer()
|
||||
return differ._sport_to_record(sport)
|
||||
|
||||
|
||||
def league_structure_to_cloudkit_record(structure: LeagueStructure) -> CloudKitRecord:
|
||||
"""Convert a LeagueStructure to a CloudKitRecord.
|
||||
|
||||
Convenience function for external use.
|
||||
"""
|
||||
differ = RecordDiffer()
|
||||
return differ._league_structure_to_record(structure)
|
||||
384
sportstime_parser/uploaders/state.py
Normal file
384
sportstime_parser/uploaders/state.py
Normal file
@@ -0,0 +1,384 @@
|
||||
"""Upload state manager for resumable uploads.
|
||||
|
||||
This module tracks upload progress to enable resuming interrupted uploads.
|
||||
State is persisted to JSON files in the .parser_state directory.
|
||||
"""
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from ..config import STATE_DIR
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecordState:
|
||||
"""State of an individual record upload.
|
||||
|
||||
Attributes:
|
||||
record_name: Canonical record ID
|
||||
record_type: CloudKit record type
|
||||
uploaded_at: Timestamp when successfully uploaded
|
||||
record_change_tag: CloudKit version tag
|
||||
status: 'pending', 'uploaded', 'failed'
|
||||
error_message: Error message if failed
|
||||
retry_count: Number of retry attempts
|
||||
"""
|
||||
record_name: str
|
||||
record_type: str
|
||||
uploaded_at: Optional[datetime] = None
|
||||
record_change_tag: Optional[str] = None
|
||||
status: str = "pending"
|
||||
error_message: Optional[str] = None
|
||||
retry_count: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"record_name": self.record_name,
|
||||
"record_type": self.record_type,
|
||||
"uploaded_at": self.uploaded_at.isoformat() if self.uploaded_at else None,
|
||||
"record_change_tag": self.record_change_tag,
|
||||
"status": self.status,
|
||||
"error_message": self.error_message,
|
||||
"retry_count": self.retry_count,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "RecordState":
|
||||
"""Create RecordState from dictionary."""
|
||||
uploaded_at = data.get("uploaded_at")
|
||||
if uploaded_at:
|
||||
uploaded_at = datetime.fromisoformat(uploaded_at)
|
||||
|
||||
return cls(
|
||||
record_name=data["record_name"],
|
||||
record_type=data["record_type"],
|
||||
uploaded_at=uploaded_at,
|
||||
record_change_tag=data.get("record_change_tag"),
|
||||
status=data.get("status", "pending"),
|
||||
error_message=data.get("error_message"),
|
||||
retry_count=data.get("retry_count", 0),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class UploadSession:
|
||||
"""Tracks the state of an upload session.
|
||||
|
||||
Attributes:
|
||||
sport: Sport code
|
||||
season: Season start year
|
||||
environment: CloudKit environment
|
||||
started_at: When the upload session started
|
||||
last_updated: When the state was last updated
|
||||
records: Dictionary of record_name -> RecordState
|
||||
total_count: Total number of records to upload
|
||||
"""
|
||||
sport: str
|
||||
season: int
|
||||
environment: str
|
||||
started_at: datetime = field(default_factory=datetime.utcnow)
|
||||
last_updated: datetime = field(default_factory=datetime.utcnow)
|
||||
records: dict[str, RecordState] = field(default_factory=dict)
|
||||
total_count: int = 0
|
||||
|
||||
@property
|
||||
def uploaded_count(self) -> int:
|
||||
"""Count of successfully uploaded records."""
|
||||
return sum(1 for r in self.records.values() if r.status == "uploaded")
|
||||
|
||||
@property
|
||||
def pending_count(self) -> int:
|
||||
"""Count of pending records."""
|
||||
return sum(1 for r in self.records.values() if r.status == "pending")
|
||||
|
||||
@property
|
||||
def failed_count(self) -> int:
|
||||
"""Count of failed records."""
|
||||
return sum(1 for r in self.records.values() if r.status == "failed")
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
"""Check if all records have been processed."""
|
||||
return self.pending_count == 0
|
||||
|
||||
@property
|
||||
def progress_percent(self) -> float:
|
||||
"""Calculate upload progress as percentage."""
|
||||
if self.total_count == 0:
|
||||
return 100.0
|
||||
return (self.uploaded_count / self.total_count) * 100
|
||||
|
||||
def get_pending_records(self) -> list[str]:
|
||||
"""Get list of record names that still need to be uploaded."""
|
||||
return [
|
||||
name for name, state in self.records.items()
|
||||
if state.status == "pending"
|
||||
]
|
||||
|
||||
def get_failed_records(self) -> list[str]:
|
||||
"""Get list of record names that failed to upload."""
|
||||
return [
|
||||
name for name, state in self.records.items()
|
||||
if state.status == "failed"
|
||||
]
|
||||
|
||||
def get_retryable_records(self, max_retries: int = 3) -> list[str]:
|
||||
"""Get failed records that can be retried."""
|
||||
return [
|
||||
name for name, state in self.records.items()
|
||||
if state.status == "failed" and state.retry_count < max_retries
|
||||
]
|
||||
|
||||
def mark_uploaded(
|
||||
self,
|
||||
record_name: str,
|
||||
record_change_tag: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Mark a record as successfully uploaded."""
|
||||
if record_name in self.records:
|
||||
state = self.records[record_name]
|
||||
state.status = "uploaded"
|
||||
state.uploaded_at = datetime.utcnow()
|
||||
state.record_change_tag = record_change_tag
|
||||
state.error_message = None
|
||||
self.last_updated = datetime.utcnow()
|
||||
|
||||
def mark_failed(self, record_name: str, error_message: str) -> None:
|
||||
"""Mark a record as failed."""
|
||||
if record_name in self.records:
|
||||
state = self.records[record_name]
|
||||
state.status = "failed"
|
||||
state.error_message = error_message
|
||||
state.retry_count += 1
|
||||
self.last_updated = datetime.utcnow()
|
||||
|
||||
def mark_pending(self, record_name: str) -> None:
|
||||
"""Mark a record as pending (for retry)."""
|
||||
if record_name in self.records:
|
||||
state = self.records[record_name]
|
||||
state.status = "pending"
|
||||
state.error_message = None
|
||||
self.last_updated = datetime.utcnow()
|
||||
|
||||
def add_record(self, record_name: str, record_type: str) -> None:
|
||||
"""Add a new record to track."""
|
||||
if record_name not in self.records:
|
||||
self.records[record_name] = RecordState(
|
||||
record_name=record_name,
|
||||
record_type=record_type,
|
||||
)
|
||||
self.total_count = len(self.records)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"sport": self.sport,
|
||||
"season": self.season,
|
||||
"environment": self.environment,
|
||||
"started_at": self.started_at.isoformat(),
|
||||
"last_updated": self.last_updated.isoformat(),
|
||||
"total_count": self.total_count,
|
||||
"records": {
|
||||
name: state.to_dict()
|
||||
for name, state in self.records.items()
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "UploadSession":
|
||||
"""Create UploadSession from dictionary."""
|
||||
session = cls(
|
||||
sport=data["sport"],
|
||||
season=data["season"],
|
||||
environment=data["environment"],
|
||||
started_at=datetime.fromisoformat(data["started_at"]),
|
||||
last_updated=datetime.fromisoformat(data["last_updated"]),
|
||||
total_count=data.get("total_count", 0),
|
||||
)
|
||||
|
||||
for name, record_data in data.get("records", {}).items():
|
||||
session.records[name] = RecordState.from_dict(record_data)
|
||||
|
||||
return session
|
||||
|
||||
|
||||
class StateManager:
|
||||
"""Manages upload state persistence.
|
||||
|
||||
State files are stored in .parser_state/ with naming convention:
|
||||
upload_state_{sport}_{season}_{environment}.json
|
||||
"""
|
||||
|
||||
def __init__(self, state_dir: Optional[Path] = None):
|
||||
"""Initialize the state manager.
|
||||
|
||||
Args:
|
||||
state_dir: Directory for state files (default: .parser_state/)
|
||||
"""
|
||||
self.state_dir = state_dir or STATE_DIR
|
||||
self.state_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _get_state_file(self, sport: str, season: int, environment: str) -> Path:
|
||||
"""Get the path to a state file."""
|
||||
return self.state_dir / f"upload_state_{sport}_{season}_{environment}.json"
|
||||
|
||||
def load_session(
|
||||
self,
|
||||
sport: str,
|
||||
season: int,
|
||||
environment: str,
|
||||
) -> Optional[UploadSession]:
|
||||
"""Load an existing upload session.
|
||||
|
||||
Args:
|
||||
sport: Sport code
|
||||
season: Season start year
|
||||
environment: CloudKit environment
|
||||
|
||||
Returns:
|
||||
UploadSession if exists, None otherwise
|
||||
"""
|
||||
state_file = self._get_state_file(sport, season, environment)
|
||||
|
||||
if not state_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(state_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return UploadSession.from_dict(data)
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
# Corrupted state file
|
||||
return None
|
||||
|
||||
def save_session(self, session: UploadSession) -> None:
|
||||
"""Save an upload session to disk.
|
||||
|
||||
Args:
|
||||
session: The session to save
|
||||
"""
|
||||
state_file = self._get_state_file(
|
||||
session.sport,
|
||||
session.season,
|
||||
session.environment,
|
||||
)
|
||||
|
||||
session.last_updated = datetime.utcnow()
|
||||
|
||||
with open(state_file, "w", encoding="utf-8") as f:
|
||||
json.dump(session.to_dict(), f, indent=2)
|
||||
|
||||
def create_session(
|
||||
self,
|
||||
sport: str,
|
||||
season: int,
|
||||
environment: str,
|
||||
record_names: list[tuple[str, str]], # (record_name, record_type)
|
||||
) -> UploadSession:
|
||||
"""Create a new upload session.
|
||||
|
||||
Args:
|
||||
sport: Sport code
|
||||
season: Season start year
|
||||
environment: CloudKit environment
|
||||
record_names: List of (record_name, record_type) tuples
|
||||
|
||||
Returns:
|
||||
New UploadSession
|
||||
"""
|
||||
session = UploadSession(
|
||||
sport=sport,
|
||||
season=season,
|
||||
environment=environment,
|
||||
)
|
||||
|
||||
for record_name, record_type in record_names:
|
||||
session.add_record(record_name, record_type)
|
||||
|
||||
self.save_session(session)
|
||||
return session
|
||||
|
||||
def delete_session(self, sport: str, season: int, environment: str) -> bool:
|
||||
"""Delete an upload session state file.
|
||||
|
||||
Args:
|
||||
sport: Sport code
|
||||
season: Season start year
|
||||
environment: CloudKit environment
|
||||
|
||||
Returns:
|
||||
True if deleted, False if not found
|
||||
"""
|
||||
state_file = self._get_state_file(sport, season, environment)
|
||||
|
||||
if state_file.exists():
|
||||
state_file.unlink()
|
||||
return True
|
||||
return False
|
||||
|
||||
def list_sessions(self) -> list[dict]:
|
||||
"""List all upload sessions.
|
||||
|
||||
Returns:
|
||||
List of session summaries
|
||||
"""
|
||||
sessions = []
|
||||
|
||||
for state_file in self.state_dir.glob("upload_state_*.json"):
|
||||
try:
|
||||
with open(state_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
session = UploadSession.from_dict(data)
|
||||
sessions.append({
|
||||
"sport": session.sport,
|
||||
"season": session.season,
|
||||
"environment": session.environment,
|
||||
"started_at": session.started_at.isoformat(),
|
||||
"last_updated": session.last_updated.isoformat(),
|
||||
"progress": f"{session.uploaded_count}/{session.total_count}",
|
||||
"progress_percent": f"{session.progress_percent:.1f}%",
|
||||
"status": "complete" if session.is_complete else "in_progress",
|
||||
"failed_count": session.failed_count,
|
||||
})
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
continue
|
||||
|
||||
return sessions
|
||||
|
||||
def get_session_or_create(
|
||||
self,
|
||||
sport: str,
|
||||
season: int,
|
||||
environment: str,
|
||||
record_names: list[tuple[str, str]],
|
||||
resume: bool = False,
|
||||
) -> UploadSession:
|
||||
"""Get existing session or create new one.
|
||||
|
||||
Args:
|
||||
sport: Sport code
|
||||
season: Season start year
|
||||
environment: CloudKit environment
|
||||
record_names: List of (record_name, record_type) tuples
|
||||
resume: Whether to resume existing session
|
||||
|
||||
Returns:
|
||||
UploadSession (existing or new)
|
||||
"""
|
||||
if resume:
|
||||
existing = self.load_session(sport, season, environment)
|
||||
if existing:
|
||||
# Add any new records not in existing session
|
||||
existing_names = set(existing.records.keys())
|
||||
for record_name, record_type in record_names:
|
||||
if record_name not in existing_names:
|
||||
existing.add_record(record_name, record_type)
|
||||
return existing
|
||||
|
||||
# Create new session (overwrites existing)
|
||||
return self.create_session(sport, season, environment, record_names)
|
||||
Reference in New Issue
Block a user