Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
462 lines
16 KiB
Python
462 lines
16 KiB
Python
"""Tests for the CloudKit client."""
|
|
|
|
import json
|
|
import pytest
|
|
from datetime import datetime
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
|
|
from sportstime_parser.uploaders.cloudkit import (
|
|
CloudKitClient,
|
|
CloudKitRecord,
|
|
CloudKitError,
|
|
CloudKitAuthError,
|
|
CloudKitRateLimitError,
|
|
CloudKitServerError,
|
|
RecordType,
|
|
OperationResult,
|
|
BatchResult,
|
|
)
|
|
|
|
|
|
class TestCloudKitRecord:
|
|
"""Tests for CloudKitRecord dataclass."""
|
|
|
|
def test_create_record(self):
|
|
"""Test creating a CloudKitRecord."""
|
|
record = CloudKitRecord(
|
|
record_name="nba_2025_hou_okc_1021",
|
|
record_type=RecordType.GAME,
|
|
fields={
|
|
"sport": "nba",
|
|
"season": 2025,
|
|
},
|
|
)
|
|
|
|
assert record.record_name == "nba_2025_hou_okc_1021"
|
|
assert record.record_type == RecordType.GAME
|
|
assert record.fields["sport"] == "nba"
|
|
assert record.record_change_tag is None
|
|
|
|
def test_to_cloudkit_dict(self):
|
|
"""Test converting to CloudKit API format."""
|
|
record = CloudKitRecord(
|
|
record_name="nba_2025_hou_okc_1021",
|
|
record_type=RecordType.GAME,
|
|
fields={
|
|
"sport": "nba",
|
|
"season": 2025,
|
|
},
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
assert data["recordName"] == "nba_2025_hou_okc_1021"
|
|
assert data["recordType"] == "Game"
|
|
assert "fields" in data
|
|
assert "recordChangeTag" not in data
|
|
|
|
def test_to_cloudkit_dict_with_change_tag(self):
|
|
"""Test converting with change tag for updates."""
|
|
record = CloudKitRecord(
|
|
record_name="nba_2025_hou_okc_1021",
|
|
record_type=RecordType.GAME,
|
|
fields={"sport": "nba"},
|
|
record_change_tag="abc123",
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
assert data["recordChangeTag"] == "abc123"
|
|
|
|
def test_format_string_field(self):
|
|
"""Test formatting string fields."""
|
|
record = CloudKitRecord(
|
|
record_name="test",
|
|
record_type=RecordType.GAME,
|
|
fields={"name": "Test Name"},
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
assert data["fields"]["name"]["value"] == "Test Name"
|
|
assert data["fields"]["name"]["type"] == "STRING"
|
|
|
|
def test_format_int_field(self):
|
|
"""Test formatting integer fields."""
|
|
record = CloudKitRecord(
|
|
record_name="test",
|
|
record_type=RecordType.GAME,
|
|
fields={"count": 42},
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
assert data["fields"]["count"]["value"] == 42
|
|
assert data["fields"]["count"]["type"] == "INT64"
|
|
|
|
def test_format_float_field(self):
|
|
"""Test formatting float fields."""
|
|
record = CloudKitRecord(
|
|
record_name="test",
|
|
record_type=RecordType.STADIUM,
|
|
fields={"latitude": 35.4634},
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
assert data["fields"]["latitude"]["value"] == 35.4634
|
|
assert data["fields"]["latitude"]["type"] == "DOUBLE"
|
|
|
|
def test_format_datetime_field(self):
|
|
"""Test formatting datetime fields."""
|
|
dt = datetime(2025, 10, 21, 19, 0, 0)
|
|
record = CloudKitRecord(
|
|
record_name="test",
|
|
record_type=RecordType.GAME,
|
|
fields={"game_date": dt},
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
expected_ms = int(dt.timestamp() * 1000)
|
|
assert data["fields"]["game_date"]["value"] == expected_ms
|
|
assert data["fields"]["game_date"]["type"] == "TIMESTAMP"
|
|
|
|
def test_format_location_field(self):
|
|
"""Test formatting location fields."""
|
|
record = CloudKitRecord(
|
|
record_name="test",
|
|
record_type=RecordType.STADIUM,
|
|
fields={
|
|
"location": {"latitude": 35.4634, "longitude": -97.5151},
|
|
},
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
assert data["fields"]["location"]["type"] == "LOCATION"
|
|
assert data["fields"]["location"]["value"]["latitude"] == 35.4634
|
|
assert data["fields"]["location"]["value"]["longitude"] == -97.5151
|
|
|
|
def test_skip_none_fields(self):
|
|
"""Test that None fields are skipped."""
|
|
record = CloudKitRecord(
|
|
record_name="test",
|
|
record_type=RecordType.GAME,
|
|
fields={
|
|
"sport": "nba",
|
|
"score": None, # Should be skipped
|
|
},
|
|
)
|
|
|
|
data = record.to_cloudkit_dict()
|
|
|
|
assert "sport" in data["fields"]
|
|
assert "score" not in data["fields"]
|
|
|
|
|
|
class TestOperationResult:
|
|
"""Tests for OperationResult dataclass."""
|
|
|
|
def test_successful_result(self):
|
|
"""Test creating a successful operation result."""
|
|
result = OperationResult(
|
|
record_name="test_record",
|
|
success=True,
|
|
record_change_tag="new_tag",
|
|
)
|
|
|
|
assert result.record_name == "test_record"
|
|
assert result.success is True
|
|
assert result.record_change_tag == "new_tag"
|
|
assert result.error_code is None
|
|
|
|
def test_failed_result(self):
|
|
"""Test creating a failed operation result."""
|
|
result = OperationResult(
|
|
record_name="test_record",
|
|
success=False,
|
|
error_code="SERVER_ERROR",
|
|
error_message="Internal server error",
|
|
)
|
|
|
|
assert result.success is False
|
|
assert result.error_code == "SERVER_ERROR"
|
|
assert result.error_message == "Internal server error"
|
|
|
|
|
|
class TestBatchResult:
|
|
"""Tests for BatchResult dataclass."""
|
|
|
|
def test_empty_batch_result(self):
|
|
"""Test empty batch result."""
|
|
result = BatchResult()
|
|
|
|
assert result.all_succeeded is True
|
|
assert result.success_count == 0
|
|
assert result.failure_count == 0
|
|
|
|
def test_batch_with_successes(self):
|
|
"""Test batch with successful operations."""
|
|
result = BatchResult()
|
|
result.successful.append(OperationResult("rec1", True))
|
|
result.successful.append(OperationResult("rec2", True))
|
|
|
|
assert result.all_succeeded is True
|
|
assert result.success_count == 2
|
|
assert result.failure_count == 0
|
|
|
|
def test_batch_with_failures(self):
|
|
"""Test batch with failed operations."""
|
|
result = BatchResult()
|
|
result.successful.append(OperationResult("rec1", True))
|
|
result.failed.append(OperationResult("rec2", False, error_message="Error"))
|
|
|
|
assert result.all_succeeded is False
|
|
assert result.success_count == 1
|
|
assert result.failure_count == 1
|
|
|
|
|
|
class TestCloudKitClient:
|
|
"""Tests for CloudKitClient."""
|
|
|
|
def test_not_configured_without_credentials(self):
|
|
"""Test that client reports not configured without credentials."""
|
|
with patch.dict("os.environ", {}, clear=True):
|
|
client = CloudKitClient()
|
|
assert client.is_configured is False
|
|
|
|
def test_configured_with_credentials(self):
|
|
"""Test that client reports configured with credentials."""
|
|
# Create a minimal mock for the private key
|
|
mock_key = MagicMock()
|
|
|
|
with patch.dict("os.environ", {
|
|
"CLOUDKIT_KEY_ID": "test_key_id",
|
|
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
|
}):
|
|
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
|
mock_load.return_value = mock_key
|
|
client = CloudKitClient()
|
|
assert client.is_configured is True
|
|
|
|
def test_get_api_path(self):
|
|
"""Test API path construction."""
|
|
client = CloudKitClient(
|
|
container_id="iCloud.com.test.app",
|
|
environment="development",
|
|
)
|
|
|
|
path = client._get_api_path("records/query")
|
|
|
|
assert path == "/database/1/iCloud.com.test.app/development/public/records/query"
|
|
|
|
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
|
def test_fetch_records_query(self, mock_session_class):
|
|
"""Test fetching records with query."""
|
|
mock_session = MagicMock()
|
|
mock_session_class.return_value = mock_session
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.json.return_value = {
|
|
"records": [
|
|
{"recordName": "rec1", "recordType": "Game"},
|
|
{"recordName": "rec2", "recordType": "Game"},
|
|
]
|
|
}
|
|
mock_session.request.return_value = mock_response
|
|
|
|
# Setup client with mocked auth
|
|
mock_key = MagicMock()
|
|
mock_key.sign.return_value = b"signature"
|
|
|
|
with patch.dict("os.environ", {
|
|
"CLOUDKIT_KEY_ID": "test_key",
|
|
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
|
}):
|
|
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
|
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
|
mock_load.return_value = mock_key
|
|
mock_jwt.return_value = "test_token"
|
|
|
|
client = CloudKitClient()
|
|
records = client.fetch_records(RecordType.GAME)
|
|
|
|
assert len(records) == 2
|
|
assert records[0]["recordName"] == "rec1"
|
|
|
|
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
|
def test_save_records_success(self, mock_session_class):
|
|
"""Test saving records successfully."""
|
|
mock_session = MagicMock()
|
|
mock_session_class.return_value = mock_session
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.json.return_value = {
|
|
"records": [
|
|
{"recordName": "rec1", "recordChangeTag": "tag1"},
|
|
{"recordName": "rec2", "recordChangeTag": "tag2"},
|
|
]
|
|
}
|
|
mock_session.request.return_value = mock_response
|
|
|
|
mock_key = MagicMock()
|
|
mock_key.sign.return_value = b"signature"
|
|
|
|
with patch.dict("os.environ", {
|
|
"CLOUDKIT_KEY_ID": "test_key",
|
|
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
|
}):
|
|
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
|
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
|
mock_load.return_value = mock_key
|
|
mock_jwt.return_value = "test_token"
|
|
|
|
client = CloudKitClient()
|
|
|
|
records = [
|
|
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
|
|
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
|
|
]
|
|
|
|
result = client.save_records(records)
|
|
|
|
assert result.success_count == 2
|
|
assert result.failure_count == 0
|
|
|
|
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
|
def test_save_records_partial_failure(self, mock_session_class):
|
|
"""Test saving records with some failures."""
|
|
mock_session = MagicMock()
|
|
mock_session_class.return_value = mock_session
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 200
|
|
mock_response.json.return_value = {
|
|
"records": [
|
|
{"recordName": "rec1", "recordChangeTag": "tag1"},
|
|
{"recordName": "rec2", "serverErrorCode": "QUOTA_EXCEEDED", "reason": "Quota exceeded"},
|
|
]
|
|
}
|
|
mock_session.request.return_value = mock_response
|
|
|
|
mock_key = MagicMock()
|
|
mock_key.sign.return_value = b"signature"
|
|
|
|
with patch.dict("os.environ", {
|
|
"CLOUDKIT_KEY_ID": "test_key",
|
|
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
|
}):
|
|
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
|
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
|
mock_load.return_value = mock_key
|
|
mock_jwt.return_value = "test_token"
|
|
|
|
client = CloudKitClient()
|
|
|
|
records = [
|
|
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
|
|
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
|
|
]
|
|
|
|
result = client.save_records(records)
|
|
|
|
assert result.success_count == 1
|
|
assert result.failure_count == 1
|
|
assert result.failed[0].error_code == "QUOTA_EXCEEDED"
|
|
|
|
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
|
def test_auth_error(self, mock_session_class):
|
|
"""Test handling authentication error."""
|
|
mock_session = MagicMock()
|
|
mock_session_class.return_value = mock_session
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 421
|
|
mock_session.request.return_value = mock_response
|
|
|
|
mock_key = MagicMock()
|
|
mock_key.sign.return_value = b"signature"
|
|
|
|
with patch.dict("os.environ", {
|
|
"CLOUDKIT_KEY_ID": "test_key",
|
|
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
|
}):
|
|
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
|
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
|
mock_load.return_value = mock_key
|
|
mock_jwt.return_value = "test_token"
|
|
|
|
client = CloudKitClient()
|
|
|
|
with pytest.raises(CloudKitAuthError):
|
|
client.fetch_records(RecordType.GAME)
|
|
|
|
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
|
def test_rate_limit_error(self, mock_session_class):
|
|
"""Test handling rate limit error."""
|
|
mock_session = MagicMock()
|
|
mock_session_class.return_value = mock_session
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 429
|
|
mock_session.request.return_value = mock_response
|
|
|
|
mock_key = MagicMock()
|
|
mock_key.sign.return_value = b"signature"
|
|
|
|
with patch.dict("os.environ", {
|
|
"CLOUDKIT_KEY_ID": "test_key",
|
|
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
|
}):
|
|
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
|
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
|
mock_load.return_value = mock_key
|
|
mock_jwt.return_value = "test_token"
|
|
|
|
client = CloudKitClient()
|
|
|
|
with pytest.raises(CloudKitRateLimitError):
|
|
client.fetch_records(RecordType.GAME)
|
|
|
|
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
|
def test_server_error(self, mock_session_class):
|
|
"""Test handling server error."""
|
|
mock_session = MagicMock()
|
|
mock_session_class.return_value = mock_session
|
|
|
|
mock_response = MagicMock()
|
|
mock_response.status_code = 503
|
|
mock_session.request.return_value = mock_response
|
|
|
|
mock_key = MagicMock()
|
|
mock_key.sign.return_value = b"signature"
|
|
|
|
with patch.dict("os.environ", {
|
|
"CLOUDKIT_KEY_ID": "test_key",
|
|
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
|
}):
|
|
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
|
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
|
mock_load.return_value = mock_key
|
|
mock_jwt.return_value = "test_token"
|
|
|
|
client = CloudKitClient()
|
|
|
|
with pytest.raises(CloudKitServerError):
|
|
client.fetch_records(RecordType.GAME)
|
|
|
|
|
|
class TestRecordType:
|
|
"""Tests for RecordType enum."""
|
|
|
|
def test_record_type_values(self):
|
|
"""Test that record type values match CloudKit schema."""
|
|
assert RecordType.GAME.value == "Game"
|
|
assert RecordType.TEAM.value == "Team"
|
|
assert RecordType.STADIUM.value == "Stadium"
|
|
assert RecordType.TEAM_ALIAS.value == "TeamAlias"
|
|
assert RecordType.STADIUM_ALIAS.value == "StadiumAlias"
|