Files
SportstimeAPI/sportstime_parser/tests/test_uploaders/test_cloudkit.py
Trey t 52d445bca4 feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 18:56:25 -06:00

462 lines
16 KiB
Python

"""Tests for the CloudKit client."""
import json
import pytest
from datetime import datetime
from unittest.mock import Mock, patch, MagicMock
from sportstime_parser.uploaders.cloudkit import (
CloudKitClient,
CloudKitRecord,
CloudKitError,
CloudKitAuthError,
CloudKitRateLimitError,
CloudKitServerError,
RecordType,
OperationResult,
BatchResult,
)
class TestCloudKitRecord:
"""Tests for CloudKitRecord dataclass."""
def test_create_record(self):
"""Test creating a CloudKitRecord."""
record = CloudKitRecord(
record_name="nba_2025_hou_okc_1021",
record_type=RecordType.GAME,
fields={
"sport": "nba",
"season": 2025,
},
)
assert record.record_name == "nba_2025_hou_okc_1021"
assert record.record_type == RecordType.GAME
assert record.fields["sport"] == "nba"
assert record.record_change_tag is None
def test_to_cloudkit_dict(self):
"""Test converting to CloudKit API format."""
record = CloudKitRecord(
record_name="nba_2025_hou_okc_1021",
record_type=RecordType.GAME,
fields={
"sport": "nba",
"season": 2025,
},
)
data = record.to_cloudkit_dict()
assert data["recordName"] == "nba_2025_hou_okc_1021"
assert data["recordType"] == "Game"
assert "fields" in data
assert "recordChangeTag" not in data
def test_to_cloudkit_dict_with_change_tag(self):
"""Test converting with change tag for updates."""
record = CloudKitRecord(
record_name="nba_2025_hou_okc_1021",
record_type=RecordType.GAME,
fields={"sport": "nba"},
record_change_tag="abc123",
)
data = record.to_cloudkit_dict()
assert data["recordChangeTag"] == "abc123"
def test_format_string_field(self):
"""Test formatting string fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={"name": "Test Name"},
)
data = record.to_cloudkit_dict()
assert data["fields"]["name"]["value"] == "Test Name"
assert data["fields"]["name"]["type"] == "STRING"
def test_format_int_field(self):
"""Test formatting integer fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={"count": 42},
)
data = record.to_cloudkit_dict()
assert data["fields"]["count"]["value"] == 42
assert data["fields"]["count"]["type"] == "INT64"
def test_format_float_field(self):
"""Test formatting float fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.STADIUM,
fields={"latitude": 35.4634},
)
data = record.to_cloudkit_dict()
assert data["fields"]["latitude"]["value"] == 35.4634
assert data["fields"]["latitude"]["type"] == "DOUBLE"
def test_format_datetime_field(self):
"""Test formatting datetime fields."""
dt = datetime(2025, 10, 21, 19, 0, 0)
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={"game_date": dt},
)
data = record.to_cloudkit_dict()
expected_ms = int(dt.timestamp() * 1000)
assert data["fields"]["game_date"]["value"] == expected_ms
assert data["fields"]["game_date"]["type"] == "TIMESTAMP"
def test_format_location_field(self):
"""Test formatting location fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.STADIUM,
fields={
"location": {"latitude": 35.4634, "longitude": -97.5151},
},
)
data = record.to_cloudkit_dict()
assert data["fields"]["location"]["type"] == "LOCATION"
assert data["fields"]["location"]["value"]["latitude"] == 35.4634
assert data["fields"]["location"]["value"]["longitude"] == -97.5151
def test_skip_none_fields(self):
"""Test that None fields are skipped."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={
"sport": "nba",
"score": None, # Should be skipped
},
)
data = record.to_cloudkit_dict()
assert "sport" in data["fields"]
assert "score" not in data["fields"]
class TestOperationResult:
"""Tests for OperationResult dataclass."""
def test_successful_result(self):
"""Test creating a successful operation result."""
result = OperationResult(
record_name="test_record",
success=True,
record_change_tag="new_tag",
)
assert result.record_name == "test_record"
assert result.success is True
assert result.record_change_tag == "new_tag"
assert result.error_code is None
def test_failed_result(self):
"""Test creating a failed operation result."""
result = OperationResult(
record_name="test_record",
success=False,
error_code="SERVER_ERROR",
error_message="Internal server error",
)
assert result.success is False
assert result.error_code == "SERVER_ERROR"
assert result.error_message == "Internal server error"
class TestBatchResult:
"""Tests for BatchResult dataclass."""
def test_empty_batch_result(self):
"""Test empty batch result."""
result = BatchResult()
assert result.all_succeeded is True
assert result.success_count == 0
assert result.failure_count == 0
def test_batch_with_successes(self):
"""Test batch with successful operations."""
result = BatchResult()
result.successful.append(OperationResult("rec1", True))
result.successful.append(OperationResult("rec2", True))
assert result.all_succeeded is True
assert result.success_count == 2
assert result.failure_count == 0
def test_batch_with_failures(self):
"""Test batch with failed operations."""
result = BatchResult()
result.successful.append(OperationResult("rec1", True))
result.failed.append(OperationResult("rec2", False, error_message="Error"))
assert result.all_succeeded is False
assert result.success_count == 1
assert result.failure_count == 1
class TestCloudKitClient:
"""Tests for CloudKitClient."""
def test_not_configured_without_credentials(self):
"""Test that client reports not configured without credentials."""
with patch.dict("os.environ", {}, clear=True):
client = CloudKitClient()
assert client.is_configured is False
def test_configured_with_credentials(self):
"""Test that client reports configured with credentials."""
# Create a minimal mock for the private key
mock_key = MagicMock()
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key_id",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
mock_load.return_value = mock_key
client = CloudKitClient()
assert client.is_configured is True
def test_get_api_path(self):
"""Test API path construction."""
client = CloudKitClient(
container_id="iCloud.com.test.app",
environment="development",
)
path = client._get_api_path("records/query")
assert path == "/database/1/iCloud.com.test.app/development/public/records/query"
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_fetch_records_query(self, mock_session_class):
"""Test fetching records with query."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"records": [
{"recordName": "rec1", "recordType": "Game"},
{"recordName": "rec2", "recordType": "Game"},
]
}
mock_session.request.return_value = mock_response
# Setup client with mocked auth
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
records = client.fetch_records(RecordType.GAME)
assert len(records) == 2
assert records[0]["recordName"] == "rec1"
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_save_records_success(self, mock_session_class):
"""Test saving records successfully."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"records": [
{"recordName": "rec1", "recordChangeTag": "tag1"},
{"recordName": "rec2", "recordChangeTag": "tag2"},
]
}
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
records = [
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
]
result = client.save_records(records)
assert result.success_count == 2
assert result.failure_count == 0
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_save_records_partial_failure(self, mock_session_class):
"""Test saving records with some failures."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"records": [
{"recordName": "rec1", "recordChangeTag": "tag1"},
{"recordName": "rec2", "serverErrorCode": "QUOTA_EXCEEDED", "reason": "Quota exceeded"},
]
}
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
records = [
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
]
result = client.save_records(records)
assert result.success_count == 1
assert result.failure_count == 1
assert result.failed[0].error_code == "QUOTA_EXCEEDED"
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_auth_error(self, mock_session_class):
"""Test handling authentication error."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 421
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
with pytest.raises(CloudKitAuthError):
client.fetch_records(RecordType.GAME)
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_rate_limit_error(self, mock_session_class):
"""Test handling rate limit error."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 429
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
with pytest.raises(CloudKitRateLimitError):
client.fetch_records(RecordType.GAME)
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_server_error(self, mock_session_class):
"""Test handling server error."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 503
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
with pytest.raises(CloudKitServerError):
client.fetch_records(RecordType.GAME)
class TestRecordType:
"""Tests for RecordType enum."""
def test_record_type_values(self):
"""Test that record type values match CloudKit schema."""
assert RecordType.GAME.value == "Game"
assert RecordType.TEAM.value == "Team"
assert RecordType.STADIUM.value == "Stadium"
assert RecordType.TEAM_ALIAS.value == "TeamAlias"
assert RecordType.STADIUM_ALIAS.value == "StadiumAlias"