feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
461
sportstime_parser/tests/test_uploaders/test_cloudkit.py
Normal file
461
sportstime_parser/tests/test_uploaders/test_cloudkit.py
Normal file
@@ -0,0 +1,461 @@
|
||||
"""Tests for the CloudKit client."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
from sportstime_parser.uploaders.cloudkit import (
|
||||
CloudKitClient,
|
||||
CloudKitRecord,
|
||||
CloudKitError,
|
||||
CloudKitAuthError,
|
||||
CloudKitRateLimitError,
|
||||
CloudKitServerError,
|
||||
RecordType,
|
||||
OperationResult,
|
||||
BatchResult,
|
||||
)
|
||||
|
||||
|
||||
class TestCloudKitRecord:
|
||||
"""Tests for CloudKitRecord dataclass."""
|
||||
|
||||
def test_create_record(self):
|
||||
"""Test creating a CloudKitRecord."""
|
||||
record = CloudKitRecord(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
fields={
|
||||
"sport": "nba",
|
||||
"season": 2025,
|
||||
},
|
||||
)
|
||||
|
||||
assert record.record_name == "nba_2025_hou_okc_1021"
|
||||
assert record.record_type == RecordType.GAME
|
||||
assert record.fields["sport"] == "nba"
|
||||
assert record.record_change_tag is None
|
||||
|
||||
def test_to_cloudkit_dict(self):
|
||||
"""Test converting to CloudKit API format."""
|
||||
record = CloudKitRecord(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
fields={
|
||||
"sport": "nba",
|
||||
"season": 2025,
|
||||
},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["recordName"] == "nba_2025_hou_okc_1021"
|
||||
assert data["recordType"] == "Game"
|
||||
assert "fields" in data
|
||||
assert "recordChangeTag" not in data
|
||||
|
||||
def test_to_cloudkit_dict_with_change_tag(self):
|
||||
"""Test converting with change tag for updates."""
|
||||
record = CloudKitRecord(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"sport": "nba"},
|
||||
record_change_tag="abc123",
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["recordChangeTag"] == "abc123"
|
||||
|
||||
def test_format_string_field(self):
|
||||
"""Test formatting string fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"name": "Test Name"},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["name"]["value"] == "Test Name"
|
||||
assert data["fields"]["name"]["type"] == "STRING"
|
||||
|
||||
def test_format_int_field(self):
|
||||
"""Test formatting integer fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"count": 42},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["count"]["value"] == 42
|
||||
assert data["fields"]["count"]["type"] == "INT64"
|
||||
|
||||
def test_format_float_field(self):
|
||||
"""Test formatting float fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.STADIUM,
|
||||
fields={"latitude": 35.4634},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["latitude"]["value"] == 35.4634
|
||||
assert data["fields"]["latitude"]["type"] == "DOUBLE"
|
||||
|
||||
def test_format_datetime_field(self):
|
||||
"""Test formatting datetime fields."""
|
||||
dt = datetime(2025, 10, 21, 19, 0, 0)
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"game_date": dt},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
expected_ms = int(dt.timestamp() * 1000)
|
||||
assert data["fields"]["game_date"]["value"] == expected_ms
|
||||
assert data["fields"]["game_date"]["type"] == "TIMESTAMP"
|
||||
|
||||
def test_format_location_field(self):
|
||||
"""Test formatting location fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.STADIUM,
|
||||
fields={
|
||||
"location": {"latitude": 35.4634, "longitude": -97.5151},
|
||||
},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["location"]["type"] == "LOCATION"
|
||||
assert data["fields"]["location"]["value"]["latitude"] == 35.4634
|
||||
assert data["fields"]["location"]["value"]["longitude"] == -97.5151
|
||||
|
||||
def test_skip_none_fields(self):
|
||||
"""Test that None fields are skipped."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={
|
||||
"sport": "nba",
|
||||
"score": None, # Should be skipped
|
||||
},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert "sport" in data["fields"]
|
||||
assert "score" not in data["fields"]
|
||||
|
||||
|
||||
class TestOperationResult:
|
||||
"""Tests for OperationResult dataclass."""
|
||||
|
||||
def test_successful_result(self):
|
||||
"""Test creating a successful operation result."""
|
||||
result = OperationResult(
|
||||
record_name="test_record",
|
||||
success=True,
|
||||
record_change_tag="new_tag",
|
||||
)
|
||||
|
||||
assert result.record_name == "test_record"
|
||||
assert result.success is True
|
||||
assert result.record_change_tag == "new_tag"
|
||||
assert result.error_code is None
|
||||
|
||||
def test_failed_result(self):
|
||||
"""Test creating a failed operation result."""
|
||||
result = OperationResult(
|
||||
record_name="test_record",
|
||||
success=False,
|
||||
error_code="SERVER_ERROR",
|
||||
error_message="Internal server error",
|
||||
)
|
||||
|
||||
assert result.success is False
|
||||
assert result.error_code == "SERVER_ERROR"
|
||||
assert result.error_message == "Internal server error"
|
||||
|
||||
|
||||
class TestBatchResult:
|
||||
"""Tests for BatchResult dataclass."""
|
||||
|
||||
def test_empty_batch_result(self):
|
||||
"""Test empty batch result."""
|
||||
result = BatchResult()
|
||||
|
||||
assert result.all_succeeded is True
|
||||
assert result.success_count == 0
|
||||
assert result.failure_count == 0
|
||||
|
||||
def test_batch_with_successes(self):
|
||||
"""Test batch with successful operations."""
|
||||
result = BatchResult()
|
||||
result.successful.append(OperationResult("rec1", True))
|
||||
result.successful.append(OperationResult("rec2", True))
|
||||
|
||||
assert result.all_succeeded is True
|
||||
assert result.success_count == 2
|
||||
assert result.failure_count == 0
|
||||
|
||||
def test_batch_with_failures(self):
|
||||
"""Test batch with failed operations."""
|
||||
result = BatchResult()
|
||||
result.successful.append(OperationResult("rec1", True))
|
||||
result.failed.append(OperationResult("rec2", False, error_message="Error"))
|
||||
|
||||
assert result.all_succeeded is False
|
||||
assert result.success_count == 1
|
||||
assert result.failure_count == 1
|
||||
|
||||
|
||||
class TestCloudKitClient:
|
||||
"""Tests for CloudKitClient."""
|
||||
|
||||
def test_not_configured_without_credentials(self):
|
||||
"""Test that client reports not configured without credentials."""
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
client = CloudKitClient()
|
||||
assert client.is_configured is False
|
||||
|
||||
def test_configured_with_credentials(self):
|
||||
"""Test that client reports configured with credentials."""
|
||||
# Create a minimal mock for the private key
|
||||
mock_key = MagicMock()
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key_id",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
mock_load.return_value = mock_key
|
||||
client = CloudKitClient()
|
||||
assert client.is_configured is True
|
||||
|
||||
def test_get_api_path(self):
|
||||
"""Test API path construction."""
|
||||
client = CloudKitClient(
|
||||
container_id="iCloud.com.test.app",
|
||||
environment="development",
|
||||
)
|
||||
|
||||
path = client._get_api_path("records/query")
|
||||
|
||||
assert path == "/database/1/iCloud.com.test.app/development/public/records/query"
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_fetch_records_query(self, mock_session_class):
|
||||
"""Test fetching records with query."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"records": [
|
||||
{"recordName": "rec1", "recordType": "Game"},
|
||||
{"recordName": "rec2", "recordType": "Game"},
|
||||
]
|
||||
}
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
# Setup client with mocked auth
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
records = client.fetch_records(RecordType.GAME)
|
||||
|
||||
assert len(records) == 2
|
||||
assert records[0]["recordName"] == "rec1"
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_save_records_success(self, mock_session_class):
|
||||
"""Test saving records successfully."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"records": [
|
||||
{"recordName": "rec1", "recordChangeTag": "tag1"},
|
||||
{"recordName": "rec2", "recordChangeTag": "tag2"},
|
||||
]
|
||||
}
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
records = [
|
||||
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
|
||||
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
|
||||
]
|
||||
|
||||
result = client.save_records(records)
|
||||
|
||||
assert result.success_count == 2
|
||||
assert result.failure_count == 0
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_save_records_partial_failure(self, mock_session_class):
|
||||
"""Test saving records with some failures."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"records": [
|
||||
{"recordName": "rec1", "recordChangeTag": "tag1"},
|
||||
{"recordName": "rec2", "serverErrorCode": "QUOTA_EXCEEDED", "reason": "Quota exceeded"},
|
||||
]
|
||||
}
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
records = [
|
||||
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
|
||||
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
|
||||
]
|
||||
|
||||
result = client.save_records(records)
|
||||
|
||||
assert result.success_count == 1
|
||||
assert result.failure_count == 1
|
||||
assert result.failed[0].error_code == "QUOTA_EXCEEDED"
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_auth_error(self, mock_session_class):
|
||||
"""Test handling authentication error."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 421
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
with pytest.raises(CloudKitAuthError):
|
||||
client.fetch_records(RecordType.GAME)
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_rate_limit_error(self, mock_session_class):
|
||||
"""Test handling rate limit error."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 429
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
with pytest.raises(CloudKitRateLimitError):
|
||||
client.fetch_records(RecordType.GAME)
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_server_error(self, mock_session_class):
|
||||
"""Test handling server error."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 503
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
with pytest.raises(CloudKitServerError):
|
||||
client.fetch_records(RecordType.GAME)
|
||||
|
||||
|
||||
class TestRecordType:
|
||||
"""Tests for RecordType enum."""
|
||||
|
||||
def test_record_type_values(self):
|
||||
"""Test that record type values match CloudKit schema."""
|
||||
assert RecordType.GAME.value == "Game"
|
||||
assert RecordType.TEAM.value == "Team"
|
||||
assert RecordType.STADIUM.value == "Stadium"
|
||||
assert RecordType.TEAM_ALIAS.value == "TeamAlias"
|
||||
assert RecordType.STADIUM_ALIAS.value == "StadiumAlias"
|
||||
Reference in New Issue
Block a user