Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
351 lines
12 KiB
Python
351 lines
12 KiB
Python
"""Tests for the record differ."""
|
|
|
|
import pytest
|
|
from datetime import datetime
|
|
|
|
from sportstime_parser.models.game import Game
|
|
from sportstime_parser.models.team import Team
|
|
from sportstime_parser.models.stadium import Stadium
|
|
from sportstime_parser.uploaders.diff import (
|
|
DiffAction,
|
|
RecordDiff,
|
|
DiffResult,
|
|
RecordDiffer,
|
|
game_to_cloudkit_record,
|
|
team_to_cloudkit_record,
|
|
stadium_to_cloudkit_record,
|
|
)
|
|
from sportstime_parser.uploaders.cloudkit import RecordType
|
|
|
|
|
|
class TestRecordDiff:
|
|
"""Tests for RecordDiff dataclass."""
|
|
|
|
def test_create_record_diff(self):
|
|
"""Test creating a RecordDiff."""
|
|
diff = RecordDiff(
|
|
record_name="nba_2025_hou_okc_1021",
|
|
record_type=RecordType.GAME,
|
|
action=DiffAction.CREATE,
|
|
)
|
|
|
|
assert diff.record_name == "nba_2025_hou_okc_1021"
|
|
assert diff.record_type == RecordType.GAME
|
|
assert diff.action == DiffAction.CREATE
|
|
|
|
|
|
class TestDiffResult:
|
|
"""Tests for DiffResult dataclass."""
|
|
|
|
def test_empty_result(self):
|
|
"""Test empty DiffResult."""
|
|
result = DiffResult()
|
|
|
|
assert result.create_count == 0
|
|
assert result.update_count == 0
|
|
assert result.delete_count == 0
|
|
assert result.unchanged_count == 0
|
|
assert result.total_changes == 0
|
|
|
|
def test_counts(self):
|
|
"""Test counting different change types."""
|
|
result = DiffResult()
|
|
|
|
result.creates.append(RecordDiff(
|
|
record_name="game_1",
|
|
record_type=RecordType.GAME,
|
|
action=DiffAction.CREATE,
|
|
))
|
|
result.creates.append(RecordDiff(
|
|
record_name="game_2",
|
|
record_type=RecordType.GAME,
|
|
action=DiffAction.CREATE,
|
|
))
|
|
result.updates.append(RecordDiff(
|
|
record_name="game_3",
|
|
record_type=RecordType.GAME,
|
|
action=DiffAction.UPDATE,
|
|
))
|
|
result.deletes.append(RecordDiff(
|
|
record_name="game_4",
|
|
record_type=RecordType.GAME,
|
|
action=DiffAction.DELETE,
|
|
))
|
|
result.unchanged.append(RecordDiff(
|
|
record_name="game_5",
|
|
record_type=RecordType.GAME,
|
|
action=DiffAction.UNCHANGED,
|
|
))
|
|
|
|
assert result.create_count == 2
|
|
assert result.update_count == 1
|
|
assert result.delete_count == 1
|
|
assert result.unchanged_count == 1
|
|
assert result.total_changes == 4 # excludes unchanged
|
|
|
|
|
|
class TestRecordDiffer:
|
|
"""Tests for RecordDiffer."""
|
|
|
|
@pytest.fixture
|
|
def differ(self):
|
|
"""Create a RecordDiffer instance."""
|
|
return RecordDiffer()
|
|
|
|
@pytest.fixture
|
|
def sample_game(self):
|
|
"""Create a sample Game."""
|
|
return Game(
|
|
id="nba_2025_hou_okc_1021",
|
|
sport="nba",
|
|
season=2025,
|
|
home_team_id="team_nba_okc",
|
|
away_team_id="team_nba_hou",
|
|
stadium_id="stadium_nba_paycom_center",
|
|
game_date=datetime(2025, 10, 21, 19, 0, 0),
|
|
status="scheduled",
|
|
)
|
|
|
|
@pytest.fixture
|
|
def sample_team(self):
|
|
"""Create a sample Team."""
|
|
return Team(
|
|
id="team_nba_okc",
|
|
sport="nba",
|
|
city="Oklahoma City",
|
|
name="Thunder",
|
|
full_name="Oklahoma City Thunder",
|
|
abbreviation="OKC",
|
|
conference="Western",
|
|
division="Northwest",
|
|
)
|
|
|
|
@pytest.fixture
|
|
def sample_stadium(self):
|
|
"""Create a sample Stadium."""
|
|
return Stadium(
|
|
id="stadium_nba_paycom_center",
|
|
sport="nba",
|
|
name="Paycom Center",
|
|
city="Oklahoma City",
|
|
state="OK",
|
|
country="USA",
|
|
latitude=35.4634,
|
|
longitude=-97.5151,
|
|
capacity=18203,
|
|
)
|
|
|
|
def test_diff_games_create(self, differ, sample_game):
|
|
"""Test detecting new games to create."""
|
|
local_games = [sample_game]
|
|
remote_records = []
|
|
|
|
result = differ.diff_games(local_games, remote_records)
|
|
|
|
assert result.create_count == 1
|
|
assert result.update_count == 0
|
|
assert result.delete_count == 0
|
|
assert result.creates[0].record_name == sample_game.id
|
|
|
|
def test_diff_games_delete(self, differ, sample_game):
|
|
"""Test detecting games to delete."""
|
|
local_games = []
|
|
remote_records = [
|
|
{
|
|
"recordName": sample_game.id,
|
|
"recordType": "Game",
|
|
"fields": {
|
|
"sport": {"value": "nba", "type": "STRING"},
|
|
"season": {"value": 2025, "type": "INT64"},
|
|
},
|
|
"recordChangeTag": "abc123",
|
|
}
|
|
]
|
|
|
|
result = differ.diff_games(local_games, remote_records)
|
|
|
|
assert result.create_count == 0
|
|
assert result.delete_count == 1
|
|
assert result.deletes[0].record_name == sample_game.id
|
|
|
|
def test_diff_games_unchanged(self, differ, sample_game):
|
|
"""Test detecting unchanged games."""
|
|
local_games = [sample_game]
|
|
remote_records = [
|
|
{
|
|
"recordName": sample_game.id,
|
|
"recordType": "Game",
|
|
"fields": {
|
|
"sport": {"value": "nba", "type": "STRING"},
|
|
"season": {"value": 2025, "type": "INT64"},
|
|
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
|
|
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
|
|
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
|
|
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
|
"game_number": {"value": None, "type": "INT64"},
|
|
"home_score": {"value": None, "type": "INT64"},
|
|
"away_score": {"value": None, "type": "INT64"},
|
|
"status": {"value": "scheduled", "type": "STRING"},
|
|
},
|
|
"recordChangeTag": "abc123",
|
|
}
|
|
]
|
|
|
|
result = differ.diff_games(local_games, remote_records)
|
|
|
|
assert result.create_count == 0
|
|
assert result.update_count == 0
|
|
assert result.unchanged_count == 1
|
|
|
|
def test_diff_games_update(self, differ, sample_game):
|
|
"""Test detecting games that need update."""
|
|
local_games = [sample_game]
|
|
# Remote has different status
|
|
remote_records = [
|
|
{
|
|
"recordName": sample_game.id,
|
|
"recordType": "Game",
|
|
"fields": {
|
|
"sport": {"value": "nba", "type": "STRING"},
|
|
"season": {"value": 2025, "type": "INT64"},
|
|
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
|
|
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
|
|
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
|
|
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
|
"game_number": {"value": None, "type": "INT64"},
|
|
"home_score": {"value": None, "type": "INT64"},
|
|
"away_score": {"value": None, "type": "INT64"},
|
|
"status": {"value": "postponed", "type": "STRING"}, # Different!
|
|
},
|
|
"recordChangeTag": "abc123",
|
|
}
|
|
]
|
|
|
|
result = differ.diff_games(local_games, remote_records)
|
|
|
|
assert result.update_count == 1
|
|
assert "status" in result.updates[0].changed_fields
|
|
assert result.updates[0].record_change_tag == "abc123"
|
|
|
|
def test_diff_teams_create(self, differ, sample_team):
|
|
"""Test detecting new teams to create."""
|
|
local_teams = [sample_team]
|
|
remote_records = []
|
|
|
|
result = differ.diff_teams(local_teams, remote_records)
|
|
|
|
assert result.create_count == 1
|
|
assert result.creates[0].record_name == sample_team.id
|
|
|
|
def test_diff_stadiums_create(self, differ, sample_stadium):
|
|
"""Test detecting new stadiums to create."""
|
|
local_stadiums = [sample_stadium]
|
|
remote_records = []
|
|
|
|
result = differ.diff_stadiums(local_stadiums, remote_records)
|
|
|
|
assert result.create_count == 1
|
|
assert result.creates[0].record_name == sample_stadium.id
|
|
|
|
def test_get_records_to_upload(self, differ, sample_game):
|
|
"""Test getting CloudKitRecords for upload."""
|
|
game2 = Game(
|
|
id="nba_2025_lal_lac_1022",
|
|
sport="nba",
|
|
season=2025,
|
|
home_team_id="team_nba_lac",
|
|
away_team_id="team_nba_lal",
|
|
stadium_id="stadium_nba_crypto_com",
|
|
game_date=datetime(2025, 10, 22, 19, 0, 0),
|
|
status="scheduled",
|
|
)
|
|
|
|
local_games = [sample_game, game2]
|
|
# Only game2 exists remotely with different status
|
|
remote_records = [
|
|
{
|
|
"recordName": game2.id,
|
|
"recordType": "Game",
|
|
"fields": {
|
|
"sport": {"value": "nba", "type": "STRING"},
|
|
"season": {"value": 2025, "type": "INT64"},
|
|
"home_team_id": {"value": "team_nba_lac", "type": "STRING"},
|
|
"away_team_id": {"value": "team_nba_lal", "type": "STRING"},
|
|
"stadium_id": {"value": "stadium_nba_crypto_com", "type": "STRING"},
|
|
"game_date": {"value": int(game2.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
|
"status": {"value": "postponed", "type": "STRING"}, # Different!
|
|
},
|
|
"recordChangeTag": "xyz789",
|
|
}
|
|
]
|
|
|
|
result = differ.diff_games(local_games, remote_records)
|
|
records = result.get_records_to_upload()
|
|
|
|
assert len(records) == 2 # 1 create + 1 update
|
|
record_names = [r.record_name for r in records]
|
|
assert sample_game.id in record_names
|
|
assert game2.id in record_names
|
|
|
|
|
|
class TestConvenienceFunctions:
|
|
"""Tests for module-level convenience functions."""
|
|
|
|
def test_game_to_cloudkit_record(self):
|
|
"""Test converting Game to CloudKitRecord."""
|
|
game = Game(
|
|
id="nba_2025_hou_okc_1021",
|
|
sport="nba",
|
|
season=2025,
|
|
home_team_id="team_nba_okc",
|
|
away_team_id="team_nba_hou",
|
|
stadium_id="stadium_nba_paycom_center",
|
|
game_date=datetime(2025, 10, 21, 19, 0, 0),
|
|
status="scheduled",
|
|
)
|
|
|
|
record = game_to_cloudkit_record(game)
|
|
|
|
assert record.record_name == game.id
|
|
assert record.record_type == RecordType.GAME
|
|
assert record.fields["sport"] == "nba"
|
|
assert record.fields["season"] == 2025
|
|
|
|
def test_team_to_cloudkit_record(self):
|
|
"""Test converting Team to CloudKitRecord."""
|
|
team = Team(
|
|
id="team_nba_okc",
|
|
sport="nba",
|
|
city="Oklahoma City",
|
|
name="Thunder",
|
|
full_name="Oklahoma City Thunder",
|
|
abbreviation="OKC",
|
|
)
|
|
|
|
record = team_to_cloudkit_record(team)
|
|
|
|
assert record.record_name == team.id
|
|
assert record.record_type == RecordType.TEAM
|
|
assert record.fields["city"] == "Oklahoma City"
|
|
assert record.fields["name"] == "Thunder"
|
|
|
|
def test_stadium_to_cloudkit_record(self):
|
|
"""Test converting Stadium to CloudKitRecord."""
|
|
stadium = Stadium(
|
|
id="stadium_nba_paycom_center",
|
|
sport="nba",
|
|
name="Paycom Center",
|
|
city="Oklahoma City",
|
|
state="OK",
|
|
country="USA",
|
|
latitude=35.4634,
|
|
longitude=-97.5151,
|
|
)
|
|
|
|
record = stadium_to_cloudkit_record(stadium)
|
|
|
|
assert record.record_name == stadium.id
|
|
assert record.record_type == RecordType.STADIUM
|
|
assert record.fields["name"] == "Paycom Center"
|
|
assert record.fields["latitude"] == 35.4634
|