feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
350
sportstime_parser/tests/test_uploaders/test_diff.py
Normal file
350
sportstime_parser/tests/test_uploaders/test_diff.py
Normal file
@@ -0,0 +1,350 @@
|
||||
"""Tests for the record differ."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
from sportstime_parser.models.game import Game
|
||||
from sportstime_parser.models.team import Team
|
||||
from sportstime_parser.models.stadium import Stadium
|
||||
from sportstime_parser.uploaders.diff import (
|
||||
DiffAction,
|
||||
RecordDiff,
|
||||
DiffResult,
|
||||
RecordDiffer,
|
||||
game_to_cloudkit_record,
|
||||
team_to_cloudkit_record,
|
||||
stadium_to_cloudkit_record,
|
||||
)
|
||||
from sportstime_parser.uploaders.cloudkit import RecordType
|
||||
|
||||
|
||||
class TestRecordDiff:
|
||||
"""Tests for RecordDiff dataclass."""
|
||||
|
||||
def test_create_record_diff(self):
|
||||
"""Test creating a RecordDiff."""
|
||||
diff = RecordDiff(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.CREATE,
|
||||
)
|
||||
|
||||
assert diff.record_name == "nba_2025_hou_okc_1021"
|
||||
assert diff.record_type == RecordType.GAME
|
||||
assert diff.action == DiffAction.CREATE
|
||||
|
||||
|
||||
class TestDiffResult:
|
||||
"""Tests for DiffResult dataclass."""
|
||||
|
||||
def test_empty_result(self):
|
||||
"""Test empty DiffResult."""
|
||||
result = DiffResult()
|
||||
|
||||
assert result.create_count == 0
|
||||
assert result.update_count == 0
|
||||
assert result.delete_count == 0
|
||||
assert result.unchanged_count == 0
|
||||
assert result.total_changes == 0
|
||||
|
||||
def test_counts(self):
|
||||
"""Test counting different change types."""
|
||||
result = DiffResult()
|
||||
|
||||
result.creates.append(RecordDiff(
|
||||
record_name="game_1",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.CREATE,
|
||||
))
|
||||
result.creates.append(RecordDiff(
|
||||
record_name="game_2",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.CREATE,
|
||||
))
|
||||
result.updates.append(RecordDiff(
|
||||
record_name="game_3",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.UPDATE,
|
||||
))
|
||||
result.deletes.append(RecordDiff(
|
||||
record_name="game_4",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.DELETE,
|
||||
))
|
||||
result.unchanged.append(RecordDiff(
|
||||
record_name="game_5",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.UNCHANGED,
|
||||
))
|
||||
|
||||
assert result.create_count == 2
|
||||
assert result.update_count == 1
|
||||
assert result.delete_count == 1
|
||||
assert result.unchanged_count == 1
|
||||
assert result.total_changes == 4 # excludes unchanged
|
||||
|
||||
|
||||
class TestRecordDiffer:
|
||||
"""Tests for RecordDiffer."""
|
||||
|
||||
@pytest.fixture
|
||||
def differ(self):
|
||||
"""Create a RecordDiffer instance."""
|
||||
return RecordDiffer()
|
||||
|
||||
@pytest.fixture
|
||||
def sample_game(self):
|
||||
"""Create a sample Game."""
|
||||
return Game(
|
||||
id="nba_2025_hou_okc_1021",
|
||||
sport="nba",
|
||||
season=2025,
|
||||
home_team_id="team_nba_okc",
|
||||
away_team_id="team_nba_hou",
|
||||
stadium_id="stadium_nba_paycom_center",
|
||||
game_date=datetime(2025, 10, 21, 19, 0, 0),
|
||||
status="scheduled",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_team(self):
|
||||
"""Create a sample Team."""
|
||||
return Team(
|
||||
id="team_nba_okc",
|
||||
sport="nba",
|
||||
city="Oklahoma City",
|
||||
name="Thunder",
|
||||
full_name="Oklahoma City Thunder",
|
||||
abbreviation="OKC",
|
||||
conference="Western",
|
||||
division="Northwest",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_stadium(self):
|
||||
"""Create a sample Stadium."""
|
||||
return Stadium(
|
||||
id="stadium_nba_paycom_center",
|
||||
sport="nba",
|
||||
name="Paycom Center",
|
||||
city="Oklahoma City",
|
||||
state="OK",
|
||||
country="USA",
|
||||
latitude=35.4634,
|
||||
longitude=-97.5151,
|
||||
capacity=18203,
|
||||
)
|
||||
|
||||
def test_diff_games_create(self, differ, sample_game):
|
||||
"""Test detecting new games to create."""
|
||||
local_games = [sample_game]
|
||||
remote_records = []
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.create_count == 1
|
||||
assert result.update_count == 0
|
||||
assert result.delete_count == 0
|
||||
assert result.creates[0].record_name == sample_game.id
|
||||
|
||||
def test_diff_games_delete(self, differ, sample_game):
|
||||
"""Test detecting games to delete."""
|
||||
local_games = []
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": sample_game.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
},
|
||||
"recordChangeTag": "abc123",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.create_count == 0
|
||||
assert result.delete_count == 1
|
||||
assert result.deletes[0].record_name == sample_game.id
|
||||
|
||||
def test_diff_games_unchanged(self, differ, sample_game):
|
||||
"""Test detecting unchanged games."""
|
||||
local_games = [sample_game]
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": sample_game.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
|
||||
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
|
||||
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
|
||||
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
||||
"game_number": {"value": None, "type": "INT64"},
|
||||
"home_score": {"value": None, "type": "INT64"},
|
||||
"away_score": {"value": None, "type": "INT64"},
|
||||
"status": {"value": "scheduled", "type": "STRING"},
|
||||
},
|
||||
"recordChangeTag": "abc123",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.create_count == 0
|
||||
assert result.update_count == 0
|
||||
assert result.unchanged_count == 1
|
||||
|
||||
def test_diff_games_update(self, differ, sample_game):
|
||||
"""Test detecting games that need update."""
|
||||
local_games = [sample_game]
|
||||
# Remote has different status
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": sample_game.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
|
||||
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
|
||||
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
|
||||
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
||||
"game_number": {"value": None, "type": "INT64"},
|
||||
"home_score": {"value": None, "type": "INT64"},
|
||||
"away_score": {"value": None, "type": "INT64"},
|
||||
"status": {"value": "postponed", "type": "STRING"}, # Different!
|
||||
},
|
||||
"recordChangeTag": "abc123",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.update_count == 1
|
||||
assert "status" in result.updates[0].changed_fields
|
||||
assert result.updates[0].record_change_tag == "abc123"
|
||||
|
||||
def test_diff_teams_create(self, differ, sample_team):
|
||||
"""Test detecting new teams to create."""
|
||||
local_teams = [sample_team]
|
||||
remote_records = []
|
||||
|
||||
result = differ.diff_teams(local_teams, remote_records)
|
||||
|
||||
assert result.create_count == 1
|
||||
assert result.creates[0].record_name == sample_team.id
|
||||
|
||||
def test_diff_stadiums_create(self, differ, sample_stadium):
|
||||
"""Test detecting new stadiums to create."""
|
||||
local_stadiums = [sample_stadium]
|
||||
remote_records = []
|
||||
|
||||
result = differ.diff_stadiums(local_stadiums, remote_records)
|
||||
|
||||
assert result.create_count == 1
|
||||
assert result.creates[0].record_name == sample_stadium.id
|
||||
|
||||
def test_get_records_to_upload(self, differ, sample_game):
|
||||
"""Test getting CloudKitRecords for upload."""
|
||||
game2 = Game(
|
||||
id="nba_2025_lal_lac_1022",
|
||||
sport="nba",
|
||||
season=2025,
|
||||
home_team_id="team_nba_lac",
|
||||
away_team_id="team_nba_lal",
|
||||
stadium_id="stadium_nba_crypto_com",
|
||||
game_date=datetime(2025, 10, 22, 19, 0, 0),
|
||||
status="scheduled",
|
||||
)
|
||||
|
||||
local_games = [sample_game, game2]
|
||||
# Only game2 exists remotely with different status
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": game2.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
"home_team_id": {"value": "team_nba_lac", "type": "STRING"},
|
||||
"away_team_id": {"value": "team_nba_lal", "type": "STRING"},
|
||||
"stadium_id": {"value": "stadium_nba_crypto_com", "type": "STRING"},
|
||||
"game_date": {"value": int(game2.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
||||
"status": {"value": "postponed", "type": "STRING"}, # Different!
|
||||
},
|
||||
"recordChangeTag": "xyz789",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
records = result.get_records_to_upload()
|
||||
|
||||
assert len(records) == 2 # 1 create + 1 update
|
||||
record_names = [r.record_name for r in records]
|
||||
assert sample_game.id in record_names
|
||||
assert game2.id in record_names
|
||||
|
||||
|
||||
class TestConvenienceFunctions:
|
||||
"""Tests for module-level convenience functions."""
|
||||
|
||||
def test_game_to_cloudkit_record(self):
|
||||
"""Test converting Game to CloudKitRecord."""
|
||||
game = Game(
|
||||
id="nba_2025_hou_okc_1021",
|
||||
sport="nba",
|
||||
season=2025,
|
||||
home_team_id="team_nba_okc",
|
||||
away_team_id="team_nba_hou",
|
||||
stadium_id="stadium_nba_paycom_center",
|
||||
game_date=datetime(2025, 10, 21, 19, 0, 0),
|
||||
status="scheduled",
|
||||
)
|
||||
|
||||
record = game_to_cloudkit_record(game)
|
||||
|
||||
assert record.record_name == game.id
|
||||
assert record.record_type == RecordType.GAME
|
||||
assert record.fields["sport"] == "nba"
|
||||
assert record.fields["season"] == 2025
|
||||
|
||||
def test_team_to_cloudkit_record(self):
|
||||
"""Test converting Team to CloudKitRecord."""
|
||||
team = Team(
|
||||
id="team_nba_okc",
|
||||
sport="nba",
|
||||
city="Oklahoma City",
|
||||
name="Thunder",
|
||||
full_name="Oklahoma City Thunder",
|
||||
abbreviation="OKC",
|
||||
)
|
||||
|
||||
record = team_to_cloudkit_record(team)
|
||||
|
||||
assert record.record_name == team.id
|
||||
assert record.record_type == RecordType.TEAM
|
||||
assert record.fields["city"] == "Oklahoma City"
|
||||
assert record.fields["name"] == "Thunder"
|
||||
|
||||
def test_stadium_to_cloudkit_record(self):
|
||||
"""Test converting Stadium to CloudKitRecord."""
|
||||
stadium = Stadium(
|
||||
id="stadium_nba_paycom_center",
|
||||
sport="nba",
|
||||
name="Paycom Center",
|
||||
city="Oklahoma City",
|
||||
state="OK",
|
||||
country="USA",
|
||||
latitude=35.4634,
|
||||
longitude=-97.5151,
|
||||
)
|
||||
|
||||
record = stadium_to_cloudkit_record(stadium)
|
||||
|
||||
assert record.record_name == stadium.id
|
||||
assert record.record_type == RecordType.STADIUM
|
||||
assert record.fields["name"] == "Paycom Center"
|
||||
assert record.fields["latitude"] == 35.4634
|
||||
Reference in New Issue
Block a user