feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Unit tests for sportstime_parser."""

View File

@@ -0,0 +1,48 @@
"""Test fixtures for sportstime-parser tests."""
from pathlib import Path
FIXTURES_DIR = Path(__file__).parent
# NBA fixtures
NBA_FIXTURES_DIR = FIXTURES_DIR / "nba"
NBA_BR_OCTOBER_HTML = NBA_FIXTURES_DIR / "basketball_reference_october.html"
NBA_BR_EDGE_CASES_HTML = NBA_FIXTURES_DIR / "basketball_reference_edge_cases.html"
NBA_ESPN_SCOREBOARD_JSON = NBA_FIXTURES_DIR / "espn_scoreboard.json"
# MLB fixtures
MLB_FIXTURES_DIR = FIXTURES_DIR / "mlb"
MLB_ESPN_SCOREBOARD_JSON = MLB_FIXTURES_DIR / "espn_scoreboard.json"
# NFL fixtures
NFL_FIXTURES_DIR = FIXTURES_DIR / "nfl"
NFL_ESPN_SCOREBOARD_JSON = NFL_FIXTURES_DIR / "espn_scoreboard.json"
# NHL fixtures
NHL_FIXTURES_DIR = FIXTURES_DIR / "nhl"
NHL_ESPN_SCOREBOARD_JSON = NHL_FIXTURES_DIR / "espn_scoreboard.json"
# MLS fixtures
MLS_FIXTURES_DIR = FIXTURES_DIR / "mls"
MLS_ESPN_SCOREBOARD_JSON = MLS_FIXTURES_DIR / "espn_scoreboard.json"
# WNBA fixtures
WNBA_FIXTURES_DIR = FIXTURES_DIR / "wnba"
WNBA_ESPN_SCOREBOARD_JSON = WNBA_FIXTURES_DIR / "espn_scoreboard.json"
# NWSL fixtures
NWSL_FIXTURES_DIR = FIXTURES_DIR / "nwsl"
NWSL_ESPN_SCOREBOARD_JSON = NWSL_FIXTURES_DIR / "espn_scoreboard.json"
def load_fixture(path: Path) -> str:
"""Load a fixture file as text."""
with open(path, "r", encoding="utf-8") as f:
return f.read()
def load_json_fixture(path: Path) -> dict:
"""Load a JSON fixture file."""
import json
with open(path, "r", encoding="utf-8") as f:
return json.load(f)

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "10",
"uid": "s:1~l:10",
"name": "Major League Baseball",
"abbreviation": "MLB"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-04-15T00:00:00Z"
},
"events": [
{
"id": "401584801",
"uid": "s:1~l:10~e:401584801",
"date": "2026-04-15T23:05:00Z",
"name": "New York Yankees at Boston Red Sox",
"shortName": "NYY @ BOS",
"competitions": [
{
"id": "401584801",
"uid": "s:1~l:10~e:401584801~c:401584801",
"date": "2026-04-15T23:05:00Z",
"attendance": 37435,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3",
"fullName": "Fenway Park",
"address": {
"city": "Boston",
"state": "MA"
},
"capacity": 37755,
"indoor": false
},
"competitors": [
{
"id": "2",
"uid": "s:1~l:10~t:2",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "2",
"uid": "s:1~l:10~t:2",
"location": "Boston",
"name": "Red Sox",
"abbreviation": "BOS",
"displayName": "Boston Red Sox"
},
"score": "5",
"winner": true
},
{
"id": "10",
"uid": "s:1~l:10~t:10",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "10",
"uid": "s:1~l:10~t:10",
"location": "New York",
"name": "Yankees",
"abbreviation": "NYY",
"displayName": "New York Yankees"
},
"score": "3",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 9,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401584802",
"uid": "s:1~l:10~e:401584802",
"date": "2026-04-15T20:10:00Z",
"name": "Chicago Cubs at St. Louis Cardinals",
"shortName": "CHC @ STL",
"competitions": [
{
"id": "401584802",
"uid": "s:1~l:10~e:401584802~c:401584802",
"date": "2026-04-15T20:10:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "87",
"fullName": "Busch Stadium",
"address": {
"city": "St. Louis",
"state": "MO"
},
"capacity": 45538,
"indoor": false
},
"competitors": [
{
"id": "24",
"uid": "s:1~l:10~t:24",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "24",
"uid": "s:1~l:10~t:24",
"location": "St. Louis",
"name": "Cardinals",
"abbreviation": "STL",
"displayName": "St. Louis Cardinals"
},
"score": "7",
"winner": true
},
{
"id": "16",
"uid": "s:1~l:10~t:16",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "16",
"uid": "s:1~l:10~t:16",
"location": "Chicago",
"name": "Cubs",
"abbreviation": "CHC",
"displayName": "Chicago Cubs"
},
"score": "4",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 9,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401584803",
"uid": "s:1~l:10~e:401584803",
"date": "2026-04-16T00:10:00Z",
"name": "Los Angeles Dodgers at San Francisco Giants",
"shortName": "LAD @ SF",
"competitions": [
{
"id": "401584803",
"uid": "s:1~l:10~e:401584803~c:401584803",
"date": "2026-04-16T00:10:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "116",
"fullName": "Oracle Park",
"address": {
"city": "San Francisco",
"state": "CA"
},
"capacity": 41915,
"indoor": false
},
"competitors": [
{
"id": "26",
"uid": "s:1~l:10~t:26",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "26",
"uid": "s:1~l:10~t:26",
"location": "San Francisco",
"name": "Giants",
"abbreviation": "SF",
"displayName": "San Francisco Giants"
},
"score": null,
"winner": null
},
{
"id": "19",
"uid": "s:1~l:10~t:19",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "19",
"uid": "s:1~l:10~t:19",
"location": "Los Angeles",
"name": "Dodgers",
"abbreviation": "LAD",
"displayName": "Los Angeles Dodgers"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "19",
"uid": "s:600~l:19",
"name": "Major League Soccer",
"abbreviation": "MLS"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-03-15T00:00:00Z"
},
"events": [
{
"id": "401672001",
"uid": "s:600~l:19~e:401672001",
"date": "2026-03-15T22:00:00Z",
"name": "LA Galaxy at LAFC",
"shortName": "LA @ LAFC",
"competitions": [
{
"id": "401672001",
"uid": "s:600~l:19~e:401672001~c:401672001",
"date": "2026-03-15T22:00:00Z",
"attendance": 22000,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8909",
"fullName": "BMO Stadium",
"address": {
"city": "Los Angeles",
"state": "CA"
},
"capacity": 22000,
"indoor": false
},
"competitors": [
{
"id": "21295",
"uid": "s:600~l:19~t:21295",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "21295",
"uid": "s:600~l:19~t:21295",
"location": "Los Angeles",
"name": "FC",
"abbreviation": "LAFC",
"displayName": "Los Angeles FC"
},
"score": "3",
"winner": true
},
{
"id": "3610",
"uid": "s:600~l:19~t:3610",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "3610",
"uid": "s:600~l:19~t:3610",
"location": "Los Angeles",
"name": "Galaxy",
"abbreviation": "LA",
"displayName": "LA Galaxy"
},
"score": "2",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672002",
"uid": "s:600~l:19~e:401672002",
"date": "2026-03-15T23:00:00Z",
"name": "Seattle Sounders at Portland Timbers",
"shortName": "SEA @ POR",
"competitions": [
{
"id": "401672002",
"uid": "s:600~l:19~e:401672002~c:401672002",
"date": "2026-03-15T23:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8070",
"fullName": "Providence Park",
"address": {
"city": "Portland",
"state": "OR"
},
"capacity": 25218,
"indoor": false
},
"competitors": [
{
"id": "5282",
"uid": "s:600~l:19~t:5282",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "5282",
"uid": "s:600~l:19~t:5282",
"location": "Portland",
"name": "Timbers",
"abbreviation": "POR",
"displayName": "Portland Timbers"
},
"score": "2",
"winner": false
},
{
"id": "4687",
"uid": "s:600~l:19~t:4687",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "4687",
"uid": "s:600~l:19~t:4687",
"location": "Seattle",
"name": "Sounders FC",
"abbreviation": "SEA",
"displayName": "Seattle Sounders FC"
},
"score": "2",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672003",
"uid": "s:600~l:19~e:401672003",
"date": "2026-03-16T00:00:00Z",
"name": "New York Red Bulls at Atlanta United",
"shortName": "NY @ ATL",
"competitions": [
{
"id": "401672003",
"uid": "s:600~l:19~e:401672003~c:401672003",
"date": "2026-03-16T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8904",
"fullName": "Mercedes-Benz Stadium",
"address": {
"city": "Atlanta",
"state": "GA"
},
"capacity": 42500,
"indoor": true
},
"competitors": [
{
"id": "18626",
"uid": "s:600~l:19~t:18626",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "18626",
"uid": "s:600~l:19~t:18626",
"location": "Atlanta",
"name": "United FC",
"abbreviation": "ATL",
"displayName": "Atlanta United FC"
},
"score": null,
"winner": null
},
{
"id": "399",
"uid": "s:600~l:19~t:399",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "399",
"uid": "s:600~l:19~t:399",
"location": "New York",
"name": "Red Bulls",
"abbreviation": "NY",
"displayName": "New York Red Bulls"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0'",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}

View File

@@ -0,0 +1,79 @@
<!DOCTYPE html>
<html>
<head>
<title>2025-26 NBA Schedule - Edge Cases | Basketball-Reference.com</title>
</head>
<body>
<table id="schedule" class="stats_table">
<thead>
<tr>
<th data-stat="date_game">Date</th>
<th data-stat="game_start_time">Start (ET)</th>
<th data-stat="visitor_team_name">Visitor/Neutral</th>
<th data-stat="visitor_pts">PTS</th>
<th data-stat="home_team_name">Home/Neutral</th>
<th data-stat="home_pts">PTS</th>
<th data-stat="arena_name">Arena</th>
<th data-stat="game_remarks">Notes</th>
</tr>
</thead>
<tbody>
<!-- Postponed game -->
<tr>
<th data-stat="date_game">Sat, Jan 11, 2026</th>
<td data-stat="game_start_time">7:30p</td>
<td data-stat="visitor_team_name">Los Angeles Lakers</td>
<td data-stat="visitor_pts"></td>
<td data-stat="home_team_name">Phoenix Suns</td>
<td data-stat="home_pts"></td>
<td data-stat="arena_name">Footprint Center</td>
<td data-stat="game_remarks">Postponed - Weather</td>
</tr>
<!-- Neutral site game (Mexico City) -->
<tr>
<th data-stat="date_game">Sat, Nov 8, 2025</th>
<td data-stat="game_start_time">7:00p</td>
<td data-stat="visitor_team_name">Miami Heat</td>
<td data-stat="visitor_pts">105</td>
<td data-stat="home_team_name">Washington Wizards</td>
<td data-stat="home_pts">99</td>
<td data-stat="arena_name">Arena CDMX</td>
<td data-stat="game_remarks">NBA Mexico City Games</td>
</tr>
<!-- Cancelled game -->
<tr>
<th data-stat="date_game">Wed, Dec 3, 2025</th>
<td data-stat="game_start_time">8:00p</td>
<td data-stat="visitor_team_name">Portland Trail Blazers</td>
<td data-stat="visitor_pts"></td>
<td data-stat="home_team_name">Sacramento Kings</td>
<td data-stat="home_pts"></td>
<td data-stat="arena_name">Golden 1 Center</td>
<td data-stat="game_remarks">Cancelled</td>
</tr>
<!-- Regular completed game with high scores -->
<tr>
<th data-stat="date_game">Sun, Mar 15, 2026</th>
<td data-stat="game_start_time">3:30p</td>
<td data-stat="visitor_team_name">Indiana Pacers</td>
<td data-stat="visitor_pts">147</td>
<td data-stat="home_team_name">Atlanta Hawks</td>
<td data-stat="home_pts">150</td>
<td data-stat="arena_name">State Farm Arena</td>
<td data-stat="game_remarks">OT</td>
</tr>
<!-- Game at arena with special characters -->
<tr>
<th data-stat="date_game">Mon, Feb 2, 2026</th>
<td data-stat="game_start_time">10:30p</td>
<td data-stat="visitor_team_name">Golden State Warriors</td>
<td data-stat="visitor_pts">118</td>
<td data-stat="home_team_name">Los Angeles Clippers</td>
<td data-stat="home_pts">115</td>
<td data-stat="arena_name">Intuit Dome</td>
<td data-stat="game_remarks"></td>
</tr>
</tbody>
</table>
</body>
</html>

View File

@@ -0,0 +1,94 @@
<!DOCTYPE html>
<html>
<head>
<title>2025-26 NBA Schedule - October | Basketball-Reference.com</title>
</head>
<body>
<table id="schedule" class="stats_table">
<thead>
<tr>
<th data-stat="date_game">Date</th>
<th data-stat="game_start_time">Start (ET)</th>
<th data-stat="visitor_team_name">Visitor/Neutral</th>
<th data-stat="visitor_pts">PTS</th>
<th data-stat="home_team_name">Home/Neutral</th>
<th data-stat="home_pts">PTS</th>
<th data-stat="arena_name">Arena</th>
<th data-stat="game_remarks">Notes</th>
</tr>
</thead>
<tbody>
<tr>
<th data-stat="date_game">Tue, Oct 22, 2025</th>
<td data-stat="game_start_time">7:30p</td>
<td data-stat="visitor_team_name">Boston Celtics</td>
<td data-stat="visitor_pts">112</td>
<td data-stat="home_team_name">Cleveland Cavaliers</td>
<td data-stat="home_pts">108</td>
<td data-stat="arena_name">Rocket Mortgage FieldHouse</td>
<td data-stat="game_remarks"></td>
</tr>
<tr>
<th data-stat="date_game">Tue, Oct 22, 2025</th>
<td data-stat="game_start_time">10:00p</td>
<td data-stat="visitor_team_name">Denver Nuggets</td>
<td data-stat="visitor_pts">119</td>
<td data-stat="home_team_name">Los Angeles Lakers</td>
<td data-stat="home_pts">127</td>
<td data-stat="arena_name">Crypto.com Arena</td>
<td data-stat="game_remarks"></td>
</tr>
<tr>
<th data-stat="date_game">Wed, Oct 23, 2025</th>
<td data-stat="game_start_time">7:00p</td>
<td data-stat="visitor_team_name">Houston Rockets</td>
<td data-stat="visitor_pts"></td>
<td data-stat="home_team_name">Oklahoma City Thunder</td>
<td data-stat="home_pts"></td>
<td data-stat="arena_name">Paycom Center</td>
<td data-stat="game_remarks"></td>
</tr>
<tr>
<th data-stat="date_game">Wed, Oct 23, 2025</th>
<td data-stat="game_start_time">7:30p</td>
<td data-stat="visitor_team_name">New York Knicks</td>
<td data-stat="visitor_pts"></td>
<td data-stat="home_team_name">Brooklyn Nets</td>
<td data-stat="home_pts"></td>
<td data-stat="arena_name">Barclays Center</td>
<td data-stat="game_remarks"></td>
</tr>
<tr>
<th data-stat="date_game">Thu, Oct 24, 2025</th>
<td data-stat="game_start_time">7:00p</td>
<td data-stat="visitor_team_name">Chicago Bulls</td>
<td data-stat="visitor_pts"></td>
<td data-stat="home_team_name">Miami Heat</td>
<td data-stat="home_pts"></td>
<td data-stat="arena_name">Kaseya Center</td>
<td data-stat="game_remarks"></td>
</tr>
<tr>
<th data-stat="date_game">Fri, Oct 25, 2025</th>
<td data-stat="game_start_time">7:30p</td>
<td data-stat="visitor_team_name">Toronto Raptors</td>
<td data-stat="visitor_pts"></td>
<td data-stat="home_team_name">Boston Celtics</td>
<td data-stat="home_pts"></td>
<td data-stat="arena_name">TD Garden</td>
<td data-stat="game_remarks"></td>
</tr>
<tr>
<th data-stat="date_game">Sat, Oct 26, 2025</th>
<td data-stat="game_start_time">8:00p</td>
<td data-stat="visitor_team_name">Minnesota Timberwolves</td>
<td data-stat="visitor_pts"></td>
<td data-stat="home_team_name">Dallas Mavericks</td>
<td data-stat="home_pts"></td>
<td data-stat="arena_name">American Airlines Center</td>
<td data-stat="game_remarks"></td>
</tr>
</tbody>
</table>
</body>
</html>

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "46",
"uid": "s:40~l:46",
"name": "National Basketball Association",
"abbreviation": "NBA"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2025-10-22T00:00:00Z"
},
"events": [
{
"id": "401584721",
"uid": "s:40~l:46~e:401584721",
"date": "2025-10-22T23:30:00Z",
"name": "Boston Celtics at Cleveland Cavaliers",
"shortName": "BOS @ CLE",
"competitions": [
{
"id": "401584721",
"uid": "s:40~l:46~e:401584721~c:401584721",
"date": "2025-10-22T23:30:00Z",
"attendance": 20562,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "5064",
"fullName": "Rocket Mortgage FieldHouse",
"address": {
"city": "Cleveland",
"state": "OH"
},
"capacity": 19432,
"indoor": true
},
"competitors": [
{
"id": "5",
"uid": "s:40~l:46~t:5",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "5",
"uid": "s:40~l:46~t:5",
"location": "Cleveland",
"name": "Cavaliers",
"abbreviation": "CLE",
"displayName": "Cleveland Cavaliers"
},
"score": "108",
"winner": false
},
{
"id": "2",
"uid": "s:40~l:46~t:2",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "2",
"uid": "s:40~l:46~t:2",
"location": "Boston",
"name": "Celtics",
"abbreviation": "BOS",
"displayName": "Boston Celtics"
},
"score": "112",
"winner": true
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401584722",
"uid": "s:40~l:46~e:401584722",
"date": "2025-10-23T02:00:00Z",
"name": "Denver Nuggets at Los Angeles Lakers",
"shortName": "DEN @ LAL",
"competitions": [
{
"id": "401584722",
"uid": "s:40~l:46~e:401584722~c:401584722",
"date": "2025-10-23T02:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "5091",
"fullName": "Crypto.com Arena",
"address": {
"city": "Los Angeles",
"state": "CA"
},
"capacity": 19068,
"indoor": true
},
"competitors": [
{
"id": "13",
"uid": "s:40~l:46~t:13",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "13",
"uid": "s:40~l:46~t:13",
"location": "Los Angeles",
"name": "Lakers",
"abbreviation": "LAL",
"displayName": "Los Angeles Lakers"
},
"score": "127",
"winner": true
},
{
"id": "7",
"uid": "s:40~l:46~t:7",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "7",
"uid": "s:40~l:46~t:7",
"location": "Denver",
"name": "Nuggets",
"abbreviation": "DEN",
"displayName": "Denver Nuggets"
},
"score": "119",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401584723",
"uid": "s:40~l:46~e:401584723",
"date": "2025-10-24T00:00:00Z",
"name": "Houston Rockets at Oklahoma City Thunder",
"shortName": "HOU @ OKC",
"competitions": [
{
"id": "401584723",
"uid": "s:40~l:46~e:401584723~c:401584723",
"date": "2025-10-24T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "4922",
"fullName": "Paycom Center",
"address": {
"city": "Oklahoma City",
"state": "OK"
},
"capacity": 18203,
"indoor": true
},
"competitors": [
{
"id": "25",
"uid": "s:40~l:46~t:25",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "25",
"uid": "s:40~l:46~t:25",
"location": "Oklahoma City",
"name": "Thunder",
"abbreviation": "OKC",
"displayName": "Oklahoma City Thunder"
},
"score": null,
"winner": null
},
{
"id": "10",
"uid": "s:40~l:46~t:10",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "10",
"uid": "s:40~l:46~t:10",
"location": "Houston",
"name": "Rockets",
"abbreviation": "HOU",
"displayName": "Houston Rockets"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "28",
"uid": "s:20~l:28",
"name": "National Football League",
"abbreviation": "NFL"
}
],
"season": {
"type": 2,
"year": 2025
},
"week": {
"number": 1
},
"events": [
{
"id": "401671801",
"uid": "s:20~l:28~e:401671801",
"date": "2025-09-07T20:00:00Z",
"name": "Kansas City Chiefs at Baltimore Ravens",
"shortName": "KC @ BAL",
"competitions": [
{
"id": "401671801",
"uid": "s:20~l:28~e:401671801~c:401671801",
"date": "2025-09-07T20:00:00Z",
"attendance": 71547,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3814",
"fullName": "M&T Bank Stadium",
"address": {
"city": "Baltimore",
"state": "MD"
},
"capacity": 71008,
"indoor": false
},
"competitors": [
{
"id": "33",
"uid": "s:20~l:28~t:33",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "33",
"uid": "s:20~l:28~t:33",
"location": "Baltimore",
"name": "Ravens",
"abbreviation": "BAL",
"displayName": "Baltimore Ravens"
},
"score": "20",
"winner": false
},
{
"id": "12",
"uid": "s:20~l:28~t:12",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "12",
"uid": "s:20~l:28~t:12",
"location": "Kansas City",
"name": "Chiefs",
"abbreviation": "KC",
"displayName": "Kansas City Chiefs"
},
"score": "27",
"winner": true
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671802",
"uid": "s:20~l:28~e:401671802",
"date": "2025-09-08T17:00:00Z",
"name": "Philadelphia Eagles at Green Bay Packers",
"shortName": "PHI @ GB",
"competitions": [
{
"id": "401671802",
"uid": "s:20~l:28~e:401671802~c:401671802",
"date": "2025-09-08T17:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3798",
"fullName": "Lambeau Field",
"address": {
"city": "Green Bay",
"state": "WI"
},
"capacity": 81441,
"indoor": false
},
"competitors": [
{
"id": "9",
"uid": "s:20~l:28~t:9",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "9",
"uid": "s:20~l:28~t:9",
"location": "Green Bay",
"name": "Packers",
"abbreviation": "GB",
"displayName": "Green Bay Packers"
},
"score": "34",
"winner": true
},
{
"id": "21",
"uid": "s:20~l:28~t:21",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "21",
"uid": "s:20~l:28~t:21",
"location": "Philadelphia",
"name": "Eagles",
"abbreviation": "PHI",
"displayName": "Philadelphia Eagles"
},
"score": "29",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671803",
"uid": "s:20~l:28~e:401671803",
"date": "2025-09-08T20:25:00Z",
"name": "Dallas Cowboys at Cleveland Browns",
"shortName": "DAL @ CLE",
"competitions": [
{
"id": "401671803",
"uid": "s:20~l:28~e:401671803~c:401671803",
"date": "2025-09-08T20:25:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3653",
"fullName": "Cleveland Browns Stadium",
"address": {
"city": "Cleveland",
"state": "OH"
},
"capacity": 67431,
"indoor": false
},
"competitors": [
{
"id": "5",
"uid": "s:20~l:28~t:5",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "5",
"uid": "s:20~l:28~t:5",
"location": "Cleveland",
"name": "Browns",
"abbreviation": "CLE",
"displayName": "Cleveland Browns"
},
"score": null,
"winner": null
},
{
"id": "6",
"uid": "s:20~l:28~t:6",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "6",
"uid": "s:20~l:28~t:6",
"location": "Dallas",
"name": "Cowboys",
"abbreviation": "DAL",
"displayName": "Dallas Cowboys"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "90",
"uid": "s:70~l:90",
"name": "National Hockey League",
"abbreviation": "NHL"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2025-10-08T00:00:00Z"
},
"events": [
{
"id": "401671901",
"uid": "s:70~l:90~e:401671901",
"date": "2025-10-08T23:00:00Z",
"name": "Pittsburgh Penguins at Boston Bruins",
"shortName": "PIT @ BOS",
"competitions": [
{
"id": "401671901",
"uid": "s:70~l:90~e:401671901~c:401671901",
"date": "2025-10-08T23:00:00Z",
"attendance": 17850,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "1823",
"fullName": "TD Garden",
"address": {
"city": "Boston",
"state": "MA"
},
"capacity": 17850,
"indoor": true
},
"competitors": [
{
"id": "1",
"uid": "s:70~l:90~t:1",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "1",
"uid": "s:70~l:90~t:1",
"location": "Boston",
"name": "Bruins",
"abbreviation": "BOS",
"displayName": "Boston Bruins"
},
"score": "4",
"winner": true
},
{
"id": "5",
"uid": "s:70~l:90~t:5",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "5",
"uid": "s:70~l:90~t:5",
"location": "Pittsburgh",
"name": "Penguins",
"abbreviation": "PIT",
"displayName": "Pittsburgh Penguins"
},
"score": "2",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 3,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671902",
"uid": "s:70~l:90~e:401671902",
"date": "2025-10-09T00:00:00Z",
"name": "Toronto Maple Leafs at Montreal Canadiens",
"shortName": "TOR @ MTL",
"competitions": [
{
"id": "401671902",
"uid": "s:70~l:90~e:401671902~c:401671902",
"date": "2025-10-09T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "1918",
"fullName": "Bell Centre",
"address": {
"city": "Montreal",
"state": "QC"
},
"capacity": 21302,
"indoor": true
},
"competitors": [
{
"id": "8",
"uid": "s:70~l:90~t:8",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "8",
"uid": "s:70~l:90~t:8",
"location": "Montreal",
"name": "Canadiens",
"abbreviation": "MTL",
"displayName": "Montreal Canadiens"
},
"score": "3",
"winner": false
},
{
"id": "10",
"uid": "s:70~l:90~t:10",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "10",
"uid": "s:70~l:90~t:10",
"location": "Toronto",
"name": "Maple Leafs",
"abbreviation": "TOR",
"displayName": "Toronto Maple Leafs"
},
"score": "5",
"winner": true
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 3,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671903",
"uid": "s:70~l:90~e:401671903",
"date": "2025-10-09T02:00:00Z",
"name": "Vegas Golden Knights at Los Angeles Kings",
"shortName": "VGK @ LAK",
"competitions": [
{
"id": "401671903",
"uid": "s:70~l:90~e:401671903~c:401671903",
"date": "2025-10-09T02:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "1816",
"fullName": "Crypto.com Arena",
"address": {
"city": "Los Angeles",
"state": "CA"
},
"capacity": 18230,
"indoor": true
},
"competitors": [
{
"id": "26",
"uid": "s:70~l:90~t:26",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "26",
"uid": "s:70~l:90~t:26",
"location": "Los Angeles",
"name": "Kings",
"abbreviation": "LAK",
"displayName": "Los Angeles Kings"
},
"score": null,
"winner": null
},
{
"id": "54",
"uid": "s:70~l:90~t:54",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "54",
"uid": "s:70~l:90~t:54",
"location": "Vegas",
"name": "Golden Knights",
"abbreviation": "VGK",
"displayName": "Vegas Golden Knights"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "761",
"uid": "s:600~l:761",
"name": "National Women's Soccer League",
"abbreviation": "NWSL"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-04-10T00:00:00Z"
},
"events": [
{
"id": "401672201",
"uid": "s:600~l:761~e:401672201",
"date": "2026-04-10T23:00:00Z",
"name": "Angel City FC at Portland Thorns",
"shortName": "LA @ POR",
"competitions": [
{
"id": "401672201",
"uid": "s:600~l:761~e:401672201~c:401672201",
"date": "2026-04-10T23:00:00Z",
"attendance": 22000,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8070",
"fullName": "Providence Park",
"address": {
"city": "Portland",
"state": "OR"
},
"capacity": 25218,
"indoor": false
},
"competitors": [
{
"id": "15625",
"uid": "s:600~l:761~t:15625",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "15625",
"uid": "s:600~l:761~t:15625",
"location": "Portland",
"name": "Thorns FC",
"abbreviation": "POR",
"displayName": "Portland Thorns FC"
},
"score": "2",
"winner": true
},
{
"id": "19934",
"uid": "s:600~l:761~t:19934",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "19934",
"uid": "s:600~l:761~t:19934",
"location": "Los Angeles",
"name": "Angel City",
"abbreviation": "LA",
"displayName": "Angel City FC"
},
"score": "1",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672202",
"uid": "s:600~l:761~e:401672202",
"date": "2026-04-11T00:00:00Z",
"name": "Orlando Pride at North Carolina Courage",
"shortName": "ORL @ NC",
"competitions": [
{
"id": "401672202",
"uid": "s:600~l:761~e:401672202~c:401672202",
"date": "2026-04-11T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8073",
"fullName": "WakeMed Soccer Park",
"address": {
"city": "Cary",
"state": "NC"
},
"capacity": 10000,
"indoor": false
},
"competitors": [
{
"id": "15618",
"uid": "s:600~l:761~t:15618",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "15618",
"uid": "s:600~l:761~t:15618",
"location": "North Carolina",
"name": "Courage",
"abbreviation": "NC",
"displayName": "North Carolina Courage"
},
"score": "3",
"winner": true
},
{
"id": "15626",
"uid": "s:600~l:761~t:15626",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "15626",
"uid": "s:600~l:761~t:15626",
"location": "Orlando",
"name": "Pride",
"abbreviation": "ORL",
"displayName": "Orlando Pride"
},
"score": "1",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672203",
"uid": "s:600~l:761~e:401672203",
"date": "2026-04-11T02:00:00Z",
"name": "San Diego Wave at Bay FC",
"shortName": "SD @ BAY",
"competitions": [
{
"id": "401672203",
"uid": "s:600~l:761~e:401672203~c:401672203",
"date": "2026-04-11T02:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3945",
"fullName": "PayPal Park",
"address": {
"city": "San Jose",
"state": "CA"
},
"capacity": 18000,
"indoor": false
},
"competitors": [
{
"id": "25645",
"uid": "s:600~l:761~t:25645",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "25645",
"uid": "s:600~l:761~t:25645",
"location": "Bay Area",
"name": "FC",
"abbreviation": "BAY",
"displayName": "Bay FC"
},
"score": null,
"winner": null
},
{
"id": "22638",
"uid": "s:600~l:761~t:22638",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "22638",
"uid": "s:600~l:761~t:22638",
"location": "San Diego",
"name": "Wave FC",
"abbreviation": "SD",
"displayName": "San Diego Wave FC"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0'",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "59",
"uid": "s:40~l:59",
"name": "Women's National Basketball Association",
"abbreviation": "WNBA"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-05-20T00:00:00Z"
},
"events": [
{
"id": "401672101",
"uid": "s:40~l:59~e:401672101",
"date": "2026-05-20T23:00:00Z",
"name": "Las Vegas Aces at New York Liberty",
"shortName": "LV @ NY",
"competitions": [
{
"id": "401672101",
"uid": "s:40~l:59~e:401672101~c:401672101",
"date": "2026-05-20T23:00:00Z",
"attendance": 17732,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "4346",
"fullName": "Barclays Center",
"address": {
"city": "Brooklyn",
"state": "NY"
},
"capacity": 17732,
"indoor": true
},
"competitors": [
{
"id": "9",
"uid": "s:40~l:59~t:9",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "9",
"uid": "s:40~l:59~t:9",
"location": "New York",
"name": "Liberty",
"abbreviation": "NY",
"displayName": "New York Liberty"
},
"score": "92",
"winner": true
},
{
"id": "20",
"uid": "s:40~l:59~t:20",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "20",
"uid": "s:40~l:59~t:20",
"location": "Las Vegas",
"name": "Aces",
"abbreviation": "LV",
"displayName": "Las Vegas Aces"
},
"score": "88",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672102",
"uid": "s:40~l:59~e:401672102",
"date": "2026-05-21T00:00:00Z",
"name": "Connecticut Sun at Chicago Sky",
"shortName": "CONN @ CHI",
"competitions": [
{
"id": "401672102",
"uid": "s:40~l:59~e:401672102~c:401672102",
"date": "2026-05-21T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8086",
"fullName": "Wintrust Arena",
"address": {
"city": "Chicago",
"state": "IL"
},
"capacity": 10387,
"indoor": true
},
"competitors": [
{
"id": "6",
"uid": "s:40~l:59~t:6",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "6",
"uid": "s:40~l:59~t:6",
"location": "Chicago",
"name": "Sky",
"abbreviation": "CHI",
"displayName": "Chicago Sky"
},
"score": "78",
"winner": false
},
{
"id": "5",
"uid": "s:40~l:59~t:5",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "5",
"uid": "s:40~l:59~t:5",
"location": "Connecticut",
"name": "Sun",
"abbreviation": "CONN",
"displayName": "Connecticut Sun"
},
"score": "85",
"winner": true
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672103",
"uid": "s:40~l:59~e:401672103",
"date": "2026-05-21T02:00:00Z",
"name": "Phoenix Mercury at Seattle Storm",
"shortName": "PHX @ SEA",
"competitions": [
{
"id": "401672103",
"uid": "s:40~l:59~e:401672103~c:401672103",
"date": "2026-05-21T02:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3097",
"fullName": "Climate Pledge Arena",
"address": {
"city": "Seattle",
"state": "WA"
},
"capacity": 18100,
"indoor": true
},
"competitors": [
{
"id": "11",
"uid": "s:40~l:59~t:11",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "11",
"uid": "s:40~l:59~t:11",
"location": "Seattle",
"name": "Storm",
"abbreviation": "SEA",
"displayName": "Seattle Storm"
},
"score": null,
"winner": null
},
{
"id": "8",
"uid": "s:40~l:59~t:8",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "8",
"uid": "s:40~l:59~t:8",
"location": "Phoenix",
"name": "Mercury",
"abbreviation": "PHX",
"displayName": "Phoenix Mercury"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}

View File

@@ -0,0 +1,269 @@
"""Tests for alias loaders."""
import pytest
import json
import tempfile
from datetime import date
from pathlib import Path
from sportstime_parser.normalizers.alias_loader import (
TeamAliasLoader,
StadiumAliasLoader,
)
from sportstime_parser.models.aliases import AliasType
class TestTeamAliasLoader:
"""Tests for TeamAliasLoader class."""
@pytest.fixture
def sample_aliases_file(self):
"""Create a temporary aliases file for testing."""
data = [
{
"id": "1",
"team_canonical_id": "nba_okc",
"alias_type": "name",
"alias_value": "Seattle SuperSonics",
"valid_from": "1967-01-01",
"valid_until": "2008-07-02",
},
{
"id": "2",
"team_canonical_id": "nba_okc",
"alias_type": "name",
"alias_value": "Oklahoma City Thunder",
"valid_from": "2008-07-03",
"valid_until": None,
},
{
"id": "3",
"team_canonical_id": "nba_okc",
"alias_type": "abbreviation",
"alias_value": "OKC",
"valid_from": "2008-07-03",
"valid_until": None,
},
]
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
json.dump(data, f)
return Path(f.name)
def test_load_aliases(self, sample_aliases_file):
"""Test loading aliases from file."""
loader = TeamAliasLoader(sample_aliases_file)
loader.load()
assert len(loader._aliases) == 3
def test_resolve_current_alias(self, sample_aliases_file):
"""Test resolving a current alias."""
loader = TeamAliasLoader(sample_aliases_file)
# Current date should resolve to Thunder
result = loader.resolve("Oklahoma City Thunder")
assert result == "nba_okc"
# Abbreviation should also work
result = loader.resolve("OKC")
assert result == "nba_okc"
def test_resolve_historical_alias(self, sample_aliases_file):
"""Test resolving a historical alias with date."""
loader = TeamAliasLoader(sample_aliases_file)
# Historical date should resolve SuperSonics
result = loader.resolve("Seattle SuperSonics", check_date=date(2007, 1, 1))
assert result == "nba_okc"
# After relocation, SuperSonics shouldn't resolve
result = loader.resolve("Seattle SuperSonics", check_date=date(2010, 1, 1))
assert result is None
def test_resolve_case_insensitive(self, sample_aliases_file):
"""Test case insensitive resolution."""
loader = TeamAliasLoader(sample_aliases_file)
result = loader.resolve("oklahoma city thunder")
assert result == "nba_okc"
result = loader.resolve("okc")
assert result == "nba_okc"
def test_resolve_with_type_filter(self, sample_aliases_file):
"""Test filtering by alias type."""
loader = TeamAliasLoader(sample_aliases_file)
# Should find when searching all types
result = loader.resolve("OKC")
assert result == "nba_okc"
# Should not find when filtering to name only
result = loader.resolve("OKC", alias_types=[AliasType.NAME])
assert result is None
def test_get_aliases_for_team(self, sample_aliases_file):
"""Test getting all aliases for a team."""
loader = TeamAliasLoader(sample_aliases_file)
aliases = loader.get_aliases_for_team("nba_okc")
assert len(aliases) == 3
# Filter by current date
aliases = loader.get_aliases_for_team(
"nba_okc", check_date=date(2020, 1, 1)
)
assert len(aliases) == 2 # Thunder name + OKC abbreviation
def test_missing_file(self):
"""Test handling of missing file."""
loader = TeamAliasLoader(Path("/nonexistent/file.json"))
loader.load() # Should not raise
assert len(loader._aliases) == 0
class TestStadiumAliasLoader:
"""Tests for StadiumAliasLoader class."""
@pytest.fixture
def sample_stadium_aliases(self):
"""Create a temporary stadium aliases file."""
data = [
{
"alias_name": "Crypto.com Arena",
"stadium_canonical_id": "crypto_arena_los_angeles_ca",
"valid_from": "2021-12-25",
"valid_until": None,
},
{
"alias_name": "Staples Center",
"stadium_canonical_id": "crypto_arena_los_angeles_ca",
"valid_from": "1999-10-17",
"valid_until": "2021-12-24",
},
]
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
json.dump(data, f)
return Path(f.name)
def test_load_stadium_aliases(self, sample_stadium_aliases):
"""Test loading stadium aliases."""
loader = StadiumAliasLoader(sample_stadium_aliases)
loader.load()
assert len(loader._aliases) == 2
def test_resolve_current_name(self, sample_stadium_aliases):
"""Test resolving current stadium name."""
loader = StadiumAliasLoader(sample_stadium_aliases)
result = loader.resolve("Crypto.com Arena")
assert result == "crypto_arena_los_angeles_ca"
def test_resolve_historical_name(self, sample_stadium_aliases):
"""Test resolving historical stadium name."""
loader = StadiumAliasLoader(sample_stadium_aliases)
# Staples Center in 2020
result = loader.resolve("Staples Center", check_date=date(2020, 1, 1))
assert result == "crypto_arena_los_angeles_ca"
# Staples Center after rename shouldn't resolve
result = loader.resolve("Staples Center", check_date=date(2023, 1, 1))
assert result is None
def test_date_boundary(self, sample_stadium_aliases):
"""Test exact date boundaries."""
loader = StadiumAliasLoader(sample_stadium_aliases)
# Last day of Staples Center
result = loader.resolve("Staples Center", check_date=date(2021, 12, 24))
assert result == "crypto_arena_los_angeles_ca"
# First day of Crypto.com Arena
result = loader.resolve("Crypto.com Arena", check_date=date(2021, 12, 25))
assert result == "crypto_arena_los_angeles_ca"
def test_get_all_names(self, sample_stadium_aliases):
"""Test getting all stadium names."""
loader = StadiumAliasLoader(sample_stadium_aliases)
names = loader.get_all_names()
assert len(names) == 2
assert "Crypto.com Arena" in names
assert "Staples Center" in names
class TestDateRangeHandling:
"""Tests for date range edge cases in aliases."""
@pytest.fixture
def date_range_aliases(self):
"""Create aliases with various date range scenarios."""
data = [
{
"id": "1",
"team_canonical_id": "test_team",
"alias_type": "name",
"alias_value": "Always Valid",
"valid_from": None,
"valid_until": None,
},
{
"id": "2",
"team_canonical_id": "test_team",
"alias_type": "name",
"alias_value": "Future Only",
"valid_from": "2030-01-01",
"valid_until": None,
},
{
"id": "3",
"team_canonical_id": "test_team",
"alias_type": "name",
"alias_value": "Past Only",
"valid_from": None,
"valid_until": "2000-01-01",
},
]
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
json.dump(data, f)
return Path(f.name)
def test_always_valid_alias(self, date_range_aliases):
"""Test alias with no date restrictions."""
loader = TeamAliasLoader(date_range_aliases)
result = loader.resolve("Always Valid", check_date=date(2025, 1, 1))
assert result == "test_team"
result = loader.resolve("Always Valid", check_date=date(1990, 1, 1))
assert result == "test_team"
def test_future_only_alias(self, date_range_aliases):
"""Test alias that starts in the future."""
loader = TeamAliasLoader(date_range_aliases)
# Before valid_from
result = loader.resolve("Future Only", check_date=date(2025, 1, 1))
assert result is None
# After valid_from
result = loader.resolve("Future Only", check_date=date(2035, 1, 1))
assert result == "test_team"
def test_past_only_alias(self, date_range_aliases):
"""Test alias that expired in the past."""
loader = TeamAliasLoader(date_range_aliases)
# Before valid_until
result = loader.resolve("Past Only", check_date=date(1990, 1, 1))
assert result == "test_team"
# After valid_until
result = loader.resolve("Past Only", check_date=date(2025, 1, 1))
assert result is None

View File

@@ -0,0 +1,187 @@
"""Tests for canonical ID generation."""
import pytest
from datetime import datetime, date
from sportstime_parser.normalizers.canonical_id import (
generate_game_id,
generate_team_id,
generate_team_id_from_abbrev,
generate_stadium_id,
parse_game_id,
normalize_string,
)
class TestNormalizeString:
"""Tests for normalize_string function."""
def test_basic_normalization(self):
"""Test basic string normalization."""
assert normalize_string("New York") == "new_york"
assert normalize_string("Los Angeles") == "los_angeles"
def test_removes_special_characters(self):
"""Test that special characters are removed."""
assert normalize_string("AT&T Stadium") == "att_stadium"
assert normalize_string("St. Louis") == "st_louis"
assert normalize_string("O'Brien Field") == "obrien_field"
def test_collapses_whitespace(self):
"""Test that multiple spaces are collapsed."""
assert normalize_string("New York") == "new_york"
assert normalize_string(" Los Angeles ") == "los_angeles"
def test_empty_string(self):
"""Test empty string handling."""
assert normalize_string("") == ""
assert normalize_string(" ") == ""
def test_unicode_normalization(self):
"""Test unicode characters are handled."""
assert normalize_string("Café") == "cafe"
assert normalize_string("José") == "jose"
class TestGenerateGameId:
"""Tests for generate_game_id function."""
def test_basic_game_id(self):
"""Test basic game ID generation."""
game_id = generate_game_id(
sport="nba",
season=2025,
away_abbrev="bos",
home_abbrev="lal",
game_date=date(2025, 12, 25),
)
assert game_id == "game_nba_2025_20251225_bos_lal"
def test_game_id_with_datetime(self):
"""Test game ID generation with datetime object."""
game_id = generate_game_id(
sport="mlb",
season=2026,
away_abbrev="nyy",
home_abbrev="bos",
game_date=datetime(2026, 4, 1, 19, 0),
)
assert game_id == "game_mlb_2026_20260401_nyy_bos"
def test_game_id_with_game_number(self):
"""Test game ID for doubleheader."""
game_id_1 = generate_game_id(
sport="mlb",
season=2026,
away_abbrev="nyy",
home_abbrev="bos",
game_date=date(2026, 7, 4),
game_number=1,
)
game_id_2 = generate_game_id(
sport="mlb",
season=2026,
away_abbrev="nyy",
home_abbrev="bos",
game_date=date(2026, 7, 4),
game_number=2,
)
assert game_id_1 == "game_mlb_2026_20260704_nyy_bos_1"
assert game_id_2 == "game_mlb_2026_20260704_nyy_bos_2"
def test_sport_lowercased(self):
"""Test that sport is lowercased."""
game_id = generate_game_id(
sport="NBA",
season=2025,
away_abbrev="BOS",
home_abbrev="LAL",
game_date=date(2025, 12, 25),
)
assert game_id == "game_nba_2025_20251225_bos_lal"
class TestParseGameId:
"""Tests for parse_game_id function."""
def test_parse_basic_game_id(self):
"""Test parsing a basic game ID."""
parsed = parse_game_id("game_nba_2025_20251225_bos_lal")
assert parsed["sport"] == "nba"
assert parsed["season"] == 2025
assert parsed["away_abbrev"] == "bos"
assert parsed["home_abbrev"] == "lal"
assert parsed["year"] == 2025
assert parsed["month"] == 12
assert parsed["day"] == 25
assert parsed["game_number"] is None
def test_parse_game_id_with_game_number(self):
"""Test parsing game ID with game number."""
parsed = parse_game_id("game_mlb_2026_20260704_nyy_bos_2")
assert parsed["sport"] == "mlb"
assert parsed["season"] == 2026
assert parsed["away_abbrev"] == "nyy"
assert parsed["home_abbrev"] == "bos"
assert parsed["year"] == 2026
assert parsed["month"] == 7
assert parsed["day"] == 4
assert parsed["game_number"] == 2
def test_parse_invalid_game_id(self):
"""Test parsing invalid game ID raises error."""
with pytest.raises(ValueError):
parse_game_id("invalid")
with pytest.raises(ValueError):
parse_game_id("nba_2025_bos") # Missing game_ prefix
with pytest.raises(ValueError):
parse_game_id("")
with pytest.raises(ValueError):
parse_game_id("game_nba_2025_bos_lal") # Missing date
class TestGenerateTeamId:
"""Tests for generate_team_id function."""
def test_basic_team_id(self):
"""Test basic team ID generation from city and name."""
team_id = generate_team_id(sport="nba", city="Los Angeles", name="Lakers")
assert team_id == "team_nba_los_angeles_lakers"
def test_team_id_normalizes_input(self):
"""Test that inputs are normalized."""
team_id = generate_team_id(sport="NBA", city="New York", name="Yankees")
assert team_id == "team_nba_new_york_yankees"
class TestGenerateTeamIdFromAbbrev:
"""Tests for generate_team_id_from_abbrev function."""
def test_basic_team_id_from_abbrev(self):
"""Test team ID from abbreviation."""
team_id = generate_team_id_from_abbrev(sport="nba", abbreviation="LAL")
assert team_id == "team_nba_lal"
def test_lowercases_abbreviation(self):
"""Test abbreviation is lowercased."""
team_id = generate_team_id_from_abbrev(sport="MLB", abbreviation="NYY")
assert team_id == "team_mlb_nyy"
class TestGenerateStadiumId:
"""Tests for generate_stadium_id function."""
def test_basic_stadium_id(self):
"""Test basic stadium ID generation."""
stadium_id = generate_stadium_id(sport="mlb", name="Fenway Park")
assert stadium_id == "stadium_mlb_fenway_park"
def test_stadium_id_special_characters(self):
"""Test stadium ID with special characters."""
stadium_id = generate_stadium_id(sport="nfl", name="AT&T Stadium")
assert stadium_id == "stadium_nfl_att_stadium"
def test_stadium_id_with_sponsor(self):
"""Test stadium ID with sponsor name."""
stadium_id = generate_stadium_id(sport="nba", name="Crypto.com Arena")
assert stadium_id == "stadium_nba_cryptocom_arena"

View File

@@ -0,0 +1,194 @@
"""Tests for fuzzy string matching utilities."""
import pytest
from sportstime_parser.normalizers.fuzzy import (
normalize_for_matching,
fuzzy_match_team,
fuzzy_match_stadium,
exact_match,
best_match,
calculate_similarity,
MatchCandidate,
)
class TestNormalizeForMatching:
"""Tests for normalize_for_matching function."""
def test_basic_normalization(self):
"""Test basic string normalization."""
assert normalize_for_matching("Los Angeles Lakers") == "los angeles lakers"
assert normalize_for_matching(" Boston Celtics ") == "boston celtics"
def test_removes_common_prefixes(self):
"""Test removal of common prefixes."""
assert normalize_for_matching("The Boston Celtics") == "boston celtics"
assert normalize_for_matching("Team Lakers") == "lakers"
def test_removes_stadium_suffixes(self):
"""Test removal of stadium-related suffixes."""
assert normalize_for_matching("Fenway Park") == "fenway"
assert normalize_for_matching("Madison Square Garden Arena") == "madison square garden"
assert normalize_for_matching("Wrigley Field") == "wrigley"
assert normalize_for_matching("TD Garden Center") == "td garden"
class TestExactMatch:
"""Tests for exact_match function."""
def test_exact_match_primary_name(self):
"""Test exact match on primary name."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LAL"]),
MatchCandidate("nba_bos", "Boston Celtics", ["Celtics", "BOS"]),
]
assert exact_match("Los Angeles Lakers", candidates) == "nba_lal"
assert exact_match("Boston Celtics", candidates) == "nba_bos"
def test_exact_match_alias(self):
"""Test exact match on alias."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LAL"]),
]
assert exact_match("Lakers", candidates) == "nba_lal"
assert exact_match("LAL", candidates) == "nba_lal"
def test_case_insensitive(self):
"""Test case insensitive matching."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
]
assert exact_match("los angeles lakers", candidates) == "nba_lal"
assert exact_match("LAKERS", candidates) == "nba_lal"
def test_no_match(self):
"""Test no match returns None."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
]
assert exact_match("New York Knicks", candidates) is None
class TestFuzzyMatchTeam:
"""Tests for fuzzy_match_team function."""
def test_close_match(self):
"""Test fuzzy matching finds close matches."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LA Lakers"]),
MatchCandidate("nba_lac", "Los Angeles Clippers", ["Clippers", "LA Clippers"]),
]
matches = fuzzy_match_team("LA Lakers", candidates, threshold=70)
assert len(matches) > 0
assert matches[0].canonical_id == "nba_lal"
def test_partial_name_match(self):
"""Test matching on partial team name."""
candidates = [
MatchCandidate("nba_bos", "Boston Celtics", ["Celtics", "BOS"]),
]
matches = fuzzy_match_team("Celtics", candidates, threshold=80)
assert len(matches) > 0
assert matches[0].canonical_id == "nba_bos"
def test_threshold_filtering(self):
"""Test that threshold filters low-confidence matches."""
candidates = [
MatchCandidate("nba_bos", "Boston Celtics", []),
]
# Very different string should not match at high threshold
matches = fuzzy_match_team("xyz123", candidates, threshold=90)
assert len(matches) == 0
def test_returns_top_n(self):
"""Test that top_n parameter limits results."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", []),
MatchCandidate("nba_lac", "Los Angeles Clippers", []),
MatchCandidate("mlb_lad", "Los Angeles Dodgers", []),
]
matches = fuzzy_match_team("Los Angeles", candidates, threshold=50, top_n=2)
assert len(matches) <= 2
class TestFuzzyMatchStadium:
"""Tests for fuzzy_match_stadium function."""
def test_stadium_match(self):
"""Test fuzzy matching stadium names."""
candidates = [
MatchCandidate("fenway", "Fenway Park", ["Fenway"]),
MatchCandidate("td_garden", "TD Garden", ["Boston Garden"]),
]
matches = fuzzy_match_stadium("Fenway Park Boston", candidates, threshold=70)
assert len(matches) > 0
assert matches[0].canonical_id == "fenway"
def test_naming_rights_change(self):
"""Test matching old stadium names."""
candidates = [
MatchCandidate(
"chase_center",
"Chase Center",
["Oracle Arena", "Oakland Coliseum Arena"],
),
]
# Should match on alias
matches = fuzzy_match_stadium("Oracle Arena", candidates, threshold=70)
assert len(matches) > 0
class TestBestMatch:
"""Tests for best_match function."""
def test_prefers_exact_match(self):
"""Test that exact match is preferred over fuzzy."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
MatchCandidate("nba_bos", "Boston Celtics", ["Celtics"]),
]
result = best_match("Lakers", candidates)
assert result is not None
assert result.canonical_id == "nba_lal"
assert result.confidence == 100 # Exact match
def test_falls_back_to_fuzzy(self):
"""Test fallback to fuzzy when no exact match."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
]
result = best_match("LA Laker", candidates, threshold=70)
assert result is not None
assert result.confidence < 100 # Fuzzy match
def test_no_match_below_threshold(self):
"""Test returns None when no match above threshold."""
candidates = [
MatchCandidate("nba_lal", "Los Angeles Lakers", []),
]
result = best_match("xyz123", candidates, threshold=90)
assert result is None
class TestCalculateSimilarity:
"""Tests for calculate_similarity function."""
def test_identical_strings(self):
"""Test identical strings have 100% similarity."""
assert calculate_similarity("Boston Celtics", "Boston Celtics") == 100
def test_similar_strings(self):
"""Test similar strings have high similarity."""
score = calculate_similarity("Boston Celtics", "Celtics Boston")
assert score >= 90
def test_different_strings(self):
"""Test different strings have low similarity."""
score = calculate_similarity("Boston Celtics", "Los Angeles Lakers")
assert score < 50
def test_empty_string(self):
"""Test empty string handling."""
score = calculate_similarity("", "Boston Celtics")
assert score == 0

View File

@@ -0,0 +1 @@
"""Tests for scrapers module."""

View File

@@ -0,0 +1,257 @@
"""Tests for MLB scraper."""
from datetime import datetime
from unittest.mock import patch
import pytest
from sportstime_parser.scrapers.mlb import MLBScraper, create_mlb_scraper
from sportstime_parser.scrapers.base import RawGameData
from sportstime_parser.tests.fixtures import (
load_json_fixture,
MLB_ESPN_SCOREBOARD_JSON,
)
class TestMLBScraperInit:
"""Test MLBScraper initialization."""
def test_creates_scraper_with_season(self):
"""Test scraper initializes with correct season."""
scraper = MLBScraper(season=2026)
assert scraper.sport == "mlb"
assert scraper.season == 2026
def test_factory_function_creates_scraper(self):
"""Test factory function creates correct scraper."""
scraper = create_mlb_scraper(season=2026)
assert isinstance(scraper, MLBScraper)
assert scraper.season == 2026
def test_expected_game_count(self):
"""Test expected game count is correct for MLB."""
scraper = MLBScraper(season=2026)
assert scraper.expected_game_count == 2430
def test_sources_in_priority_order(self):
"""Test sources are returned in correct priority order."""
scraper = MLBScraper(season=2026)
sources = scraper._get_sources()
assert sources == ["baseball_reference", "mlb_api", "espn"]
class TestESPNParsing:
"""Test ESPN API response parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games from ESPN."""
scraper = MLBScraper(season=2026)
data = load_json_fixture(MLB_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
completed = [g for g in games if g.status == "final"]
assert len(completed) == 2
# Yankees @ Red Sox
nyy_bos = next(g for g in completed if g.away_team_raw == "New York Yankees")
assert nyy_bos.home_team_raw == "Boston Red Sox"
assert nyy_bos.away_score == 3
assert nyy_bos.home_score == 5
assert nyy_bos.stadium_raw == "Fenway Park"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games from ESPN."""
scraper = MLBScraper(season=2026)
data = load_json_fixture(MLB_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
scheduled = [g for g in games if g.status == "scheduled"]
assert len(scheduled) == 1
lad_sf = scheduled[0]
assert lad_sf.away_team_raw == "Los Angeles Dodgers"
assert lad_sf.home_team_raw == "San Francisco Giants"
assert lad_sf.stadium_raw == "Oracle Park"
def test_parses_venue_info(self):
"""Test venue information is extracted."""
scraper = MLBScraper(season=2026)
data = load_json_fixture(MLB_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
for game in games:
assert game.stadium_raw is not None
class TestGameNormalization:
"""Test game normalization and canonical ID generation."""
def test_normalizes_games_with_canonical_ids(self):
"""Test games are normalized with correct canonical IDs."""
scraper = MLBScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 4, 15),
home_team_raw="Boston Red Sox",
away_team_raw="New York Yankees",
stadium_raw="Fenway Park",
home_score=5,
away_score=3,
status="final",
source_url="http://example.com",
)
]
games, review_items = scraper._normalize_games(raw_games)
assert len(games) == 1
game = games[0]
# Check canonical ID format
assert game.id == "mlb_2026_nyy_bos_0415"
assert game.sport == "mlb"
assert game.season == 2026
# Check team IDs
assert game.home_team_id == "team_mlb_bos"
assert game.away_team_id == "team_mlb_nyy"
# Check scores preserved
assert game.home_score == 5
assert game.away_score == 3
def test_creates_review_items_for_unresolved_teams(self):
"""Test review items are created for unresolved teams."""
scraper = MLBScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 4, 15),
home_team_raw="Unknown Team XYZ",
away_team_raw="Boston Red Sox",
stadium_raw="Fenway Park",
status="scheduled",
),
]
games, review_items = scraper._normalize_games(raw_games)
# Game should not be created due to unresolved team
assert len(games) == 0
# But there should be a review item
assert len(review_items) >= 1
class TestTeamAndStadiumScraping:
"""Test team and stadium data scraping."""
def test_scrapes_all_mlb_teams(self):
"""Test all 30 MLB teams are returned."""
scraper = MLBScraper(season=2026)
teams = scraper.scrape_teams()
# 30 MLB teams
assert len(teams) == 30
# Check team IDs are unique
team_ids = [t.id for t in teams]
assert len(set(team_ids)) == 30
# Check all teams have required fields
for team in teams:
assert team.id.startswith("team_mlb_")
assert team.sport == "mlb"
assert team.city
assert team.name
assert team.full_name
assert team.abbreviation
def test_teams_have_leagues_and_divisions(self):
"""Test teams have league (conference) and division info."""
scraper = MLBScraper(season=2026)
teams = scraper.scrape_teams()
# Count teams by league
al = [t for t in teams if t.conference == "American"]
nl = [t for t in teams if t.conference == "National"]
assert len(al) == 15
assert len(nl) == 15
def test_scrapes_all_mlb_stadiums(self):
"""Test all MLB stadiums are returned."""
scraper = MLBScraper(season=2026)
stadiums = scraper.scrape_stadiums()
# Should have stadiums for all teams
assert len(stadiums) == 30
# Check stadium IDs are unique
stadium_ids = [s.id for s in stadiums]
assert len(set(stadium_ids)) == 30
# Check all stadiums have required fields
for stadium in stadiums:
assert stadium.id.startswith("stadium_mlb_")
assert stadium.sport == "mlb"
assert stadium.name
assert stadium.city
assert stadium.state
assert stadium.country in ["USA", "Canada"]
assert stadium.latitude != 0
assert stadium.longitude != 0
class TestScrapeFallback:
"""Test multi-source fallback behavior."""
def test_falls_back_to_next_source_on_failure(self):
"""Test scraper tries next source when first fails."""
scraper = MLBScraper(season=2026)
with patch.object(scraper, '_scrape_baseball_reference') as mock_br, \
patch.object(scraper, '_scrape_mlb_api') as mock_mlb, \
patch.object(scraper, '_scrape_espn') as mock_espn:
# Make BR and MLB API fail
mock_br.side_effect = Exception("Connection failed")
mock_mlb.side_effect = Exception("API error")
# Make ESPN return data
mock_espn.return_value = [
RawGameData(
game_date=datetime(2026, 4, 15),
home_team_raw="Boston Red Sox",
away_team_raw="New York Yankees",
stadium_raw="Fenway Park",
status="scheduled",
)
]
result = scraper.scrape_games()
assert result.success
assert result.source == "espn"
assert mock_br.called
assert mock_mlb.called
assert mock_espn.called
class TestSeasonMonths:
"""Test season month calculation."""
def test_gets_correct_season_months(self):
"""Test correct months are returned for MLB season."""
scraper = MLBScraper(season=2026)
months = scraper._get_season_months()
# MLB season is March-November
assert len(months) == 9 # Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov
# Check first month is March of season year
assert months[0] == (2026, 3)
# Check last month is November
assert months[-1] == (2026, 11)

View File

@@ -0,0 +1,251 @@
"""Tests for MLS scraper."""
from datetime import datetime
from unittest.mock import patch
import pytest
from sportstime_parser.scrapers.mls import MLSScraper, create_mls_scraper
from sportstime_parser.scrapers.base import RawGameData
from sportstime_parser.tests.fixtures import (
load_json_fixture,
MLS_ESPN_SCOREBOARD_JSON,
)
class TestMLSScraperInit:
"""Test MLSScraper initialization."""
def test_creates_scraper_with_season(self):
"""Test scraper initializes with correct season."""
scraper = MLSScraper(season=2026)
assert scraper.sport == "mls"
assert scraper.season == 2026
def test_factory_function_creates_scraper(self):
"""Test factory function creates correct scraper."""
scraper = create_mls_scraper(season=2026)
assert isinstance(scraper, MLSScraper)
assert scraper.season == 2026
def test_expected_game_count(self):
"""Test expected game count is correct for MLS."""
scraper = MLSScraper(season=2026)
assert scraper.expected_game_count == 493
def test_sources_in_priority_order(self):
"""Test sources are returned in correct priority order."""
scraper = MLSScraper(season=2026)
sources = scraper._get_sources()
assert sources == ["espn", "fbref"]
class TestESPNParsing:
"""Test ESPN API response parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games from ESPN."""
scraper = MLSScraper(season=2026)
data = load_json_fixture(MLS_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
completed = [g for g in games if g.status == "final"]
assert len(completed) == 2
# Galaxy @ LAFC
la_lafc = next(g for g in completed if g.away_team_raw == "LA Galaxy")
assert la_lafc.home_team_raw == "Los Angeles FC"
assert la_lafc.away_score == 2
assert la_lafc.home_score == 3
assert la_lafc.stadium_raw == "BMO Stadium"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games from ESPN."""
scraper = MLSScraper(season=2026)
data = load_json_fixture(MLS_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
scheduled = [g for g in games if g.status == "scheduled"]
assert len(scheduled) == 1
ny_atl = scheduled[0]
assert ny_atl.away_team_raw == "New York Red Bulls"
assert ny_atl.home_team_raw == "Atlanta United FC"
assert ny_atl.stadium_raw == "Mercedes-Benz Stadium"
def test_parses_venue_info(self):
"""Test venue information is extracted."""
scraper = MLSScraper(season=2026)
data = load_json_fixture(MLS_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
for game in games:
assert game.stadium_raw is not None
class TestGameNormalization:
"""Test game normalization and canonical ID generation."""
def test_normalizes_games_with_canonical_ids(self):
"""Test games are normalized with correct canonical IDs."""
scraper = MLSScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 3, 15),
home_team_raw="Los Angeles FC",
away_team_raw="LA Galaxy",
stadium_raw="BMO Stadium",
home_score=3,
away_score=2,
status="final",
source_url="http://example.com",
)
]
games, review_items = scraper._normalize_games(raw_games)
assert len(games) == 1
game = games[0]
# Check canonical ID format
assert game.id == "mls_2026_lag_lafc_0315"
assert game.sport == "mls"
assert game.season == 2026
# Check team IDs
assert game.home_team_id == "team_mls_lafc"
assert game.away_team_id == "team_mls_lag"
# Check scores preserved
assert game.home_score == 3
assert game.away_score == 2
def test_creates_review_items_for_unresolved_teams(self):
"""Test review items are created for unresolved teams."""
scraper = MLSScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 3, 15),
home_team_raw="Unknown Team XYZ",
away_team_raw="LA Galaxy",
stadium_raw="BMO Stadium",
status="scheduled",
),
]
games, review_items = scraper._normalize_games(raw_games)
# Game should not be created due to unresolved team
assert len(games) == 0
# But there should be a review item
assert len(review_items) >= 1
class TestTeamAndStadiumScraping:
"""Test team and stadium data scraping."""
def test_scrapes_all_mls_teams(self):
"""Test all MLS teams are returned."""
scraper = MLSScraper(season=2026)
teams = scraper.scrape_teams()
# MLS has 29+ teams
assert len(teams) >= 29
# Check team IDs are unique
team_ids = [t.id for t in teams]
assert len(set(team_ids)) == len(teams)
# Check all teams have required fields
for team in teams:
assert team.id.startswith("team_mls_")
assert team.sport == "mls"
assert team.city
assert team.name
assert team.full_name
assert team.abbreviation
def test_teams_have_conferences(self):
"""Test teams have conference info."""
scraper = MLSScraper(season=2026)
teams = scraper.scrape_teams()
# Count teams by conference
eastern = [t for t in teams if t.conference == "Eastern"]
western = [t for t in teams if t.conference == "Western"]
# MLS has two conferences
assert len(eastern) >= 14
assert len(western) >= 14
def test_scrapes_all_mls_stadiums(self):
"""Test all MLS stadiums are returned."""
scraper = MLSScraper(season=2026)
stadiums = scraper.scrape_stadiums()
# Should have stadiums for all teams
assert len(stadiums) >= 29
# Check all stadiums have required fields
for stadium in stadiums:
assert stadium.id.startswith("stadium_mls_")
assert stadium.sport == "mls"
assert stadium.name
assert stadium.city
assert stadium.state
assert stadium.country in ["USA", "Canada"]
assert stadium.latitude != 0
assert stadium.longitude != 0
class TestScrapeFallback:
"""Test multi-source fallback behavior."""
def test_falls_back_to_next_source_on_failure(self):
"""Test scraper tries next source when first fails."""
scraper = MLSScraper(season=2026)
with patch.object(scraper, '_scrape_espn') as mock_espn, \
patch.object(scraper, '_scrape_fbref') as mock_fbref:
# Make ESPN fail
mock_espn.side_effect = Exception("Connection failed")
# Make FBref return data
mock_fbref.return_value = [
RawGameData(
game_date=datetime(2026, 3, 15),
home_team_raw="Los Angeles FC",
away_team_raw="LA Galaxy",
stadium_raw="BMO Stadium",
status="scheduled",
)
]
result = scraper.scrape_games()
assert result.success
assert result.source == "fbref"
assert mock_espn.called
assert mock_fbref.called
class TestSeasonMonths:
"""Test season month calculation."""
def test_gets_correct_season_months(self):
"""Test correct months are returned for MLS season."""
scraper = MLSScraper(season=2026)
months = scraper._get_season_months()
# MLS season is February-November
assert len(months) == 10 # Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov
# Check first month is February of season year
assert months[0] == (2026, 2)
# Check last month is November
assert months[-1] == (2026, 11)

View File

@@ -0,0 +1,428 @@
"""Tests for NBA scraper."""
import json
from datetime import datetime
from unittest.mock import MagicMock, patch
import pytest
from sportstime_parser.scrapers.nba import NBAScraper, create_nba_scraper
from sportstime_parser.scrapers.base import RawGameData
from sportstime_parser.tests.fixtures import (
load_fixture,
load_json_fixture,
NBA_BR_OCTOBER_HTML,
NBA_BR_EDGE_CASES_HTML,
NBA_ESPN_SCOREBOARD_JSON,
)
class TestNBAScraperInit:
"""Test NBAScraper initialization."""
def test_creates_scraper_with_season(self):
"""Test scraper initializes with correct season."""
scraper = NBAScraper(season=2025)
assert scraper.sport == "nba"
assert scraper.season == 2025
def test_factory_function_creates_scraper(self):
"""Test factory function creates correct scraper."""
scraper = create_nba_scraper(season=2025)
assert isinstance(scraper, NBAScraper)
assert scraper.season == 2025
def test_expected_game_count(self):
"""Test expected game count is correct for NBA."""
scraper = NBAScraper(season=2025)
assert scraper.expected_game_count == 1230
def test_sources_in_priority_order(self):
"""Test sources are returned in correct priority order."""
scraper = NBAScraper(season=2025)
sources = scraper._get_sources()
assert sources == ["basketball_reference", "espn", "cbs"]
class TestBasketballReferenceParsing:
"""Test Basketball-Reference HTML parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games with scores."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_OCTOBER_HTML)
games = scraper._parse_basketball_reference(html, "http://example.com")
# Should find all games in fixture
assert len(games) == 7
# Check first completed game
completed_games = [g for g in games if g.status == "final"]
assert len(completed_games) == 2
# Boston @ Cleveland
bos_cle = next(g for g in games if g.away_team_raw == "Boston Celtics")
assert bos_cle.home_team_raw == "Cleveland Cavaliers"
assert bos_cle.away_score == 112
assert bos_cle.home_score == 108
assert bos_cle.stadium_raw == "Rocket Mortgage FieldHouse"
assert bos_cle.status == "final"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games without scores."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_OCTOBER_HTML)
games = scraper._parse_basketball_reference(html, "http://example.com")
scheduled_games = [g for g in games if g.status == "scheduled"]
assert len(scheduled_games) == 5
# Houston @ OKC
hou_okc = next(g for g in scheduled_games if g.away_team_raw == "Houston Rockets")
assert hou_okc.home_team_raw == "Oklahoma City Thunder"
assert hou_okc.away_score is None
assert hou_okc.home_score is None
assert hou_okc.stadium_raw == "Paycom Center"
def test_parses_game_dates_correctly(self):
"""Test game dates are parsed correctly."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_OCTOBER_HTML)
games = scraper._parse_basketball_reference(html, "http://example.com")
# Check first game date
first_game = games[0]
assert first_game.game_date.year == 2025
assert first_game.game_date.month == 10
assert first_game.game_date.day == 22
def test_tracks_source_url(self):
"""Test source URL is tracked for all games."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_OCTOBER_HTML)
source_url = "http://basketball-reference.com/test"
games = scraper._parse_basketball_reference(html, source_url)
for game in games:
assert game.source_url == source_url
class TestBasketballReferenceEdgeCases:
"""Test edge case handling in Basketball-Reference parsing."""
def test_parses_postponed_games(self):
"""Test postponed games are identified correctly."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
games = scraper._parse_basketball_reference(html, "http://example.com")
postponed = [g for g in games if g.status == "postponed"]
assert len(postponed) == 1
assert postponed[0].away_team_raw == "Los Angeles Lakers"
assert postponed[0].home_team_raw == "Phoenix Suns"
def test_parses_cancelled_games(self):
"""Test cancelled games are identified correctly."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
games = scraper._parse_basketball_reference(html, "http://example.com")
cancelled = [g for g in games if g.status == "cancelled"]
assert len(cancelled) == 1
assert cancelled[0].away_team_raw == "Portland Trail Blazers"
def test_parses_neutral_site_games(self):
"""Test neutral site games are parsed."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
games = scraper._parse_basketball_reference(html, "http://example.com")
# Mexico City game
mexico = next(g for g in games if g.stadium_raw == "Arena CDMX")
assert mexico.away_team_raw == "Miami Heat"
assert mexico.home_team_raw == "Washington Wizards"
assert mexico.status == "final"
def test_parses_overtime_games(self):
"""Test overtime games with high scores."""
scraper = NBAScraper(season=2025)
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
games = scraper._parse_basketball_reference(html, "http://example.com")
# High scoring OT game
ot_game = next(g for g in games if g.away_score == 147)
assert ot_game.home_score == 150
assert ot_game.status == "final"
class TestESPNParsing:
"""Test ESPN API response parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games from ESPN."""
scraper = NBAScraper(season=2025)
data = load_json_fixture(NBA_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
completed = [g for g in games if g.status == "final"]
assert len(completed) == 2
# Boston @ Cleveland
bos_cle = next(g for g in completed if g.away_team_raw == "Boston Celtics")
assert bos_cle.home_team_raw == "Cleveland Cavaliers"
assert bos_cle.away_score == 112
assert bos_cle.home_score == 108
assert bos_cle.stadium_raw == "Rocket Mortgage FieldHouse"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games from ESPN."""
scraper = NBAScraper(season=2025)
data = load_json_fixture(NBA_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
scheduled = [g for g in games if g.status == "scheduled"]
assert len(scheduled) == 1
hou_okc = scheduled[0]
assert hou_okc.away_team_raw == "Houston Rockets"
assert hou_okc.home_team_raw == "Oklahoma City Thunder"
assert hou_okc.stadium_raw == "Paycom Center"
def test_parses_venue_info(self):
"""Test venue information is extracted."""
scraper = NBAScraper(season=2025)
data = load_json_fixture(NBA_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
# Check all games have venue info
for game in games:
assert game.stadium_raw is not None
class TestGameNormalization:
"""Test game normalization and canonical ID generation."""
def test_normalizes_games_with_canonical_ids(self):
"""Test games are normalized with correct canonical IDs."""
scraper = NBAScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 10, 22),
home_team_raw="Cleveland Cavaliers",
away_team_raw="Boston Celtics",
stadium_raw="Rocket Mortgage FieldHouse",
home_score=108,
away_score=112,
status="final",
source_url="http://example.com",
)
]
games, review_items = scraper._normalize_games(raw_games)
assert len(games) == 1
game = games[0]
# Check canonical ID format
assert game.id == "nba_2025_bos_cle_1022"
assert game.sport == "nba"
assert game.season == 2025
# Check team IDs
assert game.home_team_id == "team_nba_cle"
assert game.away_team_id == "team_nba_bos"
# Check scores preserved
assert game.home_score == 108
assert game.away_score == 112
def test_detects_doubleheaders(self):
"""Test doubleheaders get correct game numbers."""
scraper = NBAScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 4, 1, 13, 0),
home_team_raw="Boston Celtics",
away_team_raw="New York Knicks",
stadium_raw="TD Garden",
status="final",
home_score=105,
away_score=98,
),
RawGameData(
game_date=datetime(2025, 4, 1, 19, 0),
home_team_raw="Boston Celtics",
away_team_raw="New York Knicks",
stadium_raw="TD Garden",
status="final",
home_score=110,
away_score=102,
),
]
games, _ = scraper._normalize_games(raw_games)
assert len(games) == 2
game_numbers = sorted([g.game_number for g in games])
assert game_numbers == [1, 2]
# Check IDs include game number
game_ids = sorted([g.id for g in games])
assert game_ids == ["nba_2025_nyk_bos_0401_1", "nba_2025_nyk_bos_0401_2"]
def test_creates_review_items_for_unresolved_teams(self):
"""Test review items are created for unresolved teams."""
scraper = NBAScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 10, 22),
home_team_raw="Unknown Team XYZ",
away_team_raw="Boston Celtics",
stadium_raw="TD Garden",
status="scheduled",
),
]
games, review_items = scraper._normalize_games(raw_games)
# Game should not be created due to unresolved team
assert len(games) == 0
# But there should be a review item
assert len(review_items) >= 1
class TestTeamAndStadiumScraping:
"""Test team and stadium data scraping."""
def test_scrapes_all_nba_teams(self):
"""Test all 30 NBA teams are returned."""
scraper = NBAScraper(season=2025)
teams = scraper.scrape_teams()
# 30 NBA teams
assert len(teams) == 30
# Check team IDs are unique
team_ids = [t.id for t in teams]
assert len(set(team_ids)) == 30
# Check all teams have required fields
for team in teams:
assert team.id.startswith("team_nba_")
assert team.sport == "nba"
assert team.city
assert team.name
assert team.full_name
assert team.abbreviation
def test_teams_have_conferences_and_divisions(self):
"""Test teams have conference and division info."""
scraper = NBAScraper(season=2025)
teams = scraper.scrape_teams()
# Count teams by conference
eastern = [t for t in teams if t.conference == "Eastern"]
western = [t for t in teams if t.conference == "Western"]
assert len(eastern) == 15
assert len(western) == 15
def test_scrapes_all_nba_stadiums(self):
"""Test all NBA stadiums are returned."""
scraper = NBAScraper(season=2025)
stadiums = scraper.scrape_stadiums()
# Should have stadiums for all teams
assert len(stadiums) == 30
# Check stadium IDs are unique
stadium_ids = [s.id for s in stadiums]
assert len(set(stadium_ids)) == 30
# Check all stadiums have required fields
for stadium in stadiums:
assert stadium.id.startswith("stadium_nba_")
assert stadium.sport == "nba"
assert stadium.name
assert stadium.city
assert stadium.state
assert stadium.country in ["USA", "Canada"]
assert stadium.latitude != 0
assert stadium.longitude != 0
class TestScrapeFallback:
"""Test multi-source fallback behavior."""
def test_falls_back_to_next_source_on_failure(self):
"""Test scraper tries next source when first fails."""
scraper = NBAScraper(season=2025)
with patch.object(scraper, '_scrape_basketball_reference') as mock_br, \
patch.object(scraper, '_scrape_espn') as mock_espn:
# Make BR fail
mock_br.side_effect = Exception("Connection failed")
# Make ESPN return data
mock_espn.return_value = [
RawGameData(
game_date=datetime(2025, 10, 22),
home_team_raw="Cleveland Cavaliers",
away_team_raw="Boston Celtics",
stadium_raw="Rocket Mortgage FieldHouse",
status="scheduled",
)
]
result = scraper.scrape_games()
# Should have succeeded with ESPN
assert result.success
assert result.source == "espn"
assert mock_br.called
assert mock_espn.called
def test_returns_failure_when_all_sources_fail(self):
"""Test scraper returns failure when all sources fail."""
scraper = NBAScraper(season=2025)
with patch.object(scraper, '_scrape_basketball_reference') as mock_br, \
patch.object(scraper, '_scrape_espn') as mock_espn, \
patch.object(scraper, '_scrape_cbs') as mock_cbs:
mock_br.side_effect = Exception("BR failed")
mock_espn.side_effect = Exception("ESPN failed")
mock_cbs.side_effect = Exception("CBS failed")
result = scraper.scrape_games()
assert not result.success
assert "All sources failed" in result.error_message
assert "CBS failed" in result.error_message
class TestSeasonMonths:
"""Test season month calculation."""
def test_gets_correct_season_months(self):
"""Test correct months are returned for NBA season."""
scraper = NBAScraper(season=2025)
months = scraper._get_season_months()
# NBA season is Oct-Jun
assert len(months) == 9 # Oct, Nov, Dec, Jan, Feb, Mar, Apr, May, Jun
# Check first month is Oct of season year
assert months[0] == (2025, 10)
# Check last month is Jun of following year
assert months[-1] == (2026, 6)
# Check transition to new year
assert months[2] == (2025, 12) # December
assert months[3] == (2026, 1) # January

View File

@@ -0,0 +1,310 @@
"""Tests for NFL scraper."""
from datetime import datetime
from unittest.mock import patch
import pytest
from sportstime_parser.scrapers.nfl import NFLScraper, create_nfl_scraper
from sportstime_parser.scrapers.base import RawGameData
from sportstime_parser.tests.fixtures import (
load_json_fixture,
NFL_ESPN_SCOREBOARD_JSON,
)
class TestNFLScraperInit:
"""Test NFLScraper initialization."""
def test_creates_scraper_with_season(self):
"""Test scraper initializes with correct season."""
scraper = NFLScraper(season=2025)
assert scraper.sport == "nfl"
assert scraper.season == 2025
def test_factory_function_creates_scraper(self):
"""Test factory function creates correct scraper."""
scraper = create_nfl_scraper(season=2025)
assert isinstance(scraper, NFLScraper)
assert scraper.season == 2025
def test_expected_game_count(self):
"""Test expected game count is correct for NFL."""
scraper = NFLScraper(season=2025)
assert scraper.expected_game_count == 272
def test_sources_in_priority_order(self):
"""Test sources are returned in correct priority order."""
scraper = NFLScraper(season=2025)
sources = scraper._get_sources()
assert sources == ["espn", "pro_football_reference", "cbs"]
class TestESPNParsing:
"""Test ESPN API response parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games from ESPN."""
scraper = NFLScraper(season=2025)
data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
completed = [g for g in games if g.status == "final"]
assert len(completed) == 2
# Chiefs @ Ravens
kc_bal = next(g for g in completed if g.away_team_raw == "Kansas City Chiefs")
assert kc_bal.home_team_raw == "Baltimore Ravens"
assert kc_bal.away_score == 27
assert kc_bal.home_score == 20
assert kc_bal.stadium_raw == "M&T Bank Stadium"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games from ESPN."""
scraper = NFLScraper(season=2025)
data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
scheduled = [g for g in games if g.status == "scheduled"]
assert len(scheduled) == 1
dal_cle = scheduled[0]
assert dal_cle.away_team_raw == "Dallas Cowboys"
assert dal_cle.home_team_raw == "Cleveland Browns"
assert dal_cle.stadium_raw == "Cleveland Browns Stadium"
def test_parses_venue_info(self):
"""Test venue information is extracted."""
scraper = NFLScraper(season=2025)
data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
for game in games:
assert game.stadium_raw is not None
class TestGameNormalization:
"""Test game normalization and canonical ID generation."""
def test_normalizes_games_with_canonical_ids(self):
"""Test games are normalized with correct canonical IDs."""
scraper = NFLScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 9, 7),
home_team_raw="Baltimore Ravens",
away_team_raw="Kansas City Chiefs",
stadium_raw="M&T Bank Stadium",
home_score=20,
away_score=27,
status="final",
source_url="http://example.com",
)
]
games, review_items = scraper._normalize_games(raw_games)
assert len(games) == 1
game = games[0]
# Check canonical ID format
assert game.id == "nfl_2025_kc_bal_0907"
assert game.sport == "nfl"
assert game.season == 2025
# Check team IDs
assert game.home_team_id == "team_nfl_bal"
assert game.away_team_id == "team_nfl_kc"
# Check scores preserved
assert game.home_score == 20
assert game.away_score == 27
def test_creates_review_items_for_unresolved_teams(self):
"""Test review items are created for unresolved teams."""
scraper = NFLScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 9, 7),
home_team_raw="Unknown Team XYZ",
away_team_raw="Kansas City Chiefs",
stadium_raw="Arrowhead Stadium",
status="scheduled",
),
]
games, review_items = scraper._normalize_games(raw_games)
# Game should not be created due to unresolved team
assert len(games) == 0
# But there should be a review item
assert len(review_items) >= 1
class TestTeamAndStadiumScraping:
"""Test team and stadium data scraping."""
def test_scrapes_all_nfl_teams(self):
"""Test all 32 NFL teams are returned."""
scraper = NFLScraper(season=2025)
teams = scraper.scrape_teams()
# 32 NFL teams
assert len(teams) == 32
# Check team IDs are unique
team_ids = [t.id for t in teams]
assert len(set(team_ids)) == 32
# Check all teams have required fields
for team in teams:
assert team.id.startswith("team_nfl_")
assert team.sport == "nfl"
assert team.city
assert team.name
assert team.full_name
assert team.abbreviation
def test_teams_have_conferences_and_divisions(self):
"""Test teams have conference and division info."""
scraper = NFLScraper(season=2025)
teams = scraper.scrape_teams()
# Count teams by conference
afc = [t for t in teams if t.conference == "AFC"]
nfc = [t for t in teams if t.conference == "NFC"]
assert len(afc) == 16
assert len(nfc) == 16
def test_scrapes_all_nfl_stadiums(self):
"""Test all NFL stadiums are returned."""
scraper = NFLScraper(season=2025)
stadiums = scraper.scrape_stadiums()
# Should have stadiums for all teams (some share)
assert len(stadiums) >= 30
# Check all stadiums have required fields
for stadium in stadiums:
assert stadium.id.startswith("stadium_nfl_")
assert stadium.sport == "nfl"
assert stadium.name
assert stadium.city
assert stadium.state
assert stadium.country == "USA"
assert stadium.latitude != 0
assert stadium.longitude != 0
class TestScrapeFallback:
"""Test multi-source fallback behavior."""
def test_falls_back_to_next_source_on_failure(self):
"""Test scraper tries next source when first fails."""
scraper = NFLScraper(season=2025)
with patch.object(scraper, '_scrape_espn') as mock_espn, \
patch.object(scraper, '_scrape_pro_football_reference') as mock_pfr:
# Make ESPN fail
mock_espn.side_effect = Exception("Connection failed")
# Make PFR return data
mock_pfr.return_value = [
RawGameData(
game_date=datetime(2025, 9, 7),
home_team_raw="Baltimore Ravens",
away_team_raw="Kansas City Chiefs",
stadium_raw="M&T Bank Stadium",
status="scheduled",
)
]
result = scraper.scrape_games()
assert result.success
assert result.source == "pro_football_reference"
assert mock_espn.called
assert mock_pfr.called
class TestSeasonMonths:
"""Test season month calculation."""
def test_gets_correct_season_months(self):
"""Test correct months are returned for NFL season."""
scraper = NFLScraper(season=2025)
months = scraper._get_season_months()
# NFL season is September-February
assert len(months) == 6 # Sep, Oct, Nov, Dec, Jan, Feb
# Check first month is September of season year
assert months[0] == (2025, 9)
# Check last month is February of following year
assert months[-1] == (2026, 2)
# Check transition to new year
assert months[3] == (2025, 12) # December
assert months[4] == (2026, 1) # January
class TestInternationalFiltering:
"""Test international game filtering.
Note: Filtering happens in _parse_espn_response, not _normalize_games.
"""
def test_filters_london_games_during_parsing(self):
"""Test London games are filtered out during ESPN parsing."""
scraper = NFLScraper(season=2025)
# Create ESPN-like data with London game
espn_data = {
"events": [
{
"date": "2025-10-15T09:30:00Z",
"competitions": [
{
"neutralSite": True,
"venue": {
"fullName": "London Stadium",
"address": {"city": "London", "country": "UK"},
},
"competitors": [
{"homeAway": "home", "team": {"displayName": "Jacksonville Jaguars"}},
{"homeAway": "away", "team": {"displayName": "Buffalo Bills"}},
],
}
],
}
]
}
games = scraper._parse_espn_response(espn_data, "http://espn.com/api")
# London game should be filtered
assert len(games) == 0
def test_keeps_us_games(self):
"""Test US games are kept."""
scraper = NFLScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 9, 7),
home_team_raw="Baltimore Ravens",
away_team_raw="Kansas City Chiefs",
stadium_raw="M&T Bank Stadium",
status="scheduled",
),
]
games, _ = scraper._normalize_games(raw_games)
assert len(games) == 1

View File

@@ -0,0 +1,317 @@
"""Tests for NHL scraper."""
from datetime import datetime
from unittest.mock import patch
import pytest
from sportstime_parser.scrapers.nhl import NHLScraper, create_nhl_scraper
from sportstime_parser.scrapers.base import RawGameData
from sportstime_parser.tests.fixtures import (
load_json_fixture,
NHL_ESPN_SCOREBOARD_JSON,
)
class TestNHLScraperInit:
"""Test NHLScraper initialization."""
def test_creates_scraper_with_season(self):
"""Test scraper initializes with correct season."""
scraper = NHLScraper(season=2025)
assert scraper.sport == "nhl"
assert scraper.season == 2025
def test_factory_function_creates_scraper(self):
"""Test factory function creates correct scraper."""
scraper = create_nhl_scraper(season=2025)
assert isinstance(scraper, NHLScraper)
assert scraper.season == 2025
def test_expected_game_count(self):
"""Test expected game count is correct for NHL."""
scraper = NHLScraper(season=2025)
assert scraper.expected_game_count == 1312
def test_sources_in_priority_order(self):
"""Test sources are returned in correct priority order."""
scraper = NHLScraper(season=2025)
sources = scraper._get_sources()
assert sources == ["hockey_reference", "nhl_api", "espn"]
class TestESPNParsing:
"""Test ESPN API response parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games from ESPN."""
scraper = NHLScraper(season=2025)
data = load_json_fixture(NHL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
completed = [g for g in games if g.status == "final"]
assert len(completed) == 2
# Penguins @ Bruins
pit_bos = next(g for g in completed if g.away_team_raw == "Pittsburgh Penguins")
assert pit_bos.home_team_raw == "Boston Bruins"
assert pit_bos.away_score == 2
assert pit_bos.home_score == 4
assert pit_bos.stadium_raw == "TD Garden"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games from ESPN."""
scraper = NHLScraper(season=2025)
data = load_json_fixture(NHL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
scheduled = [g for g in games if g.status == "scheduled"]
assert len(scheduled) == 1
vgk_lak = scheduled[0]
assert vgk_lak.away_team_raw == "Vegas Golden Knights"
assert vgk_lak.home_team_raw == "Los Angeles Kings"
assert vgk_lak.stadium_raw == "Crypto.com Arena"
def test_parses_venue_info(self):
"""Test venue information is extracted."""
scraper = NHLScraper(season=2025)
data = load_json_fixture(NHL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
for game in games:
assert game.stadium_raw is not None
class TestGameNormalization:
"""Test game normalization and canonical ID generation."""
def test_normalizes_games_with_canonical_ids(self):
"""Test games are normalized with correct canonical IDs."""
scraper = NHLScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 10, 8),
home_team_raw="Boston Bruins",
away_team_raw="Pittsburgh Penguins",
stadium_raw="TD Garden",
home_score=4,
away_score=2,
status="final",
source_url="http://example.com",
)
]
games, review_items = scraper._normalize_games(raw_games)
assert len(games) == 1
game = games[0]
# Check canonical ID format
assert game.id == "nhl_2025_pit_bos_1008"
assert game.sport == "nhl"
assert game.season == 2025
# Check team IDs
assert game.home_team_id == "team_nhl_bos"
assert game.away_team_id == "team_nhl_pit"
# Check scores preserved
assert game.home_score == 4
assert game.away_score == 2
def test_creates_review_items_for_unresolved_teams(self):
"""Test review items are created for unresolved teams."""
scraper = NHLScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 10, 8),
home_team_raw="Unknown Team XYZ",
away_team_raw="Boston Bruins",
stadium_raw="TD Garden",
status="scheduled",
),
]
games, review_items = scraper._normalize_games(raw_games)
# Game should not be created due to unresolved team
assert len(games) == 0
# But there should be a review item
assert len(review_items) >= 1
class TestTeamAndStadiumScraping:
"""Test team and stadium data scraping."""
def test_scrapes_all_nhl_teams(self):
"""Test all 32 NHL teams are returned."""
scraper = NHLScraper(season=2025)
teams = scraper.scrape_teams()
# 32 NHL teams
assert len(teams) == 32
# Check team IDs are unique
team_ids = [t.id for t in teams]
assert len(set(team_ids)) == 32
# Check all teams have required fields
for team in teams:
assert team.id.startswith("team_nhl_")
assert team.sport == "nhl"
assert team.city
assert team.name
assert team.full_name
assert team.abbreviation
def test_teams_have_conferences_and_divisions(self):
"""Test teams have conference and division info."""
scraper = NHLScraper(season=2025)
teams = scraper.scrape_teams()
# Count teams by conference
eastern = [t for t in teams if t.conference == "Eastern"]
western = [t for t in teams if t.conference == "Western"]
assert len(eastern) == 16
assert len(western) == 16
def test_scrapes_all_nhl_stadiums(self):
"""Test all NHL stadiums are returned."""
scraper = NHLScraper(season=2025)
stadiums = scraper.scrape_stadiums()
# Should have stadiums for all teams
assert len(stadiums) == 32
# Check stadium IDs are unique
stadium_ids = [s.id for s in stadiums]
assert len(set(stadium_ids)) == 32
# Check all stadiums have required fields
for stadium in stadiums:
assert stadium.id.startswith("stadium_nhl_")
assert stadium.sport == "nhl"
assert stadium.name
assert stadium.city
assert stadium.state
assert stadium.country in ["USA", "Canada"]
assert stadium.latitude != 0
assert stadium.longitude != 0
class TestScrapeFallback:
"""Test multi-source fallback behavior."""
def test_falls_back_to_next_source_on_failure(self):
"""Test scraper tries next source when first fails."""
scraper = NHLScraper(season=2025)
with patch.object(scraper, '_scrape_hockey_reference') as mock_hr, \
patch.object(scraper, '_scrape_nhl_api') as mock_nhl, \
patch.object(scraper, '_scrape_espn') as mock_espn:
# Make HR and NHL API fail
mock_hr.side_effect = Exception("Connection failed")
mock_nhl.side_effect = Exception("API error")
# Make ESPN return data
mock_espn.return_value = [
RawGameData(
game_date=datetime(2025, 10, 8),
home_team_raw="Boston Bruins",
away_team_raw="Pittsburgh Penguins",
stadium_raw="TD Garden",
status="scheduled",
)
]
result = scraper.scrape_games()
assert result.success
assert result.source == "espn"
assert mock_hr.called
assert mock_nhl.called
assert mock_espn.called
class TestSeasonMonths:
"""Test season month calculation."""
def test_gets_correct_season_months(self):
"""Test correct months are returned for NHL season."""
scraper = NHLScraper(season=2025)
months = scraper._get_season_months()
# NHL season is October-June
assert len(months) == 9 # Oct, Nov, Dec, Jan, Feb, Mar, Apr, May, Jun
# Check first month is October of season year
assert months[0] == (2025, 10)
# Check last month is June of following year
assert months[-1] == (2026, 6)
# Check transition to new year
assert months[2] == (2025, 12) # December
assert months[3] == (2026, 1) # January
class TestInternationalFiltering:
"""Test international game filtering.
Note: Filtering happens in _parse_espn_response, not _normalize_games.
"""
def test_filters_european_games_during_parsing(self):
"""Test European games are filtered out during ESPN parsing."""
scraper = NHLScraper(season=2025)
# Create ESPN-like data with Prague game (Global Series)
espn_data = {
"events": [
{
"date": "2025-10-10T18:00:00Z",
"competitions": [
{
"neutralSite": True,
"venue": {
"fullName": "O2 Arena, Prague",
"address": {"city": "Prague", "country": "Czech Republic"},
},
"competitors": [
{"homeAway": "home", "team": {"displayName": "Florida Panthers"}},
{"homeAway": "away", "team": {"displayName": "Dallas Stars"}},
],
}
],
}
]
}
games = scraper._parse_espn_response(espn_data, "http://espn.com/api")
# Prague game should be filtered
assert len(games) == 0
def test_keeps_north_american_games(self):
"""Test North American games are kept."""
scraper = NHLScraper(season=2025)
raw_games = [
RawGameData(
game_date=datetime(2025, 10, 8),
home_team_raw="Boston Bruins",
away_team_raw="Pittsburgh Penguins",
stadium_raw="TD Garden",
status="scheduled",
),
]
games, _ = scraper._normalize_games(raw_games)
assert len(games) == 1

View File

@@ -0,0 +1,226 @@
"""Tests for NWSL scraper."""
from datetime import datetime
from unittest.mock import patch
import pytest
from sportstime_parser.scrapers.nwsl import NWSLScraper, create_nwsl_scraper
from sportstime_parser.scrapers.base import RawGameData
from sportstime_parser.tests.fixtures import (
load_json_fixture,
NWSL_ESPN_SCOREBOARD_JSON,
)
class TestNWSLScraperInit:
"""Test NWSLScraper initialization."""
def test_creates_scraper_with_season(self):
"""Test scraper initializes with correct season."""
scraper = NWSLScraper(season=2026)
assert scraper.sport == "nwsl"
assert scraper.season == 2026
def test_factory_function_creates_scraper(self):
"""Test factory function creates correct scraper."""
scraper = create_nwsl_scraper(season=2026)
assert isinstance(scraper, NWSLScraper)
assert scraper.season == 2026
def test_expected_game_count(self):
"""Test expected game count is correct for NWSL."""
scraper = NWSLScraper(season=2026)
assert scraper.expected_game_count == 182
def test_sources_in_priority_order(self):
"""Test sources are returned in correct priority order."""
scraper = NWSLScraper(season=2026)
sources = scraper._get_sources()
assert sources == ["espn"]
class TestESPNParsing:
"""Test ESPN API response parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games from ESPN."""
scraper = NWSLScraper(season=2026)
data = load_json_fixture(NWSL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
completed = [g for g in games if g.status == "final"]
assert len(completed) == 2
# Angel City @ Thorns
la_por = next(g for g in completed if g.away_team_raw == "Angel City FC")
assert la_por.home_team_raw == "Portland Thorns FC"
assert la_por.away_score == 1
assert la_por.home_score == 2
assert la_por.stadium_raw == "Providence Park"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games from ESPN."""
scraper = NWSLScraper(season=2026)
data = load_json_fixture(NWSL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
scheduled = [g for g in games if g.status == "scheduled"]
assert len(scheduled) == 1
sd_bay = scheduled[0]
assert sd_bay.away_team_raw == "San Diego Wave FC"
assert sd_bay.home_team_raw == "Bay FC"
assert sd_bay.stadium_raw == "PayPal Park"
def test_parses_venue_info(self):
"""Test venue information is extracted."""
scraper = NWSLScraper(season=2026)
data = load_json_fixture(NWSL_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
for game in games:
assert game.stadium_raw is not None
class TestGameNormalization:
"""Test game normalization and canonical ID generation."""
def test_normalizes_games_with_canonical_ids(self):
"""Test games are normalized with correct canonical IDs."""
scraper = NWSLScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 4, 10),
home_team_raw="Portland Thorns FC",
away_team_raw="Angel City FC",
stadium_raw="Providence Park",
home_score=2,
away_score=1,
status="final",
source_url="http://example.com",
)
]
games, review_items = scraper._normalize_games(raw_games)
assert len(games) == 1
game = games[0]
# Check canonical ID format
assert game.id == "nwsl_2026_anf_por_0410"
assert game.sport == "nwsl"
assert game.season == 2026
# Check team IDs
assert game.home_team_id == "team_nwsl_por"
assert game.away_team_id == "team_nwsl_anf"
# Check scores preserved
assert game.home_score == 2
assert game.away_score == 1
def test_creates_review_items_for_unresolved_teams(self):
"""Test review items are created for unresolved teams."""
scraper = NWSLScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 4, 10),
home_team_raw="Unknown Team XYZ",
away_team_raw="Portland Thorns FC",
stadium_raw="Providence Park",
status="scheduled",
),
]
games, review_items = scraper._normalize_games(raw_games)
# Game should not be created due to unresolved team
assert len(games) == 0
# But there should be a review item
assert len(review_items) >= 1
class TestTeamAndStadiumScraping:
"""Test team and stadium data scraping."""
def test_scrapes_all_nwsl_teams(self):
"""Test all NWSL teams are returned."""
scraper = NWSLScraper(season=2026)
teams = scraper.scrape_teams()
# NWSL has 14 teams
assert len(teams) == 14
# Check team IDs are unique
team_ids = [t.id for t in teams]
assert len(set(team_ids)) == 14
# Check all teams have required fields
for team in teams:
assert team.id.startswith("team_nwsl_")
assert team.sport == "nwsl"
assert team.city
assert team.name
assert team.full_name
assert team.abbreviation
def test_scrapes_all_nwsl_stadiums(self):
"""Test all NWSL stadiums are returned."""
scraper = NWSLScraper(season=2026)
stadiums = scraper.scrape_stadiums()
# Should have stadiums for all teams
assert len(stadiums) == 14
# Check stadium IDs are unique
stadium_ids = [s.id for s in stadiums]
assert len(set(stadium_ids)) == 14
# Check all stadiums have required fields
for stadium in stadiums:
assert stadium.id.startswith("stadium_nwsl_")
assert stadium.sport == "nwsl"
assert stadium.name
assert stadium.city
assert stadium.state
assert stadium.country == "USA"
assert stadium.latitude != 0
assert stadium.longitude != 0
class TestScrapeFallback:
"""Test fallback behavior (NWSL only has ESPN)."""
def test_returns_failure_when_espn_fails(self):
"""Test scraper returns failure when ESPN fails."""
scraper = NWSLScraper(season=2026)
with patch.object(scraper, '_scrape_espn') as mock_espn:
mock_espn.side_effect = Exception("ESPN failed")
result = scraper.scrape_games()
assert not result.success
assert "All sources failed" in result.error_message
class TestSeasonMonths:
"""Test season month calculation."""
def test_gets_correct_season_months(self):
"""Test correct months are returned for NWSL season."""
scraper = NWSLScraper(season=2026)
months = scraper._get_season_months()
# NWSL season is March-November
assert len(months) == 9 # Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov
# Check first month is March of season year
assert months[0] == (2026, 3)
# Check last month is November
assert months[-1] == (2026, 11)

View File

@@ -0,0 +1,226 @@
"""Tests for WNBA scraper."""
from datetime import datetime
from unittest.mock import patch
import pytest
from sportstime_parser.scrapers.wnba import WNBAScraper, create_wnba_scraper
from sportstime_parser.scrapers.base import RawGameData
from sportstime_parser.tests.fixtures import (
load_json_fixture,
WNBA_ESPN_SCOREBOARD_JSON,
)
class TestWNBAScraperInit:
"""Test WNBAScraper initialization."""
def test_creates_scraper_with_season(self):
"""Test scraper initializes with correct season."""
scraper = WNBAScraper(season=2026)
assert scraper.sport == "wnba"
assert scraper.season == 2026
def test_factory_function_creates_scraper(self):
"""Test factory function creates correct scraper."""
scraper = create_wnba_scraper(season=2026)
assert isinstance(scraper, WNBAScraper)
assert scraper.season == 2026
def test_expected_game_count(self):
"""Test expected game count is correct for WNBA."""
scraper = WNBAScraper(season=2026)
assert scraper.expected_game_count == 220
def test_sources_in_priority_order(self):
"""Test sources are returned in correct priority order."""
scraper = WNBAScraper(season=2026)
sources = scraper._get_sources()
assert sources == ["espn"]
class TestESPNParsing:
"""Test ESPN API response parsing."""
def test_parses_completed_games(self):
"""Test parsing completed games from ESPN."""
scraper = WNBAScraper(season=2026)
data = load_json_fixture(WNBA_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
completed = [g for g in games if g.status == "final"]
assert len(completed) == 2
# Aces @ Liberty
lv_ny = next(g for g in completed if g.away_team_raw == "Las Vegas Aces")
assert lv_ny.home_team_raw == "New York Liberty"
assert lv_ny.away_score == 88
assert lv_ny.home_score == 92
assert lv_ny.stadium_raw == "Barclays Center"
def test_parses_scheduled_games(self):
"""Test parsing scheduled games from ESPN."""
scraper = WNBAScraper(season=2026)
data = load_json_fixture(WNBA_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
scheduled = [g for g in games if g.status == "scheduled"]
assert len(scheduled) == 1
phx_sea = scheduled[0]
assert phx_sea.away_team_raw == "Phoenix Mercury"
assert phx_sea.home_team_raw == "Seattle Storm"
assert phx_sea.stadium_raw == "Climate Pledge Arena"
def test_parses_venue_info(self):
"""Test venue information is extracted."""
scraper = WNBAScraper(season=2026)
data = load_json_fixture(WNBA_ESPN_SCOREBOARD_JSON)
games = scraper._parse_espn_response(data, "http://espn.com/api")
for game in games:
assert game.stadium_raw is not None
class TestGameNormalization:
"""Test game normalization and canonical ID generation."""
def test_normalizes_games_with_canonical_ids(self):
"""Test games are normalized with correct canonical IDs."""
scraper = WNBAScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 5, 20),
home_team_raw="New York Liberty",
away_team_raw="Las Vegas Aces",
stadium_raw="Barclays Center",
home_score=92,
away_score=88,
status="final",
source_url="http://example.com",
)
]
games, review_items = scraper._normalize_games(raw_games)
assert len(games) == 1
game = games[0]
# Check canonical ID format
assert game.id == "wnba_2026_lv_ny_0520"
assert game.sport == "wnba"
assert game.season == 2026
# Check team IDs
assert game.home_team_id == "team_wnba_ny"
assert game.away_team_id == "team_wnba_lv"
# Check scores preserved
assert game.home_score == 92
assert game.away_score == 88
def test_creates_review_items_for_unresolved_teams(self):
"""Test review items are created for unresolved teams."""
scraper = WNBAScraper(season=2026)
raw_games = [
RawGameData(
game_date=datetime(2026, 5, 20),
home_team_raw="Unknown Team XYZ",
away_team_raw="Las Vegas Aces",
stadium_raw="Barclays Center",
status="scheduled",
),
]
games, review_items = scraper._normalize_games(raw_games)
# Game should not be created due to unresolved team
assert len(games) == 0
# But there should be a review item
assert len(review_items) >= 1
class TestTeamAndStadiumScraping:
"""Test team and stadium data scraping."""
def test_scrapes_all_wnba_teams(self):
"""Test all WNBA teams are returned."""
scraper = WNBAScraper(season=2026)
teams = scraper.scrape_teams()
# WNBA has 13 teams (including Golden State Valkyries)
assert len(teams) == 13
# Check team IDs are unique
team_ids = [t.id for t in teams]
assert len(set(team_ids)) == 13
# Check all teams have required fields
for team in teams:
assert team.id.startswith("team_wnba_")
assert team.sport == "wnba"
assert team.city
assert team.name
assert team.full_name
assert team.abbreviation
def test_scrapes_all_wnba_stadiums(self):
"""Test all WNBA stadiums are returned."""
scraper = WNBAScraper(season=2026)
stadiums = scraper.scrape_stadiums()
# Should have stadiums for all teams
assert len(stadiums) == 13
# Check stadium IDs are unique
stadium_ids = [s.id for s in stadiums]
assert len(set(stadium_ids)) == 13
# Check all stadiums have required fields
for stadium in stadiums:
assert stadium.id.startswith("stadium_wnba_")
assert stadium.sport == "wnba"
assert stadium.name
assert stadium.city
assert stadium.state
assert stadium.country == "USA"
assert stadium.latitude != 0
assert stadium.longitude != 0
class TestScrapeFallback:
"""Test fallback behavior (WNBA only has ESPN)."""
def test_returns_failure_when_espn_fails(self):
"""Test scraper returns failure when ESPN fails."""
scraper = WNBAScraper(season=2026)
with patch.object(scraper, '_scrape_espn') as mock_espn:
mock_espn.side_effect = Exception("ESPN failed")
result = scraper.scrape_games()
assert not result.success
assert "All sources failed" in result.error_message
class TestSeasonMonths:
"""Test season month calculation."""
def test_gets_correct_season_months(self):
"""Test correct months are returned for WNBA season."""
scraper = WNBAScraper(season=2026)
months = scraper._get_season_months()
# WNBA season is May-October
assert len(months) == 6 # May, Jun, Jul, Aug, Sep, Oct
# Check first month is May of season year
assert months[0] == (2026, 5)
# Check last month is October
assert months[-1] == (2026, 10)

View File

@@ -0,0 +1,187 @@
"""Tests for timezone conversion utilities."""
import pytest
from datetime import datetime, date
from zoneinfo import ZoneInfo
from sportstime_parser.normalizers.timezone import (
detect_timezone_from_string,
detect_timezone_from_location,
parse_datetime,
convert_to_utc,
get_stadium_timezone,
TimezoneResult,
)
class TestDetectTimezoneFromString:
"""Tests for detect_timezone_from_string function."""
def test_eastern_time(self):
"""Test Eastern Time detection."""
assert detect_timezone_from_string("7:00 PM ET") == "America/New_York"
assert detect_timezone_from_string("7:00 PM EST") == "America/New_York"
assert detect_timezone_from_string("7:00 PM EDT") == "America/New_York"
def test_central_time(self):
"""Test Central Time detection."""
assert detect_timezone_from_string("8:00 PM CT") == "America/Chicago"
assert detect_timezone_from_string("8:00 PM CST") == "America/Chicago"
assert detect_timezone_from_string("8:00 PM CDT") == "America/Chicago"
def test_mountain_time(self):
"""Test Mountain Time detection."""
assert detect_timezone_from_string("7:00 PM MT") == "America/Denver"
assert detect_timezone_from_string("7:00 PM MST") == "America/Denver"
def test_pacific_time(self):
"""Test Pacific Time detection."""
assert detect_timezone_from_string("7:00 PM PT") == "America/Los_Angeles"
assert detect_timezone_from_string("7:00 PM PST") == "America/Los_Angeles"
assert detect_timezone_from_string("7:00 PM PDT") == "America/Los_Angeles"
def test_no_timezone(self):
"""Test string with no timezone."""
assert detect_timezone_from_string("7:00 PM") is None
assert detect_timezone_from_string("19:00") is None
def test_case_insensitive(self):
"""Test case insensitive matching."""
assert detect_timezone_from_string("7:00 PM et") == "America/New_York"
assert detect_timezone_from_string("7:00 PM Et") == "America/New_York"
class TestDetectTimezoneFromLocation:
"""Tests for detect_timezone_from_location function."""
def test_eastern_states(self):
"""Test Eastern timezone states."""
assert detect_timezone_from_location(state="NY") == "America/New_York"
assert detect_timezone_from_location(state="MA") == "America/New_York"
assert detect_timezone_from_location(state="FL") == "America/New_York"
def test_central_states(self):
"""Test Central timezone states."""
assert detect_timezone_from_location(state="TX") == "America/Chicago"
assert detect_timezone_from_location(state="IL") == "America/Chicago"
def test_mountain_states(self):
"""Test Mountain timezone states."""
assert detect_timezone_from_location(state="CO") == "America/Denver"
assert detect_timezone_from_location(state="AZ") == "America/Phoenix"
def test_pacific_states(self):
"""Test Pacific timezone states."""
assert detect_timezone_from_location(state="CA") == "America/Los_Angeles"
assert detect_timezone_from_location(state="WA") == "America/Los_Angeles"
def test_canadian_provinces(self):
"""Test Canadian provinces."""
assert detect_timezone_from_location(state="ON") == "America/Toronto"
assert detect_timezone_from_location(state="BC") == "America/Vancouver"
assert detect_timezone_from_location(state="AB") == "America/Edmonton"
def test_case_insensitive(self):
"""Test case insensitive matching."""
assert detect_timezone_from_location(state="ny") == "America/New_York"
assert detect_timezone_from_location(state="Ny") == "America/New_York"
def test_unknown_state(self):
"""Test unknown state returns None."""
assert detect_timezone_from_location(state="XX") is None
assert detect_timezone_from_location(state=None) is None
class TestParseDatetime:
"""Tests for parse_datetime function."""
def test_basic_date_time(self):
"""Test basic date and time parsing."""
result = parse_datetime("2025-12-25", "7:00 PM ET")
assert result.datetime_utc.year == 2025
assert result.datetime_utc.month == 12
assert result.datetime_utc.day == 26 # UTC is +5 hours ahead
assert result.source_timezone == "America/New_York"
assert result.confidence == "high"
def test_date_only(self):
"""Test date only parsing."""
result = parse_datetime("2025-10-21")
assert result.datetime_utc.year == 2025
assert result.datetime_utc.month == 10
assert result.datetime_utc.day == 21
def test_timezone_hint(self):
"""Test timezone hint is used when no timezone in string."""
result = parse_datetime(
"2025-10-21",
"7:00 PM",
timezone_hint="America/Chicago",
)
assert result.source_timezone == "America/Chicago"
assert result.confidence == "medium"
def test_location_inference(self):
"""Test timezone inference from location."""
result = parse_datetime(
"2025-10-21",
"7:00 PM",
location_state="CA",
)
assert result.source_timezone == "America/Los_Angeles"
assert result.confidence == "medium"
def test_default_to_eastern(self):
"""Test defaults to Eastern when no timezone info."""
result = parse_datetime("2025-10-21", "7:00 PM")
assert result.source_timezone == "America/New_York"
assert result.confidence == "low"
assert result.warning is not None
def test_invalid_date(self):
"""Test handling of invalid date."""
result = parse_datetime("not a date")
assert result.confidence == "low"
assert result.warning is not None
class TestConvertToUtc:
"""Tests for convert_to_utc function."""
def test_convert_naive_datetime(self):
"""Test converting naive datetime to UTC."""
dt = datetime(2025, 12, 25, 19, 0) # 7:00 PM
utc = convert_to_utc(dt, "America/New_York")
# In December, Eastern Time is UTC-5
assert utc.hour == 0 # Next day 00:00 UTC
assert utc.day == 26
def test_convert_aware_datetime(self):
"""Test converting timezone-aware datetime."""
tz = ZoneInfo("America/Los_Angeles")
dt = datetime(2025, 7, 4, 19, 0, tzinfo=tz) # 7:00 PM PT
utc = convert_to_utc(dt, "America/Los_Angeles")
# In July, Pacific Time is UTC-7
assert utc.hour == 2 # 02:00 UTC next day
assert utc.day == 5
class TestGetStadiumTimezone:
"""Tests for get_stadium_timezone function."""
def test_explicit_timezone(self):
"""Test explicit timezone override."""
tz = get_stadium_timezone("AZ", stadium_timezone="America/Phoenix")
assert tz == "America/Phoenix"
def test_state_inference(self):
"""Test timezone from state."""
tz = get_stadium_timezone("NY")
assert tz == "America/New_York"
def test_default_eastern(self):
"""Test default to Eastern for unknown state."""
tz = get_stadium_timezone("XX")
assert tz == "America/New_York"

View File

@@ -0,0 +1 @@
"""Tests for the uploaders module."""

View File

@@ -0,0 +1,461 @@
"""Tests for the CloudKit client."""
import json
import pytest
from datetime import datetime
from unittest.mock import Mock, patch, MagicMock
from sportstime_parser.uploaders.cloudkit import (
CloudKitClient,
CloudKitRecord,
CloudKitError,
CloudKitAuthError,
CloudKitRateLimitError,
CloudKitServerError,
RecordType,
OperationResult,
BatchResult,
)
class TestCloudKitRecord:
"""Tests for CloudKitRecord dataclass."""
def test_create_record(self):
"""Test creating a CloudKitRecord."""
record = CloudKitRecord(
record_name="nba_2025_hou_okc_1021",
record_type=RecordType.GAME,
fields={
"sport": "nba",
"season": 2025,
},
)
assert record.record_name == "nba_2025_hou_okc_1021"
assert record.record_type == RecordType.GAME
assert record.fields["sport"] == "nba"
assert record.record_change_tag is None
def test_to_cloudkit_dict(self):
"""Test converting to CloudKit API format."""
record = CloudKitRecord(
record_name="nba_2025_hou_okc_1021",
record_type=RecordType.GAME,
fields={
"sport": "nba",
"season": 2025,
},
)
data = record.to_cloudkit_dict()
assert data["recordName"] == "nba_2025_hou_okc_1021"
assert data["recordType"] == "Game"
assert "fields" in data
assert "recordChangeTag" not in data
def test_to_cloudkit_dict_with_change_tag(self):
"""Test converting with change tag for updates."""
record = CloudKitRecord(
record_name="nba_2025_hou_okc_1021",
record_type=RecordType.GAME,
fields={"sport": "nba"},
record_change_tag="abc123",
)
data = record.to_cloudkit_dict()
assert data["recordChangeTag"] == "abc123"
def test_format_string_field(self):
"""Test formatting string fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={"name": "Test Name"},
)
data = record.to_cloudkit_dict()
assert data["fields"]["name"]["value"] == "Test Name"
assert data["fields"]["name"]["type"] == "STRING"
def test_format_int_field(self):
"""Test formatting integer fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={"count": 42},
)
data = record.to_cloudkit_dict()
assert data["fields"]["count"]["value"] == 42
assert data["fields"]["count"]["type"] == "INT64"
def test_format_float_field(self):
"""Test formatting float fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.STADIUM,
fields={"latitude": 35.4634},
)
data = record.to_cloudkit_dict()
assert data["fields"]["latitude"]["value"] == 35.4634
assert data["fields"]["latitude"]["type"] == "DOUBLE"
def test_format_datetime_field(self):
"""Test formatting datetime fields."""
dt = datetime(2025, 10, 21, 19, 0, 0)
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={"game_date": dt},
)
data = record.to_cloudkit_dict()
expected_ms = int(dt.timestamp() * 1000)
assert data["fields"]["game_date"]["value"] == expected_ms
assert data["fields"]["game_date"]["type"] == "TIMESTAMP"
def test_format_location_field(self):
"""Test formatting location fields."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.STADIUM,
fields={
"location": {"latitude": 35.4634, "longitude": -97.5151},
},
)
data = record.to_cloudkit_dict()
assert data["fields"]["location"]["type"] == "LOCATION"
assert data["fields"]["location"]["value"]["latitude"] == 35.4634
assert data["fields"]["location"]["value"]["longitude"] == -97.5151
def test_skip_none_fields(self):
"""Test that None fields are skipped."""
record = CloudKitRecord(
record_name="test",
record_type=RecordType.GAME,
fields={
"sport": "nba",
"score": None, # Should be skipped
},
)
data = record.to_cloudkit_dict()
assert "sport" in data["fields"]
assert "score" not in data["fields"]
class TestOperationResult:
"""Tests for OperationResult dataclass."""
def test_successful_result(self):
"""Test creating a successful operation result."""
result = OperationResult(
record_name="test_record",
success=True,
record_change_tag="new_tag",
)
assert result.record_name == "test_record"
assert result.success is True
assert result.record_change_tag == "new_tag"
assert result.error_code is None
def test_failed_result(self):
"""Test creating a failed operation result."""
result = OperationResult(
record_name="test_record",
success=False,
error_code="SERVER_ERROR",
error_message="Internal server error",
)
assert result.success is False
assert result.error_code == "SERVER_ERROR"
assert result.error_message == "Internal server error"
class TestBatchResult:
"""Tests for BatchResult dataclass."""
def test_empty_batch_result(self):
"""Test empty batch result."""
result = BatchResult()
assert result.all_succeeded is True
assert result.success_count == 0
assert result.failure_count == 0
def test_batch_with_successes(self):
"""Test batch with successful operations."""
result = BatchResult()
result.successful.append(OperationResult("rec1", True))
result.successful.append(OperationResult("rec2", True))
assert result.all_succeeded is True
assert result.success_count == 2
assert result.failure_count == 0
def test_batch_with_failures(self):
"""Test batch with failed operations."""
result = BatchResult()
result.successful.append(OperationResult("rec1", True))
result.failed.append(OperationResult("rec2", False, error_message="Error"))
assert result.all_succeeded is False
assert result.success_count == 1
assert result.failure_count == 1
class TestCloudKitClient:
"""Tests for CloudKitClient."""
def test_not_configured_without_credentials(self):
"""Test that client reports not configured without credentials."""
with patch.dict("os.environ", {}, clear=True):
client = CloudKitClient()
assert client.is_configured is False
def test_configured_with_credentials(self):
"""Test that client reports configured with credentials."""
# Create a minimal mock for the private key
mock_key = MagicMock()
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key_id",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
mock_load.return_value = mock_key
client = CloudKitClient()
assert client.is_configured is True
def test_get_api_path(self):
"""Test API path construction."""
client = CloudKitClient(
container_id="iCloud.com.test.app",
environment="development",
)
path = client._get_api_path("records/query")
assert path == "/database/1/iCloud.com.test.app/development/public/records/query"
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_fetch_records_query(self, mock_session_class):
"""Test fetching records with query."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"records": [
{"recordName": "rec1", "recordType": "Game"},
{"recordName": "rec2", "recordType": "Game"},
]
}
mock_session.request.return_value = mock_response
# Setup client with mocked auth
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
records = client.fetch_records(RecordType.GAME)
assert len(records) == 2
assert records[0]["recordName"] == "rec1"
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_save_records_success(self, mock_session_class):
"""Test saving records successfully."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"records": [
{"recordName": "rec1", "recordChangeTag": "tag1"},
{"recordName": "rec2", "recordChangeTag": "tag2"},
]
}
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
records = [
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
]
result = client.save_records(records)
assert result.success_count == 2
assert result.failure_count == 0
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_save_records_partial_failure(self, mock_session_class):
"""Test saving records with some failures."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"records": [
{"recordName": "rec1", "recordChangeTag": "tag1"},
{"recordName": "rec2", "serverErrorCode": "QUOTA_EXCEEDED", "reason": "Quota exceeded"},
]
}
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
records = [
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
]
result = client.save_records(records)
assert result.success_count == 1
assert result.failure_count == 1
assert result.failed[0].error_code == "QUOTA_EXCEEDED"
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_auth_error(self, mock_session_class):
"""Test handling authentication error."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 421
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
with pytest.raises(CloudKitAuthError):
client.fetch_records(RecordType.GAME)
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_rate_limit_error(self, mock_session_class):
"""Test handling rate limit error."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 429
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
with pytest.raises(CloudKitRateLimitError):
client.fetch_records(RecordType.GAME)
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
def test_server_error(self, mock_session_class):
"""Test handling server error."""
mock_session = MagicMock()
mock_session_class.return_value = mock_session
mock_response = MagicMock()
mock_response.status_code = 503
mock_session.request.return_value = mock_response
mock_key = MagicMock()
mock_key.sign.return_value = b"signature"
with patch.dict("os.environ", {
"CLOUDKIT_KEY_ID": "test_key",
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
}):
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
mock_load.return_value = mock_key
mock_jwt.return_value = "test_token"
client = CloudKitClient()
with pytest.raises(CloudKitServerError):
client.fetch_records(RecordType.GAME)
class TestRecordType:
"""Tests for RecordType enum."""
def test_record_type_values(self):
"""Test that record type values match CloudKit schema."""
assert RecordType.GAME.value == "Game"
assert RecordType.TEAM.value == "Team"
assert RecordType.STADIUM.value == "Stadium"
assert RecordType.TEAM_ALIAS.value == "TeamAlias"
assert RecordType.STADIUM_ALIAS.value == "StadiumAlias"

View File

@@ -0,0 +1,350 @@
"""Tests for the record differ."""
import pytest
from datetime import datetime
from sportstime_parser.models.game import Game
from sportstime_parser.models.team import Team
from sportstime_parser.models.stadium import Stadium
from sportstime_parser.uploaders.diff import (
DiffAction,
RecordDiff,
DiffResult,
RecordDiffer,
game_to_cloudkit_record,
team_to_cloudkit_record,
stadium_to_cloudkit_record,
)
from sportstime_parser.uploaders.cloudkit import RecordType
class TestRecordDiff:
"""Tests for RecordDiff dataclass."""
def test_create_record_diff(self):
"""Test creating a RecordDiff."""
diff = RecordDiff(
record_name="nba_2025_hou_okc_1021",
record_type=RecordType.GAME,
action=DiffAction.CREATE,
)
assert diff.record_name == "nba_2025_hou_okc_1021"
assert diff.record_type == RecordType.GAME
assert diff.action == DiffAction.CREATE
class TestDiffResult:
"""Tests for DiffResult dataclass."""
def test_empty_result(self):
"""Test empty DiffResult."""
result = DiffResult()
assert result.create_count == 0
assert result.update_count == 0
assert result.delete_count == 0
assert result.unchanged_count == 0
assert result.total_changes == 0
def test_counts(self):
"""Test counting different change types."""
result = DiffResult()
result.creates.append(RecordDiff(
record_name="game_1",
record_type=RecordType.GAME,
action=DiffAction.CREATE,
))
result.creates.append(RecordDiff(
record_name="game_2",
record_type=RecordType.GAME,
action=DiffAction.CREATE,
))
result.updates.append(RecordDiff(
record_name="game_3",
record_type=RecordType.GAME,
action=DiffAction.UPDATE,
))
result.deletes.append(RecordDiff(
record_name="game_4",
record_type=RecordType.GAME,
action=DiffAction.DELETE,
))
result.unchanged.append(RecordDiff(
record_name="game_5",
record_type=RecordType.GAME,
action=DiffAction.UNCHANGED,
))
assert result.create_count == 2
assert result.update_count == 1
assert result.delete_count == 1
assert result.unchanged_count == 1
assert result.total_changes == 4 # excludes unchanged
class TestRecordDiffer:
"""Tests for RecordDiffer."""
@pytest.fixture
def differ(self):
"""Create a RecordDiffer instance."""
return RecordDiffer()
@pytest.fixture
def sample_game(self):
"""Create a sample Game."""
return Game(
id="nba_2025_hou_okc_1021",
sport="nba",
season=2025,
home_team_id="team_nba_okc",
away_team_id="team_nba_hou",
stadium_id="stadium_nba_paycom_center",
game_date=datetime(2025, 10, 21, 19, 0, 0),
status="scheduled",
)
@pytest.fixture
def sample_team(self):
"""Create a sample Team."""
return Team(
id="team_nba_okc",
sport="nba",
city="Oklahoma City",
name="Thunder",
full_name="Oklahoma City Thunder",
abbreviation="OKC",
conference="Western",
division="Northwest",
)
@pytest.fixture
def sample_stadium(self):
"""Create a sample Stadium."""
return Stadium(
id="stadium_nba_paycom_center",
sport="nba",
name="Paycom Center",
city="Oklahoma City",
state="OK",
country="USA",
latitude=35.4634,
longitude=-97.5151,
capacity=18203,
)
def test_diff_games_create(self, differ, sample_game):
"""Test detecting new games to create."""
local_games = [sample_game]
remote_records = []
result = differ.diff_games(local_games, remote_records)
assert result.create_count == 1
assert result.update_count == 0
assert result.delete_count == 0
assert result.creates[0].record_name == sample_game.id
def test_diff_games_delete(self, differ, sample_game):
"""Test detecting games to delete."""
local_games = []
remote_records = [
{
"recordName": sample_game.id,
"recordType": "Game",
"fields": {
"sport": {"value": "nba", "type": "STRING"},
"season": {"value": 2025, "type": "INT64"},
},
"recordChangeTag": "abc123",
}
]
result = differ.diff_games(local_games, remote_records)
assert result.create_count == 0
assert result.delete_count == 1
assert result.deletes[0].record_name == sample_game.id
def test_diff_games_unchanged(self, differ, sample_game):
"""Test detecting unchanged games."""
local_games = [sample_game]
remote_records = [
{
"recordName": sample_game.id,
"recordType": "Game",
"fields": {
"sport": {"value": "nba", "type": "STRING"},
"season": {"value": 2025, "type": "INT64"},
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
"game_number": {"value": None, "type": "INT64"},
"home_score": {"value": None, "type": "INT64"},
"away_score": {"value": None, "type": "INT64"},
"status": {"value": "scheduled", "type": "STRING"},
},
"recordChangeTag": "abc123",
}
]
result = differ.diff_games(local_games, remote_records)
assert result.create_count == 0
assert result.update_count == 0
assert result.unchanged_count == 1
def test_diff_games_update(self, differ, sample_game):
"""Test detecting games that need update."""
local_games = [sample_game]
# Remote has different status
remote_records = [
{
"recordName": sample_game.id,
"recordType": "Game",
"fields": {
"sport": {"value": "nba", "type": "STRING"},
"season": {"value": 2025, "type": "INT64"},
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
"game_number": {"value": None, "type": "INT64"},
"home_score": {"value": None, "type": "INT64"},
"away_score": {"value": None, "type": "INT64"},
"status": {"value": "postponed", "type": "STRING"}, # Different!
},
"recordChangeTag": "abc123",
}
]
result = differ.diff_games(local_games, remote_records)
assert result.update_count == 1
assert "status" in result.updates[0].changed_fields
assert result.updates[0].record_change_tag == "abc123"
def test_diff_teams_create(self, differ, sample_team):
"""Test detecting new teams to create."""
local_teams = [sample_team]
remote_records = []
result = differ.diff_teams(local_teams, remote_records)
assert result.create_count == 1
assert result.creates[0].record_name == sample_team.id
def test_diff_stadiums_create(self, differ, sample_stadium):
"""Test detecting new stadiums to create."""
local_stadiums = [sample_stadium]
remote_records = []
result = differ.diff_stadiums(local_stadiums, remote_records)
assert result.create_count == 1
assert result.creates[0].record_name == sample_stadium.id
def test_get_records_to_upload(self, differ, sample_game):
"""Test getting CloudKitRecords for upload."""
game2 = Game(
id="nba_2025_lal_lac_1022",
sport="nba",
season=2025,
home_team_id="team_nba_lac",
away_team_id="team_nba_lal",
stadium_id="stadium_nba_crypto_com",
game_date=datetime(2025, 10, 22, 19, 0, 0),
status="scheduled",
)
local_games = [sample_game, game2]
# Only game2 exists remotely with different status
remote_records = [
{
"recordName": game2.id,
"recordType": "Game",
"fields": {
"sport": {"value": "nba", "type": "STRING"},
"season": {"value": 2025, "type": "INT64"},
"home_team_id": {"value": "team_nba_lac", "type": "STRING"},
"away_team_id": {"value": "team_nba_lal", "type": "STRING"},
"stadium_id": {"value": "stadium_nba_crypto_com", "type": "STRING"},
"game_date": {"value": int(game2.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
"status": {"value": "postponed", "type": "STRING"}, # Different!
},
"recordChangeTag": "xyz789",
}
]
result = differ.diff_games(local_games, remote_records)
records = result.get_records_to_upload()
assert len(records) == 2 # 1 create + 1 update
record_names = [r.record_name for r in records]
assert sample_game.id in record_names
assert game2.id in record_names
class TestConvenienceFunctions:
"""Tests for module-level convenience functions."""
def test_game_to_cloudkit_record(self):
"""Test converting Game to CloudKitRecord."""
game = Game(
id="nba_2025_hou_okc_1021",
sport="nba",
season=2025,
home_team_id="team_nba_okc",
away_team_id="team_nba_hou",
stadium_id="stadium_nba_paycom_center",
game_date=datetime(2025, 10, 21, 19, 0, 0),
status="scheduled",
)
record = game_to_cloudkit_record(game)
assert record.record_name == game.id
assert record.record_type == RecordType.GAME
assert record.fields["sport"] == "nba"
assert record.fields["season"] == 2025
def test_team_to_cloudkit_record(self):
"""Test converting Team to CloudKitRecord."""
team = Team(
id="team_nba_okc",
sport="nba",
city="Oklahoma City",
name="Thunder",
full_name="Oklahoma City Thunder",
abbreviation="OKC",
)
record = team_to_cloudkit_record(team)
assert record.record_name == team.id
assert record.record_type == RecordType.TEAM
assert record.fields["city"] == "Oklahoma City"
assert record.fields["name"] == "Thunder"
def test_stadium_to_cloudkit_record(self):
"""Test converting Stadium to CloudKitRecord."""
stadium = Stadium(
id="stadium_nba_paycom_center",
sport="nba",
name="Paycom Center",
city="Oklahoma City",
state="OK",
country="USA",
latitude=35.4634,
longitude=-97.5151,
)
record = stadium_to_cloudkit_record(stadium)
assert record.record_name == stadium.id
assert record.record_type == RecordType.STADIUM
assert record.fields["name"] == "Paycom Center"
assert record.fields["latitude"] == 35.4634

View File

@@ -0,0 +1,472 @@
"""Tests for the upload state manager."""
import json
import pytest
from datetime import datetime, timedelta
from pathlib import Path
from tempfile import TemporaryDirectory
from sportstime_parser.uploaders.state import (
RecordState,
UploadSession,
StateManager,
)
class TestRecordState:
"""Tests for RecordState dataclass."""
def test_create_record_state(self):
"""Test creating a RecordState with default values."""
state = RecordState(
record_name="nba_2025_hou_okc_1021",
record_type="Game",
)
assert state.record_name == "nba_2025_hou_okc_1021"
assert state.record_type == "Game"
assert state.status == "pending"
assert state.uploaded_at is None
assert state.record_change_tag is None
assert state.error_message is None
assert state.retry_count == 0
def test_record_state_to_dict(self):
"""Test serializing RecordState to dictionary."""
now = datetime.utcnow()
state = RecordState(
record_name="nba_2025_hou_okc_1021",
record_type="Game",
uploaded_at=now,
record_change_tag="abc123",
status="uploaded",
)
data = state.to_dict()
assert data["record_name"] == "nba_2025_hou_okc_1021"
assert data["record_type"] == "Game"
assert data["status"] == "uploaded"
assert data["uploaded_at"] == now.isoformat()
assert data["record_change_tag"] == "abc123"
def test_record_state_from_dict(self):
"""Test deserializing RecordState from dictionary."""
data = {
"record_name": "nba_2025_hou_okc_1021",
"record_type": "Game",
"uploaded_at": "2026-01-10T12:00:00",
"record_change_tag": "abc123",
"status": "uploaded",
"error_message": None,
"retry_count": 0,
}
state = RecordState.from_dict(data)
assert state.record_name == "nba_2025_hou_okc_1021"
assert state.record_type == "Game"
assert state.status == "uploaded"
assert state.uploaded_at == datetime.fromisoformat("2026-01-10T12:00:00")
assert state.record_change_tag == "abc123"
class TestUploadSession:
"""Tests for UploadSession dataclass."""
def test_create_upload_session(self):
"""Test creating an UploadSession."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
assert session.sport == "nba"
assert session.season == 2025
assert session.environment == "development"
assert session.total_count == 0
assert len(session.records) == 0
def test_add_record(self):
"""Test adding records to a session."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
session.add_record("team_1", "Team")
assert session.total_count == 3
assert len(session.records) == 3
assert "game_1" in session.records
assert session.records["game_1"].record_type == "Game"
def test_mark_uploaded(self):
"""Test marking a record as uploaded."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.mark_uploaded("game_1", "change_tag_123")
assert session.records["game_1"].status == "uploaded"
assert session.records["game_1"].record_change_tag == "change_tag_123"
assert session.records["game_1"].uploaded_at is not None
def test_mark_failed(self):
"""Test marking a record as failed."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.mark_failed("game_1", "Server error")
assert session.records["game_1"].status == "failed"
assert session.records["game_1"].error_message == "Server error"
assert session.records["game_1"].retry_count == 1
def test_mark_failed_increments_retry_count(self):
"""Test that marking failed increments retry count."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.mark_failed("game_1", "Error 1")
session.mark_failed("game_1", "Error 2")
session.mark_failed("game_1", "Error 3")
assert session.records["game_1"].retry_count == 3
def test_counts(self):
"""Test session counts."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
session.add_record("game_3", "Game")
session.mark_uploaded("game_1")
session.mark_failed("game_2", "Error")
assert session.uploaded_count == 1
assert session.failed_count == 1
assert session.pending_count == 1
def test_is_complete(self):
"""Test is_complete property."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
assert not session.is_complete
session.mark_uploaded("game_1")
assert not session.is_complete
session.mark_uploaded("game_2")
assert session.is_complete
def test_progress_percent(self):
"""Test progress percentage calculation."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
session.add_record("game_3", "Game")
session.add_record("game_4", "Game")
session.mark_uploaded("game_1")
assert session.progress_percent == 25.0
def test_get_pending_records(self):
"""Test getting pending record names."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
session.add_record("game_3", "Game")
session.mark_uploaded("game_1")
session.mark_failed("game_2", "Error")
pending = session.get_pending_records()
assert pending == ["game_3"]
def test_get_failed_records(self):
"""Test getting failed record names."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
session.add_record("game_3", "Game")
session.mark_failed("game_1", "Error 1")
session.mark_failed("game_3", "Error 3")
failed = session.get_failed_records()
assert set(failed) == {"game_1", "game_3"}
def test_get_retryable_records(self):
"""Test getting records eligible for retry."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
session.add_record("game_3", "Game")
# Fail game_1 once
session.mark_failed("game_1", "Error")
# Fail game_2 three times (max retries)
session.mark_failed("game_2", "Error")
session.mark_failed("game_2", "Error")
session.mark_failed("game_2", "Error")
retryable = session.get_retryable_records(max_retries=3)
assert retryable == ["game_1"]
def test_to_dict_and_from_dict(self):
"""Test round-trip serialization."""
session = UploadSession(
sport="nba",
season=2025,
environment="development",
)
session.add_record("game_1", "Game")
session.add_record("game_2", "Game")
session.mark_uploaded("game_1", "tag_123")
data = session.to_dict()
restored = UploadSession.from_dict(data)
assert restored.sport == session.sport
assert restored.season == session.season
assert restored.environment == session.environment
assert restored.total_count == session.total_count
assert restored.uploaded_count == session.uploaded_count
assert restored.records["game_1"].status == "uploaded"
class TestStateManager:
"""Tests for StateManager."""
def test_create_session(self):
"""Test creating a new session."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
session = manager.create_session(
sport="nba",
season=2025,
environment="development",
record_names=[
("game_1", "Game"),
("game_2", "Game"),
("team_1", "Team"),
],
)
assert session.sport == "nba"
assert session.season == 2025
assert session.total_count == 3
# Check file was created
state_file = Path(tmpdir) / "upload_state_nba_2025_development.json"
assert state_file.exists()
def test_load_session(self):
"""Test loading an existing session."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
# Create and save a session
original = manager.create_session(
sport="nba",
season=2025,
environment="development",
record_names=[("game_1", "Game")],
)
original.mark_uploaded("game_1", "tag_123")
manager.save_session(original)
# Load it back
loaded = manager.load_session("nba", 2025, "development")
assert loaded is not None
assert loaded.sport == "nba"
assert loaded.records["game_1"].status == "uploaded"
def test_load_nonexistent_session(self):
"""Test loading a session that doesn't exist."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
session = manager.load_session("nba", 2025, "development")
assert session is None
def test_delete_session(self):
"""Test deleting a session."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
# Create a session
manager.create_session(
sport="nba",
season=2025,
environment="development",
record_names=[("game_1", "Game")],
)
# Delete it
result = manager.delete_session("nba", 2025, "development")
assert result is True
# Verify it's gone
loaded = manager.load_session("nba", 2025, "development")
assert loaded is None
def test_delete_nonexistent_session(self):
"""Test deleting a session that doesn't exist."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
result = manager.delete_session("nba", 2025, "development")
assert result is False
def test_list_sessions(self):
"""Test listing all sessions."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
# Create multiple sessions
manager.create_session(
sport="nba",
season=2025,
environment="development",
record_names=[("game_1", "Game")],
)
manager.create_session(
sport="mlb",
season=2026,
environment="production",
record_names=[("game_2", "Game"), ("game_3", "Game")],
)
sessions = manager.list_sessions()
assert len(sessions) == 2
sports = {s["sport"] for s in sessions}
assert sports == {"nba", "mlb"}
def test_get_session_or_create_new(self):
"""Test getting a session when none exists."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
session = manager.get_session_or_create(
sport="nba",
season=2025,
environment="development",
record_names=[("game_1", "Game")],
resume=False,
)
assert session.sport == "nba"
assert session.total_count == 1
def test_get_session_or_create_resume(self):
"""Test resuming an existing session."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
# Create initial session
original = manager.create_session(
sport="nba",
season=2025,
environment="development",
record_names=[("game_1", "Game"), ("game_2", "Game")],
)
original.mark_uploaded("game_1", "tag_123")
manager.save_session(original)
# Resume with additional records
session = manager.get_session_or_create(
sport="nba",
season=2025,
environment="development",
record_names=[("game_1", "Game"), ("game_2", "Game"), ("game_3", "Game")],
resume=True,
)
# Should have original progress plus new record
assert session.records["game_1"].status == "uploaded"
assert "game_3" in session.records
assert session.total_count == 3
def test_get_session_or_create_overwrite(self):
"""Test overwriting an existing session when not resuming."""
with TemporaryDirectory() as tmpdir:
manager = StateManager(state_dir=Path(tmpdir))
# Create initial session
original = manager.create_session(
sport="nba",
season=2025,
environment="development",
record_names=[("game_1", "Game"), ("game_2", "Game")],
)
original.mark_uploaded("game_1", "tag_123")
manager.save_session(original)
# Create new session (not resuming)
session = manager.get_session_or_create(
sport="nba",
season=2025,
environment="development",
record_names=[("game_3", "Game")],
resume=False,
)
# Should be a fresh session
assert session.total_count == 1
assert "game_1" not in session.records
assert "game_3" in session.records