feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
1
sportstime_parser/tests/__init__.py
Normal file
1
sportstime_parser/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Unit tests for sportstime_parser."""
|
||||
48
sportstime_parser/tests/fixtures/__init__.py
vendored
Normal file
48
sportstime_parser/tests/fixtures/__init__.py
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Test fixtures for sportstime-parser tests."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
FIXTURES_DIR = Path(__file__).parent
|
||||
|
||||
# NBA fixtures
|
||||
NBA_FIXTURES_DIR = FIXTURES_DIR / "nba"
|
||||
NBA_BR_OCTOBER_HTML = NBA_FIXTURES_DIR / "basketball_reference_october.html"
|
||||
NBA_BR_EDGE_CASES_HTML = NBA_FIXTURES_DIR / "basketball_reference_edge_cases.html"
|
||||
NBA_ESPN_SCOREBOARD_JSON = NBA_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# MLB fixtures
|
||||
MLB_FIXTURES_DIR = FIXTURES_DIR / "mlb"
|
||||
MLB_ESPN_SCOREBOARD_JSON = MLB_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# NFL fixtures
|
||||
NFL_FIXTURES_DIR = FIXTURES_DIR / "nfl"
|
||||
NFL_ESPN_SCOREBOARD_JSON = NFL_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# NHL fixtures
|
||||
NHL_FIXTURES_DIR = FIXTURES_DIR / "nhl"
|
||||
NHL_ESPN_SCOREBOARD_JSON = NHL_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# MLS fixtures
|
||||
MLS_FIXTURES_DIR = FIXTURES_DIR / "mls"
|
||||
MLS_ESPN_SCOREBOARD_JSON = MLS_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# WNBA fixtures
|
||||
WNBA_FIXTURES_DIR = FIXTURES_DIR / "wnba"
|
||||
WNBA_ESPN_SCOREBOARD_JSON = WNBA_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# NWSL fixtures
|
||||
NWSL_FIXTURES_DIR = FIXTURES_DIR / "nwsl"
|
||||
NWSL_ESPN_SCOREBOARD_JSON = NWSL_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
|
||||
def load_fixture(path: Path) -> str:
|
||||
"""Load a fixture file as text."""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def load_json_fixture(path: Path) -> dict:
|
||||
"""Load a JSON fixture file."""
|
||||
import json
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
245
sportstime_parser/tests/fixtures/mlb/espn_scoreboard.json
vendored
Normal file
245
sportstime_parser/tests/fixtures/mlb/espn_scoreboard.json
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"leagues": [
|
||||
{
|
||||
"id": "10",
|
||||
"uid": "s:1~l:10",
|
||||
"name": "Major League Baseball",
|
||||
"abbreviation": "MLB"
|
||||
}
|
||||
],
|
||||
"season": {
|
||||
"type": 2,
|
||||
"year": 2026
|
||||
},
|
||||
"day": {
|
||||
"date": "2026-04-15T00:00:00Z"
|
||||
},
|
||||
"events": [
|
||||
{
|
||||
"id": "401584801",
|
||||
"uid": "s:1~l:10~e:401584801",
|
||||
"date": "2026-04-15T23:05:00Z",
|
||||
"name": "New York Yankees at Boston Red Sox",
|
||||
"shortName": "NYY @ BOS",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401584801",
|
||||
"uid": "s:1~l:10~e:401584801~c:401584801",
|
||||
"date": "2026-04-15T23:05:00Z",
|
||||
"attendance": 37435,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "3",
|
||||
"fullName": "Fenway Park",
|
||||
"address": {
|
||||
"city": "Boston",
|
||||
"state": "MA"
|
||||
},
|
||||
"capacity": 37755,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "2",
|
||||
"uid": "s:1~l:10~t:2",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "2",
|
||||
"uid": "s:1~l:10~t:2",
|
||||
"location": "Boston",
|
||||
"name": "Red Sox",
|
||||
"abbreviation": "BOS",
|
||||
"displayName": "Boston Red Sox"
|
||||
},
|
||||
"score": "5",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "10",
|
||||
"uid": "s:1~l:10~t:10",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "10",
|
||||
"uid": "s:1~l:10~t:10",
|
||||
"location": "New York",
|
||||
"name": "Yankees",
|
||||
"abbreviation": "NYY",
|
||||
"displayName": "New York Yankees"
|
||||
},
|
||||
"score": "3",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 9,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401584802",
|
||||
"uid": "s:1~l:10~e:401584802",
|
||||
"date": "2026-04-15T20:10:00Z",
|
||||
"name": "Chicago Cubs at St. Louis Cardinals",
|
||||
"shortName": "CHC @ STL",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401584802",
|
||||
"uid": "s:1~l:10~e:401584802~c:401584802",
|
||||
"date": "2026-04-15T20:10:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "87",
|
||||
"fullName": "Busch Stadium",
|
||||
"address": {
|
||||
"city": "St. Louis",
|
||||
"state": "MO"
|
||||
},
|
||||
"capacity": 45538,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "24",
|
||||
"uid": "s:1~l:10~t:24",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "24",
|
||||
"uid": "s:1~l:10~t:24",
|
||||
"location": "St. Louis",
|
||||
"name": "Cardinals",
|
||||
"abbreviation": "STL",
|
||||
"displayName": "St. Louis Cardinals"
|
||||
},
|
||||
"score": "7",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "16",
|
||||
"uid": "s:1~l:10~t:16",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "16",
|
||||
"uid": "s:1~l:10~t:16",
|
||||
"location": "Chicago",
|
||||
"name": "Cubs",
|
||||
"abbreviation": "CHC",
|
||||
"displayName": "Chicago Cubs"
|
||||
},
|
||||
"score": "4",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 9,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401584803",
|
||||
"uid": "s:1~l:10~e:401584803",
|
||||
"date": "2026-04-16T00:10:00Z",
|
||||
"name": "Los Angeles Dodgers at San Francisco Giants",
|
||||
"shortName": "LAD @ SF",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401584803",
|
||||
"uid": "s:1~l:10~e:401584803~c:401584803",
|
||||
"date": "2026-04-16T00:10:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "116",
|
||||
"fullName": "Oracle Park",
|
||||
"address": {
|
||||
"city": "San Francisco",
|
||||
"state": "CA"
|
||||
},
|
||||
"capacity": 41915,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "26",
|
||||
"uid": "s:1~l:10~t:26",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "26",
|
||||
"uid": "s:1~l:10~t:26",
|
||||
"location": "San Francisco",
|
||||
"name": "Giants",
|
||||
"abbreviation": "SF",
|
||||
"displayName": "San Francisco Giants"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
},
|
||||
{
|
||||
"id": "19",
|
||||
"uid": "s:1~l:10~t:19",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "19",
|
||||
"uid": "s:1~l:10~t:19",
|
||||
"location": "Los Angeles",
|
||||
"name": "Dodgers",
|
||||
"abbreviation": "LAD",
|
||||
"displayName": "Los Angeles Dodgers"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 0,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"name": "STATUS_SCHEDULED",
|
||||
"state": "pre",
|
||||
"completed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
245
sportstime_parser/tests/fixtures/mls/espn_scoreboard.json
vendored
Normal file
245
sportstime_parser/tests/fixtures/mls/espn_scoreboard.json
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"leagues": [
|
||||
{
|
||||
"id": "19",
|
||||
"uid": "s:600~l:19",
|
||||
"name": "Major League Soccer",
|
||||
"abbreviation": "MLS"
|
||||
}
|
||||
],
|
||||
"season": {
|
||||
"type": 2,
|
||||
"year": 2026
|
||||
},
|
||||
"day": {
|
||||
"date": "2026-03-15T00:00:00Z"
|
||||
},
|
||||
"events": [
|
||||
{
|
||||
"id": "401672001",
|
||||
"uid": "s:600~l:19~e:401672001",
|
||||
"date": "2026-03-15T22:00:00Z",
|
||||
"name": "LA Galaxy at LAFC",
|
||||
"shortName": "LA @ LAFC",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672001",
|
||||
"uid": "s:600~l:19~e:401672001~c:401672001",
|
||||
"date": "2026-03-15T22:00:00Z",
|
||||
"attendance": 22000,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "8909",
|
||||
"fullName": "BMO Stadium",
|
||||
"address": {
|
||||
"city": "Los Angeles",
|
||||
"state": "CA"
|
||||
},
|
||||
"capacity": 22000,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "21295",
|
||||
"uid": "s:600~l:19~t:21295",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "21295",
|
||||
"uid": "s:600~l:19~t:21295",
|
||||
"location": "Los Angeles",
|
||||
"name": "FC",
|
||||
"abbreviation": "LAFC",
|
||||
"displayName": "Los Angeles FC"
|
||||
},
|
||||
"score": "3",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "3610",
|
||||
"uid": "s:600~l:19~t:3610",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "3610",
|
||||
"uid": "s:600~l:19~t:3610",
|
||||
"location": "Los Angeles",
|
||||
"name": "Galaxy",
|
||||
"abbreviation": "LA",
|
||||
"displayName": "LA Galaxy"
|
||||
},
|
||||
"score": "2",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 90,
|
||||
"displayClock": "90'",
|
||||
"period": 2,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401672002",
|
||||
"uid": "s:600~l:19~e:401672002",
|
||||
"date": "2026-03-15T23:00:00Z",
|
||||
"name": "Seattle Sounders at Portland Timbers",
|
||||
"shortName": "SEA @ POR",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672002",
|
||||
"uid": "s:600~l:19~e:401672002~c:401672002",
|
||||
"date": "2026-03-15T23:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "8070",
|
||||
"fullName": "Providence Park",
|
||||
"address": {
|
||||
"city": "Portland",
|
||||
"state": "OR"
|
||||
},
|
||||
"capacity": 25218,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "5282",
|
||||
"uid": "s:600~l:19~t:5282",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "5282",
|
||||
"uid": "s:600~l:19~t:5282",
|
||||
"location": "Portland",
|
||||
"name": "Timbers",
|
||||
"abbreviation": "POR",
|
||||
"displayName": "Portland Timbers"
|
||||
},
|
||||
"score": "2",
|
||||
"winner": false
|
||||
},
|
||||
{
|
||||
"id": "4687",
|
||||
"uid": "s:600~l:19~t:4687",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "4687",
|
||||
"uid": "s:600~l:19~t:4687",
|
||||
"location": "Seattle",
|
||||
"name": "Sounders FC",
|
||||
"abbreviation": "SEA",
|
||||
"displayName": "Seattle Sounders FC"
|
||||
},
|
||||
"score": "2",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 90,
|
||||
"displayClock": "90'",
|
||||
"period": 2,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401672003",
|
||||
"uid": "s:600~l:19~e:401672003",
|
||||
"date": "2026-03-16T00:00:00Z",
|
||||
"name": "New York Red Bulls at Atlanta United",
|
||||
"shortName": "NY @ ATL",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672003",
|
||||
"uid": "s:600~l:19~e:401672003~c:401672003",
|
||||
"date": "2026-03-16T00:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "8904",
|
||||
"fullName": "Mercedes-Benz Stadium",
|
||||
"address": {
|
||||
"city": "Atlanta",
|
||||
"state": "GA"
|
||||
},
|
||||
"capacity": 42500,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "18626",
|
||||
"uid": "s:600~l:19~t:18626",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "18626",
|
||||
"uid": "s:600~l:19~t:18626",
|
||||
"location": "Atlanta",
|
||||
"name": "United FC",
|
||||
"abbreviation": "ATL",
|
||||
"displayName": "Atlanta United FC"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
},
|
||||
{
|
||||
"id": "399",
|
||||
"uid": "s:600~l:19~t:399",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "399",
|
||||
"uid": "s:600~l:19~t:399",
|
||||
"location": "New York",
|
||||
"name": "Red Bulls",
|
||||
"abbreviation": "NY",
|
||||
"displayName": "New York Red Bulls"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0'",
|
||||
"period": 0,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"name": "STATUS_SCHEDULED",
|
||||
"state": "pre",
|
||||
"completed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
79
sportstime_parser/tests/fixtures/nba/basketball_reference_edge_cases.html
vendored
Normal file
79
sportstime_parser/tests/fixtures/nba/basketball_reference_edge_cases.html
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>2025-26 NBA Schedule - Edge Cases | Basketball-Reference.com</title>
|
||||
</head>
|
||||
<body>
|
||||
<table id="schedule" class="stats_table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th data-stat="date_game">Date</th>
|
||||
<th data-stat="game_start_time">Start (ET)</th>
|
||||
<th data-stat="visitor_team_name">Visitor/Neutral</th>
|
||||
<th data-stat="visitor_pts">PTS</th>
|
||||
<th data-stat="home_team_name">Home/Neutral</th>
|
||||
<th data-stat="home_pts">PTS</th>
|
||||
<th data-stat="arena_name">Arena</th>
|
||||
<th data-stat="game_remarks">Notes</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<!-- Postponed game -->
|
||||
<tr>
|
||||
<th data-stat="date_game">Sat, Jan 11, 2026</th>
|
||||
<td data-stat="game_start_time">7:30p</td>
|
||||
<td data-stat="visitor_team_name">Los Angeles Lakers</td>
|
||||
<td data-stat="visitor_pts"></td>
|
||||
<td data-stat="home_team_name">Phoenix Suns</td>
|
||||
<td data-stat="home_pts"></td>
|
||||
<td data-stat="arena_name">Footprint Center</td>
|
||||
<td data-stat="game_remarks">Postponed - Weather</td>
|
||||
</tr>
|
||||
<!-- Neutral site game (Mexico City) -->
|
||||
<tr>
|
||||
<th data-stat="date_game">Sat, Nov 8, 2025</th>
|
||||
<td data-stat="game_start_time">7:00p</td>
|
||||
<td data-stat="visitor_team_name">Miami Heat</td>
|
||||
<td data-stat="visitor_pts">105</td>
|
||||
<td data-stat="home_team_name">Washington Wizards</td>
|
||||
<td data-stat="home_pts">99</td>
|
||||
<td data-stat="arena_name">Arena CDMX</td>
|
||||
<td data-stat="game_remarks">NBA Mexico City Games</td>
|
||||
</tr>
|
||||
<!-- Cancelled game -->
|
||||
<tr>
|
||||
<th data-stat="date_game">Wed, Dec 3, 2025</th>
|
||||
<td data-stat="game_start_time">8:00p</td>
|
||||
<td data-stat="visitor_team_name">Portland Trail Blazers</td>
|
||||
<td data-stat="visitor_pts"></td>
|
||||
<td data-stat="home_team_name">Sacramento Kings</td>
|
||||
<td data-stat="home_pts"></td>
|
||||
<td data-stat="arena_name">Golden 1 Center</td>
|
||||
<td data-stat="game_remarks">Cancelled</td>
|
||||
</tr>
|
||||
<!-- Regular completed game with high scores -->
|
||||
<tr>
|
||||
<th data-stat="date_game">Sun, Mar 15, 2026</th>
|
||||
<td data-stat="game_start_time">3:30p</td>
|
||||
<td data-stat="visitor_team_name">Indiana Pacers</td>
|
||||
<td data-stat="visitor_pts">147</td>
|
||||
<td data-stat="home_team_name">Atlanta Hawks</td>
|
||||
<td data-stat="home_pts">150</td>
|
||||
<td data-stat="arena_name">State Farm Arena</td>
|
||||
<td data-stat="game_remarks">OT</td>
|
||||
</tr>
|
||||
<!-- Game at arena with special characters -->
|
||||
<tr>
|
||||
<th data-stat="date_game">Mon, Feb 2, 2026</th>
|
||||
<td data-stat="game_start_time">10:30p</td>
|
||||
<td data-stat="visitor_team_name">Golden State Warriors</td>
|
||||
<td data-stat="visitor_pts">118</td>
|
||||
<td data-stat="home_team_name">Los Angeles Clippers</td>
|
||||
<td data-stat="home_pts">115</td>
|
||||
<td data-stat="arena_name">Intuit Dome</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
94
sportstime_parser/tests/fixtures/nba/basketball_reference_october.html
vendored
Normal file
94
sportstime_parser/tests/fixtures/nba/basketball_reference_october.html
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>2025-26 NBA Schedule - October | Basketball-Reference.com</title>
|
||||
</head>
|
||||
<body>
|
||||
<table id="schedule" class="stats_table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th data-stat="date_game">Date</th>
|
||||
<th data-stat="game_start_time">Start (ET)</th>
|
||||
<th data-stat="visitor_team_name">Visitor/Neutral</th>
|
||||
<th data-stat="visitor_pts">PTS</th>
|
||||
<th data-stat="home_team_name">Home/Neutral</th>
|
||||
<th data-stat="home_pts">PTS</th>
|
||||
<th data-stat="arena_name">Arena</th>
|
||||
<th data-stat="game_remarks">Notes</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th data-stat="date_game">Tue, Oct 22, 2025</th>
|
||||
<td data-stat="game_start_time">7:30p</td>
|
||||
<td data-stat="visitor_team_name">Boston Celtics</td>
|
||||
<td data-stat="visitor_pts">112</td>
|
||||
<td data-stat="home_team_name">Cleveland Cavaliers</td>
|
||||
<td data-stat="home_pts">108</td>
|
||||
<td data-stat="arena_name">Rocket Mortgage FieldHouse</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th data-stat="date_game">Tue, Oct 22, 2025</th>
|
||||
<td data-stat="game_start_time">10:00p</td>
|
||||
<td data-stat="visitor_team_name">Denver Nuggets</td>
|
||||
<td data-stat="visitor_pts">119</td>
|
||||
<td data-stat="home_team_name">Los Angeles Lakers</td>
|
||||
<td data-stat="home_pts">127</td>
|
||||
<td data-stat="arena_name">Crypto.com Arena</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th data-stat="date_game">Wed, Oct 23, 2025</th>
|
||||
<td data-stat="game_start_time">7:00p</td>
|
||||
<td data-stat="visitor_team_name">Houston Rockets</td>
|
||||
<td data-stat="visitor_pts"></td>
|
||||
<td data-stat="home_team_name">Oklahoma City Thunder</td>
|
||||
<td data-stat="home_pts"></td>
|
||||
<td data-stat="arena_name">Paycom Center</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th data-stat="date_game">Wed, Oct 23, 2025</th>
|
||||
<td data-stat="game_start_time">7:30p</td>
|
||||
<td data-stat="visitor_team_name">New York Knicks</td>
|
||||
<td data-stat="visitor_pts"></td>
|
||||
<td data-stat="home_team_name">Brooklyn Nets</td>
|
||||
<td data-stat="home_pts"></td>
|
||||
<td data-stat="arena_name">Barclays Center</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th data-stat="date_game">Thu, Oct 24, 2025</th>
|
||||
<td data-stat="game_start_time">7:00p</td>
|
||||
<td data-stat="visitor_team_name">Chicago Bulls</td>
|
||||
<td data-stat="visitor_pts"></td>
|
||||
<td data-stat="home_team_name">Miami Heat</td>
|
||||
<td data-stat="home_pts"></td>
|
||||
<td data-stat="arena_name">Kaseya Center</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th data-stat="date_game">Fri, Oct 25, 2025</th>
|
||||
<td data-stat="game_start_time">7:30p</td>
|
||||
<td data-stat="visitor_team_name">Toronto Raptors</td>
|
||||
<td data-stat="visitor_pts"></td>
|
||||
<td data-stat="home_team_name">Boston Celtics</td>
|
||||
<td data-stat="home_pts"></td>
|
||||
<td data-stat="arena_name">TD Garden</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th data-stat="date_game">Sat, Oct 26, 2025</th>
|
||||
<td data-stat="game_start_time">8:00p</td>
|
||||
<td data-stat="visitor_team_name">Minnesota Timberwolves</td>
|
||||
<td data-stat="visitor_pts"></td>
|
||||
<td data-stat="home_team_name">Dallas Mavericks</td>
|
||||
<td data-stat="home_pts"></td>
|
||||
<td data-stat="arena_name">American Airlines Center</td>
|
||||
<td data-stat="game_remarks"></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
245
sportstime_parser/tests/fixtures/nba/espn_scoreboard.json
vendored
Normal file
245
sportstime_parser/tests/fixtures/nba/espn_scoreboard.json
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"leagues": [
|
||||
{
|
||||
"id": "46",
|
||||
"uid": "s:40~l:46",
|
||||
"name": "National Basketball Association",
|
||||
"abbreviation": "NBA"
|
||||
}
|
||||
],
|
||||
"season": {
|
||||
"type": 2,
|
||||
"year": 2026
|
||||
},
|
||||
"day": {
|
||||
"date": "2025-10-22T00:00:00Z"
|
||||
},
|
||||
"events": [
|
||||
{
|
||||
"id": "401584721",
|
||||
"uid": "s:40~l:46~e:401584721",
|
||||
"date": "2025-10-22T23:30:00Z",
|
||||
"name": "Boston Celtics at Cleveland Cavaliers",
|
||||
"shortName": "BOS @ CLE",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401584721",
|
||||
"uid": "s:40~l:46~e:401584721~c:401584721",
|
||||
"date": "2025-10-22T23:30:00Z",
|
||||
"attendance": 20562,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "5064",
|
||||
"fullName": "Rocket Mortgage FieldHouse",
|
||||
"address": {
|
||||
"city": "Cleveland",
|
||||
"state": "OH"
|
||||
},
|
||||
"capacity": 19432,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "5",
|
||||
"uid": "s:40~l:46~t:5",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "5",
|
||||
"uid": "s:40~l:46~t:5",
|
||||
"location": "Cleveland",
|
||||
"name": "Cavaliers",
|
||||
"abbreviation": "CLE",
|
||||
"displayName": "Cleveland Cavaliers"
|
||||
},
|
||||
"score": "108",
|
||||
"winner": false
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"uid": "s:40~l:46~t:2",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "2",
|
||||
"uid": "s:40~l:46~t:2",
|
||||
"location": "Boston",
|
||||
"name": "Celtics",
|
||||
"abbreviation": "BOS",
|
||||
"displayName": "Boston Celtics"
|
||||
},
|
||||
"score": "112",
|
||||
"winner": true
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 4,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401584722",
|
||||
"uid": "s:40~l:46~e:401584722",
|
||||
"date": "2025-10-23T02:00:00Z",
|
||||
"name": "Denver Nuggets at Los Angeles Lakers",
|
||||
"shortName": "DEN @ LAL",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401584722",
|
||||
"uid": "s:40~l:46~e:401584722~c:401584722",
|
||||
"date": "2025-10-23T02:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "5091",
|
||||
"fullName": "Crypto.com Arena",
|
||||
"address": {
|
||||
"city": "Los Angeles",
|
||||
"state": "CA"
|
||||
},
|
||||
"capacity": 19068,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "13",
|
||||
"uid": "s:40~l:46~t:13",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "13",
|
||||
"uid": "s:40~l:46~t:13",
|
||||
"location": "Los Angeles",
|
||||
"name": "Lakers",
|
||||
"abbreviation": "LAL",
|
||||
"displayName": "Los Angeles Lakers"
|
||||
},
|
||||
"score": "127",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "7",
|
||||
"uid": "s:40~l:46~t:7",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "7",
|
||||
"uid": "s:40~l:46~t:7",
|
||||
"location": "Denver",
|
||||
"name": "Nuggets",
|
||||
"abbreviation": "DEN",
|
||||
"displayName": "Denver Nuggets"
|
||||
},
|
||||
"score": "119",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 4,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401584723",
|
||||
"uid": "s:40~l:46~e:401584723",
|
||||
"date": "2025-10-24T00:00:00Z",
|
||||
"name": "Houston Rockets at Oklahoma City Thunder",
|
||||
"shortName": "HOU @ OKC",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401584723",
|
||||
"uid": "s:40~l:46~e:401584723~c:401584723",
|
||||
"date": "2025-10-24T00:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "4922",
|
||||
"fullName": "Paycom Center",
|
||||
"address": {
|
||||
"city": "Oklahoma City",
|
||||
"state": "OK"
|
||||
},
|
||||
"capacity": 18203,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "25",
|
||||
"uid": "s:40~l:46~t:25",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "25",
|
||||
"uid": "s:40~l:46~t:25",
|
||||
"location": "Oklahoma City",
|
||||
"name": "Thunder",
|
||||
"abbreviation": "OKC",
|
||||
"displayName": "Oklahoma City Thunder"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
},
|
||||
{
|
||||
"id": "10",
|
||||
"uid": "s:40~l:46~t:10",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "10",
|
||||
"uid": "s:40~l:46~t:10",
|
||||
"location": "Houston",
|
||||
"name": "Rockets",
|
||||
"abbreviation": "HOU",
|
||||
"displayName": "Houston Rockets"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 0,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"name": "STATUS_SCHEDULED",
|
||||
"state": "pre",
|
||||
"completed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
245
sportstime_parser/tests/fixtures/nfl/espn_scoreboard.json
vendored
Normal file
245
sportstime_parser/tests/fixtures/nfl/espn_scoreboard.json
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"leagues": [
|
||||
{
|
||||
"id": "28",
|
||||
"uid": "s:20~l:28",
|
||||
"name": "National Football League",
|
||||
"abbreviation": "NFL"
|
||||
}
|
||||
],
|
||||
"season": {
|
||||
"type": 2,
|
||||
"year": 2025
|
||||
},
|
||||
"week": {
|
||||
"number": 1
|
||||
},
|
||||
"events": [
|
||||
{
|
||||
"id": "401671801",
|
||||
"uid": "s:20~l:28~e:401671801",
|
||||
"date": "2025-09-07T20:00:00Z",
|
||||
"name": "Kansas City Chiefs at Baltimore Ravens",
|
||||
"shortName": "KC @ BAL",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401671801",
|
||||
"uid": "s:20~l:28~e:401671801~c:401671801",
|
||||
"date": "2025-09-07T20:00:00Z",
|
||||
"attendance": 71547,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "3814",
|
||||
"fullName": "M&T Bank Stadium",
|
||||
"address": {
|
||||
"city": "Baltimore",
|
||||
"state": "MD"
|
||||
},
|
||||
"capacity": 71008,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "33",
|
||||
"uid": "s:20~l:28~t:33",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "33",
|
||||
"uid": "s:20~l:28~t:33",
|
||||
"location": "Baltimore",
|
||||
"name": "Ravens",
|
||||
"abbreviation": "BAL",
|
||||
"displayName": "Baltimore Ravens"
|
||||
},
|
||||
"score": "20",
|
||||
"winner": false
|
||||
},
|
||||
{
|
||||
"id": "12",
|
||||
"uid": "s:20~l:28~t:12",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "12",
|
||||
"uid": "s:20~l:28~t:12",
|
||||
"location": "Kansas City",
|
||||
"name": "Chiefs",
|
||||
"abbreviation": "KC",
|
||||
"displayName": "Kansas City Chiefs"
|
||||
},
|
||||
"score": "27",
|
||||
"winner": true
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 4,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401671802",
|
||||
"uid": "s:20~l:28~e:401671802",
|
||||
"date": "2025-09-08T17:00:00Z",
|
||||
"name": "Philadelphia Eagles at Green Bay Packers",
|
||||
"shortName": "PHI @ GB",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401671802",
|
||||
"uid": "s:20~l:28~e:401671802~c:401671802",
|
||||
"date": "2025-09-08T17:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "3798",
|
||||
"fullName": "Lambeau Field",
|
||||
"address": {
|
||||
"city": "Green Bay",
|
||||
"state": "WI"
|
||||
},
|
||||
"capacity": 81441,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "9",
|
||||
"uid": "s:20~l:28~t:9",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "9",
|
||||
"uid": "s:20~l:28~t:9",
|
||||
"location": "Green Bay",
|
||||
"name": "Packers",
|
||||
"abbreviation": "GB",
|
||||
"displayName": "Green Bay Packers"
|
||||
},
|
||||
"score": "34",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "21",
|
||||
"uid": "s:20~l:28~t:21",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "21",
|
||||
"uid": "s:20~l:28~t:21",
|
||||
"location": "Philadelphia",
|
||||
"name": "Eagles",
|
||||
"abbreviation": "PHI",
|
||||
"displayName": "Philadelphia Eagles"
|
||||
},
|
||||
"score": "29",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 4,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401671803",
|
||||
"uid": "s:20~l:28~e:401671803",
|
||||
"date": "2025-09-08T20:25:00Z",
|
||||
"name": "Dallas Cowboys at Cleveland Browns",
|
||||
"shortName": "DAL @ CLE",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401671803",
|
||||
"uid": "s:20~l:28~e:401671803~c:401671803",
|
||||
"date": "2025-09-08T20:25:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "3653",
|
||||
"fullName": "Cleveland Browns Stadium",
|
||||
"address": {
|
||||
"city": "Cleveland",
|
||||
"state": "OH"
|
||||
},
|
||||
"capacity": 67431,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "5",
|
||||
"uid": "s:20~l:28~t:5",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "5",
|
||||
"uid": "s:20~l:28~t:5",
|
||||
"location": "Cleveland",
|
||||
"name": "Browns",
|
||||
"abbreviation": "CLE",
|
||||
"displayName": "Cleveland Browns"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
},
|
||||
{
|
||||
"id": "6",
|
||||
"uid": "s:20~l:28~t:6",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "6",
|
||||
"uid": "s:20~l:28~t:6",
|
||||
"location": "Dallas",
|
||||
"name": "Cowboys",
|
||||
"abbreviation": "DAL",
|
||||
"displayName": "Dallas Cowboys"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 0,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"name": "STATUS_SCHEDULED",
|
||||
"state": "pre",
|
||||
"completed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
245
sportstime_parser/tests/fixtures/nhl/espn_scoreboard.json
vendored
Normal file
245
sportstime_parser/tests/fixtures/nhl/espn_scoreboard.json
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"leagues": [
|
||||
{
|
||||
"id": "90",
|
||||
"uid": "s:70~l:90",
|
||||
"name": "National Hockey League",
|
||||
"abbreviation": "NHL"
|
||||
}
|
||||
],
|
||||
"season": {
|
||||
"type": 2,
|
||||
"year": 2026
|
||||
},
|
||||
"day": {
|
||||
"date": "2025-10-08T00:00:00Z"
|
||||
},
|
||||
"events": [
|
||||
{
|
||||
"id": "401671901",
|
||||
"uid": "s:70~l:90~e:401671901",
|
||||
"date": "2025-10-08T23:00:00Z",
|
||||
"name": "Pittsburgh Penguins at Boston Bruins",
|
||||
"shortName": "PIT @ BOS",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401671901",
|
||||
"uid": "s:70~l:90~e:401671901~c:401671901",
|
||||
"date": "2025-10-08T23:00:00Z",
|
||||
"attendance": 17850,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "1823",
|
||||
"fullName": "TD Garden",
|
||||
"address": {
|
||||
"city": "Boston",
|
||||
"state": "MA"
|
||||
},
|
||||
"capacity": 17850,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "1",
|
||||
"uid": "s:70~l:90~t:1",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "1",
|
||||
"uid": "s:70~l:90~t:1",
|
||||
"location": "Boston",
|
||||
"name": "Bruins",
|
||||
"abbreviation": "BOS",
|
||||
"displayName": "Boston Bruins"
|
||||
},
|
||||
"score": "4",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "5",
|
||||
"uid": "s:70~l:90~t:5",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "5",
|
||||
"uid": "s:70~l:90~t:5",
|
||||
"location": "Pittsburgh",
|
||||
"name": "Penguins",
|
||||
"abbreviation": "PIT",
|
||||
"displayName": "Pittsburgh Penguins"
|
||||
},
|
||||
"score": "2",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 3,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401671902",
|
||||
"uid": "s:70~l:90~e:401671902",
|
||||
"date": "2025-10-09T00:00:00Z",
|
||||
"name": "Toronto Maple Leafs at Montreal Canadiens",
|
||||
"shortName": "TOR @ MTL",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401671902",
|
||||
"uid": "s:70~l:90~e:401671902~c:401671902",
|
||||
"date": "2025-10-09T00:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "1918",
|
||||
"fullName": "Bell Centre",
|
||||
"address": {
|
||||
"city": "Montreal",
|
||||
"state": "QC"
|
||||
},
|
||||
"capacity": 21302,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "8",
|
||||
"uid": "s:70~l:90~t:8",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "8",
|
||||
"uid": "s:70~l:90~t:8",
|
||||
"location": "Montreal",
|
||||
"name": "Canadiens",
|
||||
"abbreviation": "MTL",
|
||||
"displayName": "Montreal Canadiens"
|
||||
},
|
||||
"score": "3",
|
||||
"winner": false
|
||||
},
|
||||
{
|
||||
"id": "10",
|
||||
"uid": "s:70~l:90~t:10",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "10",
|
||||
"uid": "s:70~l:90~t:10",
|
||||
"location": "Toronto",
|
||||
"name": "Maple Leafs",
|
||||
"abbreviation": "TOR",
|
||||
"displayName": "Toronto Maple Leafs"
|
||||
},
|
||||
"score": "5",
|
||||
"winner": true
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 3,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401671903",
|
||||
"uid": "s:70~l:90~e:401671903",
|
||||
"date": "2025-10-09T02:00:00Z",
|
||||
"name": "Vegas Golden Knights at Los Angeles Kings",
|
||||
"shortName": "VGK @ LAK",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401671903",
|
||||
"uid": "s:70~l:90~e:401671903~c:401671903",
|
||||
"date": "2025-10-09T02:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "1816",
|
||||
"fullName": "Crypto.com Arena",
|
||||
"address": {
|
||||
"city": "Los Angeles",
|
||||
"state": "CA"
|
||||
},
|
||||
"capacity": 18230,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "26",
|
||||
"uid": "s:70~l:90~t:26",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "26",
|
||||
"uid": "s:70~l:90~t:26",
|
||||
"location": "Los Angeles",
|
||||
"name": "Kings",
|
||||
"abbreviation": "LAK",
|
||||
"displayName": "Los Angeles Kings"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
},
|
||||
{
|
||||
"id": "54",
|
||||
"uid": "s:70~l:90~t:54",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "54",
|
||||
"uid": "s:70~l:90~t:54",
|
||||
"location": "Vegas",
|
||||
"name": "Golden Knights",
|
||||
"abbreviation": "VGK",
|
||||
"displayName": "Vegas Golden Knights"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 0,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"name": "STATUS_SCHEDULED",
|
||||
"state": "pre",
|
||||
"completed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
245
sportstime_parser/tests/fixtures/nwsl/espn_scoreboard.json
vendored
Normal file
245
sportstime_parser/tests/fixtures/nwsl/espn_scoreboard.json
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"leagues": [
|
||||
{
|
||||
"id": "761",
|
||||
"uid": "s:600~l:761",
|
||||
"name": "National Women's Soccer League",
|
||||
"abbreviation": "NWSL"
|
||||
}
|
||||
],
|
||||
"season": {
|
||||
"type": 2,
|
||||
"year": 2026
|
||||
},
|
||||
"day": {
|
||||
"date": "2026-04-10T00:00:00Z"
|
||||
},
|
||||
"events": [
|
||||
{
|
||||
"id": "401672201",
|
||||
"uid": "s:600~l:761~e:401672201",
|
||||
"date": "2026-04-10T23:00:00Z",
|
||||
"name": "Angel City FC at Portland Thorns",
|
||||
"shortName": "LA @ POR",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672201",
|
||||
"uid": "s:600~l:761~e:401672201~c:401672201",
|
||||
"date": "2026-04-10T23:00:00Z",
|
||||
"attendance": 22000,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "8070",
|
||||
"fullName": "Providence Park",
|
||||
"address": {
|
||||
"city": "Portland",
|
||||
"state": "OR"
|
||||
},
|
||||
"capacity": 25218,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "15625",
|
||||
"uid": "s:600~l:761~t:15625",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "15625",
|
||||
"uid": "s:600~l:761~t:15625",
|
||||
"location": "Portland",
|
||||
"name": "Thorns FC",
|
||||
"abbreviation": "POR",
|
||||
"displayName": "Portland Thorns FC"
|
||||
},
|
||||
"score": "2",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "19934",
|
||||
"uid": "s:600~l:761~t:19934",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "19934",
|
||||
"uid": "s:600~l:761~t:19934",
|
||||
"location": "Los Angeles",
|
||||
"name": "Angel City",
|
||||
"abbreviation": "LA",
|
||||
"displayName": "Angel City FC"
|
||||
},
|
||||
"score": "1",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 90,
|
||||
"displayClock": "90'",
|
||||
"period": 2,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401672202",
|
||||
"uid": "s:600~l:761~e:401672202",
|
||||
"date": "2026-04-11T00:00:00Z",
|
||||
"name": "Orlando Pride at North Carolina Courage",
|
||||
"shortName": "ORL @ NC",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672202",
|
||||
"uid": "s:600~l:761~e:401672202~c:401672202",
|
||||
"date": "2026-04-11T00:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "8073",
|
||||
"fullName": "WakeMed Soccer Park",
|
||||
"address": {
|
||||
"city": "Cary",
|
||||
"state": "NC"
|
||||
},
|
||||
"capacity": 10000,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "15618",
|
||||
"uid": "s:600~l:761~t:15618",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "15618",
|
||||
"uid": "s:600~l:761~t:15618",
|
||||
"location": "North Carolina",
|
||||
"name": "Courage",
|
||||
"abbreviation": "NC",
|
||||
"displayName": "North Carolina Courage"
|
||||
},
|
||||
"score": "3",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "15626",
|
||||
"uid": "s:600~l:761~t:15626",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "15626",
|
||||
"uid": "s:600~l:761~t:15626",
|
||||
"location": "Orlando",
|
||||
"name": "Pride",
|
||||
"abbreviation": "ORL",
|
||||
"displayName": "Orlando Pride"
|
||||
},
|
||||
"score": "1",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 90,
|
||||
"displayClock": "90'",
|
||||
"period": 2,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401672203",
|
||||
"uid": "s:600~l:761~e:401672203",
|
||||
"date": "2026-04-11T02:00:00Z",
|
||||
"name": "San Diego Wave at Bay FC",
|
||||
"shortName": "SD @ BAY",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672203",
|
||||
"uid": "s:600~l:761~e:401672203~c:401672203",
|
||||
"date": "2026-04-11T02:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "3945",
|
||||
"fullName": "PayPal Park",
|
||||
"address": {
|
||||
"city": "San Jose",
|
||||
"state": "CA"
|
||||
},
|
||||
"capacity": 18000,
|
||||
"indoor": false
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "25645",
|
||||
"uid": "s:600~l:761~t:25645",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "25645",
|
||||
"uid": "s:600~l:761~t:25645",
|
||||
"location": "Bay Area",
|
||||
"name": "FC",
|
||||
"abbreviation": "BAY",
|
||||
"displayName": "Bay FC"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
},
|
||||
{
|
||||
"id": "22638",
|
||||
"uid": "s:600~l:761~t:22638",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "22638",
|
||||
"uid": "s:600~l:761~t:22638",
|
||||
"location": "San Diego",
|
||||
"name": "Wave FC",
|
||||
"abbreviation": "SD",
|
||||
"displayName": "San Diego Wave FC"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0'",
|
||||
"period": 0,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"name": "STATUS_SCHEDULED",
|
||||
"state": "pre",
|
||||
"completed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
245
sportstime_parser/tests/fixtures/wnba/espn_scoreboard.json
vendored
Normal file
245
sportstime_parser/tests/fixtures/wnba/espn_scoreboard.json
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"leagues": [
|
||||
{
|
||||
"id": "59",
|
||||
"uid": "s:40~l:59",
|
||||
"name": "Women's National Basketball Association",
|
||||
"abbreviation": "WNBA"
|
||||
}
|
||||
],
|
||||
"season": {
|
||||
"type": 2,
|
||||
"year": 2026
|
||||
},
|
||||
"day": {
|
||||
"date": "2026-05-20T00:00:00Z"
|
||||
},
|
||||
"events": [
|
||||
{
|
||||
"id": "401672101",
|
||||
"uid": "s:40~l:59~e:401672101",
|
||||
"date": "2026-05-20T23:00:00Z",
|
||||
"name": "Las Vegas Aces at New York Liberty",
|
||||
"shortName": "LV @ NY",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672101",
|
||||
"uid": "s:40~l:59~e:401672101~c:401672101",
|
||||
"date": "2026-05-20T23:00:00Z",
|
||||
"attendance": 17732,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "4346",
|
||||
"fullName": "Barclays Center",
|
||||
"address": {
|
||||
"city": "Brooklyn",
|
||||
"state": "NY"
|
||||
},
|
||||
"capacity": 17732,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "9",
|
||||
"uid": "s:40~l:59~t:9",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "9",
|
||||
"uid": "s:40~l:59~t:9",
|
||||
"location": "New York",
|
||||
"name": "Liberty",
|
||||
"abbreviation": "NY",
|
||||
"displayName": "New York Liberty"
|
||||
},
|
||||
"score": "92",
|
||||
"winner": true
|
||||
},
|
||||
{
|
||||
"id": "20",
|
||||
"uid": "s:40~l:59~t:20",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "20",
|
||||
"uid": "s:40~l:59~t:20",
|
||||
"location": "Las Vegas",
|
||||
"name": "Aces",
|
||||
"abbreviation": "LV",
|
||||
"displayName": "Las Vegas Aces"
|
||||
},
|
||||
"score": "88",
|
||||
"winner": false
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 4,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401672102",
|
||||
"uid": "s:40~l:59~e:401672102",
|
||||
"date": "2026-05-21T00:00:00Z",
|
||||
"name": "Connecticut Sun at Chicago Sky",
|
||||
"shortName": "CONN @ CHI",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672102",
|
||||
"uid": "s:40~l:59~e:401672102~c:401672102",
|
||||
"date": "2026-05-21T00:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "8086",
|
||||
"fullName": "Wintrust Arena",
|
||||
"address": {
|
||||
"city": "Chicago",
|
||||
"state": "IL"
|
||||
},
|
||||
"capacity": 10387,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "6",
|
||||
"uid": "s:40~l:59~t:6",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "6",
|
||||
"uid": "s:40~l:59~t:6",
|
||||
"location": "Chicago",
|
||||
"name": "Sky",
|
||||
"abbreviation": "CHI",
|
||||
"displayName": "Chicago Sky"
|
||||
},
|
||||
"score": "78",
|
||||
"winner": false
|
||||
},
|
||||
{
|
||||
"id": "5",
|
||||
"uid": "s:40~l:59~t:5",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "5",
|
||||
"uid": "s:40~l:59~t:5",
|
||||
"location": "Connecticut",
|
||||
"name": "Sun",
|
||||
"abbreviation": "CONN",
|
||||
"displayName": "Connecticut Sun"
|
||||
},
|
||||
"score": "85",
|
||||
"winner": true
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 4,
|
||||
"type": {
|
||||
"id": "3",
|
||||
"name": "STATUS_FINAL",
|
||||
"state": "post",
|
||||
"completed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "401672103",
|
||||
"uid": "s:40~l:59~e:401672103",
|
||||
"date": "2026-05-21T02:00:00Z",
|
||||
"name": "Phoenix Mercury at Seattle Storm",
|
||||
"shortName": "PHX @ SEA",
|
||||
"competitions": [
|
||||
{
|
||||
"id": "401672103",
|
||||
"uid": "s:40~l:59~e:401672103~c:401672103",
|
||||
"date": "2026-05-21T02:00:00Z",
|
||||
"type": {
|
||||
"id": "1",
|
||||
"abbreviation": "STD"
|
||||
},
|
||||
"venue": {
|
||||
"id": "3097",
|
||||
"fullName": "Climate Pledge Arena",
|
||||
"address": {
|
||||
"city": "Seattle",
|
||||
"state": "WA"
|
||||
},
|
||||
"capacity": 18100,
|
||||
"indoor": true
|
||||
},
|
||||
"competitors": [
|
||||
{
|
||||
"id": "11",
|
||||
"uid": "s:40~l:59~t:11",
|
||||
"type": "team",
|
||||
"order": 0,
|
||||
"homeAway": "home",
|
||||
"team": {
|
||||
"id": "11",
|
||||
"uid": "s:40~l:59~t:11",
|
||||
"location": "Seattle",
|
||||
"name": "Storm",
|
||||
"abbreviation": "SEA",
|
||||
"displayName": "Seattle Storm"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
},
|
||||
{
|
||||
"id": "8",
|
||||
"uid": "s:40~l:59~t:8",
|
||||
"type": "team",
|
||||
"order": 1,
|
||||
"homeAway": "away",
|
||||
"team": {
|
||||
"id": "8",
|
||||
"uid": "s:40~l:59~t:8",
|
||||
"location": "Phoenix",
|
||||
"name": "Mercury",
|
||||
"abbreviation": "PHX",
|
||||
"displayName": "Phoenix Mercury"
|
||||
},
|
||||
"score": null,
|
||||
"winner": null
|
||||
}
|
||||
],
|
||||
"status": {
|
||||
"clock": 0,
|
||||
"displayClock": "0:00",
|
||||
"period": 0,
|
||||
"type": {
|
||||
"id": "1",
|
||||
"name": "STATUS_SCHEDULED",
|
||||
"state": "pre",
|
||||
"completed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
269
sportstime_parser/tests/test_alias_loader.py
Normal file
269
sportstime_parser/tests/test_alias_loader.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""Tests for alias loaders."""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
import tempfile
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
from sportstime_parser.normalizers.alias_loader import (
|
||||
TeamAliasLoader,
|
||||
StadiumAliasLoader,
|
||||
)
|
||||
from sportstime_parser.models.aliases import AliasType
|
||||
|
||||
|
||||
class TestTeamAliasLoader:
|
||||
"""Tests for TeamAliasLoader class."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_aliases_file(self):
|
||||
"""Create a temporary aliases file for testing."""
|
||||
data = [
|
||||
{
|
||||
"id": "1",
|
||||
"team_canonical_id": "nba_okc",
|
||||
"alias_type": "name",
|
||||
"alias_value": "Seattle SuperSonics",
|
||||
"valid_from": "1967-01-01",
|
||||
"valid_until": "2008-07-02",
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"team_canonical_id": "nba_okc",
|
||||
"alias_type": "name",
|
||||
"alias_value": "Oklahoma City Thunder",
|
||||
"valid_from": "2008-07-03",
|
||||
"valid_until": None,
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"team_canonical_id": "nba_okc",
|
||||
"alias_type": "abbreviation",
|
||||
"alias_value": "OKC",
|
||||
"valid_from": "2008-07-03",
|
||||
"valid_until": None,
|
||||
},
|
||||
]
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".json", delete=False
|
||||
) as f:
|
||||
json.dump(data, f)
|
||||
return Path(f.name)
|
||||
|
||||
def test_load_aliases(self, sample_aliases_file):
|
||||
"""Test loading aliases from file."""
|
||||
loader = TeamAliasLoader(sample_aliases_file)
|
||||
loader.load()
|
||||
assert len(loader._aliases) == 3
|
||||
|
||||
def test_resolve_current_alias(self, sample_aliases_file):
|
||||
"""Test resolving a current alias."""
|
||||
loader = TeamAliasLoader(sample_aliases_file)
|
||||
|
||||
# Current date should resolve to Thunder
|
||||
result = loader.resolve("Oklahoma City Thunder")
|
||||
assert result == "nba_okc"
|
||||
|
||||
# Abbreviation should also work
|
||||
result = loader.resolve("OKC")
|
||||
assert result == "nba_okc"
|
||||
|
||||
def test_resolve_historical_alias(self, sample_aliases_file):
|
||||
"""Test resolving a historical alias with date."""
|
||||
loader = TeamAliasLoader(sample_aliases_file)
|
||||
|
||||
# Historical date should resolve SuperSonics
|
||||
result = loader.resolve("Seattle SuperSonics", check_date=date(2007, 1, 1))
|
||||
assert result == "nba_okc"
|
||||
|
||||
# After relocation, SuperSonics shouldn't resolve
|
||||
result = loader.resolve("Seattle SuperSonics", check_date=date(2010, 1, 1))
|
||||
assert result is None
|
||||
|
||||
def test_resolve_case_insensitive(self, sample_aliases_file):
|
||||
"""Test case insensitive resolution."""
|
||||
loader = TeamAliasLoader(sample_aliases_file)
|
||||
|
||||
result = loader.resolve("oklahoma city thunder")
|
||||
assert result == "nba_okc"
|
||||
|
||||
result = loader.resolve("okc")
|
||||
assert result == "nba_okc"
|
||||
|
||||
def test_resolve_with_type_filter(self, sample_aliases_file):
|
||||
"""Test filtering by alias type."""
|
||||
loader = TeamAliasLoader(sample_aliases_file)
|
||||
|
||||
# Should find when searching all types
|
||||
result = loader.resolve("OKC")
|
||||
assert result == "nba_okc"
|
||||
|
||||
# Should not find when filtering to name only
|
||||
result = loader.resolve("OKC", alias_types=[AliasType.NAME])
|
||||
assert result is None
|
||||
|
||||
def test_get_aliases_for_team(self, sample_aliases_file):
|
||||
"""Test getting all aliases for a team."""
|
||||
loader = TeamAliasLoader(sample_aliases_file)
|
||||
|
||||
aliases = loader.get_aliases_for_team("nba_okc")
|
||||
assert len(aliases) == 3
|
||||
|
||||
# Filter by current date
|
||||
aliases = loader.get_aliases_for_team(
|
||||
"nba_okc", check_date=date(2020, 1, 1)
|
||||
)
|
||||
assert len(aliases) == 2 # Thunder name + OKC abbreviation
|
||||
|
||||
def test_missing_file(self):
|
||||
"""Test handling of missing file."""
|
||||
loader = TeamAliasLoader(Path("/nonexistent/file.json"))
|
||||
loader.load() # Should not raise
|
||||
assert len(loader._aliases) == 0
|
||||
|
||||
|
||||
class TestStadiumAliasLoader:
|
||||
"""Tests for StadiumAliasLoader class."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_stadium_aliases(self):
|
||||
"""Create a temporary stadium aliases file."""
|
||||
data = [
|
||||
{
|
||||
"alias_name": "Crypto.com Arena",
|
||||
"stadium_canonical_id": "crypto_arena_los_angeles_ca",
|
||||
"valid_from": "2021-12-25",
|
||||
"valid_until": None,
|
||||
},
|
||||
{
|
||||
"alias_name": "Staples Center",
|
||||
"stadium_canonical_id": "crypto_arena_los_angeles_ca",
|
||||
"valid_from": "1999-10-17",
|
||||
"valid_until": "2021-12-24",
|
||||
},
|
||||
]
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".json", delete=False
|
||||
) as f:
|
||||
json.dump(data, f)
|
||||
return Path(f.name)
|
||||
|
||||
def test_load_stadium_aliases(self, sample_stadium_aliases):
|
||||
"""Test loading stadium aliases."""
|
||||
loader = StadiumAliasLoader(sample_stadium_aliases)
|
||||
loader.load()
|
||||
assert len(loader._aliases) == 2
|
||||
|
||||
def test_resolve_current_name(self, sample_stadium_aliases):
|
||||
"""Test resolving current stadium name."""
|
||||
loader = StadiumAliasLoader(sample_stadium_aliases)
|
||||
|
||||
result = loader.resolve("Crypto.com Arena")
|
||||
assert result == "crypto_arena_los_angeles_ca"
|
||||
|
||||
def test_resolve_historical_name(self, sample_stadium_aliases):
|
||||
"""Test resolving historical stadium name."""
|
||||
loader = StadiumAliasLoader(sample_stadium_aliases)
|
||||
|
||||
# Staples Center in 2020
|
||||
result = loader.resolve("Staples Center", check_date=date(2020, 1, 1))
|
||||
assert result == "crypto_arena_los_angeles_ca"
|
||||
|
||||
# Staples Center after rename shouldn't resolve
|
||||
result = loader.resolve("Staples Center", check_date=date(2023, 1, 1))
|
||||
assert result is None
|
||||
|
||||
def test_date_boundary(self, sample_stadium_aliases):
|
||||
"""Test exact date boundaries."""
|
||||
loader = StadiumAliasLoader(sample_stadium_aliases)
|
||||
|
||||
# Last day of Staples Center
|
||||
result = loader.resolve("Staples Center", check_date=date(2021, 12, 24))
|
||||
assert result == "crypto_arena_los_angeles_ca"
|
||||
|
||||
# First day of Crypto.com Arena
|
||||
result = loader.resolve("Crypto.com Arena", check_date=date(2021, 12, 25))
|
||||
assert result == "crypto_arena_los_angeles_ca"
|
||||
|
||||
def test_get_all_names(self, sample_stadium_aliases):
|
||||
"""Test getting all stadium names."""
|
||||
loader = StadiumAliasLoader(sample_stadium_aliases)
|
||||
|
||||
names = loader.get_all_names()
|
||||
assert len(names) == 2
|
||||
assert "Crypto.com Arena" in names
|
||||
assert "Staples Center" in names
|
||||
|
||||
|
||||
class TestDateRangeHandling:
|
||||
"""Tests for date range edge cases in aliases."""
|
||||
|
||||
@pytest.fixture
|
||||
def date_range_aliases(self):
|
||||
"""Create aliases with various date range scenarios."""
|
||||
data = [
|
||||
{
|
||||
"id": "1",
|
||||
"team_canonical_id": "test_team",
|
||||
"alias_type": "name",
|
||||
"alias_value": "Always Valid",
|
||||
"valid_from": None,
|
||||
"valid_until": None,
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"team_canonical_id": "test_team",
|
||||
"alias_type": "name",
|
||||
"alias_value": "Future Only",
|
||||
"valid_from": "2030-01-01",
|
||||
"valid_until": None,
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"team_canonical_id": "test_team",
|
||||
"alias_type": "name",
|
||||
"alias_value": "Past Only",
|
||||
"valid_from": None,
|
||||
"valid_until": "2000-01-01",
|
||||
},
|
||||
]
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".json", delete=False
|
||||
) as f:
|
||||
json.dump(data, f)
|
||||
return Path(f.name)
|
||||
|
||||
def test_always_valid_alias(self, date_range_aliases):
|
||||
"""Test alias with no date restrictions."""
|
||||
loader = TeamAliasLoader(date_range_aliases)
|
||||
|
||||
result = loader.resolve("Always Valid", check_date=date(2025, 1, 1))
|
||||
assert result == "test_team"
|
||||
|
||||
result = loader.resolve("Always Valid", check_date=date(1990, 1, 1))
|
||||
assert result == "test_team"
|
||||
|
||||
def test_future_only_alias(self, date_range_aliases):
|
||||
"""Test alias that starts in the future."""
|
||||
loader = TeamAliasLoader(date_range_aliases)
|
||||
|
||||
# Before valid_from
|
||||
result = loader.resolve("Future Only", check_date=date(2025, 1, 1))
|
||||
assert result is None
|
||||
|
||||
# After valid_from
|
||||
result = loader.resolve("Future Only", check_date=date(2035, 1, 1))
|
||||
assert result == "test_team"
|
||||
|
||||
def test_past_only_alias(self, date_range_aliases):
|
||||
"""Test alias that expired in the past."""
|
||||
loader = TeamAliasLoader(date_range_aliases)
|
||||
|
||||
# Before valid_until
|
||||
result = loader.resolve("Past Only", check_date=date(1990, 1, 1))
|
||||
assert result == "test_team"
|
||||
|
||||
# After valid_until
|
||||
result = loader.resolve("Past Only", check_date=date(2025, 1, 1))
|
||||
assert result is None
|
||||
187
sportstime_parser/tests/test_canonical_id.py
Normal file
187
sportstime_parser/tests/test_canonical_id.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""Tests for canonical ID generation."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, date
|
||||
|
||||
from sportstime_parser.normalizers.canonical_id import (
|
||||
generate_game_id,
|
||||
generate_team_id,
|
||||
generate_team_id_from_abbrev,
|
||||
generate_stadium_id,
|
||||
parse_game_id,
|
||||
normalize_string,
|
||||
)
|
||||
|
||||
|
||||
class TestNormalizeString:
|
||||
"""Tests for normalize_string function."""
|
||||
|
||||
def test_basic_normalization(self):
|
||||
"""Test basic string normalization."""
|
||||
assert normalize_string("New York") == "new_york"
|
||||
assert normalize_string("Los Angeles") == "los_angeles"
|
||||
|
||||
def test_removes_special_characters(self):
|
||||
"""Test that special characters are removed."""
|
||||
assert normalize_string("AT&T Stadium") == "att_stadium"
|
||||
assert normalize_string("St. Louis") == "st_louis"
|
||||
assert normalize_string("O'Brien Field") == "obrien_field"
|
||||
|
||||
def test_collapses_whitespace(self):
|
||||
"""Test that multiple spaces are collapsed."""
|
||||
assert normalize_string("New York") == "new_york"
|
||||
assert normalize_string(" Los Angeles ") == "los_angeles"
|
||||
|
||||
def test_empty_string(self):
|
||||
"""Test empty string handling."""
|
||||
assert normalize_string("") == ""
|
||||
assert normalize_string(" ") == ""
|
||||
|
||||
def test_unicode_normalization(self):
|
||||
"""Test unicode characters are handled."""
|
||||
assert normalize_string("Café") == "cafe"
|
||||
assert normalize_string("José") == "jose"
|
||||
|
||||
|
||||
class TestGenerateGameId:
|
||||
"""Tests for generate_game_id function."""
|
||||
|
||||
def test_basic_game_id(self):
|
||||
"""Test basic game ID generation."""
|
||||
game_id = generate_game_id(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
away_abbrev="bos",
|
||||
home_abbrev="lal",
|
||||
game_date=date(2025, 12, 25),
|
||||
)
|
||||
assert game_id == "game_nba_2025_20251225_bos_lal"
|
||||
|
||||
def test_game_id_with_datetime(self):
|
||||
"""Test game ID generation with datetime object."""
|
||||
game_id = generate_game_id(
|
||||
sport="mlb",
|
||||
season=2026,
|
||||
away_abbrev="nyy",
|
||||
home_abbrev="bos",
|
||||
game_date=datetime(2026, 4, 1, 19, 0),
|
||||
)
|
||||
assert game_id == "game_mlb_2026_20260401_nyy_bos"
|
||||
|
||||
def test_game_id_with_game_number(self):
|
||||
"""Test game ID for doubleheader."""
|
||||
game_id_1 = generate_game_id(
|
||||
sport="mlb",
|
||||
season=2026,
|
||||
away_abbrev="nyy",
|
||||
home_abbrev="bos",
|
||||
game_date=date(2026, 7, 4),
|
||||
game_number=1,
|
||||
)
|
||||
game_id_2 = generate_game_id(
|
||||
sport="mlb",
|
||||
season=2026,
|
||||
away_abbrev="nyy",
|
||||
home_abbrev="bos",
|
||||
game_date=date(2026, 7, 4),
|
||||
game_number=2,
|
||||
)
|
||||
assert game_id_1 == "game_mlb_2026_20260704_nyy_bos_1"
|
||||
assert game_id_2 == "game_mlb_2026_20260704_nyy_bos_2"
|
||||
|
||||
def test_sport_lowercased(self):
|
||||
"""Test that sport is lowercased."""
|
||||
game_id = generate_game_id(
|
||||
sport="NBA",
|
||||
season=2025,
|
||||
away_abbrev="BOS",
|
||||
home_abbrev="LAL",
|
||||
game_date=date(2025, 12, 25),
|
||||
)
|
||||
assert game_id == "game_nba_2025_20251225_bos_lal"
|
||||
|
||||
|
||||
class TestParseGameId:
|
||||
"""Tests for parse_game_id function."""
|
||||
|
||||
def test_parse_basic_game_id(self):
|
||||
"""Test parsing a basic game ID."""
|
||||
parsed = parse_game_id("game_nba_2025_20251225_bos_lal")
|
||||
assert parsed["sport"] == "nba"
|
||||
assert parsed["season"] == 2025
|
||||
assert parsed["away_abbrev"] == "bos"
|
||||
assert parsed["home_abbrev"] == "lal"
|
||||
assert parsed["year"] == 2025
|
||||
assert parsed["month"] == 12
|
||||
assert parsed["day"] == 25
|
||||
assert parsed["game_number"] is None
|
||||
|
||||
def test_parse_game_id_with_game_number(self):
|
||||
"""Test parsing game ID with game number."""
|
||||
parsed = parse_game_id("game_mlb_2026_20260704_nyy_bos_2")
|
||||
assert parsed["sport"] == "mlb"
|
||||
assert parsed["season"] == 2026
|
||||
assert parsed["away_abbrev"] == "nyy"
|
||||
assert parsed["home_abbrev"] == "bos"
|
||||
assert parsed["year"] == 2026
|
||||
assert parsed["month"] == 7
|
||||
assert parsed["day"] == 4
|
||||
assert parsed["game_number"] == 2
|
||||
|
||||
def test_parse_invalid_game_id(self):
|
||||
"""Test parsing invalid game ID raises error."""
|
||||
with pytest.raises(ValueError):
|
||||
parse_game_id("invalid")
|
||||
with pytest.raises(ValueError):
|
||||
parse_game_id("nba_2025_bos") # Missing game_ prefix
|
||||
with pytest.raises(ValueError):
|
||||
parse_game_id("")
|
||||
with pytest.raises(ValueError):
|
||||
parse_game_id("game_nba_2025_bos_lal") # Missing date
|
||||
|
||||
|
||||
class TestGenerateTeamId:
|
||||
"""Tests for generate_team_id function."""
|
||||
|
||||
def test_basic_team_id(self):
|
||||
"""Test basic team ID generation from city and name."""
|
||||
team_id = generate_team_id(sport="nba", city="Los Angeles", name="Lakers")
|
||||
assert team_id == "team_nba_los_angeles_lakers"
|
||||
|
||||
def test_team_id_normalizes_input(self):
|
||||
"""Test that inputs are normalized."""
|
||||
team_id = generate_team_id(sport="NBA", city="New York", name="Yankees")
|
||||
assert team_id == "team_nba_new_york_yankees"
|
||||
|
||||
|
||||
class TestGenerateTeamIdFromAbbrev:
|
||||
"""Tests for generate_team_id_from_abbrev function."""
|
||||
|
||||
def test_basic_team_id_from_abbrev(self):
|
||||
"""Test team ID from abbreviation."""
|
||||
team_id = generate_team_id_from_abbrev(sport="nba", abbreviation="LAL")
|
||||
assert team_id == "team_nba_lal"
|
||||
|
||||
def test_lowercases_abbreviation(self):
|
||||
"""Test abbreviation is lowercased."""
|
||||
team_id = generate_team_id_from_abbrev(sport="MLB", abbreviation="NYY")
|
||||
assert team_id == "team_mlb_nyy"
|
||||
|
||||
|
||||
class TestGenerateStadiumId:
|
||||
"""Tests for generate_stadium_id function."""
|
||||
|
||||
def test_basic_stadium_id(self):
|
||||
"""Test basic stadium ID generation."""
|
||||
stadium_id = generate_stadium_id(sport="mlb", name="Fenway Park")
|
||||
assert stadium_id == "stadium_mlb_fenway_park"
|
||||
|
||||
def test_stadium_id_special_characters(self):
|
||||
"""Test stadium ID with special characters."""
|
||||
stadium_id = generate_stadium_id(sport="nfl", name="AT&T Stadium")
|
||||
assert stadium_id == "stadium_nfl_att_stadium"
|
||||
|
||||
def test_stadium_id_with_sponsor(self):
|
||||
"""Test stadium ID with sponsor name."""
|
||||
stadium_id = generate_stadium_id(sport="nba", name="Crypto.com Arena")
|
||||
assert stadium_id == "stadium_nba_cryptocom_arena"
|
||||
194
sportstime_parser/tests/test_fuzzy.py
Normal file
194
sportstime_parser/tests/test_fuzzy.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""Tests for fuzzy string matching utilities."""
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.normalizers.fuzzy import (
|
||||
normalize_for_matching,
|
||||
fuzzy_match_team,
|
||||
fuzzy_match_stadium,
|
||||
exact_match,
|
||||
best_match,
|
||||
calculate_similarity,
|
||||
MatchCandidate,
|
||||
)
|
||||
|
||||
|
||||
class TestNormalizeForMatching:
|
||||
"""Tests for normalize_for_matching function."""
|
||||
|
||||
def test_basic_normalization(self):
|
||||
"""Test basic string normalization."""
|
||||
assert normalize_for_matching("Los Angeles Lakers") == "los angeles lakers"
|
||||
assert normalize_for_matching(" Boston Celtics ") == "boston celtics"
|
||||
|
||||
def test_removes_common_prefixes(self):
|
||||
"""Test removal of common prefixes."""
|
||||
assert normalize_for_matching("The Boston Celtics") == "boston celtics"
|
||||
assert normalize_for_matching("Team Lakers") == "lakers"
|
||||
|
||||
def test_removes_stadium_suffixes(self):
|
||||
"""Test removal of stadium-related suffixes."""
|
||||
assert normalize_for_matching("Fenway Park") == "fenway"
|
||||
assert normalize_for_matching("Madison Square Garden Arena") == "madison square garden"
|
||||
assert normalize_for_matching("Wrigley Field") == "wrigley"
|
||||
assert normalize_for_matching("TD Garden Center") == "td garden"
|
||||
|
||||
|
||||
class TestExactMatch:
|
||||
"""Tests for exact_match function."""
|
||||
|
||||
def test_exact_match_primary_name(self):
|
||||
"""Test exact match on primary name."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LAL"]),
|
||||
MatchCandidate("nba_bos", "Boston Celtics", ["Celtics", "BOS"]),
|
||||
]
|
||||
assert exact_match("Los Angeles Lakers", candidates) == "nba_lal"
|
||||
assert exact_match("Boston Celtics", candidates) == "nba_bos"
|
||||
|
||||
def test_exact_match_alias(self):
|
||||
"""Test exact match on alias."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LAL"]),
|
||||
]
|
||||
assert exact_match("Lakers", candidates) == "nba_lal"
|
||||
assert exact_match("LAL", candidates) == "nba_lal"
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Test case insensitive matching."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
|
||||
]
|
||||
assert exact_match("los angeles lakers", candidates) == "nba_lal"
|
||||
assert exact_match("LAKERS", candidates) == "nba_lal"
|
||||
|
||||
def test_no_match(self):
|
||||
"""Test no match returns None."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
|
||||
]
|
||||
assert exact_match("New York Knicks", candidates) is None
|
||||
|
||||
|
||||
class TestFuzzyMatchTeam:
|
||||
"""Tests for fuzzy_match_team function."""
|
||||
|
||||
def test_close_match(self):
|
||||
"""Test fuzzy matching finds close matches."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LA Lakers"]),
|
||||
MatchCandidate("nba_lac", "Los Angeles Clippers", ["Clippers", "LA Clippers"]),
|
||||
]
|
||||
matches = fuzzy_match_team("LA Lakers", candidates, threshold=70)
|
||||
assert len(matches) > 0
|
||||
assert matches[0].canonical_id == "nba_lal"
|
||||
|
||||
def test_partial_name_match(self):
|
||||
"""Test matching on partial team name."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_bos", "Boston Celtics", ["Celtics", "BOS"]),
|
||||
]
|
||||
matches = fuzzy_match_team("Celtics", candidates, threshold=80)
|
||||
assert len(matches) > 0
|
||||
assert matches[0].canonical_id == "nba_bos"
|
||||
|
||||
def test_threshold_filtering(self):
|
||||
"""Test that threshold filters low-confidence matches."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_bos", "Boston Celtics", []),
|
||||
]
|
||||
# Very different string should not match at high threshold
|
||||
matches = fuzzy_match_team("xyz123", candidates, threshold=90)
|
||||
assert len(matches) == 0
|
||||
|
||||
def test_returns_top_n(self):
|
||||
"""Test that top_n parameter limits results."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", []),
|
||||
MatchCandidate("nba_lac", "Los Angeles Clippers", []),
|
||||
MatchCandidate("mlb_lad", "Los Angeles Dodgers", []),
|
||||
]
|
||||
matches = fuzzy_match_team("Los Angeles", candidates, threshold=50, top_n=2)
|
||||
assert len(matches) <= 2
|
||||
|
||||
|
||||
class TestFuzzyMatchStadium:
|
||||
"""Tests for fuzzy_match_stadium function."""
|
||||
|
||||
def test_stadium_match(self):
|
||||
"""Test fuzzy matching stadium names."""
|
||||
candidates = [
|
||||
MatchCandidate("fenway", "Fenway Park", ["Fenway"]),
|
||||
MatchCandidate("td_garden", "TD Garden", ["Boston Garden"]),
|
||||
]
|
||||
matches = fuzzy_match_stadium("Fenway Park Boston", candidates, threshold=70)
|
||||
assert len(matches) > 0
|
||||
assert matches[0].canonical_id == "fenway"
|
||||
|
||||
def test_naming_rights_change(self):
|
||||
"""Test matching old stadium names."""
|
||||
candidates = [
|
||||
MatchCandidate(
|
||||
"chase_center",
|
||||
"Chase Center",
|
||||
["Oracle Arena", "Oakland Coliseum Arena"],
|
||||
),
|
||||
]
|
||||
# Should match on alias
|
||||
matches = fuzzy_match_stadium("Oracle Arena", candidates, threshold=70)
|
||||
assert len(matches) > 0
|
||||
|
||||
|
||||
class TestBestMatch:
|
||||
"""Tests for best_match function."""
|
||||
|
||||
def test_prefers_exact_match(self):
|
||||
"""Test that exact match is preferred over fuzzy."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
|
||||
MatchCandidate("nba_bos", "Boston Celtics", ["Celtics"]),
|
||||
]
|
||||
result = best_match("Lakers", candidates)
|
||||
assert result is not None
|
||||
assert result.canonical_id == "nba_lal"
|
||||
assert result.confidence == 100 # Exact match
|
||||
|
||||
def test_falls_back_to_fuzzy(self):
|
||||
"""Test fallback to fuzzy when no exact match."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]),
|
||||
]
|
||||
result = best_match("LA Laker", candidates, threshold=70)
|
||||
assert result is not None
|
||||
assert result.confidence < 100 # Fuzzy match
|
||||
|
||||
def test_no_match_below_threshold(self):
|
||||
"""Test returns None when no match above threshold."""
|
||||
candidates = [
|
||||
MatchCandidate("nba_lal", "Los Angeles Lakers", []),
|
||||
]
|
||||
result = best_match("xyz123", candidates, threshold=90)
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestCalculateSimilarity:
|
||||
"""Tests for calculate_similarity function."""
|
||||
|
||||
def test_identical_strings(self):
|
||||
"""Test identical strings have 100% similarity."""
|
||||
assert calculate_similarity("Boston Celtics", "Boston Celtics") == 100
|
||||
|
||||
def test_similar_strings(self):
|
||||
"""Test similar strings have high similarity."""
|
||||
score = calculate_similarity("Boston Celtics", "Celtics Boston")
|
||||
assert score >= 90
|
||||
|
||||
def test_different_strings(self):
|
||||
"""Test different strings have low similarity."""
|
||||
score = calculate_similarity("Boston Celtics", "Los Angeles Lakers")
|
||||
assert score < 50
|
||||
|
||||
def test_empty_string(self):
|
||||
"""Test empty string handling."""
|
||||
score = calculate_similarity("", "Boston Celtics")
|
||||
assert score == 0
|
||||
1
sportstime_parser/tests/test_scrapers/__init__.py
Normal file
1
sportstime_parser/tests/test_scrapers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Tests for scrapers module."""
|
||||
257
sportstime_parser/tests/test_scrapers/test_mlb.py
Normal file
257
sportstime_parser/tests/test_scrapers/test_mlb.py
Normal file
@@ -0,0 +1,257 @@
|
||||
"""Tests for MLB scraper."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.scrapers.mlb import MLBScraper, create_mlb_scraper
|
||||
from sportstime_parser.scrapers.base import RawGameData
|
||||
from sportstime_parser.tests.fixtures import (
|
||||
load_json_fixture,
|
||||
MLB_ESPN_SCOREBOARD_JSON,
|
||||
)
|
||||
|
||||
|
||||
class TestMLBScraperInit:
|
||||
"""Test MLBScraper initialization."""
|
||||
|
||||
def test_creates_scraper_with_season(self):
|
||||
"""Test scraper initializes with correct season."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
assert scraper.sport == "mlb"
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_factory_function_creates_scraper(self):
|
||||
"""Test factory function creates correct scraper."""
|
||||
scraper = create_mlb_scraper(season=2026)
|
||||
assert isinstance(scraper, MLBScraper)
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_expected_game_count(self):
|
||||
"""Test expected game count is correct for MLB."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
assert scraper.expected_game_count == 2430
|
||||
|
||||
def test_sources_in_priority_order(self):
|
||||
"""Test sources are returned in correct priority order."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
sources = scraper._get_sources()
|
||||
assert sources == ["baseball_reference", "mlb_api", "espn"]
|
||||
|
||||
|
||||
class TestESPNParsing:
|
||||
"""Test ESPN API response parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games from ESPN."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
data = load_json_fixture(MLB_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
completed = [g for g in games if g.status == "final"]
|
||||
assert len(completed) == 2
|
||||
|
||||
# Yankees @ Red Sox
|
||||
nyy_bos = next(g for g in completed if g.away_team_raw == "New York Yankees")
|
||||
assert nyy_bos.home_team_raw == "Boston Red Sox"
|
||||
assert nyy_bos.away_score == 3
|
||||
assert nyy_bos.home_score == 5
|
||||
assert nyy_bos.stadium_raw == "Fenway Park"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games from ESPN."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
data = load_json_fixture(MLB_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
scheduled = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled) == 1
|
||||
|
||||
lad_sf = scheduled[0]
|
||||
assert lad_sf.away_team_raw == "Los Angeles Dodgers"
|
||||
assert lad_sf.home_team_raw == "San Francisco Giants"
|
||||
assert lad_sf.stadium_raw == "Oracle Park"
|
||||
|
||||
def test_parses_venue_info(self):
|
||||
"""Test venue information is extracted."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
data = load_json_fixture(MLB_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
for game in games:
|
||||
assert game.stadium_raw is not None
|
||||
|
||||
|
||||
class TestGameNormalization:
|
||||
"""Test game normalization and canonical ID generation."""
|
||||
|
||||
def test_normalizes_games_with_canonical_ids(self):
|
||||
"""Test games are normalized with correct canonical IDs."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 4, 15),
|
||||
home_team_raw="Boston Red Sox",
|
||||
away_team_raw="New York Yankees",
|
||||
stadium_raw="Fenway Park",
|
||||
home_score=5,
|
||||
away_score=3,
|
||||
status="final",
|
||||
source_url="http://example.com",
|
||||
)
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
game = games[0]
|
||||
|
||||
# Check canonical ID format
|
||||
assert game.id == "mlb_2026_nyy_bos_0415"
|
||||
assert game.sport == "mlb"
|
||||
assert game.season == 2026
|
||||
|
||||
# Check team IDs
|
||||
assert game.home_team_id == "team_mlb_bos"
|
||||
assert game.away_team_id == "team_mlb_nyy"
|
||||
|
||||
# Check scores preserved
|
||||
assert game.home_score == 5
|
||||
assert game.away_score == 3
|
||||
|
||||
def test_creates_review_items_for_unresolved_teams(self):
|
||||
"""Test review items are created for unresolved teams."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 4, 15),
|
||||
home_team_raw="Unknown Team XYZ",
|
||||
away_team_raw="Boston Red Sox",
|
||||
stadium_raw="Fenway Park",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
# Game should not be created due to unresolved team
|
||||
assert len(games) == 0
|
||||
|
||||
# But there should be a review item
|
||||
assert len(review_items) >= 1
|
||||
|
||||
|
||||
class TestTeamAndStadiumScraping:
|
||||
"""Test team and stadium data scraping."""
|
||||
|
||||
def test_scrapes_all_mlb_teams(self):
|
||||
"""Test all 30 MLB teams are returned."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# 30 MLB teams
|
||||
assert len(teams) == 30
|
||||
|
||||
# Check team IDs are unique
|
||||
team_ids = [t.id for t in teams]
|
||||
assert len(set(team_ids)) == 30
|
||||
|
||||
# Check all teams have required fields
|
||||
for team in teams:
|
||||
assert team.id.startswith("team_mlb_")
|
||||
assert team.sport == "mlb"
|
||||
assert team.city
|
||||
assert team.name
|
||||
assert team.full_name
|
||||
assert team.abbreviation
|
||||
|
||||
def test_teams_have_leagues_and_divisions(self):
|
||||
"""Test teams have league (conference) and division info."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# Count teams by league
|
||||
al = [t for t in teams if t.conference == "American"]
|
||||
nl = [t for t in teams if t.conference == "National"]
|
||||
|
||||
assert len(al) == 15
|
||||
assert len(nl) == 15
|
||||
|
||||
def test_scrapes_all_mlb_stadiums(self):
|
||||
"""Test all MLB stadiums are returned."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
stadiums = scraper.scrape_stadiums()
|
||||
|
||||
# Should have stadiums for all teams
|
||||
assert len(stadiums) == 30
|
||||
|
||||
# Check stadium IDs are unique
|
||||
stadium_ids = [s.id for s in stadiums]
|
||||
assert len(set(stadium_ids)) == 30
|
||||
|
||||
# Check all stadiums have required fields
|
||||
for stadium in stadiums:
|
||||
assert stadium.id.startswith("stadium_mlb_")
|
||||
assert stadium.sport == "mlb"
|
||||
assert stadium.name
|
||||
assert stadium.city
|
||||
assert stadium.state
|
||||
assert stadium.country in ["USA", "Canada"]
|
||||
assert stadium.latitude != 0
|
||||
assert stadium.longitude != 0
|
||||
|
||||
|
||||
class TestScrapeFallback:
|
||||
"""Test multi-source fallback behavior."""
|
||||
|
||||
def test_falls_back_to_next_source_on_failure(self):
|
||||
"""Test scraper tries next source when first fails."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
|
||||
with patch.object(scraper, '_scrape_baseball_reference') as mock_br, \
|
||||
patch.object(scraper, '_scrape_mlb_api') as mock_mlb, \
|
||||
patch.object(scraper, '_scrape_espn') as mock_espn:
|
||||
|
||||
# Make BR and MLB API fail
|
||||
mock_br.side_effect = Exception("Connection failed")
|
||||
mock_mlb.side_effect = Exception("API error")
|
||||
|
||||
# Make ESPN return data
|
||||
mock_espn.return_value = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 4, 15),
|
||||
home_team_raw="Boston Red Sox",
|
||||
away_team_raw="New York Yankees",
|
||||
stadium_raw="Fenway Park",
|
||||
status="scheduled",
|
||||
)
|
||||
]
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
assert result.success
|
||||
assert result.source == "espn"
|
||||
assert mock_br.called
|
||||
assert mock_mlb.called
|
||||
assert mock_espn.called
|
||||
|
||||
|
||||
class TestSeasonMonths:
|
||||
"""Test season month calculation."""
|
||||
|
||||
def test_gets_correct_season_months(self):
|
||||
"""Test correct months are returned for MLB season."""
|
||||
scraper = MLBScraper(season=2026)
|
||||
months = scraper._get_season_months()
|
||||
|
||||
# MLB season is March-November
|
||||
assert len(months) == 9 # Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov
|
||||
|
||||
# Check first month is March of season year
|
||||
assert months[0] == (2026, 3)
|
||||
|
||||
# Check last month is November
|
||||
assert months[-1] == (2026, 11)
|
||||
251
sportstime_parser/tests/test_scrapers/test_mls.py
Normal file
251
sportstime_parser/tests/test_scrapers/test_mls.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""Tests for MLS scraper."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.scrapers.mls import MLSScraper, create_mls_scraper
|
||||
from sportstime_parser.scrapers.base import RawGameData
|
||||
from sportstime_parser.tests.fixtures import (
|
||||
load_json_fixture,
|
||||
MLS_ESPN_SCOREBOARD_JSON,
|
||||
)
|
||||
|
||||
|
||||
class TestMLSScraperInit:
|
||||
"""Test MLSScraper initialization."""
|
||||
|
||||
def test_creates_scraper_with_season(self):
|
||||
"""Test scraper initializes with correct season."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
assert scraper.sport == "mls"
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_factory_function_creates_scraper(self):
|
||||
"""Test factory function creates correct scraper."""
|
||||
scraper = create_mls_scraper(season=2026)
|
||||
assert isinstance(scraper, MLSScraper)
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_expected_game_count(self):
|
||||
"""Test expected game count is correct for MLS."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
assert scraper.expected_game_count == 493
|
||||
|
||||
def test_sources_in_priority_order(self):
|
||||
"""Test sources are returned in correct priority order."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
sources = scraper._get_sources()
|
||||
assert sources == ["espn", "fbref"]
|
||||
|
||||
|
||||
class TestESPNParsing:
|
||||
"""Test ESPN API response parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games from ESPN."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
data = load_json_fixture(MLS_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
completed = [g for g in games if g.status == "final"]
|
||||
assert len(completed) == 2
|
||||
|
||||
# Galaxy @ LAFC
|
||||
la_lafc = next(g for g in completed if g.away_team_raw == "LA Galaxy")
|
||||
assert la_lafc.home_team_raw == "Los Angeles FC"
|
||||
assert la_lafc.away_score == 2
|
||||
assert la_lafc.home_score == 3
|
||||
assert la_lafc.stadium_raw == "BMO Stadium"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games from ESPN."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
data = load_json_fixture(MLS_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
scheduled = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled) == 1
|
||||
|
||||
ny_atl = scheduled[0]
|
||||
assert ny_atl.away_team_raw == "New York Red Bulls"
|
||||
assert ny_atl.home_team_raw == "Atlanta United FC"
|
||||
assert ny_atl.stadium_raw == "Mercedes-Benz Stadium"
|
||||
|
||||
def test_parses_venue_info(self):
|
||||
"""Test venue information is extracted."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
data = load_json_fixture(MLS_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
for game in games:
|
||||
assert game.stadium_raw is not None
|
||||
|
||||
|
||||
class TestGameNormalization:
|
||||
"""Test game normalization and canonical ID generation."""
|
||||
|
||||
def test_normalizes_games_with_canonical_ids(self):
|
||||
"""Test games are normalized with correct canonical IDs."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 3, 15),
|
||||
home_team_raw="Los Angeles FC",
|
||||
away_team_raw="LA Galaxy",
|
||||
stadium_raw="BMO Stadium",
|
||||
home_score=3,
|
||||
away_score=2,
|
||||
status="final",
|
||||
source_url="http://example.com",
|
||||
)
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
game = games[0]
|
||||
|
||||
# Check canonical ID format
|
||||
assert game.id == "mls_2026_lag_lafc_0315"
|
||||
assert game.sport == "mls"
|
||||
assert game.season == 2026
|
||||
|
||||
# Check team IDs
|
||||
assert game.home_team_id == "team_mls_lafc"
|
||||
assert game.away_team_id == "team_mls_lag"
|
||||
|
||||
# Check scores preserved
|
||||
assert game.home_score == 3
|
||||
assert game.away_score == 2
|
||||
|
||||
def test_creates_review_items_for_unresolved_teams(self):
|
||||
"""Test review items are created for unresolved teams."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 3, 15),
|
||||
home_team_raw="Unknown Team XYZ",
|
||||
away_team_raw="LA Galaxy",
|
||||
stadium_raw="BMO Stadium",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
# Game should not be created due to unresolved team
|
||||
assert len(games) == 0
|
||||
|
||||
# But there should be a review item
|
||||
assert len(review_items) >= 1
|
||||
|
||||
|
||||
class TestTeamAndStadiumScraping:
|
||||
"""Test team and stadium data scraping."""
|
||||
|
||||
def test_scrapes_all_mls_teams(self):
|
||||
"""Test all MLS teams are returned."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# MLS has 29+ teams
|
||||
assert len(teams) >= 29
|
||||
|
||||
# Check team IDs are unique
|
||||
team_ids = [t.id for t in teams]
|
||||
assert len(set(team_ids)) == len(teams)
|
||||
|
||||
# Check all teams have required fields
|
||||
for team in teams:
|
||||
assert team.id.startswith("team_mls_")
|
||||
assert team.sport == "mls"
|
||||
assert team.city
|
||||
assert team.name
|
||||
assert team.full_name
|
||||
assert team.abbreviation
|
||||
|
||||
def test_teams_have_conferences(self):
|
||||
"""Test teams have conference info."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# Count teams by conference
|
||||
eastern = [t for t in teams if t.conference == "Eastern"]
|
||||
western = [t for t in teams if t.conference == "Western"]
|
||||
|
||||
# MLS has two conferences
|
||||
assert len(eastern) >= 14
|
||||
assert len(western) >= 14
|
||||
|
||||
def test_scrapes_all_mls_stadiums(self):
|
||||
"""Test all MLS stadiums are returned."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
stadiums = scraper.scrape_stadiums()
|
||||
|
||||
# Should have stadiums for all teams
|
||||
assert len(stadiums) >= 29
|
||||
|
||||
# Check all stadiums have required fields
|
||||
for stadium in stadiums:
|
||||
assert stadium.id.startswith("stadium_mls_")
|
||||
assert stadium.sport == "mls"
|
||||
assert stadium.name
|
||||
assert stadium.city
|
||||
assert stadium.state
|
||||
assert stadium.country in ["USA", "Canada"]
|
||||
assert stadium.latitude != 0
|
||||
assert stadium.longitude != 0
|
||||
|
||||
|
||||
class TestScrapeFallback:
|
||||
"""Test multi-source fallback behavior."""
|
||||
|
||||
def test_falls_back_to_next_source_on_failure(self):
|
||||
"""Test scraper tries next source when first fails."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
|
||||
with patch.object(scraper, '_scrape_espn') as mock_espn, \
|
||||
patch.object(scraper, '_scrape_fbref') as mock_fbref:
|
||||
|
||||
# Make ESPN fail
|
||||
mock_espn.side_effect = Exception("Connection failed")
|
||||
|
||||
# Make FBref return data
|
||||
mock_fbref.return_value = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 3, 15),
|
||||
home_team_raw="Los Angeles FC",
|
||||
away_team_raw="LA Galaxy",
|
||||
stadium_raw="BMO Stadium",
|
||||
status="scheduled",
|
||||
)
|
||||
]
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
assert result.success
|
||||
assert result.source == "fbref"
|
||||
assert mock_espn.called
|
||||
assert mock_fbref.called
|
||||
|
||||
|
||||
class TestSeasonMonths:
|
||||
"""Test season month calculation."""
|
||||
|
||||
def test_gets_correct_season_months(self):
|
||||
"""Test correct months are returned for MLS season."""
|
||||
scraper = MLSScraper(season=2026)
|
||||
months = scraper._get_season_months()
|
||||
|
||||
# MLS season is February-November
|
||||
assert len(months) == 10 # Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov
|
||||
|
||||
# Check first month is February of season year
|
||||
assert months[0] == (2026, 2)
|
||||
|
||||
# Check last month is November
|
||||
assert months[-1] == (2026, 11)
|
||||
428
sportstime_parser/tests/test_scrapers/test_nba.py
Normal file
428
sportstime_parser/tests/test_scrapers/test_nba.py
Normal file
@@ -0,0 +1,428 @@
|
||||
"""Tests for NBA scraper."""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.scrapers.nba import NBAScraper, create_nba_scraper
|
||||
from sportstime_parser.scrapers.base import RawGameData
|
||||
from sportstime_parser.tests.fixtures import (
|
||||
load_fixture,
|
||||
load_json_fixture,
|
||||
NBA_BR_OCTOBER_HTML,
|
||||
NBA_BR_EDGE_CASES_HTML,
|
||||
NBA_ESPN_SCOREBOARD_JSON,
|
||||
)
|
||||
|
||||
|
||||
class TestNBAScraperInit:
|
||||
"""Test NBAScraper initialization."""
|
||||
|
||||
def test_creates_scraper_with_season(self):
|
||||
"""Test scraper initializes with correct season."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
assert scraper.sport == "nba"
|
||||
assert scraper.season == 2025
|
||||
|
||||
def test_factory_function_creates_scraper(self):
|
||||
"""Test factory function creates correct scraper."""
|
||||
scraper = create_nba_scraper(season=2025)
|
||||
assert isinstance(scraper, NBAScraper)
|
||||
assert scraper.season == 2025
|
||||
|
||||
def test_expected_game_count(self):
|
||||
"""Test expected game count is correct for NBA."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
assert scraper.expected_game_count == 1230
|
||||
|
||||
def test_sources_in_priority_order(self):
|
||||
"""Test sources are returned in correct priority order."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
sources = scraper._get_sources()
|
||||
assert sources == ["basketball_reference", "espn", "cbs"]
|
||||
|
||||
|
||||
class TestBasketballReferenceParsing:
|
||||
"""Test Basketball-Reference HTML parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games with scores."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_OCTOBER_HTML)
|
||||
games = scraper._parse_basketball_reference(html, "http://example.com")
|
||||
|
||||
# Should find all games in fixture
|
||||
assert len(games) == 7
|
||||
|
||||
# Check first completed game
|
||||
completed_games = [g for g in games if g.status == "final"]
|
||||
assert len(completed_games) == 2
|
||||
|
||||
# Boston @ Cleveland
|
||||
bos_cle = next(g for g in games if g.away_team_raw == "Boston Celtics")
|
||||
assert bos_cle.home_team_raw == "Cleveland Cavaliers"
|
||||
assert bos_cle.away_score == 112
|
||||
assert bos_cle.home_score == 108
|
||||
assert bos_cle.stadium_raw == "Rocket Mortgage FieldHouse"
|
||||
assert bos_cle.status == "final"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games without scores."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_OCTOBER_HTML)
|
||||
games = scraper._parse_basketball_reference(html, "http://example.com")
|
||||
|
||||
scheduled_games = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled_games) == 5
|
||||
|
||||
# Houston @ OKC
|
||||
hou_okc = next(g for g in scheduled_games if g.away_team_raw == "Houston Rockets")
|
||||
assert hou_okc.home_team_raw == "Oklahoma City Thunder"
|
||||
assert hou_okc.away_score is None
|
||||
assert hou_okc.home_score is None
|
||||
assert hou_okc.stadium_raw == "Paycom Center"
|
||||
|
||||
def test_parses_game_dates_correctly(self):
|
||||
"""Test game dates are parsed correctly."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_OCTOBER_HTML)
|
||||
games = scraper._parse_basketball_reference(html, "http://example.com")
|
||||
|
||||
# Check first game date
|
||||
first_game = games[0]
|
||||
assert first_game.game_date.year == 2025
|
||||
assert first_game.game_date.month == 10
|
||||
assert first_game.game_date.day == 22
|
||||
|
||||
def test_tracks_source_url(self):
|
||||
"""Test source URL is tracked for all games."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_OCTOBER_HTML)
|
||||
source_url = "http://basketball-reference.com/test"
|
||||
games = scraper._parse_basketball_reference(html, source_url)
|
||||
|
||||
for game in games:
|
||||
assert game.source_url == source_url
|
||||
|
||||
|
||||
class TestBasketballReferenceEdgeCases:
|
||||
"""Test edge case handling in Basketball-Reference parsing."""
|
||||
|
||||
def test_parses_postponed_games(self):
|
||||
"""Test postponed games are identified correctly."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
|
||||
games = scraper._parse_basketball_reference(html, "http://example.com")
|
||||
|
||||
postponed = [g for g in games if g.status == "postponed"]
|
||||
assert len(postponed) == 1
|
||||
assert postponed[0].away_team_raw == "Los Angeles Lakers"
|
||||
assert postponed[0].home_team_raw == "Phoenix Suns"
|
||||
|
||||
def test_parses_cancelled_games(self):
|
||||
"""Test cancelled games are identified correctly."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
|
||||
games = scraper._parse_basketball_reference(html, "http://example.com")
|
||||
|
||||
cancelled = [g for g in games if g.status == "cancelled"]
|
||||
assert len(cancelled) == 1
|
||||
assert cancelled[0].away_team_raw == "Portland Trail Blazers"
|
||||
|
||||
def test_parses_neutral_site_games(self):
|
||||
"""Test neutral site games are parsed."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
|
||||
games = scraper._parse_basketball_reference(html, "http://example.com")
|
||||
|
||||
# Mexico City game
|
||||
mexico = next(g for g in games if g.stadium_raw == "Arena CDMX")
|
||||
assert mexico.away_team_raw == "Miami Heat"
|
||||
assert mexico.home_team_raw == "Washington Wizards"
|
||||
assert mexico.status == "final"
|
||||
|
||||
def test_parses_overtime_games(self):
|
||||
"""Test overtime games with high scores."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
html = load_fixture(NBA_BR_EDGE_CASES_HTML)
|
||||
games = scraper._parse_basketball_reference(html, "http://example.com")
|
||||
|
||||
# High scoring OT game
|
||||
ot_game = next(g for g in games if g.away_score == 147)
|
||||
assert ot_game.home_score == 150
|
||||
assert ot_game.status == "final"
|
||||
|
||||
|
||||
class TestESPNParsing:
|
||||
"""Test ESPN API response parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games from ESPN."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
data = load_json_fixture(NBA_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
completed = [g for g in games if g.status == "final"]
|
||||
assert len(completed) == 2
|
||||
|
||||
# Boston @ Cleveland
|
||||
bos_cle = next(g for g in completed if g.away_team_raw == "Boston Celtics")
|
||||
assert bos_cle.home_team_raw == "Cleveland Cavaliers"
|
||||
assert bos_cle.away_score == 112
|
||||
assert bos_cle.home_score == 108
|
||||
assert bos_cle.stadium_raw == "Rocket Mortgage FieldHouse"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games from ESPN."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
data = load_json_fixture(NBA_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
scheduled = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled) == 1
|
||||
|
||||
hou_okc = scheduled[0]
|
||||
assert hou_okc.away_team_raw == "Houston Rockets"
|
||||
assert hou_okc.home_team_raw == "Oklahoma City Thunder"
|
||||
assert hou_okc.stadium_raw == "Paycom Center"
|
||||
|
||||
def test_parses_venue_info(self):
|
||||
"""Test venue information is extracted."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
data = load_json_fixture(NBA_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
# Check all games have venue info
|
||||
for game in games:
|
||||
assert game.stadium_raw is not None
|
||||
|
||||
|
||||
class TestGameNormalization:
|
||||
"""Test game normalization and canonical ID generation."""
|
||||
|
||||
def test_normalizes_games_with_canonical_ids(self):
|
||||
"""Test games are normalized with correct canonical IDs."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 10, 22),
|
||||
home_team_raw="Cleveland Cavaliers",
|
||||
away_team_raw="Boston Celtics",
|
||||
stadium_raw="Rocket Mortgage FieldHouse",
|
||||
home_score=108,
|
||||
away_score=112,
|
||||
status="final",
|
||||
source_url="http://example.com",
|
||||
)
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
game = games[0]
|
||||
|
||||
# Check canonical ID format
|
||||
assert game.id == "nba_2025_bos_cle_1022"
|
||||
assert game.sport == "nba"
|
||||
assert game.season == 2025
|
||||
|
||||
# Check team IDs
|
||||
assert game.home_team_id == "team_nba_cle"
|
||||
assert game.away_team_id == "team_nba_bos"
|
||||
|
||||
# Check scores preserved
|
||||
assert game.home_score == 108
|
||||
assert game.away_score == 112
|
||||
|
||||
def test_detects_doubleheaders(self):
|
||||
"""Test doubleheaders get correct game numbers."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 4, 1, 13, 0),
|
||||
home_team_raw="Boston Celtics",
|
||||
away_team_raw="New York Knicks",
|
||||
stadium_raw="TD Garden",
|
||||
status="final",
|
||||
home_score=105,
|
||||
away_score=98,
|
||||
),
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 4, 1, 19, 0),
|
||||
home_team_raw="Boston Celtics",
|
||||
away_team_raw="New York Knicks",
|
||||
stadium_raw="TD Garden",
|
||||
status="final",
|
||||
home_score=110,
|
||||
away_score=102,
|
||||
),
|
||||
]
|
||||
|
||||
games, _ = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 2
|
||||
game_numbers = sorted([g.game_number for g in games])
|
||||
assert game_numbers == [1, 2]
|
||||
|
||||
# Check IDs include game number
|
||||
game_ids = sorted([g.id for g in games])
|
||||
assert game_ids == ["nba_2025_nyk_bos_0401_1", "nba_2025_nyk_bos_0401_2"]
|
||||
|
||||
def test_creates_review_items_for_unresolved_teams(self):
|
||||
"""Test review items are created for unresolved teams."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 10, 22),
|
||||
home_team_raw="Unknown Team XYZ",
|
||||
away_team_raw="Boston Celtics",
|
||||
stadium_raw="TD Garden",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
# Game should not be created due to unresolved team
|
||||
assert len(games) == 0
|
||||
|
||||
# But there should be a review item
|
||||
assert len(review_items) >= 1
|
||||
|
||||
|
||||
class TestTeamAndStadiumScraping:
|
||||
"""Test team and stadium data scraping."""
|
||||
|
||||
def test_scrapes_all_nba_teams(self):
|
||||
"""Test all 30 NBA teams are returned."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# 30 NBA teams
|
||||
assert len(teams) == 30
|
||||
|
||||
# Check team IDs are unique
|
||||
team_ids = [t.id for t in teams]
|
||||
assert len(set(team_ids)) == 30
|
||||
|
||||
# Check all teams have required fields
|
||||
for team in teams:
|
||||
assert team.id.startswith("team_nba_")
|
||||
assert team.sport == "nba"
|
||||
assert team.city
|
||||
assert team.name
|
||||
assert team.full_name
|
||||
assert team.abbreviation
|
||||
|
||||
def test_teams_have_conferences_and_divisions(self):
|
||||
"""Test teams have conference and division info."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# Count teams by conference
|
||||
eastern = [t for t in teams if t.conference == "Eastern"]
|
||||
western = [t for t in teams if t.conference == "Western"]
|
||||
|
||||
assert len(eastern) == 15
|
||||
assert len(western) == 15
|
||||
|
||||
def test_scrapes_all_nba_stadiums(self):
|
||||
"""Test all NBA stadiums are returned."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
stadiums = scraper.scrape_stadiums()
|
||||
|
||||
# Should have stadiums for all teams
|
||||
assert len(stadiums) == 30
|
||||
|
||||
# Check stadium IDs are unique
|
||||
stadium_ids = [s.id for s in stadiums]
|
||||
assert len(set(stadium_ids)) == 30
|
||||
|
||||
# Check all stadiums have required fields
|
||||
for stadium in stadiums:
|
||||
assert stadium.id.startswith("stadium_nba_")
|
||||
assert stadium.sport == "nba"
|
||||
assert stadium.name
|
||||
assert stadium.city
|
||||
assert stadium.state
|
||||
assert stadium.country in ["USA", "Canada"]
|
||||
assert stadium.latitude != 0
|
||||
assert stadium.longitude != 0
|
||||
|
||||
|
||||
class TestScrapeFallback:
|
||||
"""Test multi-source fallback behavior."""
|
||||
|
||||
def test_falls_back_to_next_source_on_failure(self):
|
||||
"""Test scraper tries next source when first fails."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
|
||||
with patch.object(scraper, '_scrape_basketball_reference') as mock_br, \
|
||||
patch.object(scraper, '_scrape_espn') as mock_espn:
|
||||
|
||||
# Make BR fail
|
||||
mock_br.side_effect = Exception("Connection failed")
|
||||
|
||||
# Make ESPN return data
|
||||
mock_espn.return_value = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 10, 22),
|
||||
home_team_raw="Cleveland Cavaliers",
|
||||
away_team_raw="Boston Celtics",
|
||||
stadium_raw="Rocket Mortgage FieldHouse",
|
||||
status="scheduled",
|
||||
)
|
||||
]
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
# Should have succeeded with ESPN
|
||||
assert result.success
|
||||
assert result.source == "espn"
|
||||
assert mock_br.called
|
||||
assert mock_espn.called
|
||||
|
||||
def test_returns_failure_when_all_sources_fail(self):
|
||||
"""Test scraper returns failure when all sources fail."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
|
||||
with patch.object(scraper, '_scrape_basketball_reference') as mock_br, \
|
||||
patch.object(scraper, '_scrape_espn') as mock_espn, \
|
||||
patch.object(scraper, '_scrape_cbs') as mock_cbs:
|
||||
|
||||
mock_br.side_effect = Exception("BR failed")
|
||||
mock_espn.side_effect = Exception("ESPN failed")
|
||||
mock_cbs.side_effect = Exception("CBS failed")
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
assert not result.success
|
||||
assert "All sources failed" in result.error_message
|
||||
assert "CBS failed" in result.error_message
|
||||
|
||||
|
||||
class TestSeasonMonths:
|
||||
"""Test season month calculation."""
|
||||
|
||||
def test_gets_correct_season_months(self):
|
||||
"""Test correct months are returned for NBA season."""
|
||||
scraper = NBAScraper(season=2025)
|
||||
months = scraper._get_season_months()
|
||||
|
||||
# NBA season is Oct-Jun
|
||||
assert len(months) == 9 # Oct, Nov, Dec, Jan, Feb, Mar, Apr, May, Jun
|
||||
|
||||
# Check first month is Oct of season year
|
||||
assert months[0] == (2025, 10)
|
||||
|
||||
# Check last month is Jun of following year
|
||||
assert months[-1] == (2026, 6)
|
||||
|
||||
# Check transition to new year
|
||||
assert months[2] == (2025, 12) # December
|
||||
assert months[3] == (2026, 1) # January
|
||||
310
sportstime_parser/tests/test_scrapers/test_nfl.py
Normal file
310
sportstime_parser/tests/test_scrapers/test_nfl.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""Tests for NFL scraper."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.scrapers.nfl import NFLScraper, create_nfl_scraper
|
||||
from sportstime_parser.scrapers.base import RawGameData
|
||||
from sportstime_parser.tests.fixtures import (
|
||||
load_json_fixture,
|
||||
NFL_ESPN_SCOREBOARD_JSON,
|
||||
)
|
||||
|
||||
|
||||
class TestNFLScraperInit:
|
||||
"""Test NFLScraper initialization."""
|
||||
|
||||
def test_creates_scraper_with_season(self):
|
||||
"""Test scraper initializes with correct season."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
assert scraper.sport == "nfl"
|
||||
assert scraper.season == 2025
|
||||
|
||||
def test_factory_function_creates_scraper(self):
|
||||
"""Test factory function creates correct scraper."""
|
||||
scraper = create_nfl_scraper(season=2025)
|
||||
assert isinstance(scraper, NFLScraper)
|
||||
assert scraper.season == 2025
|
||||
|
||||
def test_expected_game_count(self):
|
||||
"""Test expected game count is correct for NFL."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
assert scraper.expected_game_count == 272
|
||||
|
||||
def test_sources_in_priority_order(self):
|
||||
"""Test sources are returned in correct priority order."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
sources = scraper._get_sources()
|
||||
assert sources == ["espn", "pro_football_reference", "cbs"]
|
||||
|
||||
|
||||
class TestESPNParsing:
|
||||
"""Test ESPN API response parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games from ESPN."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
completed = [g for g in games if g.status == "final"]
|
||||
assert len(completed) == 2
|
||||
|
||||
# Chiefs @ Ravens
|
||||
kc_bal = next(g for g in completed if g.away_team_raw == "Kansas City Chiefs")
|
||||
assert kc_bal.home_team_raw == "Baltimore Ravens"
|
||||
assert kc_bal.away_score == 27
|
||||
assert kc_bal.home_score == 20
|
||||
assert kc_bal.stadium_raw == "M&T Bank Stadium"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games from ESPN."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
scheduled = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled) == 1
|
||||
|
||||
dal_cle = scheduled[0]
|
||||
assert dal_cle.away_team_raw == "Dallas Cowboys"
|
||||
assert dal_cle.home_team_raw == "Cleveland Browns"
|
||||
assert dal_cle.stadium_raw == "Cleveland Browns Stadium"
|
||||
|
||||
def test_parses_venue_info(self):
|
||||
"""Test venue information is extracted."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
data = load_json_fixture(NFL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
for game in games:
|
||||
assert game.stadium_raw is not None
|
||||
|
||||
|
||||
class TestGameNormalization:
|
||||
"""Test game normalization and canonical ID generation."""
|
||||
|
||||
def test_normalizes_games_with_canonical_ids(self):
|
||||
"""Test games are normalized with correct canonical IDs."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 9, 7),
|
||||
home_team_raw="Baltimore Ravens",
|
||||
away_team_raw="Kansas City Chiefs",
|
||||
stadium_raw="M&T Bank Stadium",
|
||||
home_score=20,
|
||||
away_score=27,
|
||||
status="final",
|
||||
source_url="http://example.com",
|
||||
)
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
game = games[0]
|
||||
|
||||
# Check canonical ID format
|
||||
assert game.id == "nfl_2025_kc_bal_0907"
|
||||
assert game.sport == "nfl"
|
||||
assert game.season == 2025
|
||||
|
||||
# Check team IDs
|
||||
assert game.home_team_id == "team_nfl_bal"
|
||||
assert game.away_team_id == "team_nfl_kc"
|
||||
|
||||
# Check scores preserved
|
||||
assert game.home_score == 20
|
||||
assert game.away_score == 27
|
||||
|
||||
def test_creates_review_items_for_unresolved_teams(self):
|
||||
"""Test review items are created for unresolved teams."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 9, 7),
|
||||
home_team_raw="Unknown Team XYZ",
|
||||
away_team_raw="Kansas City Chiefs",
|
||||
stadium_raw="Arrowhead Stadium",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
# Game should not be created due to unresolved team
|
||||
assert len(games) == 0
|
||||
|
||||
# But there should be a review item
|
||||
assert len(review_items) >= 1
|
||||
|
||||
|
||||
class TestTeamAndStadiumScraping:
|
||||
"""Test team and stadium data scraping."""
|
||||
|
||||
def test_scrapes_all_nfl_teams(self):
|
||||
"""Test all 32 NFL teams are returned."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# 32 NFL teams
|
||||
assert len(teams) == 32
|
||||
|
||||
# Check team IDs are unique
|
||||
team_ids = [t.id for t in teams]
|
||||
assert len(set(team_ids)) == 32
|
||||
|
||||
# Check all teams have required fields
|
||||
for team in teams:
|
||||
assert team.id.startswith("team_nfl_")
|
||||
assert team.sport == "nfl"
|
||||
assert team.city
|
||||
assert team.name
|
||||
assert team.full_name
|
||||
assert team.abbreviation
|
||||
|
||||
def test_teams_have_conferences_and_divisions(self):
|
||||
"""Test teams have conference and division info."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# Count teams by conference
|
||||
afc = [t for t in teams if t.conference == "AFC"]
|
||||
nfc = [t for t in teams if t.conference == "NFC"]
|
||||
|
||||
assert len(afc) == 16
|
||||
assert len(nfc) == 16
|
||||
|
||||
def test_scrapes_all_nfl_stadiums(self):
|
||||
"""Test all NFL stadiums are returned."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
stadiums = scraper.scrape_stadiums()
|
||||
|
||||
# Should have stadiums for all teams (some share)
|
||||
assert len(stadiums) >= 30
|
||||
|
||||
# Check all stadiums have required fields
|
||||
for stadium in stadiums:
|
||||
assert stadium.id.startswith("stadium_nfl_")
|
||||
assert stadium.sport == "nfl"
|
||||
assert stadium.name
|
||||
assert stadium.city
|
||||
assert stadium.state
|
||||
assert stadium.country == "USA"
|
||||
assert stadium.latitude != 0
|
||||
assert stadium.longitude != 0
|
||||
|
||||
|
||||
class TestScrapeFallback:
|
||||
"""Test multi-source fallback behavior."""
|
||||
|
||||
def test_falls_back_to_next_source_on_failure(self):
|
||||
"""Test scraper tries next source when first fails."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
|
||||
with patch.object(scraper, '_scrape_espn') as mock_espn, \
|
||||
patch.object(scraper, '_scrape_pro_football_reference') as mock_pfr:
|
||||
|
||||
# Make ESPN fail
|
||||
mock_espn.side_effect = Exception("Connection failed")
|
||||
|
||||
# Make PFR return data
|
||||
mock_pfr.return_value = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 9, 7),
|
||||
home_team_raw="Baltimore Ravens",
|
||||
away_team_raw="Kansas City Chiefs",
|
||||
stadium_raw="M&T Bank Stadium",
|
||||
status="scheduled",
|
||||
)
|
||||
]
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
assert result.success
|
||||
assert result.source == "pro_football_reference"
|
||||
assert mock_espn.called
|
||||
assert mock_pfr.called
|
||||
|
||||
|
||||
class TestSeasonMonths:
|
||||
"""Test season month calculation."""
|
||||
|
||||
def test_gets_correct_season_months(self):
|
||||
"""Test correct months are returned for NFL season."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
months = scraper._get_season_months()
|
||||
|
||||
# NFL season is September-February
|
||||
assert len(months) == 6 # Sep, Oct, Nov, Dec, Jan, Feb
|
||||
|
||||
# Check first month is September of season year
|
||||
assert months[0] == (2025, 9)
|
||||
|
||||
# Check last month is February of following year
|
||||
assert months[-1] == (2026, 2)
|
||||
|
||||
# Check transition to new year
|
||||
assert months[3] == (2025, 12) # December
|
||||
assert months[4] == (2026, 1) # January
|
||||
|
||||
|
||||
class TestInternationalFiltering:
|
||||
"""Test international game filtering.
|
||||
|
||||
Note: Filtering happens in _parse_espn_response, not _normalize_games.
|
||||
"""
|
||||
|
||||
def test_filters_london_games_during_parsing(self):
|
||||
"""Test London games are filtered out during ESPN parsing."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
|
||||
# Create ESPN-like data with London game
|
||||
espn_data = {
|
||||
"events": [
|
||||
{
|
||||
"date": "2025-10-15T09:30:00Z",
|
||||
"competitions": [
|
||||
{
|
||||
"neutralSite": True,
|
||||
"venue": {
|
||||
"fullName": "London Stadium",
|
||||
"address": {"city": "London", "country": "UK"},
|
||||
},
|
||||
"competitors": [
|
||||
{"homeAway": "home", "team": {"displayName": "Jacksonville Jaguars"}},
|
||||
{"homeAway": "away", "team": {"displayName": "Buffalo Bills"}},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
games = scraper._parse_espn_response(espn_data, "http://espn.com/api")
|
||||
|
||||
# London game should be filtered
|
||||
assert len(games) == 0
|
||||
|
||||
def test_keeps_us_games(self):
|
||||
"""Test US games are kept."""
|
||||
scraper = NFLScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 9, 7),
|
||||
home_team_raw="Baltimore Ravens",
|
||||
away_team_raw="Kansas City Chiefs",
|
||||
stadium_raw="M&T Bank Stadium",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, _ = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
317
sportstime_parser/tests/test_scrapers/test_nhl.py
Normal file
317
sportstime_parser/tests/test_scrapers/test_nhl.py
Normal file
@@ -0,0 +1,317 @@
|
||||
"""Tests for NHL scraper."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.scrapers.nhl import NHLScraper, create_nhl_scraper
|
||||
from sportstime_parser.scrapers.base import RawGameData
|
||||
from sportstime_parser.tests.fixtures import (
|
||||
load_json_fixture,
|
||||
NHL_ESPN_SCOREBOARD_JSON,
|
||||
)
|
||||
|
||||
|
||||
class TestNHLScraperInit:
|
||||
"""Test NHLScraper initialization."""
|
||||
|
||||
def test_creates_scraper_with_season(self):
|
||||
"""Test scraper initializes with correct season."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
assert scraper.sport == "nhl"
|
||||
assert scraper.season == 2025
|
||||
|
||||
def test_factory_function_creates_scraper(self):
|
||||
"""Test factory function creates correct scraper."""
|
||||
scraper = create_nhl_scraper(season=2025)
|
||||
assert isinstance(scraper, NHLScraper)
|
||||
assert scraper.season == 2025
|
||||
|
||||
def test_expected_game_count(self):
|
||||
"""Test expected game count is correct for NHL."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
assert scraper.expected_game_count == 1312
|
||||
|
||||
def test_sources_in_priority_order(self):
|
||||
"""Test sources are returned in correct priority order."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
sources = scraper._get_sources()
|
||||
assert sources == ["hockey_reference", "nhl_api", "espn"]
|
||||
|
||||
|
||||
class TestESPNParsing:
|
||||
"""Test ESPN API response parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games from ESPN."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
data = load_json_fixture(NHL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
completed = [g for g in games if g.status == "final"]
|
||||
assert len(completed) == 2
|
||||
|
||||
# Penguins @ Bruins
|
||||
pit_bos = next(g for g in completed if g.away_team_raw == "Pittsburgh Penguins")
|
||||
assert pit_bos.home_team_raw == "Boston Bruins"
|
||||
assert pit_bos.away_score == 2
|
||||
assert pit_bos.home_score == 4
|
||||
assert pit_bos.stadium_raw == "TD Garden"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games from ESPN."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
data = load_json_fixture(NHL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
scheduled = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled) == 1
|
||||
|
||||
vgk_lak = scheduled[0]
|
||||
assert vgk_lak.away_team_raw == "Vegas Golden Knights"
|
||||
assert vgk_lak.home_team_raw == "Los Angeles Kings"
|
||||
assert vgk_lak.stadium_raw == "Crypto.com Arena"
|
||||
|
||||
def test_parses_venue_info(self):
|
||||
"""Test venue information is extracted."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
data = load_json_fixture(NHL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
for game in games:
|
||||
assert game.stadium_raw is not None
|
||||
|
||||
|
||||
class TestGameNormalization:
|
||||
"""Test game normalization and canonical ID generation."""
|
||||
|
||||
def test_normalizes_games_with_canonical_ids(self):
|
||||
"""Test games are normalized with correct canonical IDs."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 10, 8),
|
||||
home_team_raw="Boston Bruins",
|
||||
away_team_raw="Pittsburgh Penguins",
|
||||
stadium_raw="TD Garden",
|
||||
home_score=4,
|
||||
away_score=2,
|
||||
status="final",
|
||||
source_url="http://example.com",
|
||||
)
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
game = games[0]
|
||||
|
||||
# Check canonical ID format
|
||||
assert game.id == "nhl_2025_pit_bos_1008"
|
||||
assert game.sport == "nhl"
|
||||
assert game.season == 2025
|
||||
|
||||
# Check team IDs
|
||||
assert game.home_team_id == "team_nhl_bos"
|
||||
assert game.away_team_id == "team_nhl_pit"
|
||||
|
||||
# Check scores preserved
|
||||
assert game.home_score == 4
|
||||
assert game.away_score == 2
|
||||
|
||||
def test_creates_review_items_for_unresolved_teams(self):
|
||||
"""Test review items are created for unresolved teams."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 10, 8),
|
||||
home_team_raw="Unknown Team XYZ",
|
||||
away_team_raw="Boston Bruins",
|
||||
stadium_raw="TD Garden",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
# Game should not be created due to unresolved team
|
||||
assert len(games) == 0
|
||||
|
||||
# But there should be a review item
|
||||
assert len(review_items) >= 1
|
||||
|
||||
|
||||
class TestTeamAndStadiumScraping:
|
||||
"""Test team and stadium data scraping."""
|
||||
|
||||
def test_scrapes_all_nhl_teams(self):
|
||||
"""Test all 32 NHL teams are returned."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# 32 NHL teams
|
||||
assert len(teams) == 32
|
||||
|
||||
# Check team IDs are unique
|
||||
team_ids = [t.id for t in teams]
|
||||
assert len(set(team_ids)) == 32
|
||||
|
||||
# Check all teams have required fields
|
||||
for team in teams:
|
||||
assert team.id.startswith("team_nhl_")
|
||||
assert team.sport == "nhl"
|
||||
assert team.city
|
||||
assert team.name
|
||||
assert team.full_name
|
||||
assert team.abbreviation
|
||||
|
||||
def test_teams_have_conferences_and_divisions(self):
|
||||
"""Test teams have conference and division info."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# Count teams by conference
|
||||
eastern = [t for t in teams if t.conference == "Eastern"]
|
||||
western = [t for t in teams if t.conference == "Western"]
|
||||
|
||||
assert len(eastern) == 16
|
||||
assert len(western) == 16
|
||||
|
||||
def test_scrapes_all_nhl_stadiums(self):
|
||||
"""Test all NHL stadiums are returned."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
stadiums = scraper.scrape_stadiums()
|
||||
|
||||
# Should have stadiums for all teams
|
||||
assert len(stadiums) == 32
|
||||
|
||||
# Check stadium IDs are unique
|
||||
stadium_ids = [s.id for s in stadiums]
|
||||
assert len(set(stadium_ids)) == 32
|
||||
|
||||
# Check all stadiums have required fields
|
||||
for stadium in stadiums:
|
||||
assert stadium.id.startswith("stadium_nhl_")
|
||||
assert stadium.sport == "nhl"
|
||||
assert stadium.name
|
||||
assert stadium.city
|
||||
assert stadium.state
|
||||
assert stadium.country in ["USA", "Canada"]
|
||||
assert stadium.latitude != 0
|
||||
assert stadium.longitude != 0
|
||||
|
||||
|
||||
class TestScrapeFallback:
|
||||
"""Test multi-source fallback behavior."""
|
||||
|
||||
def test_falls_back_to_next_source_on_failure(self):
|
||||
"""Test scraper tries next source when first fails."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
|
||||
with patch.object(scraper, '_scrape_hockey_reference') as mock_hr, \
|
||||
patch.object(scraper, '_scrape_nhl_api') as mock_nhl, \
|
||||
patch.object(scraper, '_scrape_espn') as mock_espn:
|
||||
|
||||
# Make HR and NHL API fail
|
||||
mock_hr.side_effect = Exception("Connection failed")
|
||||
mock_nhl.side_effect = Exception("API error")
|
||||
|
||||
# Make ESPN return data
|
||||
mock_espn.return_value = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 10, 8),
|
||||
home_team_raw="Boston Bruins",
|
||||
away_team_raw="Pittsburgh Penguins",
|
||||
stadium_raw="TD Garden",
|
||||
status="scheduled",
|
||||
)
|
||||
]
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
assert result.success
|
||||
assert result.source == "espn"
|
||||
assert mock_hr.called
|
||||
assert mock_nhl.called
|
||||
assert mock_espn.called
|
||||
|
||||
|
||||
class TestSeasonMonths:
|
||||
"""Test season month calculation."""
|
||||
|
||||
def test_gets_correct_season_months(self):
|
||||
"""Test correct months are returned for NHL season."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
months = scraper._get_season_months()
|
||||
|
||||
# NHL season is October-June
|
||||
assert len(months) == 9 # Oct, Nov, Dec, Jan, Feb, Mar, Apr, May, Jun
|
||||
|
||||
# Check first month is October of season year
|
||||
assert months[0] == (2025, 10)
|
||||
|
||||
# Check last month is June of following year
|
||||
assert months[-1] == (2026, 6)
|
||||
|
||||
# Check transition to new year
|
||||
assert months[2] == (2025, 12) # December
|
||||
assert months[3] == (2026, 1) # January
|
||||
|
||||
|
||||
class TestInternationalFiltering:
|
||||
"""Test international game filtering.
|
||||
|
||||
Note: Filtering happens in _parse_espn_response, not _normalize_games.
|
||||
"""
|
||||
|
||||
def test_filters_european_games_during_parsing(self):
|
||||
"""Test European games are filtered out during ESPN parsing."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
|
||||
# Create ESPN-like data with Prague game (Global Series)
|
||||
espn_data = {
|
||||
"events": [
|
||||
{
|
||||
"date": "2025-10-10T18:00:00Z",
|
||||
"competitions": [
|
||||
{
|
||||
"neutralSite": True,
|
||||
"venue": {
|
||||
"fullName": "O2 Arena, Prague",
|
||||
"address": {"city": "Prague", "country": "Czech Republic"},
|
||||
},
|
||||
"competitors": [
|
||||
{"homeAway": "home", "team": {"displayName": "Florida Panthers"}},
|
||||
{"homeAway": "away", "team": {"displayName": "Dallas Stars"}},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
games = scraper._parse_espn_response(espn_data, "http://espn.com/api")
|
||||
|
||||
# Prague game should be filtered
|
||||
assert len(games) == 0
|
||||
|
||||
def test_keeps_north_american_games(self):
|
||||
"""Test North American games are kept."""
|
||||
scraper = NHLScraper(season=2025)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2025, 10, 8),
|
||||
home_team_raw="Boston Bruins",
|
||||
away_team_raw="Pittsburgh Penguins",
|
||||
stadium_raw="TD Garden",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, _ = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
226
sportstime_parser/tests/test_scrapers/test_nwsl.py
Normal file
226
sportstime_parser/tests/test_scrapers/test_nwsl.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""Tests for NWSL scraper."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.scrapers.nwsl import NWSLScraper, create_nwsl_scraper
|
||||
from sportstime_parser.scrapers.base import RawGameData
|
||||
from sportstime_parser.tests.fixtures import (
|
||||
load_json_fixture,
|
||||
NWSL_ESPN_SCOREBOARD_JSON,
|
||||
)
|
||||
|
||||
|
||||
class TestNWSLScraperInit:
|
||||
"""Test NWSLScraper initialization."""
|
||||
|
||||
def test_creates_scraper_with_season(self):
|
||||
"""Test scraper initializes with correct season."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
assert scraper.sport == "nwsl"
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_factory_function_creates_scraper(self):
|
||||
"""Test factory function creates correct scraper."""
|
||||
scraper = create_nwsl_scraper(season=2026)
|
||||
assert isinstance(scraper, NWSLScraper)
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_expected_game_count(self):
|
||||
"""Test expected game count is correct for NWSL."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
assert scraper.expected_game_count == 182
|
||||
|
||||
def test_sources_in_priority_order(self):
|
||||
"""Test sources are returned in correct priority order."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
sources = scraper._get_sources()
|
||||
assert sources == ["espn"]
|
||||
|
||||
|
||||
class TestESPNParsing:
|
||||
"""Test ESPN API response parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games from ESPN."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
data = load_json_fixture(NWSL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
completed = [g for g in games if g.status == "final"]
|
||||
assert len(completed) == 2
|
||||
|
||||
# Angel City @ Thorns
|
||||
la_por = next(g for g in completed if g.away_team_raw == "Angel City FC")
|
||||
assert la_por.home_team_raw == "Portland Thorns FC"
|
||||
assert la_por.away_score == 1
|
||||
assert la_por.home_score == 2
|
||||
assert la_por.stadium_raw == "Providence Park"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games from ESPN."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
data = load_json_fixture(NWSL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
scheduled = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled) == 1
|
||||
|
||||
sd_bay = scheduled[0]
|
||||
assert sd_bay.away_team_raw == "San Diego Wave FC"
|
||||
assert sd_bay.home_team_raw == "Bay FC"
|
||||
assert sd_bay.stadium_raw == "PayPal Park"
|
||||
|
||||
def test_parses_venue_info(self):
|
||||
"""Test venue information is extracted."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
data = load_json_fixture(NWSL_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
for game in games:
|
||||
assert game.stadium_raw is not None
|
||||
|
||||
|
||||
class TestGameNormalization:
|
||||
"""Test game normalization and canonical ID generation."""
|
||||
|
||||
def test_normalizes_games_with_canonical_ids(self):
|
||||
"""Test games are normalized with correct canonical IDs."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 4, 10),
|
||||
home_team_raw="Portland Thorns FC",
|
||||
away_team_raw="Angel City FC",
|
||||
stadium_raw="Providence Park",
|
||||
home_score=2,
|
||||
away_score=1,
|
||||
status="final",
|
||||
source_url="http://example.com",
|
||||
)
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
game = games[0]
|
||||
|
||||
# Check canonical ID format
|
||||
assert game.id == "nwsl_2026_anf_por_0410"
|
||||
assert game.sport == "nwsl"
|
||||
assert game.season == 2026
|
||||
|
||||
# Check team IDs
|
||||
assert game.home_team_id == "team_nwsl_por"
|
||||
assert game.away_team_id == "team_nwsl_anf"
|
||||
|
||||
# Check scores preserved
|
||||
assert game.home_score == 2
|
||||
assert game.away_score == 1
|
||||
|
||||
def test_creates_review_items_for_unresolved_teams(self):
|
||||
"""Test review items are created for unresolved teams."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 4, 10),
|
||||
home_team_raw="Unknown Team XYZ",
|
||||
away_team_raw="Portland Thorns FC",
|
||||
stadium_raw="Providence Park",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
# Game should not be created due to unresolved team
|
||||
assert len(games) == 0
|
||||
|
||||
# But there should be a review item
|
||||
assert len(review_items) >= 1
|
||||
|
||||
|
||||
class TestTeamAndStadiumScraping:
|
||||
"""Test team and stadium data scraping."""
|
||||
|
||||
def test_scrapes_all_nwsl_teams(self):
|
||||
"""Test all NWSL teams are returned."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# NWSL has 14 teams
|
||||
assert len(teams) == 14
|
||||
|
||||
# Check team IDs are unique
|
||||
team_ids = [t.id for t in teams]
|
||||
assert len(set(team_ids)) == 14
|
||||
|
||||
# Check all teams have required fields
|
||||
for team in teams:
|
||||
assert team.id.startswith("team_nwsl_")
|
||||
assert team.sport == "nwsl"
|
||||
assert team.city
|
||||
assert team.name
|
||||
assert team.full_name
|
||||
assert team.abbreviation
|
||||
|
||||
def test_scrapes_all_nwsl_stadiums(self):
|
||||
"""Test all NWSL stadiums are returned."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
stadiums = scraper.scrape_stadiums()
|
||||
|
||||
# Should have stadiums for all teams
|
||||
assert len(stadiums) == 14
|
||||
|
||||
# Check stadium IDs are unique
|
||||
stadium_ids = [s.id for s in stadiums]
|
||||
assert len(set(stadium_ids)) == 14
|
||||
|
||||
# Check all stadiums have required fields
|
||||
for stadium in stadiums:
|
||||
assert stadium.id.startswith("stadium_nwsl_")
|
||||
assert stadium.sport == "nwsl"
|
||||
assert stadium.name
|
||||
assert stadium.city
|
||||
assert stadium.state
|
||||
assert stadium.country == "USA"
|
||||
assert stadium.latitude != 0
|
||||
assert stadium.longitude != 0
|
||||
|
||||
|
||||
class TestScrapeFallback:
|
||||
"""Test fallback behavior (NWSL only has ESPN)."""
|
||||
|
||||
def test_returns_failure_when_espn_fails(self):
|
||||
"""Test scraper returns failure when ESPN fails."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
|
||||
with patch.object(scraper, '_scrape_espn') as mock_espn:
|
||||
mock_espn.side_effect = Exception("ESPN failed")
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
assert not result.success
|
||||
assert "All sources failed" in result.error_message
|
||||
|
||||
|
||||
class TestSeasonMonths:
|
||||
"""Test season month calculation."""
|
||||
|
||||
def test_gets_correct_season_months(self):
|
||||
"""Test correct months are returned for NWSL season."""
|
||||
scraper = NWSLScraper(season=2026)
|
||||
months = scraper._get_season_months()
|
||||
|
||||
# NWSL season is March-November
|
||||
assert len(months) == 9 # Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov
|
||||
|
||||
# Check first month is March of season year
|
||||
assert months[0] == (2026, 3)
|
||||
|
||||
# Check last month is November
|
||||
assert months[-1] == (2026, 11)
|
||||
226
sportstime_parser/tests/test_scrapers/test_wnba.py
Normal file
226
sportstime_parser/tests/test_scrapers/test_wnba.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""Tests for WNBA scraper."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from sportstime_parser.scrapers.wnba import WNBAScraper, create_wnba_scraper
|
||||
from sportstime_parser.scrapers.base import RawGameData
|
||||
from sportstime_parser.tests.fixtures import (
|
||||
load_json_fixture,
|
||||
WNBA_ESPN_SCOREBOARD_JSON,
|
||||
)
|
||||
|
||||
|
||||
class TestWNBAScraperInit:
|
||||
"""Test WNBAScraper initialization."""
|
||||
|
||||
def test_creates_scraper_with_season(self):
|
||||
"""Test scraper initializes with correct season."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
assert scraper.sport == "wnba"
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_factory_function_creates_scraper(self):
|
||||
"""Test factory function creates correct scraper."""
|
||||
scraper = create_wnba_scraper(season=2026)
|
||||
assert isinstance(scraper, WNBAScraper)
|
||||
assert scraper.season == 2026
|
||||
|
||||
def test_expected_game_count(self):
|
||||
"""Test expected game count is correct for WNBA."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
assert scraper.expected_game_count == 220
|
||||
|
||||
def test_sources_in_priority_order(self):
|
||||
"""Test sources are returned in correct priority order."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
sources = scraper._get_sources()
|
||||
assert sources == ["espn"]
|
||||
|
||||
|
||||
class TestESPNParsing:
|
||||
"""Test ESPN API response parsing."""
|
||||
|
||||
def test_parses_completed_games(self):
|
||||
"""Test parsing completed games from ESPN."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
data = load_json_fixture(WNBA_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
completed = [g for g in games if g.status == "final"]
|
||||
assert len(completed) == 2
|
||||
|
||||
# Aces @ Liberty
|
||||
lv_ny = next(g for g in completed if g.away_team_raw == "Las Vegas Aces")
|
||||
assert lv_ny.home_team_raw == "New York Liberty"
|
||||
assert lv_ny.away_score == 88
|
||||
assert lv_ny.home_score == 92
|
||||
assert lv_ny.stadium_raw == "Barclays Center"
|
||||
|
||||
def test_parses_scheduled_games(self):
|
||||
"""Test parsing scheduled games from ESPN."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
data = load_json_fixture(WNBA_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
scheduled = [g for g in games if g.status == "scheduled"]
|
||||
assert len(scheduled) == 1
|
||||
|
||||
phx_sea = scheduled[0]
|
||||
assert phx_sea.away_team_raw == "Phoenix Mercury"
|
||||
assert phx_sea.home_team_raw == "Seattle Storm"
|
||||
assert phx_sea.stadium_raw == "Climate Pledge Arena"
|
||||
|
||||
def test_parses_venue_info(self):
|
||||
"""Test venue information is extracted."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
data = load_json_fixture(WNBA_ESPN_SCOREBOARD_JSON)
|
||||
games = scraper._parse_espn_response(data, "http://espn.com/api")
|
||||
|
||||
for game in games:
|
||||
assert game.stadium_raw is not None
|
||||
|
||||
|
||||
class TestGameNormalization:
|
||||
"""Test game normalization and canonical ID generation."""
|
||||
|
||||
def test_normalizes_games_with_canonical_ids(self):
|
||||
"""Test games are normalized with correct canonical IDs."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 5, 20),
|
||||
home_team_raw="New York Liberty",
|
||||
away_team_raw="Las Vegas Aces",
|
||||
stadium_raw="Barclays Center",
|
||||
home_score=92,
|
||||
away_score=88,
|
||||
status="final",
|
||||
source_url="http://example.com",
|
||||
)
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
assert len(games) == 1
|
||||
game = games[0]
|
||||
|
||||
# Check canonical ID format
|
||||
assert game.id == "wnba_2026_lv_ny_0520"
|
||||
assert game.sport == "wnba"
|
||||
assert game.season == 2026
|
||||
|
||||
# Check team IDs
|
||||
assert game.home_team_id == "team_wnba_ny"
|
||||
assert game.away_team_id == "team_wnba_lv"
|
||||
|
||||
# Check scores preserved
|
||||
assert game.home_score == 92
|
||||
assert game.away_score == 88
|
||||
|
||||
def test_creates_review_items_for_unresolved_teams(self):
|
||||
"""Test review items are created for unresolved teams."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
|
||||
raw_games = [
|
||||
RawGameData(
|
||||
game_date=datetime(2026, 5, 20),
|
||||
home_team_raw="Unknown Team XYZ",
|
||||
away_team_raw="Las Vegas Aces",
|
||||
stadium_raw="Barclays Center",
|
||||
status="scheduled",
|
||||
),
|
||||
]
|
||||
|
||||
games, review_items = scraper._normalize_games(raw_games)
|
||||
|
||||
# Game should not be created due to unresolved team
|
||||
assert len(games) == 0
|
||||
|
||||
# But there should be a review item
|
||||
assert len(review_items) >= 1
|
||||
|
||||
|
||||
class TestTeamAndStadiumScraping:
|
||||
"""Test team and stadium data scraping."""
|
||||
|
||||
def test_scrapes_all_wnba_teams(self):
|
||||
"""Test all WNBA teams are returned."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
teams = scraper.scrape_teams()
|
||||
|
||||
# WNBA has 13 teams (including Golden State Valkyries)
|
||||
assert len(teams) == 13
|
||||
|
||||
# Check team IDs are unique
|
||||
team_ids = [t.id for t in teams]
|
||||
assert len(set(team_ids)) == 13
|
||||
|
||||
# Check all teams have required fields
|
||||
for team in teams:
|
||||
assert team.id.startswith("team_wnba_")
|
||||
assert team.sport == "wnba"
|
||||
assert team.city
|
||||
assert team.name
|
||||
assert team.full_name
|
||||
assert team.abbreviation
|
||||
|
||||
def test_scrapes_all_wnba_stadiums(self):
|
||||
"""Test all WNBA stadiums are returned."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
stadiums = scraper.scrape_stadiums()
|
||||
|
||||
# Should have stadiums for all teams
|
||||
assert len(stadiums) == 13
|
||||
|
||||
# Check stadium IDs are unique
|
||||
stadium_ids = [s.id for s in stadiums]
|
||||
assert len(set(stadium_ids)) == 13
|
||||
|
||||
# Check all stadiums have required fields
|
||||
for stadium in stadiums:
|
||||
assert stadium.id.startswith("stadium_wnba_")
|
||||
assert stadium.sport == "wnba"
|
||||
assert stadium.name
|
||||
assert stadium.city
|
||||
assert stadium.state
|
||||
assert stadium.country == "USA"
|
||||
assert stadium.latitude != 0
|
||||
assert stadium.longitude != 0
|
||||
|
||||
|
||||
class TestScrapeFallback:
|
||||
"""Test fallback behavior (WNBA only has ESPN)."""
|
||||
|
||||
def test_returns_failure_when_espn_fails(self):
|
||||
"""Test scraper returns failure when ESPN fails."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
|
||||
with patch.object(scraper, '_scrape_espn') as mock_espn:
|
||||
mock_espn.side_effect = Exception("ESPN failed")
|
||||
|
||||
result = scraper.scrape_games()
|
||||
|
||||
assert not result.success
|
||||
assert "All sources failed" in result.error_message
|
||||
|
||||
|
||||
class TestSeasonMonths:
|
||||
"""Test season month calculation."""
|
||||
|
||||
def test_gets_correct_season_months(self):
|
||||
"""Test correct months are returned for WNBA season."""
|
||||
scraper = WNBAScraper(season=2026)
|
||||
months = scraper._get_season_months()
|
||||
|
||||
# WNBA season is May-October
|
||||
assert len(months) == 6 # May, Jun, Jul, Aug, Sep, Oct
|
||||
|
||||
# Check first month is May of season year
|
||||
assert months[0] == (2026, 5)
|
||||
|
||||
# Check last month is October
|
||||
assert months[-1] == (2026, 10)
|
||||
187
sportstime_parser/tests/test_timezone.py
Normal file
187
sportstime_parser/tests/test_timezone.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""Tests for timezone conversion utilities."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, date
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from sportstime_parser.normalizers.timezone import (
|
||||
detect_timezone_from_string,
|
||||
detect_timezone_from_location,
|
||||
parse_datetime,
|
||||
convert_to_utc,
|
||||
get_stadium_timezone,
|
||||
TimezoneResult,
|
||||
)
|
||||
|
||||
|
||||
class TestDetectTimezoneFromString:
|
||||
"""Tests for detect_timezone_from_string function."""
|
||||
|
||||
def test_eastern_time(self):
|
||||
"""Test Eastern Time detection."""
|
||||
assert detect_timezone_from_string("7:00 PM ET") == "America/New_York"
|
||||
assert detect_timezone_from_string("7:00 PM EST") == "America/New_York"
|
||||
assert detect_timezone_from_string("7:00 PM EDT") == "America/New_York"
|
||||
|
||||
def test_central_time(self):
|
||||
"""Test Central Time detection."""
|
||||
assert detect_timezone_from_string("8:00 PM CT") == "America/Chicago"
|
||||
assert detect_timezone_from_string("8:00 PM CST") == "America/Chicago"
|
||||
assert detect_timezone_from_string("8:00 PM CDT") == "America/Chicago"
|
||||
|
||||
def test_mountain_time(self):
|
||||
"""Test Mountain Time detection."""
|
||||
assert detect_timezone_from_string("7:00 PM MT") == "America/Denver"
|
||||
assert detect_timezone_from_string("7:00 PM MST") == "America/Denver"
|
||||
|
||||
def test_pacific_time(self):
|
||||
"""Test Pacific Time detection."""
|
||||
assert detect_timezone_from_string("7:00 PM PT") == "America/Los_Angeles"
|
||||
assert detect_timezone_from_string("7:00 PM PST") == "America/Los_Angeles"
|
||||
assert detect_timezone_from_string("7:00 PM PDT") == "America/Los_Angeles"
|
||||
|
||||
def test_no_timezone(self):
|
||||
"""Test string with no timezone."""
|
||||
assert detect_timezone_from_string("7:00 PM") is None
|
||||
assert detect_timezone_from_string("19:00") is None
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Test case insensitive matching."""
|
||||
assert detect_timezone_from_string("7:00 PM et") == "America/New_York"
|
||||
assert detect_timezone_from_string("7:00 PM Et") == "America/New_York"
|
||||
|
||||
|
||||
class TestDetectTimezoneFromLocation:
|
||||
"""Tests for detect_timezone_from_location function."""
|
||||
|
||||
def test_eastern_states(self):
|
||||
"""Test Eastern timezone states."""
|
||||
assert detect_timezone_from_location(state="NY") == "America/New_York"
|
||||
assert detect_timezone_from_location(state="MA") == "America/New_York"
|
||||
assert detect_timezone_from_location(state="FL") == "America/New_York"
|
||||
|
||||
def test_central_states(self):
|
||||
"""Test Central timezone states."""
|
||||
assert detect_timezone_from_location(state="TX") == "America/Chicago"
|
||||
assert detect_timezone_from_location(state="IL") == "America/Chicago"
|
||||
|
||||
def test_mountain_states(self):
|
||||
"""Test Mountain timezone states."""
|
||||
assert detect_timezone_from_location(state="CO") == "America/Denver"
|
||||
assert detect_timezone_from_location(state="AZ") == "America/Phoenix"
|
||||
|
||||
def test_pacific_states(self):
|
||||
"""Test Pacific timezone states."""
|
||||
assert detect_timezone_from_location(state="CA") == "America/Los_Angeles"
|
||||
assert detect_timezone_from_location(state="WA") == "America/Los_Angeles"
|
||||
|
||||
def test_canadian_provinces(self):
|
||||
"""Test Canadian provinces."""
|
||||
assert detect_timezone_from_location(state="ON") == "America/Toronto"
|
||||
assert detect_timezone_from_location(state="BC") == "America/Vancouver"
|
||||
assert detect_timezone_from_location(state="AB") == "America/Edmonton"
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Test case insensitive matching."""
|
||||
assert detect_timezone_from_location(state="ny") == "America/New_York"
|
||||
assert detect_timezone_from_location(state="Ny") == "America/New_York"
|
||||
|
||||
def test_unknown_state(self):
|
||||
"""Test unknown state returns None."""
|
||||
assert detect_timezone_from_location(state="XX") is None
|
||||
assert detect_timezone_from_location(state=None) is None
|
||||
|
||||
|
||||
class TestParseDatetime:
|
||||
"""Tests for parse_datetime function."""
|
||||
|
||||
def test_basic_date_time(self):
|
||||
"""Test basic date and time parsing."""
|
||||
result = parse_datetime("2025-12-25", "7:00 PM ET")
|
||||
assert result.datetime_utc.year == 2025
|
||||
assert result.datetime_utc.month == 12
|
||||
assert result.datetime_utc.day == 26 # UTC is +5 hours ahead
|
||||
assert result.source_timezone == "America/New_York"
|
||||
assert result.confidence == "high"
|
||||
|
||||
def test_date_only(self):
|
||||
"""Test date only parsing."""
|
||||
result = parse_datetime("2025-10-21")
|
||||
assert result.datetime_utc.year == 2025
|
||||
assert result.datetime_utc.month == 10
|
||||
assert result.datetime_utc.day == 21
|
||||
|
||||
def test_timezone_hint(self):
|
||||
"""Test timezone hint is used when no timezone in string."""
|
||||
result = parse_datetime(
|
||||
"2025-10-21",
|
||||
"7:00 PM",
|
||||
timezone_hint="America/Chicago",
|
||||
)
|
||||
assert result.source_timezone == "America/Chicago"
|
||||
assert result.confidence == "medium"
|
||||
|
||||
def test_location_inference(self):
|
||||
"""Test timezone inference from location."""
|
||||
result = parse_datetime(
|
||||
"2025-10-21",
|
||||
"7:00 PM",
|
||||
location_state="CA",
|
||||
)
|
||||
assert result.source_timezone == "America/Los_Angeles"
|
||||
assert result.confidence == "medium"
|
||||
|
||||
def test_default_to_eastern(self):
|
||||
"""Test defaults to Eastern when no timezone info."""
|
||||
result = parse_datetime("2025-10-21", "7:00 PM")
|
||||
assert result.source_timezone == "America/New_York"
|
||||
assert result.confidence == "low"
|
||||
assert result.warning is not None
|
||||
|
||||
def test_invalid_date(self):
|
||||
"""Test handling of invalid date."""
|
||||
result = parse_datetime("not a date")
|
||||
assert result.confidence == "low"
|
||||
assert result.warning is not None
|
||||
|
||||
|
||||
class TestConvertToUtc:
|
||||
"""Tests for convert_to_utc function."""
|
||||
|
||||
def test_convert_naive_datetime(self):
|
||||
"""Test converting naive datetime to UTC."""
|
||||
dt = datetime(2025, 12, 25, 19, 0) # 7:00 PM
|
||||
utc = convert_to_utc(dt, "America/New_York")
|
||||
|
||||
# In December, Eastern Time is UTC-5
|
||||
assert utc.hour == 0 # Next day 00:00 UTC
|
||||
assert utc.day == 26
|
||||
|
||||
def test_convert_aware_datetime(self):
|
||||
"""Test converting timezone-aware datetime."""
|
||||
tz = ZoneInfo("America/Los_Angeles")
|
||||
dt = datetime(2025, 7, 4, 19, 0, tzinfo=tz) # 7:00 PM PT
|
||||
utc = convert_to_utc(dt, "America/Los_Angeles")
|
||||
|
||||
# In July, Pacific Time is UTC-7
|
||||
assert utc.hour == 2 # 02:00 UTC next day
|
||||
assert utc.day == 5
|
||||
|
||||
|
||||
class TestGetStadiumTimezone:
|
||||
"""Tests for get_stadium_timezone function."""
|
||||
|
||||
def test_explicit_timezone(self):
|
||||
"""Test explicit timezone override."""
|
||||
tz = get_stadium_timezone("AZ", stadium_timezone="America/Phoenix")
|
||||
assert tz == "America/Phoenix"
|
||||
|
||||
def test_state_inference(self):
|
||||
"""Test timezone from state."""
|
||||
tz = get_stadium_timezone("NY")
|
||||
assert tz == "America/New_York"
|
||||
|
||||
def test_default_eastern(self):
|
||||
"""Test default to Eastern for unknown state."""
|
||||
tz = get_stadium_timezone("XX")
|
||||
assert tz == "America/New_York"
|
||||
1
sportstime_parser/tests/test_uploaders/__init__.py
Normal file
1
sportstime_parser/tests/test_uploaders/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Tests for the uploaders module."""
|
||||
461
sportstime_parser/tests/test_uploaders/test_cloudkit.py
Normal file
461
sportstime_parser/tests/test_uploaders/test_cloudkit.py
Normal file
@@ -0,0 +1,461 @@
|
||||
"""Tests for the CloudKit client."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
from sportstime_parser.uploaders.cloudkit import (
|
||||
CloudKitClient,
|
||||
CloudKitRecord,
|
||||
CloudKitError,
|
||||
CloudKitAuthError,
|
||||
CloudKitRateLimitError,
|
||||
CloudKitServerError,
|
||||
RecordType,
|
||||
OperationResult,
|
||||
BatchResult,
|
||||
)
|
||||
|
||||
|
||||
class TestCloudKitRecord:
|
||||
"""Tests for CloudKitRecord dataclass."""
|
||||
|
||||
def test_create_record(self):
|
||||
"""Test creating a CloudKitRecord."""
|
||||
record = CloudKitRecord(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
fields={
|
||||
"sport": "nba",
|
||||
"season": 2025,
|
||||
},
|
||||
)
|
||||
|
||||
assert record.record_name == "nba_2025_hou_okc_1021"
|
||||
assert record.record_type == RecordType.GAME
|
||||
assert record.fields["sport"] == "nba"
|
||||
assert record.record_change_tag is None
|
||||
|
||||
def test_to_cloudkit_dict(self):
|
||||
"""Test converting to CloudKit API format."""
|
||||
record = CloudKitRecord(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
fields={
|
||||
"sport": "nba",
|
||||
"season": 2025,
|
||||
},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["recordName"] == "nba_2025_hou_okc_1021"
|
||||
assert data["recordType"] == "Game"
|
||||
assert "fields" in data
|
||||
assert "recordChangeTag" not in data
|
||||
|
||||
def test_to_cloudkit_dict_with_change_tag(self):
|
||||
"""Test converting with change tag for updates."""
|
||||
record = CloudKitRecord(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"sport": "nba"},
|
||||
record_change_tag="abc123",
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["recordChangeTag"] == "abc123"
|
||||
|
||||
def test_format_string_field(self):
|
||||
"""Test formatting string fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"name": "Test Name"},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["name"]["value"] == "Test Name"
|
||||
assert data["fields"]["name"]["type"] == "STRING"
|
||||
|
||||
def test_format_int_field(self):
|
||||
"""Test formatting integer fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"count": 42},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["count"]["value"] == 42
|
||||
assert data["fields"]["count"]["type"] == "INT64"
|
||||
|
||||
def test_format_float_field(self):
|
||||
"""Test formatting float fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.STADIUM,
|
||||
fields={"latitude": 35.4634},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["latitude"]["value"] == 35.4634
|
||||
assert data["fields"]["latitude"]["type"] == "DOUBLE"
|
||||
|
||||
def test_format_datetime_field(self):
|
||||
"""Test formatting datetime fields."""
|
||||
dt = datetime(2025, 10, 21, 19, 0, 0)
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={"game_date": dt},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
expected_ms = int(dt.timestamp() * 1000)
|
||||
assert data["fields"]["game_date"]["value"] == expected_ms
|
||||
assert data["fields"]["game_date"]["type"] == "TIMESTAMP"
|
||||
|
||||
def test_format_location_field(self):
|
||||
"""Test formatting location fields."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.STADIUM,
|
||||
fields={
|
||||
"location": {"latitude": 35.4634, "longitude": -97.5151},
|
||||
},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert data["fields"]["location"]["type"] == "LOCATION"
|
||||
assert data["fields"]["location"]["value"]["latitude"] == 35.4634
|
||||
assert data["fields"]["location"]["value"]["longitude"] == -97.5151
|
||||
|
||||
def test_skip_none_fields(self):
|
||||
"""Test that None fields are skipped."""
|
||||
record = CloudKitRecord(
|
||||
record_name="test",
|
||||
record_type=RecordType.GAME,
|
||||
fields={
|
||||
"sport": "nba",
|
||||
"score": None, # Should be skipped
|
||||
},
|
||||
)
|
||||
|
||||
data = record.to_cloudkit_dict()
|
||||
|
||||
assert "sport" in data["fields"]
|
||||
assert "score" not in data["fields"]
|
||||
|
||||
|
||||
class TestOperationResult:
|
||||
"""Tests for OperationResult dataclass."""
|
||||
|
||||
def test_successful_result(self):
|
||||
"""Test creating a successful operation result."""
|
||||
result = OperationResult(
|
||||
record_name="test_record",
|
||||
success=True,
|
||||
record_change_tag="new_tag",
|
||||
)
|
||||
|
||||
assert result.record_name == "test_record"
|
||||
assert result.success is True
|
||||
assert result.record_change_tag == "new_tag"
|
||||
assert result.error_code is None
|
||||
|
||||
def test_failed_result(self):
|
||||
"""Test creating a failed operation result."""
|
||||
result = OperationResult(
|
||||
record_name="test_record",
|
||||
success=False,
|
||||
error_code="SERVER_ERROR",
|
||||
error_message="Internal server error",
|
||||
)
|
||||
|
||||
assert result.success is False
|
||||
assert result.error_code == "SERVER_ERROR"
|
||||
assert result.error_message == "Internal server error"
|
||||
|
||||
|
||||
class TestBatchResult:
|
||||
"""Tests for BatchResult dataclass."""
|
||||
|
||||
def test_empty_batch_result(self):
|
||||
"""Test empty batch result."""
|
||||
result = BatchResult()
|
||||
|
||||
assert result.all_succeeded is True
|
||||
assert result.success_count == 0
|
||||
assert result.failure_count == 0
|
||||
|
||||
def test_batch_with_successes(self):
|
||||
"""Test batch with successful operations."""
|
||||
result = BatchResult()
|
||||
result.successful.append(OperationResult("rec1", True))
|
||||
result.successful.append(OperationResult("rec2", True))
|
||||
|
||||
assert result.all_succeeded is True
|
||||
assert result.success_count == 2
|
||||
assert result.failure_count == 0
|
||||
|
||||
def test_batch_with_failures(self):
|
||||
"""Test batch with failed operations."""
|
||||
result = BatchResult()
|
||||
result.successful.append(OperationResult("rec1", True))
|
||||
result.failed.append(OperationResult("rec2", False, error_message="Error"))
|
||||
|
||||
assert result.all_succeeded is False
|
||||
assert result.success_count == 1
|
||||
assert result.failure_count == 1
|
||||
|
||||
|
||||
class TestCloudKitClient:
|
||||
"""Tests for CloudKitClient."""
|
||||
|
||||
def test_not_configured_without_credentials(self):
|
||||
"""Test that client reports not configured without credentials."""
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
client = CloudKitClient()
|
||||
assert client.is_configured is False
|
||||
|
||||
def test_configured_with_credentials(self):
|
||||
"""Test that client reports configured with credentials."""
|
||||
# Create a minimal mock for the private key
|
||||
mock_key = MagicMock()
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key_id",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
mock_load.return_value = mock_key
|
||||
client = CloudKitClient()
|
||||
assert client.is_configured is True
|
||||
|
||||
def test_get_api_path(self):
|
||||
"""Test API path construction."""
|
||||
client = CloudKitClient(
|
||||
container_id="iCloud.com.test.app",
|
||||
environment="development",
|
||||
)
|
||||
|
||||
path = client._get_api_path("records/query")
|
||||
|
||||
assert path == "/database/1/iCloud.com.test.app/development/public/records/query"
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_fetch_records_query(self, mock_session_class):
|
||||
"""Test fetching records with query."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"records": [
|
||||
{"recordName": "rec1", "recordType": "Game"},
|
||||
{"recordName": "rec2", "recordType": "Game"},
|
||||
]
|
||||
}
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
# Setup client with mocked auth
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
records = client.fetch_records(RecordType.GAME)
|
||||
|
||||
assert len(records) == 2
|
||||
assert records[0]["recordName"] == "rec1"
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_save_records_success(self, mock_session_class):
|
||||
"""Test saving records successfully."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"records": [
|
||||
{"recordName": "rec1", "recordChangeTag": "tag1"},
|
||||
{"recordName": "rec2", "recordChangeTag": "tag2"},
|
||||
]
|
||||
}
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
records = [
|
||||
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
|
||||
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
|
||||
]
|
||||
|
||||
result = client.save_records(records)
|
||||
|
||||
assert result.success_count == 2
|
||||
assert result.failure_count == 0
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_save_records_partial_failure(self, mock_session_class):
|
||||
"""Test saving records with some failures."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"records": [
|
||||
{"recordName": "rec1", "recordChangeTag": "tag1"},
|
||||
{"recordName": "rec2", "serverErrorCode": "QUOTA_EXCEEDED", "reason": "Quota exceeded"},
|
||||
]
|
||||
}
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
records = [
|
||||
CloudKitRecord("rec1", RecordType.GAME, {"sport": "nba"}),
|
||||
CloudKitRecord("rec2", RecordType.GAME, {"sport": "nba"}),
|
||||
]
|
||||
|
||||
result = client.save_records(records)
|
||||
|
||||
assert result.success_count == 1
|
||||
assert result.failure_count == 1
|
||||
assert result.failed[0].error_code == "QUOTA_EXCEEDED"
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_auth_error(self, mock_session_class):
|
||||
"""Test handling authentication error."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 421
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
with pytest.raises(CloudKitAuthError):
|
||||
client.fetch_records(RecordType.GAME)
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_rate_limit_error(self, mock_session_class):
|
||||
"""Test handling rate limit error."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 429
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
with pytest.raises(CloudKitRateLimitError):
|
||||
client.fetch_records(RecordType.GAME)
|
||||
|
||||
@patch("sportstime_parser.uploaders.cloudkit.requests.Session")
|
||||
def test_server_error(self, mock_session_class):
|
||||
"""Test handling server error."""
|
||||
mock_session = MagicMock()
|
||||
mock_session_class.return_value = mock_session
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 503
|
||||
mock_session.request.return_value = mock_response
|
||||
|
||||
mock_key = MagicMock()
|
||||
mock_key.sign.return_value = b"signature"
|
||||
|
||||
with patch.dict("os.environ", {
|
||||
"CLOUDKIT_KEY_ID": "test_key",
|
||||
"CLOUDKIT_PRIVATE_KEY": "-----BEGIN EC PRIVATE KEY-----\ntest\n-----END EC PRIVATE KEY-----",
|
||||
}):
|
||||
with patch("sportstime_parser.uploaders.cloudkit.serialization.load_pem_private_key") as mock_load:
|
||||
with patch("sportstime_parser.uploaders.cloudkit.jwt.encode") as mock_jwt:
|
||||
mock_load.return_value = mock_key
|
||||
mock_jwt.return_value = "test_token"
|
||||
|
||||
client = CloudKitClient()
|
||||
|
||||
with pytest.raises(CloudKitServerError):
|
||||
client.fetch_records(RecordType.GAME)
|
||||
|
||||
|
||||
class TestRecordType:
|
||||
"""Tests for RecordType enum."""
|
||||
|
||||
def test_record_type_values(self):
|
||||
"""Test that record type values match CloudKit schema."""
|
||||
assert RecordType.GAME.value == "Game"
|
||||
assert RecordType.TEAM.value == "Team"
|
||||
assert RecordType.STADIUM.value == "Stadium"
|
||||
assert RecordType.TEAM_ALIAS.value == "TeamAlias"
|
||||
assert RecordType.STADIUM_ALIAS.value == "StadiumAlias"
|
||||
350
sportstime_parser/tests/test_uploaders/test_diff.py
Normal file
350
sportstime_parser/tests/test_uploaders/test_diff.py
Normal file
@@ -0,0 +1,350 @@
|
||||
"""Tests for the record differ."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
from sportstime_parser.models.game import Game
|
||||
from sportstime_parser.models.team import Team
|
||||
from sportstime_parser.models.stadium import Stadium
|
||||
from sportstime_parser.uploaders.diff import (
|
||||
DiffAction,
|
||||
RecordDiff,
|
||||
DiffResult,
|
||||
RecordDiffer,
|
||||
game_to_cloudkit_record,
|
||||
team_to_cloudkit_record,
|
||||
stadium_to_cloudkit_record,
|
||||
)
|
||||
from sportstime_parser.uploaders.cloudkit import RecordType
|
||||
|
||||
|
||||
class TestRecordDiff:
|
||||
"""Tests for RecordDiff dataclass."""
|
||||
|
||||
def test_create_record_diff(self):
|
||||
"""Test creating a RecordDiff."""
|
||||
diff = RecordDiff(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.CREATE,
|
||||
)
|
||||
|
||||
assert diff.record_name == "nba_2025_hou_okc_1021"
|
||||
assert diff.record_type == RecordType.GAME
|
||||
assert diff.action == DiffAction.CREATE
|
||||
|
||||
|
||||
class TestDiffResult:
|
||||
"""Tests for DiffResult dataclass."""
|
||||
|
||||
def test_empty_result(self):
|
||||
"""Test empty DiffResult."""
|
||||
result = DiffResult()
|
||||
|
||||
assert result.create_count == 0
|
||||
assert result.update_count == 0
|
||||
assert result.delete_count == 0
|
||||
assert result.unchanged_count == 0
|
||||
assert result.total_changes == 0
|
||||
|
||||
def test_counts(self):
|
||||
"""Test counting different change types."""
|
||||
result = DiffResult()
|
||||
|
||||
result.creates.append(RecordDiff(
|
||||
record_name="game_1",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.CREATE,
|
||||
))
|
||||
result.creates.append(RecordDiff(
|
||||
record_name="game_2",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.CREATE,
|
||||
))
|
||||
result.updates.append(RecordDiff(
|
||||
record_name="game_3",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.UPDATE,
|
||||
))
|
||||
result.deletes.append(RecordDiff(
|
||||
record_name="game_4",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.DELETE,
|
||||
))
|
||||
result.unchanged.append(RecordDiff(
|
||||
record_name="game_5",
|
||||
record_type=RecordType.GAME,
|
||||
action=DiffAction.UNCHANGED,
|
||||
))
|
||||
|
||||
assert result.create_count == 2
|
||||
assert result.update_count == 1
|
||||
assert result.delete_count == 1
|
||||
assert result.unchanged_count == 1
|
||||
assert result.total_changes == 4 # excludes unchanged
|
||||
|
||||
|
||||
class TestRecordDiffer:
|
||||
"""Tests for RecordDiffer."""
|
||||
|
||||
@pytest.fixture
|
||||
def differ(self):
|
||||
"""Create a RecordDiffer instance."""
|
||||
return RecordDiffer()
|
||||
|
||||
@pytest.fixture
|
||||
def sample_game(self):
|
||||
"""Create a sample Game."""
|
||||
return Game(
|
||||
id="nba_2025_hou_okc_1021",
|
||||
sport="nba",
|
||||
season=2025,
|
||||
home_team_id="team_nba_okc",
|
||||
away_team_id="team_nba_hou",
|
||||
stadium_id="stadium_nba_paycom_center",
|
||||
game_date=datetime(2025, 10, 21, 19, 0, 0),
|
||||
status="scheduled",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_team(self):
|
||||
"""Create a sample Team."""
|
||||
return Team(
|
||||
id="team_nba_okc",
|
||||
sport="nba",
|
||||
city="Oklahoma City",
|
||||
name="Thunder",
|
||||
full_name="Oklahoma City Thunder",
|
||||
abbreviation="OKC",
|
||||
conference="Western",
|
||||
division="Northwest",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_stadium(self):
|
||||
"""Create a sample Stadium."""
|
||||
return Stadium(
|
||||
id="stadium_nba_paycom_center",
|
||||
sport="nba",
|
||||
name="Paycom Center",
|
||||
city="Oklahoma City",
|
||||
state="OK",
|
||||
country="USA",
|
||||
latitude=35.4634,
|
||||
longitude=-97.5151,
|
||||
capacity=18203,
|
||||
)
|
||||
|
||||
def test_diff_games_create(self, differ, sample_game):
|
||||
"""Test detecting new games to create."""
|
||||
local_games = [sample_game]
|
||||
remote_records = []
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.create_count == 1
|
||||
assert result.update_count == 0
|
||||
assert result.delete_count == 0
|
||||
assert result.creates[0].record_name == sample_game.id
|
||||
|
||||
def test_diff_games_delete(self, differ, sample_game):
|
||||
"""Test detecting games to delete."""
|
||||
local_games = []
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": sample_game.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
},
|
||||
"recordChangeTag": "abc123",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.create_count == 0
|
||||
assert result.delete_count == 1
|
||||
assert result.deletes[0].record_name == sample_game.id
|
||||
|
||||
def test_diff_games_unchanged(self, differ, sample_game):
|
||||
"""Test detecting unchanged games."""
|
||||
local_games = [sample_game]
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": sample_game.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
|
||||
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
|
||||
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
|
||||
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
||||
"game_number": {"value": None, "type": "INT64"},
|
||||
"home_score": {"value": None, "type": "INT64"},
|
||||
"away_score": {"value": None, "type": "INT64"},
|
||||
"status": {"value": "scheduled", "type": "STRING"},
|
||||
},
|
||||
"recordChangeTag": "abc123",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.create_count == 0
|
||||
assert result.update_count == 0
|
||||
assert result.unchanged_count == 1
|
||||
|
||||
def test_diff_games_update(self, differ, sample_game):
|
||||
"""Test detecting games that need update."""
|
||||
local_games = [sample_game]
|
||||
# Remote has different status
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": sample_game.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
"home_team_id": {"value": "team_nba_okc", "type": "STRING"},
|
||||
"away_team_id": {"value": "team_nba_hou", "type": "STRING"},
|
||||
"stadium_id": {"value": "stadium_nba_paycom_center", "type": "STRING"},
|
||||
"game_date": {"value": int(sample_game.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
||||
"game_number": {"value": None, "type": "INT64"},
|
||||
"home_score": {"value": None, "type": "INT64"},
|
||||
"away_score": {"value": None, "type": "INT64"},
|
||||
"status": {"value": "postponed", "type": "STRING"}, # Different!
|
||||
},
|
||||
"recordChangeTag": "abc123",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
|
||||
assert result.update_count == 1
|
||||
assert "status" in result.updates[0].changed_fields
|
||||
assert result.updates[0].record_change_tag == "abc123"
|
||||
|
||||
def test_diff_teams_create(self, differ, sample_team):
|
||||
"""Test detecting new teams to create."""
|
||||
local_teams = [sample_team]
|
||||
remote_records = []
|
||||
|
||||
result = differ.diff_teams(local_teams, remote_records)
|
||||
|
||||
assert result.create_count == 1
|
||||
assert result.creates[0].record_name == sample_team.id
|
||||
|
||||
def test_diff_stadiums_create(self, differ, sample_stadium):
|
||||
"""Test detecting new stadiums to create."""
|
||||
local_stadiums = [sample_stadium]
|
||||
remote_records = []
|
||||
|
||||
result = differ.diff_stadiums(local_stadiums, remote_records)
|
||||
|
||||
assert result.create_count == 1
|
||||
assert result.creates[0].record_name == sample_stadium.id
|
||||
|
||||
def test_get_records_to_upload(self, differ, sample_game):
|
||||
"""Test getting CloudKitRecords for upload."""
|
||||
game2 = Game(
|
||||
id="nba_2025_lal_lac_1022",
|
||||
sport="nba",
|
||||
season=2025,
|
||||
home_team_id="team_nba_lac",
|
||||
away_team_id="team_nba_lal",
|
||||
stadium_id="stadium_nba_crypto_com",
|
||||
game_date=datetime(2025, 10, 22, 19, 0, 0),
|
||||
status="scheduled",
|
||||
)
|
||||
|
||||
local_games = [sample_game, game2]
|
||||
# Only game2 exists remotely with different status
|
||||
remote_records = [
|
||||
{
|
||||
"recordName": game2.id,
|
||||
"recordType": "Game",
|
||||
"fields": {
|
||||
"sport": {"value": "nba", "type": "STRING"},
|
||||
"season": {"value": 2025, "type": "INT64"},
|
||||
"home_team_id": {"value": "team_nba_lac", "type": "STRING"},
|
||||
"away_team_id": {"value": "team_nba_lal", "type": "STRING"},
|
||||
"stadium_id": {"value": "stadium_nba_crypto_com", "type": "STRING"},
|
||||
"game_date": {"value": int(game2.game_date.timestamp() * 1000), "type": "TIMESTAMP"},
|
||||
"status": {"value": "postponed", "type": "STRING"}, # Different!
|
||||
},
|
||||
"recordChangeTag": "xyz789",
|
||||
}
|
||||
]
|
||||
|
||||
result = differ.diff_games(local_games, remote_records)
|
||||
records = result.get_records_to_upload()
|
||||
|
||||
assert len(records) == 2 # 1 create + 1 update
|
||||
record_names = [r.record_name for r in records]
|
||||
assert sample_game.id in record_names
|
||||
assert game2.id in record_names
|
||||
|
||||
|
||||
class TestConvenienceFunctions:
|
||||
"""Tests for module-level convenience functions."""
|
||||
|
||||
def test_game_to_cloudkit_record(self):
|
||||
"""Test converting Game to CloudKitRecord."""
|
||||
game = Game(
|
||||
id="nba_2025_hou_okc_1021",
|
||||
sport="nba",
|
||||
season=2025,
|
||||
home_team_id="team_nba_okc",
|
||||
away_team_id="team_nba_hou",
|
||||
stadium_id="stadium_nba_paycom_center",
|
||||
game_date=datetime(2025, 10, 21, 19, 0, 0),
|
||||
status="scheduled",
|
||||
)
|
||||
|
||||
record = game_to_cloudkit_record(game)
|
||||
|
||||
assert record.record_name == game.id
|
||||
assert record.record_type == RecordType.GAME
|
||||
assert record.fields["sport"] == "nba"
|
||||
assert record.fields["season"] == 2025
|
||||
|
||||
def test_team_to_cloudkit_record(self):
|
||||
"""Test converting Team to CloudKitRecord."""
|
||||
team = Team(
|
||||
id="team_nba_okc",
|
||||
sport="nba",
|
||||
city="Oklahoma City",
|
||||
name="Thunder",
|
||||
full_name="Oklahoma City Thunder",
|
||||
abbreviation="OKC",
|
||||
)
|
||||
|
||||
record = team_to_cloudkit_record(team)
|
||||
|
||||
assert record.record_name == team.id
|
||||
assert record.record_type == RecordType.TEAM
|
||||
assert record.fields["city"] == "Oklahoma City"
|
||||
assert record.fields["name"] == "Thunder"
|
||||
|
||||
def test_stadium_to_cloudkit_record(self):
|
||||
"""Test converting Stadium to CloudKitRecord."""
|
||||
stadium = Stadium(
|
||||
id="stadium_nba_paycom_center",
|
||||
sport="nba",
|
||||
name="Paycom Center",
|
||||
city="Oklahoma City",
|
||||
state="OK",
|
||||
country="USA",
|
||||
latitude=35.4634,
|
||||
longitude=-97.5151,
|
||||
)
|
||||
|
||||
record = stadium_to_cloudkit_record(stadium)
|
||||
|
||||
assert record.record_name == stadium.id
|
||||
assert record.record_type == RecordType.STADIUM
|
||||
assert record.fields["name"] == "Paycom Center"
|
||||
assert record.fields["latitude"] == 35.4634
|
||||
472
sportstime_parser/tests/test_uploaders/test_state.py
Normal file
472
sportstime_parser/tests/test_uploaders/test_state.py
Normal file
@@ -0,0 +1,472 @@
|
||||
"""Tests for the upload state manager."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from sportstime_parser.uploaders.state import (
|
||||
RecordState,
|
||||
UploadSession,
|
||||
StateManager,
|
||||
)
|
||||
|
||||
|
||||
class TestRecordState:
|
||||
"""Tests for RecordState dataclass."""
|
||||
|
||||
def test_create_record_state(self):
|
||||
"""Test creating a RecordState with default values."""
|
||||
state = RecordState(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type="Game",
|
||||
)
|
||||
|
||||
assert state.record_name == "nba_2025_hou_okc_1021"
|
||||
assert state.record_type == "Game"
|
||||
assert state.status == "pending"
|
||||
assert state.uploaded_at is None
|
||||
assert state.record_change_tag is None
|
||||
assert state.error_message is None
|
||||
assert state.retry_count == 0
|
||||
|
||||
def test_record_state_to_dict(self):
|
||||
"""Test serializing RecordState to dictionary."""
|
||||
now = datetime.utcnow()
|
||||
state = RecordState(
|
||||
record_name="nba_2025_hou_okc_1021",
|
||||
record_type="Game",
|
||||
uploaded_at=now,
|
||||
record_change_tag="abc123",
|
||||
status="uploaded",
|
||||
)
|
||||
|
||||
data = state.to_dict()
|
||||
|
||||
assert data["record_name"] == "nba_2025_hou_okc_1021"
|
||||
assert data["record_type"] == "Game"
|
||||
assert data["status"] == "uploaded"
|
||||
assert data["uploaded_at"] == now.isoformat()
|
||||
assert data["record_change_tag"] == "abc123"
|
||||
|
||||
def test_record_state_from_dict(self):
|
||||
"""Test deserializing RecordState from dictionary."""
|
||||
data = {
|
||||
"record_name": "nba_2025_hou_okc_1021",
|
||||
"record_type": "Game",
|
||||
"uploaded_at": "2026-01-10T12:00:00",
|
||||
"record_change_tag": "abc123",
|
||||
"status": "uploaded",
|
||||
"error_message": None,
|
||||
"retry_count": 0,
|
||||
}
|
||||
|
||||
state = RecordState.from_dict(data)
|
||||
|
||||
assert state.record_name == "nba_2025_hou_okc_1021"
|
||||
assert state.record_type == "Game"
|
||||
assert state.status == "uploaded"
|
||||
assert state.uploaded_at == datetime.fromisoformat("2026-01-10T12:00:00")
|
||||
assert state.record_change_tag == "abc123"
|
||||
|
||||
|
||||
class TestUploadSession:
|
||||
"""Tests for UploadSession dataclass."""
|
||||
|
||||
def test_create_upload_session(self):
|
||||
"""Test creating an UploadSession."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
|
||||
assert session.sport == "nba"
|
||||
assert session.season == 2025
|
||||
assert session.environment == "development"
|
||||
assert session.total_count == 0
|
||||
assert len(session.records) == 0
|
||||
|
||||
def test_add_record(self):
|
||||
"""Test adding records to a session."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
session.add_record("team_1", "Team")
|
||||
|
||||
assert session.total_count == 3
|
||||
assert len(session.records) == 3
|
||||
assert "game_1" in session.records
|
||||
assert session.records["game_1"].record_type == "Game"
|
||||
|
||||
def test_mark_uploaded(self):
|
||||
"""Test marking a record as uploaded."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
|
||||
session.mark_uploaded("game_1", "change_tag_123")
|
||||
|
||||
assert session.records["game_1"].status == "uploaded"
|
||||
assert session.records["game_1"].record_change_tag == "change_tag_123"
|
||||
assert session.records["game_1"].uploaded_at is not None
|
||||
|
||||
def test_mark_failed(self):
|
||||
"""Test marking a record as failed."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
|
||||
session.mark_failed("game_1", "Server error")
|
||||
|
||||
assert session.records["game_1"].status == "failed"
|
||||
assert session.records["game_1"].error_message == "Server error"
|
||||
assert session.records["game_1"].retry_count == 1
|
||||
|
||||
def test_mark_failed_increments_retry_count(self):
|
||||
"""Test that marking failed increments retry count."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
|
||||
session.mark_failed("game_1", "Error 1")
|
||||
session.mark_failed("game_1", "Error 2")
|
||||
session.mark_failed("game_1", "Error 3")
|
||||
|
||||
assert session.records["game_1"].retry_count == 3
|
||||
|
||||
def test_counts(self):
|
||||
"""Test session counts."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
session.add_record("game_3", "Game")
|
||||
|
||||
session.mark_uploaded("game_1")
|
||||
session.mark_failed("game_2", "Error")
|
||||
|
||||
assert session.uploaded_count == 1
|
||||
assert session.failed_count == 1
|
||||
assert session.pending_count == 1
|
||||
|
||||
def test_is_complete(self):
|
||||
"""Test is_complete property."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
|
||||
assert not session.is_complete
|
||||
|
||||
session.mark_uploaded("game_1")
|
||||
assert not session.is_complete
|
||||
|
||||
session.mark_uploaded("game_2")
|
||||
assert session.is_complete
|
||||
|
||||
def test_progress_percent(self):
|
||||
"""Test progress percentage calculation."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
session.add_record("game_3", "Game")
|
||||
session.add_record("game_4", "Game")
|
||||
|
||||
session.mark_uploaded("game_1")
|
||||
|
||||
assert session.progress_percent == 25.0
|
||||
|
||||
def test_get_pending_records(self):
|
||||
"""Test getting pending record names."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
session.add_record("game_3", "Game")
|
||||
|
||||
session.mark_uploaded("game_1")
|
||||
session.mark_failed("game_2", "Error")
|
||||
|
||||
pending = session.get_pending_records()
|
||||
|
||||
assert pending == ["game_3"]
|
||||
|
||||
def test_get_failed_records(self):
|
||||
"""Test getting failed record names."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
session.add_record("game_3", "Game")
|
||||
|
||||
session.mark_failed("game_1", "Error 1")
|
||||
session.mark_failed("game_3", "Error 3")
|
||||
|
||||
failed = session.get_failed_records()
|
||||
|
||||
assert set(failed) == {"game_1", "game_3"}
|
||||
|
||||
def test_get_retryable_records(self):
|
||||
"""Test getting records eligible for retry."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
session.add_record("game_3", "Game")
|
||||
|
||||
# Fail game_1 once
|
||||
session.mark_failed("game_1", "Error")
|
||||
|
||||
# Fail game_2 three times (max retries)
|
||||
session.mark_failed("game_2", "Error")
|
||||
session.mark_failed("game_2", "Error")
|
||||
session.mark_failed("game_2", "Error")
|
||||
|
||||
retryable = session.get_retryable_records(max_retries=3)
|
||||
|
||||
assert retryable == ["game_1"]
|
||||
|
||||
def test_to_dict_and_from_dict(self):
|
||||
"""Test round-trip serialization."""
|
||||
session = UploadSession(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
)
|
||||
session.add_record("game_1", "Game")
|
||||
session.add_record("game_2", "Game")
|
||||
session.mark_uploaded("game_1", "tag_123")
|
||||
|
||||
data = session.to_dict()
|
||||
restored = UploadSession.from_dict(data)
|
||||
|
||||
assert restored.sport == session.sport
|
||||
assert restored.season == session.season
|
||||
assert restored.environment == session.environment
|
||||
assert restored.total_count == session.total_count
|
||||
assert restored.uploaded_count == session.uploaded_count
|
||||
assert restored.records["game_1"].status == "uploaded"
|
||||
|
||||
|
||||
class TestStateManager:
|
||||
"""Tests for StateManager."""
|
||||
|
||||
def test_create_session(self):
|
||||
"""Test creating a new session."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
session = manager.create_session(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[
|
||||
("game_1", "Game"),
|
||||
("game_2", "Game"),
|
||||
("team_1", "Team"),
|
||||
],
|
||||
)
|
||||
|
||||
assert session.sport == "nba"
|
||||
assert session.season == 2025
|
||||
assert session.total_count == 3
|
||||
|
||||
# Check file was created
|
||||
state_file = Path(tmpdir) / "upload_state_nba_2025_development.json"
|
||||
assert state_file.exists()
|
||||
|
||||
def test_load_session(self):
|
||||
"""Test loading an existing session."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
# Create and save a session
|
||||
original = manager.create_session(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_1", "Game")],
|
||||
)
|
||||
original.mark_uploaded("game_1", "tag_123")
|
||||
manager.save_session(original)
|
||||
|
||||
# Load it back
|
||||
loaded = manager.load_session("nba", 2025, "development")
|
||||
|
||||
assert loaded is not None
|
||||
assert loaded.sport == "nba"
|
||||
assert loaded.records["game_1"].status == "uploaded"
|
||||
|
||||
def test_load_nonexistent_session(self):
|
||||
"""Test loading a session that doesn't exist."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
session = manager.load_session("nba", 2025, "development")
|
||||
|
||||
assert session is None
|
||||
|
||||
def test_delete_session(self):
|
||||
"""Test deleting a session."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
# Create a session
|
||||
manager.create_session(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_1", "Game")],
|
||||
)
|
||||
|
||||
# Delete it
|
||||
result = manager.delete_session("nba", 2025, "development")
|
||||
|
||||
assert result is True
|
||||
|
||||
# Verify it's gone
|
||||
loaded = manager.load_session("nba", 2025, "development")
|
||||
assert loaded is None
|
||||
|
||||
def test_delete_nonexistent_session(self):
|
||||
"""Test deleting a session that doesn't exist."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
result = manager.delete_session("nba", 2025, "development")
|
||||
|
||||
assert result is False
|
||||
|
||||
def test_list_sessions(self):
|
||||
"""Test listing all sessions."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
# Create multiple sessions
|
||||
manager.create_session(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_1", "Game")],
|
||||
)
|
||||
manager.create_session(
|
||||
sport="mlb",
|
||||
season=2026,
|
||||
environment="production",
|
||||
record_names=[("game_2", "Game"), ("game_3", "Game")],
|
||||
)
|
||||
|
||||
sessions = manager.list_sessions()
|
||||
|
||||
assert len(sessions) == 2
|
||||
sports = {s["sport"] for s in sessions}
|
||||
assert sports == {"nba", "mlb"}
|
||||
|
||||
def test_get_session_or_create_new(self):
|
||||
"""Test getting a session when none exists."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
session = manager.get_session_or_create(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_1", "Game")],
|
||||
resume=False,
|
||||
)
|
||||
|
||||
assert session.sport == "nba"
|
||||
assert session.total_count == 1
|
||||
|
||||
def test_get_session_or_create_resume(self):
|
||||
"""Test resuming an existing session."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
# Create initial session
|
||||
original = manager.create_session(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_1", "Game"), ("game_2", "Game")],
|
||||
)
|
||||
original.mark_uploaded("game_1", "tag_123")
|
||||
manager.save_session(original)
|
||||
|
||||
# Resume with additional records
|
||||
session = manager.get_session_or_create(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_1", "Game"), ("game_2", "Game"), ("game_3", "Game")],
|
||||
resume=True,
|
||||
)
|
||||
|
||||
# Should have original progress plus new record
|
||||
assert session.records["game_1"].status == "uploaded"
|
||||
assert "game_3" in session.records
|
||||
assert session.total_count == 3
|
||||
|
||||
def test_get_session_or_create_overwrite(self):
|
||||
"""Test overwriting an existing session when not resuming."""
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
manager = StateManager(state_dir=Path(tmpdir))
|
||||
|
||||
# Create initial session
|
||||
original = manager.create_session(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_1", "Game"), ("game_2", "Game")],
|
||||
)
|
||||
original.mark_uploaded("game_1", "tag_123")
|
||||
manager.save_session(original)
|
||||
|
||||
# Create new session (not resuming)
|
||||
session = manager.get_session_or_create(
|
||||
sport="nba",
|
||||
season=2025,
|
||||
environment="development",
|
||||
record_names=[("game_3", "Game")],
|
||||
resume=False,
|
||||
)
|
||||
|
||||
# Should be a fresh session
|
||||
assert session.total_count == 1
|
||||
assert "game_1" not in session.records
|
||||
assert "game_3" in session.records
|
||||
Reference in New Issue
Block a user