feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
48
sportstime_parser/tests/fixtures/__init__.py
vendored
Normal file
48
sportstime_parser/tests/fixtures/__init__.py
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Test fixtures for sportstime-parser tests."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
FIXTURES_DIR = Path(__file__).parent
|
||||
|
||||
# NBA fixtures
|
||||
NBA_FIXTURES_DIR = FIXTURES_DIR / "nba"
|
||||
NBA_BR_OCTOBER_HTML = NBA_FIXTURES_DIR / "basketball_reference_october.html"
|
||||
NBA_BR_EDGE_CASES_HTML = NBA_FIXTURES_DIR / "basketball_reference_edge_cases.html"
|
||||
NBA_ESPN_SCOREBOARD_JSON = NBA_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# MLB fixtures
|
||||
MLB_FIXTURES_DIR = FIXTURES_DIR / "mlb"
|
||||
MLB_ESPN_SCOREBOARD_JSON = MLB_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# NFL fixtures
|
||||
NFL_FIXTURES_DIR = FIXTURES_DIR / "nfl"
|
||||
NFL_ESPN_SCOREBOARD_JSON = NFL_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# NHL fixtures
|
||||
NHL_FIXTURES_DIR = FIXTURES_DIR / "nhl"
|
||||
NHL_ESPN_SCOREBOARD_JSON = NHL_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# MLS fixtures
|
||||
MLS_FIXTURES_DIR = FIXTURES_DIR / "mls"
|
||||
MLS_ESPN_SCOREBOARD_JSON = MLS_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# WNBA fixtures
|
||||
WNBA_FIXTURES_DIR = FIXTURES_DIR / "wnba"
|
||||
WNBA_ESPN_SCOREBOARD_JSON = WNBA_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
# NWSL fixtures
|
||||
NWSL_FIXTURES_DIR = FIXTURES_DIR / "nwsl"
|
||||
NWSL_ESPN_SCOREBOARD_JSON = NWSL_FIXTURES_DIR / "espn_scoreboard.json"
|
||||
|
||||
|
||||
def load_fixture(path: Path) -> str:
|
||||
"""Load a fixture file as text."""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def load_json_fixture(path: Path) -> dict:
|
||||
"""Load a JSON fixture file."""
|
||||
import json
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
Reference in New Issue
Block a user