feat(scripts): rewrite parser as modular Python CLI

Replace monolithic scraping scripts with sportstime_parser package:

- Multi-source scrapers with automatic fallback for 7 sports
- Canonical ID generation for games, teams, and stadiums
- Fuzzy matching with configurable thresholds for name resolution
- CloudKit Web Services uploader with JWT auth, diff-based updates
- Resumable uploads with checkpoint state persistence
- Validation reports with manual review items and suggested matches
- Comprehensive test suite (249 tests)

CLI: sportstime-parser scrape|validate|upload|status|retry|clear

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 21:06:12 -06:00
parent 284a10d9e1
commit eeaf900e5a
109 changed files with 18415 additions and 266211 deletions

View File

@@ -0,0 +1,187 @@
"""Tests for timezone conversion utilities."""
import pytest
from datetime import datetime, date
from zoneinfo import ZoneInfo
from sportstime_parser.normalizers.timezone import (
detect_timezone_from_string,
detect_timezone_from_location,
parse_datetime,
convert_to_utc,
get_stadium_timezone,
TimezoneResult,
)
class TestDetectTimezoneFromString:
"""Tests for detect_timezone_from_string function."""
def test_eastern_time(self):
"""Test Eastern Time detection."""
assert detect_timezone_from_string("7:00 PM ET") == "America/New_York"
assert detect_timezone_from_string("7:00 PM EST") == "America/New_York"
assert detect_timezone_from_string("7:00 PM EDT") == "America/New_York"
def test_central_time(self):
"""Test Central Time detection."""
assert detect_timezone_from_string("8:00 PM CT") == "America/Chicago"
assert detect_timezone_from_string("8:00 PM CST") == "America/Chicago"
assert detect_timezone_from_string("8:00 PM CDT") == "America/Chicago"
def test_mountain_time(self):
"""Test Mountain Time detection."""
assert detect_timezone_from_string("7:00 PM MT") == "America/Denver"
assert detect_timezone_from_string("7:00 PM MST") == "America/Denver"
def test_pacific_time(self):
"""Test Pacific Time detection."""
assert detect_timezone_from_string("7:00 PM PT") == "America/Los_Angeles"
assert detect_timezone_from_string("7:00 PM PST") == "America/Los_Angeles"
assert detect_timezone_from_string("7:00 PM PDT") == "America/Los_Angeles"
def test_no_timezone(self):
"""Test string with no timezone."""
assert detect_timezone_from_string("7:00 PM") is None
assert detect_timezone_from_string("19:00") is None
def test_case_insensitive(self):
"""Test case insensitive matching."""
assert detect_timezone_from_string("7:00 PM et") == "America/New_York"
assert detect_timezone_from_string("7:00 PM Et") == "America/New_York"
class TestDetectTimezoneFromLocation:
"""Tests for detect_timezone_from_location function."""
def test_eastern_states(self):
"""Test Eastern timezone states."""
assert detect_timezone_from_location(state="NY") == "America/New_York"
assert detect_timezone_from_location(state="MA") == "America/New_York"
assert detect_timezone_from_location(state="FL") == "America/New_York"
def test_central_states(self):
"""Test Central timezone states."""
assert detect_timezone_from_location(state="TX") == "America/Chicago"
assert detect_timezone_from_location(state="IL") == "America/Chicago"
def test_mountain_states(self):
"""Test Mountain timezone states."""
assert detect_timezone_from_location(state="CO") == "America/Denver"
assert detect_timezone_from_location(state="AZ") == "America/Phoenix"
def test_pacific_states(self):
"""Test Pacific timezone states."""
assert detect_timezone_from_location(state="CA") == "America/Los_Angeles"
assert detect_timezone_from_location(state="WA") == "America/Los_Angeles"
def test_canadian_provinces(self):
"""Test Canadian provinces."""
assert detect_timezone_from_location(state="ON") == "America/Toronto"
assert detect_timezone_from_location(state="BC") == "America/Vancouver"
assert detect_timezone_from_location(state="AB") == "America/Edmonton"
def test_case_insensitive(self):
"""Test case insensitive matching."""
assert detect_timezone_from_location(state="ny") == "America/New_York"
assert detect_timezone_from_location(state="Ny") == "America/New_York"
def test_unknown_state(self):
"""Test unknown state returns None."""
assert detect_timezone_from_location(state="XX") is None
assert detect_timezone_from_location(state=None) is None
class TestParseDatetime:
"""Tests for parse_datetime function."""
def test_basic_date_time(self):
"""Test basic date and time parsing."""
result = parse_datetime("2025-12-25", "7:00 PM ET")
assert result.datetime_utc.year == 2025
assert result.datetime_utc.month == 12
assert result.datetime_utc.day == 26 # UTC is +5 hours ahead
assert result.source_timezone == "America/New_York"
assert result.confidence == "high"
def test_date_only(self):
"""Test date only parsing."""
result = parse_datetime("2025-10-21")
assert result.datetime_utc.year == 2025
assert result.datetime_utc.month == 10
assert result.datetime_utc.day == 21
def test_timezone_hint(self):
"""Test timezone hint is used when no timezone in string."""
result = parse_datetime(
"2025-10-21",
"7:00 PM",
timezone_hint="America/Chicago",
)
assert result.source_timezone == "America/Chicago"
assert result.confidence == "medium"
def test_location_inference(self):
"""Test timezone inference from location."""
result = parse_datetime(
"2025-10-21",
"7:00 PM",
location_state="CA",
)
assert result.source_timezone == "America/Los_Angeles"
assert result.confidence == "medium"
def test_default_to_eastern(self):
"""Test defaults to Eastern when no timezone info."""
result = parse_datetime("2025-10-21", "7:00 PM")
assert result.source_timezone == "America/New_York"
assert result.confidence == "low"
assert result.warning is not None
def test_invalid_date(self):
"""Test handling of invalid date."""
result = parse_datetime("not a date")
assert result.confidence == "low"
assert result.warning is not None
class TestConvertToUtc:
"""Tests for convert_to_utc function."""
def test_convert_naive_datetime(self):
"""Test converting naive datetime to UTC."""
dt = datetime(2025, 12, 25, 19, 0) # 7:00 PM
utc = convert_to_utc(dt, "America/New_York")
# In December, Eastern Time is UTC-5
assert utc.hour == 0 # Next day 00:00 UTC
assert utc.day == 26
def test_convert_aware_datetime(self):
"""Test converting timezone-aware datetime."""
tz = ZoneInfo("America/Los_Angeles")
dt = datetime(2025, 7, 4, 19, 0, tzinfo=tz) # 7:00 PM PT
utc = convert_to_utc(dt, "America/Los_Angeles")
# In July, Pacific Time is UTC-7
assert utc.hour == 2 # 02:00 UTC next day
assert utc.day == 5
class TestGetStadiumTimezone:
"""Tests for get_stadium_timezone function."""
def test_explicit_timezone(self):
"""Test explicit timezone override."""
tz = get_stadium_timezone("AZ", stadium_timezone="America/Phoenix")
assert tz == "America/Phoenix"
def test_state_inference(self):
"""Test timezone from state."""
tz = get_stadium_timezone("NY")
assert tz == "America/New_York"
def test_default_eastern(self):
"""Test default to Eastern for unknown state."""
tz = get_stadium_timezone("XX")
assert tz == "America/New_York"