188 lines
6.4 KiB
Python
188 lines
6.4 KiB
Python
"""Tests for canonical ID generation."""
|
|
|
|
import pytest
|
|
from datetime import datetime, date
|
|
|
|
from sportstime_parser.normalizers.canonical_id import (
|
|
generate_game_id,
|
|
generate_team_id,
|
|
generate_team_id_from_abbrev,
|
|
generate_stadium_id,
|
|
parse_game_id,
|
|
normalize_string,
|
|
)
|
|
|
|
|
|
class TestNormalizeString:
|
|
"""Tests for normalize_string function."""
|
|
|
|
def test_basic_normalization(self):
|
|
"""Test basic string normalization."""
|
|
assert normalize_string("New York") == "new_york"
|
|
assert normalize_string("Los Angeles") == "los_angeles"
|
|
|
|
def test_removes_special_characters(self):
|
|
"""Test that special characters are removed."""
|
|
assert normalize_string("AT&T Stadium") == "att_stadium"
|
|
assert normalize_string("St. Louis") == "st_louis"
|
|
assert normalize_string("O'Brien Field") == "obrien_field"
|
|
|
|
def test_collapses_whitespace(self):
|
|
"""Test that multiple spaces are collapsed."""
|
|
assert normalize_string("New York") == "new_york"
|
|
assert normalize_string(" Los Angeles ") == "los_angeles"
|
|
|
|
def test_empty_string(self):
|
|
"""Test empty string handling."""
|
|
assert normalize_string("") == ""
|
|
assert normalize_string(" ") == ""
|
|
|
|
def test_unicode_normalization(self):
|
|
"""Test unicode characters are handled."""
|
|
assert normalize_string("Café") == "cafe"
|
|
assert normalize_string("José") == "jose"
|
|
|
|
|
|
class TestGenerateGameId:
|
|
"""Tests for generate_game_id function."""
|
|
|
|
def test_basic_game_id(self):
|
|
"""Test basic game ID generation."""
|
|
game_id = generate_game_id(
|
|
sport="nba",
|
|
season=2025,
|
|
away_abbrev="bos",
|
|
home_abbrev="lal",
|
|
game_date=date(2025, 12, 25),
|
|
)
|
|
assert game_id == "game_nba_2025_20251225_bos_lal"
|
|
|
|
def test_game_id_with_datetime(self):
|
|
"""Test game ID generation with datetime object."""
|
|
game_id = generate_game_id(
|
|
sport="mlb",
|
|
season=2026,
|
|
away_abbrev="nyy",
|
|
home_abbrev="bos",
|
|
game_date=datetime(2026, 4, 1, 19, 0),
|
|
)
|
|
assert game_id == "game_mlb_2026_20260401_nyy_bos"
|
|
|
|
def test_game_id_with_game_number(self):
|
|
"""Test game ID for doubleheader."""
|
|
game_id_1 = generate_game_id(
|
|
sport="mlb",
|
|
season=2026,
|
|
away_abbrev="nyy",
|
|
home_abbrev="bos",
|
|
game_date=date(2026, 7, 4),
|
|
game_number=1,
|
|
)
|
|
game_id_2 = generate_game_id(
|
|
sport="mlb",
|
|
season=2026,
|
|
away_abbrev="nyy",
|
|
home_abbrev="bos",
|
|
game_date=date(2026, 7, 4),
|
|
game_number=2,
|
|
)
|
|
assert game_id_1 == "game_mlb_2026_20260704_nyy_bos_1"
|
|
assert game_id_2 == "game_mlb_2026_20260704_nyy_bos_2"
|
|
|
|
def test_sport_lowercased(self):
|
|
"""Test that sport is lowercased."""
|
|
game_id = generate_game_id(
|
|
sport="NBA",
|
|
season=2025,
|
|
away_abbrev="BOS",
|
|
home_abbrev="LAL",
|
|
game_date=date(2025, 12, 25),
|
|
)
|
|
assert game_id == "game_nba_2025_20251225_bos_lal"
|
|
|
|
|
|
class TestParseGameId:
|
|
"""Tests for parse_game_id function."""
|
|
|
|
def test_parse_basic_game_id(self):
|
|
"""Test parsing a basic game ID."""
|
|
parsed = parse_game_id("game_nba_2025_20251225_bos_lal")
|
|
assert parsed["sport"] == "nba"
|
|
assert parsed["season"] == 2025
|
|
assert parsed["away_abbrev"] == "bos"
|
|
assert parsed["home_abbrev"] == "lal"
|
|
assert parsed["year"] == 2025
|
|
assert parsed["month"] == 12
|
|
assert parsed["day"] == 25
|
|
assert parsed["game_number"] is None
|
|
|
|
def test_parse_game_id_with_game_number(self):
|
|
"""Test parsing game ID with game number."""
|
|
parsed = parse_game_id("game_mlb_2026_20260704_nyy_bos_2")
|
|
assert parsed["sport"] == "mlb"
|
|
assert parsed["season"] == 2026
|
|
assert parsed["away_abbrev"] == "nyy"
|
|
assert parsed["home_abbrev"] == "bos"
|
|
assert parsed["year"] == 2026
|
|
assert parsed["month"] == 7
|
|
assert parsed["day"] == 4
|
|
assert parsed["game_number"] == 2
|
|
|
|
def test_parse_invalid_game_id(self):
|
|
"""Test parsing invalid game ID raises error."""
|
|
with pytest.raises(ValueError):
|
|
parse_game_id("invalid")
|
|
with pytest.raises(ValueError):
|
|
parse_game_id("nba_2025_bos") # Missing game_ prefix
|
|
with pytest.raises(ValueError):
|
|
parse_game_id("")
|
|
with pytest.raises(ValueError):
|
|
parse_game_id("game_nba_2025_bos_lal") # Missing date
|
|
|
|
|
|
class TestGenerateTeamId:
|
|
"""Tests for generate_team_id function."""
|
|
|
|
def test_basic_team_id(self):
|
|
"""Test basic team ID generation from city and name."""
|
|
team_id = generate_team_id(sport="nba", city="Los Angeles", name="Lakers")
|
|
assert team_id == "team_nba_los_angeles_lakers"
|
|
|
|
def test_team_id_normalizes_input(self):
|
|
"""Test that inputs are normalized."""
|
|
team_id = generate_team_id(sport="NBA", city="New York", name="Yankees")
|
|
assert team_id == "team_nba_new_york_yankees"
|
|
|
|
|
|
class TestGenerateTeamIdFromAbbrev:
|
|
"""Tests for generate_team_id_from_abbrev function."""
|
|
|
|
def test_basic_team_id_from_abbrev(self):
|
|
"""Test team ID from abbreviation."""
|
|
team_id = generate_team_id_from_abbrev(sport="nba", abbreviation="LAL")
|
|
assert team_id == "team_nba_lal"
|
|
|
|
def test_lowercases_abbreviation(self):
|
|
"""Test abbreviation is lowercased."""
|
|
team_id = generate_team_id_from_abbrev(sport="MLB", abbreviation="NYY")
|
|
assert team_id == "team_mlb_nyy"
|
|
|
|
|
|
class TestGenerateStadiumId:
|
|
"""Tests for generate_stadium_id function."""
|
|
|
|
def test_basic_stadium_id(self):
|
|
"""Test basic stadium ID generation."""
|
|
stadium_id = generate_stadium_id(sport="mlb", name="Fenway Park")
|
|
assert stadium_id == "stadium_mlb_fenway_park"
|
|
|
|
def test_stadium_id_special_characters(self):
|
|
"""Test stadium ID with special characters."""
|
|
stadium_id = generate_stadium_id(sport="nfl", name="AT&T Stadium")
|
|
assert stadium_id == "stadium_nfl_att_stadium"
|
|
|
|
def test_stadium_id_with_sponsor(self):
|
|
"""Test stadium ID with sponsor name."""
|
|
stadium_id = generate_stadium_id(sport="nba", name="Crypto.com Arena")
|
|
assert stadium_id == "stadium_nba_cryptocom_arena"
|