"""Tests for fuzzy string matching utilities.""" import pytest from sportstime_parser.normalizers.fuzzy import ( normalize_for_matching, fuzzy_match_team, fuzzy_match_stadium, exact_match, best_match, calculate_similarity, MatchCandidate, ) class TestNormalizeForMatching: """Tests for normalize_for_matching function.""" def test_basic_normalization(self): """Test basic string normalization.""" assert normalize_for_matching("Los Angeles Lakers") == "los angeles lakers" assert normalize_for_matching(" Boston Celtics ") == "boston celtics" def test_removes_common_prefixes(self): """Test removal of common prefixes.""" assert normalize_for_matching("The Boston Celtics") == "boston celtics" assert normalize_for_matching("Team Lakers") == "lakers" def test_removes_stadium_suffixes(self): """Test removal of stadium-related suffixes.""" assert normalize_for_matching("Fenway Park") == "fenway" assert normalize_for_matching("Madison Square Garden Arena") == "madison square garden" assert normalize_for_matching("Wrigley Field") == "wrigley" assert normalize_for_matching("TD Garden Center") == "td garden" class TestExactMatch: """Tests for exact_match function.""" def test_exact_match_primary_name(self): """Test exact match on primary name.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LAL"]), MatchCandidate("nba_bos", "Boston Celtics", ["Celtics", "BOS"]), ] assert exact_match("Los Angeles Lakers", candidates) == "nba_lal" assert exact_match("Boston Celtics", candidates) == "nba_bos" def test_exact_match_alias(self): """Test exact match on alias.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LAL"]), ] assert exact_match("Lakers", candidates) == "nba_lal" assert exact_match("LAL", candidates) == "nba_lal" def test_case_insensitive(self): """Test case insensitive matching.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]), ] assert exact_match("los angeles lakers", candidates) == "nba_lal" assert exact_match("LAKERS", candidates) == "nba_lal" def test_no_match(self): """Test no match returns None.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]), ] assert exact_match("New York Knicks", candidates) is None class TestFuzzyMatchTeam: """Tests for fuzzy_match_team function.""" def test_close_match(self): """Test fuzzy matching finds close matches.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers", "LA Lakers"]), MatchCandidate("nba_lac", "Los Angeles Clippers", ["Clippers", "LA Clippers"]), ] matches = fuzzy_match_team("LA Lakers", candidates, threshold=70) assert len(matches) > 0 assert matches[0].canonical_id == "nba_lal" def test_partial_name_match(self): """Test matching on partial team name.""" candidates = [ MatchCandidate("nba_bos", "Boston Celtics", ["Celtics", "BOS"]), ] matches = fuzzy_match_team("Celtics", candidates, threshold=80) assert len(matches) > 0 assert matches[0].canonical_id == "nba_bos" def test_threshold_filtering(self): """Test that threshold filters low-confidence matches.""" candidates = [ MatchCandidate("nba_bos", "Boston Celtics", []), ] # Very different string should not match at high threshold matches = fuzzy_match_team("xyz123", candidates, threshold=90) assert len(matches) == 0 def test_returns_top_n(self): """Test that top_n parameter limits results.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", []), MatchCandidate("nba_lac", "Los Angeles Clippers", []), MatchCandidate("mlb_lad", "Los Angeles Dodgers", []), ] matches = fuzzy_match_team("Los Angeles", candidates, threshold=50, top_n=2) assert len(matches) <= 2 class TestFuzzyMatchStadium: """Tests for fuzzy_match_stadium function.""" def test_stadium_match(self): """Test fuzzy matching stadium names.""" candidates = [ MatchCandidate("fenway", "Fenway Park", ["Fenway"]), MatchCandidate("td_garden", "TD Garden", ["Boston Garden"]), ] matches = fuzzy_match_stadium("Fenway Park Boston", candidates, threshold=70) assert len(matches) > 0 assert matches[0].canonical_id == "fenway" def test_naming_rights_change(self): """Test matching old stadium names.""" candidates = [ MatchCandidate( "chase_center", "Chase Center", ["Oracle Arena", "Oakland Coliseum Arena"], ), ] # Should match on alias matches = fuzzy_match_stadium("Oracle Arena", candidates, threshold=70) assert len(matches) > 0 class TestBestMatch: """Tests for best_match function.""" def test_prefers_exact_match(self): """Test that exact match is preferred over fuzzy.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]), MatchCandidate("nba_bos", "Boston Celtics", ["Celtics"]), ] result = best_match("Lakers", candidates) assert result is not None assert result.canonical_id == "nba_lal" assert result.confidence == 100 # Exact match def test_falls_back_to_fuzzy(self): """Test fallback to fuzzy when no exact match.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", ["Lakers"]), ] result = best_match("LA Laker", candidates, threshold=70) assert result is not None assert result.confidence < 100 # Fuzzy match def test_no_match_below_threshold(self): """Test returns None when no match above threshold.""" candidates = [ MatchCandidate("nba_lal", "Los Angeles Lakers", []), ] result = best_match("xyz123", candidates, threshold=90) assert result is None class TestCalculateSimilarity: """Tests for calculate_similarity function.""" def test_identical_strings(self): """Test identical strings have 100% similarity.""" assert calculate_similarity("Boston Celtics", "Boston Celtics") == 100 def test_similar_strings(self): """Test similar strings have high similarity.""" score = calculate_similarity("Boston Celtics", "Celtics Boston") assert score >= 90 def test_different_strings(self): """Test different strings have low similarity.""" score = calculate_similarity("Boston Celtics", "Los Angeles Lakers") assert score < 50 def test_empty_string(self): """Test empty string handling.""" score = calculate_similarity("", "Boston Celtics") assert score == 0