Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
92 lines
2.0 KiB
Python
92 lines
2.0 KiB
Python
"""Normalizers for team, stadium, and game data."""
|
|
|
|
from .canonical_id import (
|
|
generate_game_id,
|
|
generate_team_id,
|
|
generate_team_id_from_abbrev,
|
|
generate_stadium_id,
|
|
parse_game_id,
|
|
normalize_string,
|
|
)
|
|
from .timezone import (
|
|
TimezoneResult,
|
|
parse_datetime,
|
|
convert_to_utc,
|
|
detect_timezone_from_string,
|
|
detect_timezone_from_location,
|
|
get_stadium_timezone,
|
|
create_timezone_warning,
|
|
)
|
|
from .fuzzy import (
|
|
MatchCandidate,
|
|
fuzzy_match_team,
|
|
fuzzy_match_stadium,
|
|
exact_match,
|
|
best_match,
|
|
calculate_similarity,
|
|
normalize_for_matching,
|
|
)
|
|
from .alias_loader import (
|
|
TeamAliasLoader,
|
|
StadiumAliasLoader,
|
|
get_team_alias_loader,
|
|
get_stadium_alias_loader,
|
|
resolve_team_alias,
|
|
resolve_stadium_alias,
|
|
)
|
|
from .team_resolver import (
|
|
TeamResolver,
|
|
TeamResolveResult,
|
|
get_team_resolver,
|
|
resolve_team,
|
|
)
|
|
from .stadium_resolver import (
|
|
StadiumResolver,
|
|
StadiumResolveResult,
|
|
get_stadium_resolver,
|
|
resolve_stadium,
|
|
)
|
|
|
|
__all__ = [
|
|
# Canonical ID
|
|
"generate_game_id",
|
|
"generate_team_id",
|
|
"generate_team_id_from_abbrev",
|
|
"generate_stadium_id",
|
|
"parse_game_id",
|
|
"normalize_string",
|
|
# Timezone
|
|
"TimezoneResult",
|
|
"parse_datetime",
|
|
"convert_to_utc",
|
|
"detect_timezone_from_string",
|
|
"detect_timezone_from_location",
|
|
"get_stadium_timezone",
|
|
"create_timezone_warning",
|
|
# Fuzzy matching
|
|
"MatchCandidate",
|
|
"fuzzy_match_team",
|
|
"fuzzy_match_stadium",
|
|
"exact_match",
|
|
"best_match",
|
|
"calculate_similarity",
|
|
"normalize_for_matching",
|
|
# Alias loaders
|
|
"TeamAliasLoader",
|
|
"StadiumAliasLoader",
|
|
"get_team_alias_loader",
|
|
"get_stadium_alias_loader",
|
|
"resolve_team_alias",
|
|
"resolve_stadium_alias",
|
|
# Team resolver
|
|
"TeamResolver",
|
|
"TeamResolveResult",
|
|
"get_team_resolver",
|
|
"resolve_team",
|
|
# Stadium resolver
|
|
"StadiumResolver",
|
|
"StadiumResolveResult",
|
|
"get_stadium_resolver",
|
|
"resolve_stadium",
|
|
]
|