feat(scripts): add sportstime-parser data pipeline
Complete Python package for scraping, normalizing, and uploading sports schedule data to CloudKit. Includes: - Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL - Canonical ID system for teams, stadiums, and games - Fuzzy matching with manual alias support - CloudKit uploader with batch operations and deduplication - Comprehensive test suite with fixtures - WNBA abbreviation aliases for improved team resolution - Alias validation script to detect orphan references All 5 phases of data remediation plan completed: - Phase 1: Alias fixes (team/stadium alias additions) - Phase 2: NHL stadium coordinate fixes - Phase 3: Re-scrape validation - Phase 4: iOS bundle update - Phase 5: Code quality improvements (WNBA aliases) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
91
sportstime_parser/normalizers/__init__.py
Normal file
91
sportstime_parser/normalizers/__init__.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Normalizers for team, stadium, and game data."""
|
||||
|
||||
from .canonical_id import (
|
||||
generate_game_id,
|
||||
generate_team_id,
|
||||
generate_team_id_from_abbrev,
|
||||
generate_stadium_id,
|
||||
parse_game_id,
|
||||
normalize_string,
|
||||
)
|
||||
from .timezone import (
|
||||
TimezoneResult,
|
||||
parse_datetime,
|
||||
convert_to_utc,
|
||||
detect_timezone_from_string,
|
||||
detect_timezone_from_location,
|
||||
get_stadium_timezone,
|
||||
create_timezone_warning,
|
||||
)
|
||||
from .fuzzy import (
|
||||
MatchCandidate,
|
||||
fuzzy_match_team,
|
||||
fuzzy_match_stadium,
|
||||
exact_match,
|
||||
best_match,
|
||||
calculate_similarity,
|
||||
normalize_for_matching,
|
||||
)
|
||||
from .alias_loader import (
|
||||
TeamAliasLoader,
|
||||
StadiumAliasLoader,
|
||||
get_team_alias_loader,
|
||||
get_stadium_alias_loader,
|
||||
resolve_team_alias,
|
||||
resolve_stadium_alias,
|
||||
)
|
||||
from .team_resolver import (
|
||||
TeamResolver,
|
||||
TeamResolveResult,
|
||||
get_team_resolver,
|
||||
resolve_team,
|
||||
)
|
||||
from .stadium_resolver import (
|
||||
StadiumResolver,
|
||||
StadiumResolveResult,
|
||||
get_stadium_resolver,
|
||||
resolve_stadium,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Canonical ID
|
||||
"generate_game_id",
|
||||
"generate_team_id",
|
||||
"generate_team_id_from_abbrev",
|
||||
"generate_stadium_id",
|
||||
"parse_game_id",
|
||||
"normalize_string",
|
||||
# Timezone
|
||||
"TimezoneResult",
|
||||
"parse_datetime",
|
||||
"convert_to_utc",
|
||||
"detect_timezone_from_string",
|
||||
"detect_timezone_from_location",
|
||||
"get_stadium_timezone",
|
||||
"create_timezone_warning",
|
||||
# Fuzzy matching
|
||||
"MatchCandidate",
|
||||
"fuzzy_match_team",
|
||||
"fuzzy_match_stadium",
|
||||
"exact_match",
|
||||
"best_match",
|
||||
"calculate_similarity",
|
||||
"normalize_for_matching",
|
||||
# Alias loaders
|
||||
"TeamAliasLoader",
|
||||
"StadiumAliasLoader",
|
||||
"get_team_alias_loader",
|
||||
"get_stadium_alias_loader",
|
||||
"resolve_team_alias",
|
||||
"resolve_stadium_alias",
|
||||
# Team resolver
|
||||
"TeamResolver",
|
||||
"TeamResolveResult",
|
||||
"get_team_resolver",
|
||||
"resolve_team",
|
||||
# Stadium resolver
|
||||
"StadiumResolver",
|
||||
"StadiumResolveResult",
|
||||
"get_stadium_resolver",
|
||||
"resolve_stadium",
|
||||
]
|
||||
Reference in New Issue
Block a user