feat(scripts): complete data pipeline remediation
Scripts changes: - Add WNBA abbreviation aliases to team_resolver.py - Fix NHL stadium coordinates in stadium_resolver.py - Add validate_aliases.py script for orphan detection - Update scrapers with improved error handling - Add DATA_AUDIT.md and REMEDIATION_PLAN.md documentation - Update alias JSON files with new mappings iOS bundle updates: - Update games_canonical.json with latest scraped data - Update teams_canonical.json and stadiums_canonical.json - Sync alias files with Scripts versions All 5 remediation phases complete. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -41,14 +41,15 @@ BACKOFF_FACTOR: float = 2.0 # exponential backoff multiplier
|
||||
INITIAL_BACKOFF: float = 1.0 # initial backoff in seconds
|
||||
|
||||
# Expected game counts per sport (approximate, for validation)
|
||||
# Updated 2026-01-20 based on 2025-26 season data
|
||||
EXPECTED_GAME_COUNTS: dict[str, int] = {
|
||||
"nba": 1230, # 30 teams × 82 games / 2
|
||||
"mlb": 2430, # 30 teams × 162 games / 2
|
||||
"nfl": 272, # 32 teams × 17 games / 2
|
||||
"mlb": 2430, # 30 teams × 162 games / 2 (regular season only)
|
||||
"nfl": 272, # 32 teams × 17 games / 2 (regular season only)
|
||||
"nhl": 1312, # 32 teams × 82 games / 2
|
||||
"mls": 493, # 30 teams × varies
|
||||
"wnba": 220, # 13 teams × 40 games / 2 (approx)
|
||||
"nwsl": 182, # 14 teams × 26 games / 2
|
||||
"mls": 540, # 30 teams × varies (updated for 2025 expansion)
|
||||
"wnba": 286, # 13 teams × 44 games / 2 (updated for 2025 expansion)
|
||||
"nwsl": 188, # 14→16 teams × varies (updated for 2025 expansion)
|
||||
}
|
||||
|
||||
# Minimum match score for fuzzy matching (0-100)
|
||||
|
||||
Reference in New Issue
Block a user