feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,149 @@
"""Logging infrastructure for sportstime-parser."""
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Optional
from rich.console import Console
from rich.logging import RichHandler
from ..config import SCRIPTS_DIR
# Module-level state
_logger: Optional[logging.Logger] = None
_verbose: bool = False
_console: Optional[Console] = None
def get_console() -> Console:
"""Get the shared Rich console instance."""
global _console
if _console is None:
_console = Console()
return _console
def set_verbose(verbose: bool) -> None:
"""Set verbose mode globally."""
global _verbose
_verbose = verbose
logger = get_logger()
if verbose:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
def is_verbose() -> bool:
"""Check if verbose mode is enabled."""
return _verbose
def get_logger() -> logging.Logger:
"""Get or create the application logger."""
global _logger
if _logger is not None:
return _logger
_logger = logging.getLogger("sportstime_parser")
_logger.setLevel(logging.INFO)
# Prevent propagation to root logger
_logger.propagate = False
# Clear any existing handlers
_logger.handlers.clear()
# Console handler with Rich formatting
console_handler = RichHandler(
console=get_console(),
show_time=True,
show_path=False,
rich_tracebacks=True,
tracebacks_show_locals=True,
markup=True,
)
console_handler.setLevel(logging.DEBUG)
console_format = logging.Formatter("%(message)s")
console_handler.setFormatter(console_format)
_logger.addHandler(console_handler)
# File handler for persistent logs
log_dir = SCRIPTS_DIR / "logs"
log_dir.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = log_dir / f"parser_{timestamp}.log"
file_handler = logging.FileHandler(log_file, encoding="utf-8")
file_handler.setLevel(logging.DEBUG)
file_format = logging.Formatter(
"%(asctime)s | %(levelname)-8s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
file_handler.setFormatter(file_format)
_logger.addHandler(file_handler)
return _logger
def log_game(
sport: str,
game_id: str,
home: str,
away: str,
date: str,
status: str = "parsed",
) -> None:
"""Log a game being processed (only in verbose mode)."""
if not is_verbose():
return
logger = get_logger()
logger.debug(f"[{sport.upper()}] {game_id}: {away} @ {home} ({date}) - {status}")
def log_team(sport: str, team_id: str, name: str, status: str = "resolved") -> None:
"""Log a team being processed (only in verbose mode)."""
if not is_verbose():
return
logger = get_logger()
logger.debug(f"[{sport.upper()}] Team: {name} -> {team_id} ({status})")
def log_stadium(sport: str, stadium_id: str, name: str, status: str = "resolved") -> None:
"""Log a stadium being processed (only in verbose mode)."""
if not is_verbose():
return
logger = get_logger()
logger.debug(f"[{sport.upper()}] Stadium: {name} -> {stadium_id} ({status})")
def log_error(message: str, exc_info: bool = False) -> None:
"""Log an error message."""
logger = get_logger()
logger.error(message, exc_info=exc_info)
def log_warning(message: str) -> None:
"""Log a warning message."""
logger = get_logger()
logger.warning(message)
def log_success(message: str) -> None:
"""Log a success message with green formatting."""
logger = get_logger()
logger.info(f"[green]✓[/green] {message}")
def log_failure(message: str) -> None:
"""Log a failure message with red formatting."""
logger = get_logger()
logger.info(f"[red]✗[/red] {message}")