feat(scripts): rewrite parser as modular Python CLI

Replace monolithic scraping scripts with sportstime_parser package:

- Multi-source scrapers with automatic fallback for 7 sports
- Canonical ID generation for games, teams, and stadiums
- Fuzzy matching with configurable thresholds for name resolution
- CloudKit Web Services uploader with JWT auth, diff-based updates
- Resumable uploads with checkpoint state persistence
- Validation reports with manual review items and suggested matches
- Comprehensive test suite (249 tests)

CLI: sportstime-parser scrape|validate|upload|status|retry|clear

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 21:06:12 -06:00
parent 284a10d9e1
commit eeaf900e5a
109 changed files with 18415 additions and 266211 deletions

View File

@@ -0,0 +1,360 @@
"""Progress utilities using Rich for visual feedback."""
from contextlib import contextmanager
from typing import Generator, Iterable, Optional, TypeVar
from rich.progress import (
Progress,
SpinnerColumn,
TextColumn,
BarColumn,
TaskProgressColumn,
TimeElapsedColumn,
TimeRemainingColumn,
MofNCompleteColumn,
)
from rich.console import Console
from .logging import get_console
T = TypeVar("T")
def create_progress() -> Progress:
"""Create a Rich progress bar with standard columns."""
return Progress(
SpinnerColumn(),
TextColumn("[bold blue]{task.description}"),
BarColumn(bar_width=40),
TaskProgressColumn(),
MofNCompleteColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
console=get_console(),
transient=False,
)
def create_spinner_progress() -> Progress:
"""Create a Rich progress bar with spinner only (for indeterminate tasks)."""
return Progress(
SpinnerColumn(),
TextColumn("[bold blue]{task.description}"),
TimeElapsedColumn(),
console=get_console(),
transient=True,
)
@contextmanager
def progress_bar(
description: str,
total: Optional[int] = None,
) -> Generator[tuple[Progress, int], None, None]:
"""Context manager for a progress bar.
Args:
description: Task description to display
total: Total number of items (None for indeterminate)
Yields:
Tuple of (Progress instance, task_id)
Example:
with progress_bar("Scraping games", total=100) as (progress, task):
for item in items:
process(item)
progress.advance(task)
"""
if total is None:
progress = create_spinner_progress()
else:
progress = create_progress()
with progress:
task_id = progress.add_task(description, total=total)
yield progress, task_id
def track_progress(
iterable: Iterable[T],
description: str,
total: Optional[int] = None,
) -> Generator[T, None, None]:
"""Wrap an iterable with a progress bar.
Args:
iterable: Items to iterate over
description: Task description to display
total: Total number of items (auto-detected if iterable has len)
Yields:
Items from the iterable
Example:
for game in track_progress(games, "Processing games"):
process(game)
"""
# Try to get length if not provided
if total is None:
try:
total = len(iterable) # type: ignore
except TypeError:
pass
if total is None:
# Indeterminate progress
progress = create_spinner_progress()
with progress:
task_id = progress.add_task(description, total=None)
for item in iterable:
yield item
progress.update(task_id, advance=1)
else:
# Determinate progress
progress = create_progress()
with progress:
task_id = progress.add_task(description, total=total)
for item in iterable:
yield item
progress.advance(task_id)
class ProgressTracker:
"""Track progress across multiple phases with nested tasks.
Example:
tracker = ProgressTracker()
tracker.start("Scraping NBA")
with tracker.task("Fetching schedule", total=12) as advance:
for month in months:
fetch(month)
advance()
with tracker.task("Parsing games", total=1230) as advance:
for game in games:
parse(game)
advance()
tracker.finish("Completed NBA scrape")
"""
def __init__(self):
"""Initialize the progress tracker."""
self._console = get_console()
self._current_progress: Optional[Progress] = None
self._current_task: Optional[int] = None
def start(self, message: str) -> None:
"""Start a new tracking session with a message."""
self._console.print(f"\n[bold cyan]>>> {message}[/bold cyan]")
def finish(self, message: str) -> None:
"""Finish the tracking session with a message."""
self._console.print(f"[bold green]<<< {message}[/bold green]\n")
@contextmanager
def task(
self,
description: str,
total: Optional[int] = None,
) -> Generator[callable, None, None]:
"""Context manager for a tracked task.
Args:
description: Task description
total: Total items (None for indeterminate)
Yields:
Callable to advance the progress
Example:
with tracker.task("Processing", total=100) as advance:
for item in items:
process(item)
advance()
"""
with progress_bar(description, total) as (progress, task_id):
self._current_progress = progress
self._current_task = task_id
def advance(amount: int = 1) -> None:
progress.advance(task_id, advance=amount)
yield advance
self._current_progress = None
self._current_task = None
def log(self, message: str) -> None:
"""Log a message (will be displayed above progress bar if active)."""
if self._current_progress:
self._current_progress.console.print(f" {message}")
else:
self._console.print(f" {message}")
class ScrapeProgress:
"""Specialized progress tracker for scraping operations.
Tracks counts of games, teams, stadiums scraped and provides
formatted status updates.
"""
def __init__(self, sport: str, season: int):
"""Initialize scrape progress for a sport.
Args:
sport: Sport code (e.g., 'nba')
season: Season start year
"""
self.sport = sport
self.season = season
self.games_count = 0
self.teams_count = 0
self.stadiums_count = 0
self.errors_count = 0
self._tracker = ProgressTracker()
def start(self) -> None:
"""Start the scraping session."""
self._tracker.start(
f"Scraping {self.sport.upper()} {self.season}-{self.season + 1}"
)
def finish(self) -> None:
"""Finish the scraping session with summary."""
summary = (
f"Scraped {self.games_count} games, "
f"{self.teams_count} teams, "
f"{self.stadiums_count} stadiums"
)
if self.errors_count > 0:
summary += f" ({self.errors_count} errors)"
self._tracker.finish(summary)
@contextmanager
def scraping_schedule(
self,
total_months: Optional[int] = None,
) -> Generator[callable, None, None]:
"""Track schedule scraping progress."""
with self._tracker.task(
f"Fetching {self.sport.upper()} schedule",
total=total_months,
) as advance:
yield advance
@contextmanager
def parsing_games(
self,
total_games: Optional[int] = None,
) -> Generator[callable, None, None]:
"""Track game parsing progress."""
with self._tracker.task(
"Parsing games",
total=total_games,
) as advance:
def advance_and_count(amount: int = 1) -> None:
self.games_count += amount
advance(amount)
yield advance_and_count
@contextmanager
def resolving_teams(
self,
total_teams: Optional[int] = None,
) -> Generator[callable, None, None]:
"""Track team resolution progress."""
with self._tracker.task(
"Resolving teams",
total=total_teams,
) as advance:
def advance_and_count(amount: int = 1) -> None:
self.teams_count += amount
advance(amount)
yield advance_and_count
@contextmanager
def resolving_stadiums(
self,
total_stadiums: Optional[int] = None,
) -> Generator[callable, None, None]:
"""Track stadium resolution progress."""
with self._tracker.task(
"Resolving stadiums",
total=total_stadiums,
) as advance:
def advance_and_count(amount: int = 1) -> None:
self.stadiums_count += amount
advance(amount)
yield advance_and_count
def log_error(self, message: str) -> None:
"""Log an error during scraping."""
self.errors_count += 1
self._tracker.log(f"[red]Error: {message}[/red]")
def log_warning(self, message: str) -> None:
"""Log a warning during scraping."""
self._tracker.log(f"[yellow]Warning: {message}[/yellow]")
def log_info(self, message: str) -> None:
"""Log an info message during scraping."""
self._tracker.log(message)
class SimpleProgressBar:
"""Simple progress bar wrapper for batch operations.
Example:
with create_progress_bar(total=100, description="Uploading") as progress:
for item in items:
upload(item)
progress.advance()
"""
def __init__(self, progress: Progress, task_id: int):
self._progress = progress
self._task_id = task_id
def advance(self, amount: int = 1) -> None:
"""Advance the progress bar."""
self._progress.advance(self._task_id, advance=amount)
def update(self, completed: int) -> None:
"""Set the progress to a specific value."""
self._progress.update(self._task_id, completed=completed)
@contextmanager
def create_progress_bar(
total: int,
description: str = "Progress",
) -> Generator[SimpleProgressBar, None, None]:
"""Create a simple progress bar for batch operations.
Args:
total: Total number of items
description: Task description
Yields:
SimpleProgressBar with advance() and update() methods
Example:
with create_progress_bar(total=100, description="Uploading") as progress:
for item in items:
upload(item)
progress.advance()
"""
progress = create_progress()
with progress:
task_id = progress.add_task(description, total=total)
yield SimpleProgressBar(progress, task_id)