feat(scripts): rewrite parser as modular Python CLI

Replace monolithic scraping scripts with sportstime_parser package:

- Multi-source scrapers with automatic fallback for 7 sports
- Canonical ID generation for games, teams, and stadiums
- Fuzzy matching with configurable thresholds for name resolution
- CloudKit Web Services uploader with JWT auth, diff-based updates
- Resumable uploads with checkpoint state persistence
- Validation reports with manual review items and suggested matches
- Comprehensive test suite (249 tests)

CLI: sportstime-parser scrape|validate|upload|status|retry|clear

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 21:06:12 -06:00
parent 284a10d9e1
commit eeaf900e5a
109 changed files with 18415 additions and 266211 deletions

View File

@@ -0,0 +1,914 @@
"""CLI subcommand definitions for sportstime-parser."""
import argparse
import sys
from typing import Optional
from .config import (
DEFAULT_SEASON,
CLOUDKIT_ENVIRONMENT,
SUPPORTED_SPORTS,
OUTPUT_DIR,
)
from .utils.logging import get_logger, set_verbose, log_success, log_failure
def create_parser() -> argparse.ArgumentParser:
"""Create the main argument parser with all subcommands."""
parser = argparse.ArgumentParser(
prog="sportstime-parser",
description="Sports data scraper and CloudKit uploader for SportsTime app",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
sportstime-parser scrape nba --season 2025
sportstime-parser scrape all --season 2025
sportstime-parser validate nba --season 2025
sportstime-parser upload nba --season 2025
sportstime-parser status
""",
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose output",
)
subparsers = parser.add_subparsers(
dest="command",
title="commands",
description="Available commands",
metavar="COMMAND",
)
# Scrape subcommand
scrape_parser = subparsers.add_parser(
"scrape",
help="Scrape game schedules, teams, and stadiums",
description="Scrape sports data from multiple sources",
)
scrape_parser.add_argument(
"sport",
choices=SUPPORTED_SPORTS + ["all"],
help="Sport to scrape (or 'all' for all sports)",
)
scrape_parser.add_argument(
"--season", "-s",
type=int,
default=DEFAULT_SEASON,
help=f"Season start year (default: {DEFAULT_SEASON})",
)
scrape_parser.add_argument(
"--dry-run",
action="store_true",
help="Parse and validate only, don't write output files",
)
scrape_parser.set_defaults(func=cmd_scrape)
# Validate subcommand
validate_parser = subparsers.add_parser(
"validate",
help="Run validation on existing scraped data",
description="Validate scraped data and regenerate reports",
)
validate_parser.add_argument(
"sport",
choices=SUPPORTED_SPORTS + ["all"],
help="Sport to validate (or 'all' for all sports)",
)
validate_parser.add_argument(
"--season", "-s",
type=int,
default=DEFAULT_SEASON,
help=f"Season start year (default: {DEFAULT_SEASON})",
)
validate_parser.set_defaults(func=cmd_validate)
# Upload subcommand
upload_parser = subparsers.add_parser(
"upload",
help="Upload scraped data to CloudKit",
description="Upload data to CloudKit with resumable, diff-based updates",
)
upload_parser.add_argument(
"sport",
choices=SUPPORTED_SPORTS + ["all"],
help="Sport to upload (or 'all' for all sports)",
)
upload_parser.add_argument(
"--season", "-s",
type=int,
default=DEFAULT_SEASON,
help=f"Season start year (default: {DEFAULT_SEASON})",
)
upload_parser.add_argument(
"--environment", "-e",
choices=["development", "production"],
default=CLOUDKIT_ENVIRONMENT,
help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})",
)
upload_parser.add_argument(
"--resume",
action="store_true",
help="Resume interrupted upload from last checkpoint",
)
upload_parser.set_defaults(func=cmd_upload)
# Status subcommand
status_parser = subparsers.add_parser(
"status",
help="Show current scrape and upload status",
description="Display summary of scraped data and upload progress",
)
status_parser.set_defaults(func=cmd_status)
# Retry subcommand
retry_parser = subparsers.add_parser(
"retry",
help="Retry failed uploads",
description="Retry records that failed during previous upload attempts",
)
retry_parser.add_argument(
"sport",
choices=SUPPORTED_SPORTS + ["all"],
help="Sport to retry (or 'all' for all sports)",
)
retry_parser.add_argument(
"--season", "-s",
type=int,
default=DEFAULT_SEASON,
help=f"Season start year (default: {DEFAULT_SEASON})",
)
retry_parser.add_argument(
"--environment", "-e",
choices=["development", "production"],
default=CLOUDKIT_ENVIRONMENT,
help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})",
)
retry_parser.add_argument(
"--max-retries",
type=int,
default=3,
help="Maximum retry attempts per record (default: 3)",
)
retry_parser.set_defaults(func=cmd_retry)
# Clear subcommand
clear_parser = subparsers.add_parser(
"clear",
help="Clear upload session state",
description="Delete upload session state files to start fresh",
)
clear_parser.add_argument(
"sport",
choices=SUPPORTED_SPORTS + ["all"],
help="Sport to clear (or 'all' for all sports)",
)
clear_parser.add_argument(
"--season", "-s",
type=int,
default=DEFAULT_SEASON,
help=f"Season start year (default: {DEFAULT_SEASON})",
)
clear_parser.add_argument(
"--environment", "-e",
choices=["development", "production"],
default=CLOUDKIT_ENVIRONMENT,
help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})",
)
clear_parser.set_defaults(func=cmd_clear)
return parser
def get_scraper(sport: str, season: int):
"""Get the appropriate scraper for a sport.
Args:
sport: Sport code
season: Season start year
Returns:
Scraper instance
Raises:
NotImplementedError: If sport scraper is not yet implemented
"""
if sport == "nba":
from .scrapers.nba import create_nba_scraper
return create_nba_scraper(season)
elif sport == "mlb":
from .scrapers.mlb import create_mlb_scraper
return create_mlb_scraper(season)
elif sport == "nfl":
from .scrapers.nfl import create_nfl_scraper
return create_nfl_scraper(season)
elif sport == "nhl":
from .scrapers.nhl import create_nhl_scraper
return create_nhl_scraper(season)
elif sport == "mls":
from .scrapers.mls import create_mls_scraper
return create_mls_scraper(season)
elif sport == "wnba":
from .scrapers.wnba import create_wnba_scraper
return create_wnba_scraper(season)
elif sport == "nwsl":
from .scrapers.nwsl import create_nwsl_scraper
return create_nwsl_scraper(season)
else:
raise NotImplementedError(f"Scraper for {sport} not yet implemented")
def cmd_scrape(args: argparse.Namespace) -> int:
"""Execute the scrape command."""
from .models.game import save_games
from .models.team import save_teams
from .models.stadium import save_stadiums
from .validators.report import generate_report, validate_games
logger = get_logger()
sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport]
logger.info(f"Scraping {', '.join(sports)} for {args.season}-{args.season + 1} season")
if args.dry_run:
logger.info("Dry run mode - no files will be written")
# Ensure output directory exists
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
success_count = 0
failure_count = 0
for sport in sports:
logger.info(f"\n{'='*50}")
logger.info(f"Scraping {sport.upper()}...")
logger.info(f"{'='*50}")
try:
# Get scraper for this sport
scraper = get_scraper(sport, args.season)
# Scrape all data
result = scraper.scrape_all()
if not result.success:
log_failure(f"{sport.upper()}: {result.error_message}")
failure_count += 1
continue
# Validate games
validation_issues = validate_games(result.games)
all_review_items = result.review_items + validation_issues
# Generate validation report
report = generate_report(
sport=sport,
season=args.season,
source=result.source,
games=result.games,
teams=result.teams,
stadiums=result.stadiums,
review_items=all_review_items,
)
# Log summary
logger.info(f"Games: {report.summary.total_games}")
logger.info(f"Teams: {len(result.teams)}")
logger.info(f"Stadiums: {len(result.stadiums)}")
logger.info(f"Coverage: {report.summary.game_coverage:.1f}%")
logger.info(f"Review items: {report.summary.review_count}")
if not args.dry_run:
# Save output files
games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json"
teams_file = OUTPUT_DIR / f"teams_{sport}.json"
stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json"
save_games(result.games, str(games_file))
save_teams(result.teams, str(teams_file))
save_stadiums(result.stadiums, str(stadiums_file))
# Save validation report
report_path = report.save()
logger.info(f"Saved games to: {games_file}")
logger.info(f"Saved teams to: {teams_file}")
logger.info(f"Saved stadiums to: {stadiums_file}")
logger.info(f"Saved report to: {report_path}")
log_success(f"{sport.upper()}: Scraped {result.game_count} games")
success_count += 1
except NotImplementedError as e:
logger.warning(str(e))
failure_count += 1
continue
except Exception as e:
log_failure(f"{sport.upper()}: {e}")
logger.exception("Scraping failed")
failure_count += 1
continue
# Final summary
logger.info(f"\n{'='*50}")
logger.info("SUMMARY")
logger.info(f"{'='*50}")
logger.info(f"Successful: {success_count}")
logger.info(f"Failed: {failure_count}")
return 0 if failure_count == 0 else 1
def cmd_validate(args: argparse.Namespace) -> int:
"""Execute the validate command."""
from .models.game import load_games
from .models.team import load_teams
from .models.stadium import load_stadiums
from .validators.report import generate_report, validate_games
logger = get_logger()
sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport]
logger.info(f"Validating {', '.join(sports)} for {args.season}-{args.season + 1} season")
for sport in sports:
logger.info(f"\nValidating {sport.upper()}...")
# Load existing data
games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json"
teams_file = OUTPUT_DIR / f"teams_{sport}.json"
stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json"
if not games_file.exists():
logger.warning(f"No games file found: {games_file}")
continue
try:
games = load_games(str(games_file))
teams = load_teams(str(teams_file)) if teams_file.exists() else []
stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else []
# Run validation
review_items = validate_games(games)
# Generate report
report = generate_report(
sport=sport,
season=args.season,
source="existing",
games=games,
teams=teams,
stadiums=stadiums,
review_items=review_items,
)
# Save report
report_path = report.save()
logger.info(f"Games: {report.summary.total_games}")
logger.info(f"Valid: {report.summary.valid_games}")
logger.info(f"Review items: {report.summary.review_count}")
logger.info(f"Saved report to: {report_path}")
log_success(f"{sport.upper()}: Validation complete")
except Exception as e:
log_failure(f"{sport.upper()}: {e}")
logger.exception("Validation failed")
continue
return 0
def cmd_upload(args: argparse.Namespace) -> int:
"""Execute the upload command."""
from .models.game import load_games
from .models.team import load_teams
from .models.stadium import load_stadiums
from .uploaders import (
CloudKitClient,
CloudKitError,
CloudKitAuthError,
CloudKitRateLimitError,
RecordType,
RecordDiffer,
StateManager,
game_to_cloudkit_record,
team_to_cloudkit_record,
stadium_to_cloudkit_record,
)
from .utils.progress import create_progress_bar
logger = get_logger()
sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport]
logger.info(f"Uploading {', '.join(sports)} for {args.season}-{args.season + 1} season")
logger.info(f"Environment: {args.environment}")
# Initialize CloudKit client
client = CloudKitClient(environment=args.environment)
if not client.is_configured:
log_failure("CloudKit not configured")
logger.error(
"Set CLOUDKIT_KEY_ID and CLOUDKIT_PRIVATE_KEY_PATH environment variables.\n"
"Get credentials from Apple Developer Portal > Certificates, Identifiers & Profiles > Keys"
)
return 1
# Initialize state manager
state_manager = StateManager()
differ = RecordDiffer()
success_count = 0
failure_count = 0
for sport in sports:
logger.info(f"\n{'='*50}")
logger.info(f"Uploading {sport.upper()}...")
logger.info(f"{'='*50}")
try:
# Load local data
games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json"
teams_file = OUTPUT_DIR / f"teams_{sport}.json"
stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json"
if not games_file.exists():
logger.warning(f"No games file found: {games_file}")
logger.warning("Run 'scrape' command first")
failure_count += 1
continue
games = load_games(str(games_file))
teams = load_teams(str(teams_file)) if teams_file.exists() else []
stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else []
logger.info(f"Loaded {len(games)} games, {len(teams)} teams, {len(stadiums)} stadiums")
# Fetch existing CloudKit records for diff
logger.info("Fetching existing CloudKit records...")
try:
remote_games = client.fetch_all_records(RecordType.GAME)
remote_teams = client.fetch_all_records(RecordType.TEAM)
remote_stadiums = client.fetch_all_records(RecordType.STADIUM)
except CloudKitAuthError as e:
log_failure(f"Authentication failed: {e}")
return 1
except CloudKitRateLimitError:
log_failure("Rate limit exceeded - try again later")
return 1
except CloudKitError as e:
log_failure(f"Failed to fetch records: {e}")
failure_count += 1
continue
# Filter remote records to this sport/season
remote_games = [
r for r in remote_games
if r.get("fields", {}).get("sport", {}).get("value") == sport
and r.get("fields", {}).get("season", {}).get("value") == args.season
]
remote_teams = [
r for r in remote_teams
if r.get("fields", {}).get("sport", {}).get("value") == sport
]
remote_stadiums = [
r for r in remote_stadiums
if r.get("fields", {}).get("sport", {}).get("value") == sport
]
logger.info(f"Found {len(remote_games)} games, {len(remote_teams)} teams, {len(remote_stadiums)} stadiums in CloudKit")
# Calculate diffs
logger.info("Calculating changes...")
game_diff = differ.diff_games(games, remote_games)
team_diff = differ.diff_teams(teams, remote_teams)
stadium_diff = differ.diff_stadiums(stadiums, remote_stadiums)
total_creates = game_diff.create_count + team_diff.create_count + stadium_diff.create_count
total_updates = game_diff.update_count + team_diff.update_count + stadium_diff.update_count
total_unchanged = game_diff.unchanged_count + team_diff.unchanged_count + stadium_diff.unchanged_count
logger.info(f"Creates: {total_creates}, Updates: {total_updates}, Unchanged: {total_unchanged}")
if total_creates == 0 and total_updates == 0:
log_success(f"{sport.upper()}: Already up to date")
success_count += 1
continue
# Prepare records for upload
all_records = []
all_records.extend(game_diff.get_records_to_upload())
all_records.extend(team_diff.get_records_to_upload())
all_records.extend(stadium_diff.get_records_to_upload())
# Create or resume upload session
record_info = [(r.record_name, r.record_type.value) for r in all_records]
session = state_manager.get_session_or_create(
sport=sport,
season=args.season,
environment=args.environment,
record_names=record_info,
resume=args.resume,
)
if args.resume:
pending = session.get_pending_records()
logger.info(f"Resuming: {len(pending)} records pending")
# Filter to only pending records
pending_set = set(pending)
all_records = [r for r in all_records if r.record_name in pending_set]
# Upload records with progress
logger.info(f"Uploading {len(all_records)} records...")
with create_progress_bar(total=len(all_records), description="Uploading") as progress:
batch_result = client.save_records(all_records)
# Update session state
for op_result in batch_result.successful:
session.mark_uploaded(op_result.record_name, op_result.record_change_tag)
progress.advance()
for op_result in batch_result.failed:
session.mark_failed(op_result.record_name, op_result.error_message or "Unknown error")
progress.advance()
# Save session state
state_manager.save_session(session)
# Report results
logger.info(f"Uploaded: {batch_result.success_count}")
logger.info(f"Failed: {batch_result.failure_count}")
if batch_result.failure_count > 0:
log_failure(f"{sport.upper()}: {batch_result.failure_count} records failed")
for op_result in batch_result.failed[:5]: # Show first 5 failures
logger.error(f" {op_result.record_name}: {op_result.error_message}")
if batch_result.failure_count > 5:
logger.error(f" ... and {batch_result.failure_count - 5} more")
failure_count += 1
else:
log_success(f"{sport.upper()}: Uploaded {batch_result.success_count} records")
# Clear session on complete success
state_manager.delete_session(sport, args.season, args.environment)
success_count += 1
except Exception as e:
log_failure(f"{sport.upper()}: {e}")
logger.exception("Upload failed")
failure_count += 1
continue
# Final summary
logger.info(f"\n{'='*50}")
logger.info("SUMMARY")
logger.info(f"{'='*50}")
logger.info(f"Successful: {success_count}")
logger.info(f"Failed: {failure_count}")
return 0 if failure_count == 0 else 1
def cmd_status(args: argparse.Namespace) -> int:
"""Execute the status command."""
from datetime import datetime
from .config import STATE_DIR, EXPECTED_GAME_COUNTS
from .uploaders import StateManager
logger = get_logger()
logger.info("SportsTime Parser Status")
logger.info("=" * 50)
logger.info("")
# Check for scraped data
logger.info("[bold]Scraped Data[/bold]")
logger.info("-" * 40)
total_games = 0
scraped_sports = 0
for sport in SUPPORTED_SPORTS:
games_file = OUTPUT_DIR / f"games_{sport}_{DEFAULT_SEASON}.json"
teams_file = OUTPUT_DIR / f"teams_{sport}.json"
stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json"
if games_file.exists():
from .models.game import load_games
from .models.team import load_teams
from .models.stadium import load_stadiums
try:
games = load_games(str(games_file))
teams = load_teams(str(teams_file)) if teams_file.exists() else []
stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else []
game_count = len(games)
expected = EXPECTED_GAME_COUNTS.get(sport, 0)
coverage = (game_count / expected * 100) if expected > 0 else 0
# Format with coverage indicator
if coverage >= 95:
status = "[green]✓[/green]"
elif coverage >= 80:
status = "[yellow]~[/yellow]"
else:
status = "[red]![/red]"
logger.info(
f" {status} {sport.upper():6} {game_count:5} games, "
f"{len(teams):2} teams, {len(stadiums):2} stadiums "
f"({coverage:.0f}% coverage)"
)
total_games += game_count
scraped_sports += 1
except Exception as e:
logger.info(f" [red]✗[/red] {sport.upper():6} Error loading: {e}")
else:
logger.info(f" [dim]-[/dim] {sport.upper():6} Not scraped")
logger.info("-" * 40)
logger.info(f" Total: {total_games} games across {scraped_sports} sports")
logger.info("")
# Check for upload sessions
logger.info("[bold]Upload Sessions[/bold]")
logger.info("-" * 40)
state_manager = StateManager()
sessions = state_manager.list_sessions()
if sessions:
for session in sessions:
sport = session["sport"].upper()
season = session["season"]
env = session["environment"]
progress = session["progress"]
percent = session["progress_percent"]
status = session["status"]
failed = session["failed_count"]
if status == "complete":
status_icon = "[green]✓[/green]"
elif failed > 0:
status_icon = "[yellow]![/yellow]"
else:
status_icon = "[blue]→[/blue]"
logger.info(
f" {status_icon} {sport} {season} ({env}): "
f"{progress} ({percent})"
)
if failed > 0:
logger.info(f" [yellow]⚠ {failed} failed records[/yellow]")
# Show last updated time
try:
last_updated = datetime.fromisoformat(session["last_updated"])
age = datetime.utcnow() - last_updated
if age.days > 0:
age_str = f"{age.days} days ago"
elif age.seconds > 3600:
age_str = f"{age.seconds // 3600} hours ago"
elif age.seconds > 60:
age_str = f"{age.seconds // 60} minutes ago"
else:
age_str = "just now"
logger.info(f" Last updated: {age_str}")
except (ValueError, KeyError):
pass
else:
logger.info(" No upload sessions found")
logger.info("")
# CloudKit configuration status
logger.info("[bold]CloudKit Configuration[/bold]")
logger.info("-" * 40)
import os
key_id = os.environ.get("CLOUDKIT_KEY_ID")
key_path = os.environ.get("CLOUDKIT_PRIVATE_KEY_PATH")
key_content = os.environ.get("CLOUDKIT_PRIVATE_KEY")
if key_id:
logger.info(f" [green]✓[/green] CLOUDKIT_KEY_ID: {key_id[:8]}...")
else:
logger.info(" [red]✗[/red] CLOUDKIT_KEY_ID: Not set")
if key_path:
from pathlib import Path
if Path(key_path).exists():
logger.info(f" [green]✓[/green] CLOUDKIT_PRIVATE_KEY_PATH: {key_path}")
else:
logger.info(f" [red]✗[/red] CLOUDKIT_PRIVATE_KEY_PATH: File not found: {key_path}")
elif key_content:
logger.info(" [green]✓[/green] CLOUDKIT_PRIVATE_KEY: Set (inline)")
else:
logger.info(" [red]✗[/red] CLOUDKIT_PRIVATE_KEY: Not set")
logger.info("")
return 0
def cmd_retry(args: argparse.Namespace) -> int:
"""Execute the retry command for failed uploads."""
from .models.game import load_games
from .models.team import load_teams
from .models.stadium import load_stadiums
from .uploaders import (
CloudKitClient,
CloudKitError,
CloudKitAuthError,
CloudKitRateLimitError,
StateManager,
game_to_cloudkit_record,
team_to_cloudkit_record,
stadium_to_cloudkit_record,
)
from .utils.progress import create_progress_bar
logger = get_logger()
sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport]
logger.info(f"Retrying failed uploads for {', '.join(sports)}")
logger.info(f"Environment: {args.environment}")
logger.info(f"Max retries per record: {args.max_retries}")
# Initialize CloudKit client
client = CloudKitClient(environment=args.environment)
if not client.is_configured:
log_failure("CloudKit not configured")
return 1
# Initialize state manager
state_manager = StateManager()
total_retried = 0
total_succeeded = 0
total_failed = 0
for sport in sports:
# Load existing session
session = state_manager.load_session(sport, args.season, args.environment)
if session is None:
logger.info(f"{sport.upper()}: No upload session found")
continue
# Get records eligible for retry
retryable = session.get_retryable_records(max_retries=args.max_retries)
if not retryable:
failed_count = session.failed_count
if failed_count > 0:
logger.info(f"{sport.upper()}: {failed_count} failed records exceeded max retries")
else:
logger.info(f"{sport.upper()}: No failed records to retry")
continue
logger.info(f"{sport.upper()}: Retrying {len(retryable)} failed records...")
# Load local data to get the records
games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json"
teams_file = OUTPUT_DIR / f"teams_{sport}.json"
stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json"
if not games_file.exists():
logger.warning(f"No games file found: {games_file}")
continue
games = load_games(str(games_file))
teams = load_teams(str(teams_file)) if teams_file.exists() else []
stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else []
# Build record lookup
records_to_retry = []
retryable_set = set(retryable)
for game in games:
if game.id in retryable_set:
records_to_retry.append(game_to_cloudkit_record(game))
for team in teams:
if team.id in retryable_set:
records_to_retry.append(team_to_cloudkit_record(team))
for stadium in stadiums:
if stadium.id in retryable_set:
records_to_retry.append(stadium_to_cloudkit_record(stadium))
if not records_to_retry:
logger.warning(f"{sport.upper()}: Could not find records for retry")
continue
# Mark as pending for retry
for record_name in retryable:
session.mark_pending(record_name)
# Retry upload
try:
with create_progress_bar(total=len(records_to_retry), description="Retrying") as progress:
batch_result = client.save_records(records_to_retry)
for op_result in batch_result.successful:
session.mark_uploaded(op_result.record_name, op_result.record_change_tag)
progress.advance()
total_succeeded += 1
for op_result in batch_result.failed:
session.mark_failed(op_result.record_name, op_result.error_message or "Unknown error")
progress.advance()
total_failed += 1
state_manager.save_session(session)
total_retried += len(records_to_retry)
if batch_result.failure_count > 0:
log_failure(f"{sport.upper()}: {batch_result.failure_count} still failing")
else:
log_success(f"{sport.upper()}: All {batch_result.success_count} retries succeeded")
# Clear session if all complete
if session.is_complete:
state_manager.delete_session(sport, args.season, args.environment)
except CloudKitAuthError as e:
log_failure(f"Authentication failed: {e}")
return 1
except CloudKitRateLimitError:
log_failure("Rate limit exceeded - try again later")
state_manager.save_session(session)
return 1
except CloudKitError as e:
log_failure(f"Upload error: {e}")
state_manager.save_session(session)
continue
# Summary
logger.info(f"\n{'='*50}")
logger.info("RETRY SUMMARY")
logger.info(f"{'='*50}")
logger.info(f"Retried: {total_retried}")
logger.info(f"Succeeded: {total_succeeded}")
logger.info(f"Failed: {total_failed}")
return 0 if total_failed == 0 else 1
def cmd_clear(args: argparse.Namespace) -> int:
"""Execute the clear command to delete upload state."""
from .uploaders import StateManager
logger = get_logger()
sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport]
logger.info(f"Clearing upload state for {', '.join(sports)}")
state_manager = StateManager()
cleared_count = 0
for sport in sports:
if state_manager.delete_session(sport, args.season, args.environment):
logger.info(f" [green]✓[/green] Cleared {sport.upper()} {args.season} ({args.environment})")
cleared_count += 1
else:
logger.info(f" [dim]-[/dim] No session for {sport.upper()} {args.season} ({args.environment})")
logger.info(f"\nCleared {cleared_count} session(s)")
return 0
def run_cli(argv: Optional[list[str]] = None) -> int:
"""Parse arguments and run the appropriate command."""
parser = create_parser()
args = parser.parse_args(argv)
if args.verbose:
set_verbose(True)
if args.command is None:
parser.print_help()
return 1
return args.func(args)