"""CLI subcommand definitions for sportstime-parser.""" import argparse import sys from typing import Optional from .config import ( DEFAULT_SEASON, CLOUDKIT_ENVIRONMENT, SUPPORTED_SPORTS, OUTPUT_DIR, ) from .utils.logging import get_logger, set_verbose, log_success, log_failure def create_parser() -> argparse.ArgumentParser: """Create the main argument parser with all subcommands.""" parser = argparse.ArgumentParser( prog="sportstime-parser", description="Sports data scraper and CloudKit uploader for SportsTime app", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: sportstime-parser scrape nba --season 2025 sportstime-parser scrape all --season 2025 sportstime-parser validate nba --season 2025 sportstime-parser upload nba --season 2025 sportstime-parser status sportstime-parser purge --environment development sportstime-parser count --environment development sportstime-parser upload-static --environment development """, ) parser.add_argument( "--verbose", "-v", action="store_true", help="Enable verbose output", ) subparsers = parser.add_subparsers( dest="command", title="commands", description="Available commands", metavar="COMMAND", ) # Scrape subcommand scrape_parser = subparsers.add_parser( "scrape", help="Scrape game schedules, teams, and stadiums", description="Scrape sports data from multiple sources", ) scrape_parser.add_argument( "sport", choices=SUPPORTED_SPORTS + ["all"], help="Sport to scrape (or 'all' for all sports)", ) scrape_parser.add_argument( "--season", "-s", type=int, default=DEFAULT_SEASON, help=f"Season start year (default: {DEFAULT_SEASON})", ) scrape_parser.add_argument( "--dry-run", action="store_true", help="Parse and validate only, don't write output files", ) scrape_parser.set_defaults(func=cmd_scrape) # Validate subcommand validate_parser = subparsers.add_parser( "validate", help="Run validation on existing scraped data", description="Validate scraped data and regenerate reports", ) validate_parser.add_argument( "sport", choices=SUPPORTED_SPORTS + ["all"], help="Sport to validate (or 'all' for all sports)", ) validate_parser.add_argument( "--season", "-s", type=int, default=DEFAULT_SEASON, help=f"Season start year (default: {DEFAULT_SEASON})", ) validate_parser.set_defaults(func=cmd_validate) # Upload subcommand upload_parser = subparsers.add_parser( "upload", help="Upload scraped data to CloudKit", description="Upload data to CloudKit with resumable, diff-based updates", ) upload_parser.add_argument( "sport", choices=SUPPORTED_SPORTS + ["all"], help="Sport to upload (or 'all' for all sports)", ) upload_parser.add_argument( "--season", "-s", type=int, default=DEFAULT_SEASON, help=f"Season start year (default: {DEFAULT_SEASON})", ) upload_parser.add_argument( "--environment", "-e", choices=["development", "production"], default=CLOUDKIT_ENVIRONMENT, help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})", ) upload_parser.add_argument( "--resume", action="store_true", help="Resume interrupted upload from last checkpoint", ) upload_parser.set_defaults(func=cmd_upload) # Status subcommand status_parser = subparsers.add_parser( "status", help="Show current scrape and upload status", description="Display summary of scraped data and upload progress", ) status_parser.set_defaults(func=cmd_status) # Retry subcommand retry_parser = subparsers.add_parser( "retry", help="Retry failed uploads", description="Retry records that failed during previous upload attempts", ) retry_parser.add_argument( "sport", choices=SUPPORTED_SPORTS + ["all"], help="Sport to retry (or 'all' for all sports)", ) retry_parser.add_argument( "--season", "-s", type=int, default=DEFAULT_SEASON, help=f"Season start year (default: {DEFAULT_SEASON})", ) retry_parser.add_argument( "--environment", "-e", choices=["development", "production"], default=CLOUDKIT_ENVIRONMENT, help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})", ) retry_parser.add_argument( "--max-retries", type=int, default=3, help="Maximum retry attempts per record (default: 3)", ) retry_parser.set_defaults(func=cmd_retry) # Clear subcommand clear_parser = subparsers.add_parser( "clear", help="Clear upload session state", description="Delete upload session state files to start fresh", ) clear_parser.add_argument( "sport", choices=SUPPORTED_SPORTS + ["all"], help="Sport to clear (or 'all' for all sports)", ) clear_parser.add_argument( "--season", "-s", type=int, default=DEFAULT_SEASON, help=f"Season start year (default: {DEFAULT_SEASON})", ) clear_parser.add_argument( "--environment", "-e", choices=["development", "production"], default=CLOUDKIT_ENVIRONMENT, help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})", ) clear_parser.set_defaults(func=cmd_clear) # Purge subcommand purge_parser = subparsers.add_parser( "purge", help="Delete all records from CloudKit (DESTRUCTIVE)", description="Delete ALL records from CloudKit. This is destructive and cannot be undone.", ) purge_parser.add_argument( "--environment", "-e", choices=["development", "production"], default=CLOUDKIT_ENVIRONMENT, help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})", ) purge_parser.add_argument( "--yes", "-y", action="store_true", help="Skip confirmation prompt", ) purge_parser.set_defaults(func=cmd_purge) # Count subcommand count_parser = subparsers.add_parser( "count", help="Count records in CloudKit by type", description="Display count of all record types in CloudKit", ) count_parser.add_argument( "--environment", "-e", choices=["development", "production"], default=CLOUDKIT_ENVIRONMENT, help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})", ) count_parser.set_defaults(func=cmd_count) # Upload-static subcommand upload_static_parser = subparsers.add_parser( "upload-static", help="Upload static reference data to CloudKit", description="Upload league structure, team aliases, stadium aliases, and sports to CloudKit", ) upload_static_parser.add_argument( "--environment", "-e", choices=["development", "production"], default=CLOUDKIT_ENVIRONMENT, help=f"CloudKit environment (default: {CLOUDKIT_ENVIRONMENT})", ) upload_static_parser.set_defaults(func=cmd_upload_static) return parser def get_scraper(sport: str, season: int): """Get the appropriate scraper for a sport. Args: sport: Sport code season: Season start year Returns: Scraper instance Raises: NotImplementedError: If sport scraper is not yet implemented """ if sport == "nba": from .scrapers.nba import create_nba_scraper return create_nba_scraper(season) elif sport == "mlb": from .scrapers.mlb import create_mlb_scraper return create_mlb_scraper(season) elif sport == "nfl": from .scrapers.nfl import create_nfl_scraper return create_nfl_scraper(season) elif sport == "nhl": from .scrapers.nhl import create_nhl_scraper return create_nhl_scraper(season) elif sport == "mls": from .scrapers.mls import create_mls_scraper return create_mls_scraper(season) elif sport == "wnba": from .scrapers.wnba import create_wnba_scraper return create_wnba_scraper(season) elif sport == "nwsl": from .scrapers.nwsl import create_nwsl_scraper return create_nwsl_scraper(season) else: raise NotImplementedError(f"Scraper for {sport} not yet implemented") def cmd_scrape(args: argparse.Namespace) -> int: """Execute the scrape command with canonical output format.""" import json from .validators.report import generate_report, validate_games from .normalizers.timezone import get_stadium_timezone from .validators.schema import SchemaValidationError, validate_batch logger = get_logger() sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport] logger.info(f"Scraping {', '.join(sports)} for {args.season}-{args.season + 1} season") if args.dry_run: logger.info("Dry run mode - no files will be written") # Ensure output directory exists OUTPUT_DIR.mkdir(parents=True, exist_ok=True) success_count = 0 failure_count = 0 for sport in sports: logger.info(f"\n{'='*50}") logger.info(f"Scraping {sport.upper()}...") logger.info(f"{'='*50}") try: # Get scraper for this sport scraper = get_scraper(sport, args.season) # Scrape all data result = scraper.scrape_all() if not result.success: log_failure(f"{sport.upper()}: {result.error_message}") failure_count += 1 continue # Validate games validation_issues = validate_games(result.games) all_review_items = result.review_items + validation_issues # Generate validation report report = generate_report( sport=sport, season=args.season, source=result.source, games=result.games, teams=result.teams, stadiums=result.stadiums, review_items=all_review_items, ) # Log summary logger.info(f"Games: {report.summary.total_games}") logger.info(f"Teams: {len(result.teams)}") logger.info(f"Stadiums: {len(result.stadiums)}") logger.info(f"Coverage: {report.summary.game_coverage:.1f}%") logger.info(f"Review items: {report.summary.review_count}") if not args.dry_run: # Build mappings for canonical conversion stadium_timezone_map: dict[str, str] = {} for stadium in result.stadiums: tz = get_stadium_timezone(stadium.state, stadium.timezone) stadium_timezone_map[stadium.id] = tz stadium_team_abbrevs: dict[str, list[str]] = {} for team in result.teams: if team.stadium_id: if team.stadium_id not in stadium_team_abbrevs: stadium_team_abbrevs[team.stadium_id] = [] stadium_team_abbrevs[team.stadium_id].append(team.abbreviation) # Convert to canonical format canonical_stadiums = [ s.to_canonical_dict(primary_team_abbrevs=stadium_team_abbrevs.get(s.id, [])) for s in result.stadiums ] canonical_teams = [t.to_canonical_dict() for t in result.teams] canonical_games = [ g.to_canonical_dict(stadium_timezone=stadium_timezone_map.get(g.stadium_id, "America/New_York")) for g in result.games ] # Validate canonical output stadium_errors = validate_batch(canonical_stadiums, "stadium", fail_fast=False) team_errors = validate_batch(canonical_teams, "team", fail_fast=False) game_errors = validate_batch(canonical_games, "game", fail_fast=False) if stadium_errors or team_errors or game_errors: for idx, errors in stadium_errors: for e in errors: logger.error(f"Stadium {result.stadiums[idx].id}: {e}") for idx, errors in team_errors: for e in errors: logger.error(f"Team {result.teams[idx].id}: {e}") for idx, errors in game_errors[:10]: for e in errors: logger.error(f"Game {result.games[idx].id}: {e}") if len(game_errors) > 10: logger.error(f"... and {len(game_errors) - 10} more game errors") raise SchemaValidationError("canonical", ["Schema validation failed"]) # Save canonical output files games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json" teams_file = OUTPUT_DIR / f"teams_{sport}.json" stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json" with open(games_file, "w", encoding="utf-8") as f: json.dump(canonical_games, f, indent=2) with open(teams_file, "w", encoding="utf-8") as f: json.dump(canonical_teams, f, indent=2) with open(stadiums_file, "w", encoding="utf-8") as f: json.dump(canonical_stadiums, f, indent=2) # Save validation report report_path = report.save() logger.info(f"Saved games to: {games_file}") logger.info(f"Saved teams to: {teams_file}") logger.info(f"Saved stadiums to: {stadiums_file}") logger.info(f"Saved report to: {report_path}") log_success(f"{sport.upper()}: Scraped {result.game_count} games") success_count += 1 except NotImplementedError as e: logger.warning(str(e)) failure_count += 1 continue except SchemaValidationError as e: log_failure(f"{sport.upper()}: {e}") failure_count += 1 continue except Exception as e: log_failure(f"{sport.upper()}: {e}") logger.exception("Scraping failed") failure_count += 1 continue # Final summary logger.info(f"\n{'='*50}") logger.info("SUMMARY") logger.info(f"{'='*50}") logger.info(f"Successful: {success_count}") logger.info(f"Failed: {failure_count}") return 0 if failure_count == 0 else 1 def cmd_validate(args: argparse.Namespace) -> int: """Execute the validate command.""" from .models.game import load_games from .models.team import load_teams from .models.stadium import load_stadiums from .validators.report import generate_report, validate_games logger = get_logger() sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport] logger.info(f"Validating {', '.join(sports)} for {args.season}-{args.season + 1} season") for sport in sports: logger.info(f"\nValidating {sport.upper()}...") # Load existing data games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json" teams_file = OUTPUT_DIR / f"teams_{sport}.json" stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json" if not games_file.exists(): logger.warning(f"No games file found: {games_file}") continue try: games = load_games(str(games_file)) teams = load_teams(str(teams_file)) if teams_file.exists() else [] stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else [] # Run validation review_items = validate_games(games) # Generate report report = generate_report( sport=sport, season=args.season, source="existing", games=games, teams=teams, stadiums=stadiums, review_items=review_items, ) # Save report report_path = report.save() logger.info(f"Games: {report.summary.total_games}") logger.info(f"Valid: {report.summary.valid_games}") logger.info(f"Review items: {report.summary.review_count}") logger.info(f"Saved report to: {report_path}") log_success(f"{sport.upper()}: Validation complete") except Exception as e: log_failure(f"{sport.upper()}: {e}") logger.exception("Validation failed") continue return 0 def cmd_upload(args: argparse.Namespace) -> int: """Execute the upload command.""" from .models.game import load_games from .models.team import load_teams from .models.stadium import load_stadiums from .uploaders import ( CloudKitClient, CloudKitError, CloudKitAuthError, CloudKitRateLimitError, RecordType, RecordDiffer, StateManager, game_to_cloudkit_record, team_to_cloudkit_record, stadium_to_cloudkit_record, ) from .utils.progress import create_progress_bar logger = get_logger() sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport] logger.info(f"Uploading {', '.join(sports)} for {args.season}-{args.season + 1} season") logger.info(f"Environment: {args.environment}") # Initialize CloudKit client client = CloudKitClient(environment=args.environment) if not client.is_configured: log_failure("CloudKit not configured") logger.error( "Set CLOUDKIT_KEY_ID and CLOUDKIT_PRIVATE_KEY_PATH environment variables.\n" "Get credentials from Apple Developer Portal > Certificates, Identifiers & Profiles > Keys" ) return 1 # Initialize state manager state_manager = StateManager() differ = RecordDiffer() success_count = 0 failure_count = 0 for sport in sports: logger.info(f"\n{'='*50}") logger.info(f"Uploading {sport.upper()}...") logger.info(f"{'='*50}") try: # Load local data games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json" teams_file = OUTPUT_DIR / f"teams_{sport}.json" stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json" if not games_file.exists(): logger.warning(f"No games file found: {games_file}") logger.warning("Run 'scrape' command first") failure_count += 1 continue games = load_games(str(games_file)) teams = load_teams(str(teams_file)) if teams_file.exists() else [] stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else [] logger.info(f"Loaded {len(games)} games, {len(teams)} teams, {len(stadiums)} stadiums") # Fetch existing CloudKit records for diff logger.info("Fetching existing CloudKit records...") try: remote_games = client.fetch_all_records(RecordType.GAME) remote_teams = client.fetch_all_records(RecordType.TEAM) remote_stadiums = client.fetch_all_records(RecordType.STADIUM) except CloudKitAuthError as e: log_failure(f"Authentication failed: {e}") return 1 except CloudKitRateLimitError: log_failure("Rate limit exceeded - try again later") return 1 except CloudKitError as e: log_failure(f"Failed to fetch records: {e}") failure_count += 1 continue # Filter remote records to this sport/season remote_games = [ r for r in remote_games if r.get("fields", {}).get("sport", {}).get("value") == sport and r.get("fields", {}).get("season", {}).get("value") == args.season ] remote_teams = [ r for r in remote_teams if r.get("fields", {}).get("sport", {}).get("value") == sport ] remote_stadiums = [ r for r in remote_stadiums if r.get("fields", {}).get("sport", {}).get("value") == sport ] logger.info(f"Found {len(remote_games)} games, {len(remote_teams)} teams, {len(remote_stadiums)} stadiums in CloudKit") # Calculate diffs logger.info("Calculating changes...") game_diff = differ.diff_games(games, remote_games) team_diff = differ.diff_teams(teams, remote_teams) stadium_diff = differ.diff_stadiums(stadiums, remote_stadiums) total_creates = game_diff.create_count + team_diff.create_count + stadium_diff.create_count total_updates = game_diff.update_count + team_diff.update_count + stadium_diff.update_count total_unchanged = game_diff.unchanged_count + team_diff.unchanged_count + stadium_diff.unchanged_count logger.info(f"Creates: {total_creates}, Updates: {total_updates}, Unchanged: {total_unchanged}") if total_creates == 0 and total_updates == 0: log_success(f"{sport.upper()}: Already up to date") success_count += 1 continue # Prepare records for upload all_records = [] all_records.extend(game_diff.get_records_to_upload()) all_records.extend(team_diff.get_records_to_upload()) all_records.extend(stadium_diff.get_records_to_upload()) # Create or resume upload session record_info = [(r.record_name, r.record_type.value) for r in all_records] session = state_manager.get_session_or_create( sport=sport, season=args.season, environment=args.environment, record_names=record_info, resume=args.resume, ) if args.resume: pending = session.get_pending_records() logger.info(f"Resuming: {len(pending)} records pending") # Filter to only pending records pending_set = set(pending) all_records = [r for r in all_records if r.record_name in pending_set] # Upload records with progress logger.info(f"Uploading {len(all_records)} records...") with create_progress_bar(total=len(all_records), description="Uploading") as progress: batch_result = client.save_records(all_records) # Update session state for op_result in batch_result.successful: session.mark_uploaded(op_result.record_name, op_result.record_change_tag) progress.advance() for op_result in batch_result.failed: session.mark_failed(op_result.record_name, op_result.error_message or "Unknown error") progress.advance() # Save session state state_manager.save_session(session) # Report results logger.info(f"Uploaded: {batch_result.success_count}") logger.info(f"Failed: {batch_result.failure_count}") if batch_result.failure_count > 0: log_failure(f"{sport.upper()}: {batch_result.failure_count} records failed") for op_result in batch_result.failed[:5]: # Show first 5 failures logger.error(f" {op_result.record_name}: {op_result.error_message}") if batch_result.failure_count > 5: logger.error(f" ... and {batch_result.failure_count - 5} more") failure_count += 1 else: log_success(f"{sport.upper()}: Uploaded {batch_result.success_count} records") # Clear session on complete success state_manager.delete_session(sport, args.season, args.environment) success_count += 1 except Exception as e: log_failure(f"{sport.upper()}: {e}") logger.exception("Upload failed") failure_count += 1 continue # Final summary logger.info(f"\n{'='*50}") logger.info("SUMMARY") logger.info(f"{'='*50}") logger.info(f"Successful: {success_count}") logger.info(f"Failed: {failure_count}") return 0 if failure_count == 0 else 1 def cmd_status(args: argparse.Namespace) -> int: """Execute the status command.""" from datetime import datetime from .config import STATE_DIR, EXPECTED_GAME_COUNTS from .uploaders import StateManager logger = get_logger() logger.info("SportsTime Parser Status") logger.info("=" * 50) logger.info("") # Check for scraped data logger.info("[bold]Scraped Data[/bold]") logger.info("-" * 40) total_games = 0 scraped_sports = 0 for sport in SUPPORTED_SPORTS: games_file = OUTPUT_DIR / f"games_{sport}_{DEFAULT_SEASON}.json" teams_file = OUTPUT_DIR / f"teams_{sport}.json" stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json" if games_file.exists(): from .models.game import load_games from .models.team import load_teams from .models.stadium import load_stadiums try: games = load_games(str(games_file)) teams = load_teams(str(teams_file)) if teams_file.exists() else [] stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else [] game_count = len(games) expected = EXPECTED_GAME_COUNTS.get(sport, 0) coverage = (game_count / expected * 100) if expected > 0 else 0 # Format with coverage indicator if coverage >= 95: status = "[green]✓[/green]" elif coverage >= 80: status = "[yellow]~[/yellow]" else: status = "[red]![/red]" logger.info( f" {status} {sport.upper():6} {game_count:5} games, " f"{len(teams):2} teams, {len(stadiums):2} stadiums " f"({coverage:.0f}% coverage)" ) total_games += game_count scraped_sports += 1 except Exception as e: logger.info(f" [red]✗[/red] {sport.upper():6} Error loading: {e}") else: logger.info(f" [dim]-[/dim] {sport.upper():6} Not scraped") logger.info("-" * 40) logger.info(f" Total: {total_games} games across {scraped_sports} sports") logger.info("") # Check for upload sessions logger.info("[bold]Upload Sessions[/bold]") logger.info("-" * 40) state_manager = StateManager() sessions = state_manager.list_sessions() if sessions: for session in sessions: sport = session["sport"].upper() season = session["season"] env = session["environment"] progress = session["progress"] percent = session["progress_percent"] status = session["status"] failed = session["failed_count"] if status == "complete": status_icon = "[green]✓[/green]" elif failed > 0: status_icon = "[yellow]![/yellow]" else: status_icon = "[blue]→[/blue]" logger.info( f" {status_icon} {sport} {season} ({env}): " f"{progress} ({percent})" ) if failed > 0: logger.info(f" [yellow]⚠ {failed} failed records[/yellow]") # Show last updated time try: last_updated = datetime.fromisoformat(session["last_updated"]) age = datetime.utcnow() - last_updated if age.days > 0: age_str = f"{age.days} days ago" elif age.seconds > 3600: age_str = f"{age.seconds // 3600} hours ago" elif age.seconds > 60: age_str = f"{age.seconds // 60} minutes ago" else: age_str = "just now" logger.info(f" Last updated: {age_str}") except (ValueError, KeyError): pass else: logger.info(" No upload sessions found") logger.info("") # CloudKit configuration status logger.info("[bold]CloudKit Configuration[/bold]") logger.info("-" * 40) import os key_id = os.environ.get("CLOUDKIT_KEY_ID") key_path = os.environ.get("CLOUDKIT_PRIVATE_KEY_PATH") key_content = os.environ.get("CLOUDKIT_PRIVATE_KEY") if key_id: logger.info(f" [green]✓[/green] CLOUDKIT_KEY_ID: {key_id[:8]}...") else: logger.info(" [red]✗[/red] CLOUDKIT_KEY_ID: Not set") if key_path: from pathlib import Path if Path(key_path).exists(): logger.info(f" [green]✓[/green] CLOUDKIT_PRIVATE_KEY_PATH: {key_path}") else: logger.info(f" [red]✗[/red] CLOUDKIT_PRIVATE_KEY_PATH: File not found: {key_path}") elif key_content: logger.info(" [green]✓[/green] CLOUDKIT_PRIVATE_KEY: Set (inline)") else: logger.info(" [red]✗[/red] CLOUDKIT_PRIVATE_KEY: Not set") logger.info("") return 0 def cmd_retry(args: argparse.Namespace) -> int: """Execute the retry command for failed uploads.""" from .models.game import load_games from .models.team import load_teams from .models.stadium import load_stadiums from .uploaders import ( CloudKitClient, CloudKitError, CloudKitAuthError, CloudKitRateLimitError, StateManager, game_to_cloudkit_record, team_to_cloudkit_record, stadium_to_cloudkit_record, ) from .utils.progress import create_progress_bar logger = get_logger() sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport] logger.info(f"Retrying failed uploads for {', '.join(sports)}") logger.info(f"Environment: {args.environment}") logger.info(f"Max retries per record: {args.max_retries}") # Initialize CloudKit client client = CloudKitClient(environment=args.environment) if not client.is_configured: log_failure("CloudKit not configured") return 1 # Initialize state manager state_manager = StateManager() total_retried = 0 total_succeeded = 0 total_failed = 0 for sport in sports: # Load existing session session = state_manager.load_session(sport, args.season, args.environment) if session is None: logger.info(f"{sport.upper()}: No upload session found") continue # Get records eligible for retry retryable = session.get_retryable_records(max_retries=args.max_retries) if not retryable: failed_count = session.failed_count if failed_count > 0: logger.info(f"{sport.upper()}: {failed_count} failed records exceeded max retries") else: logger.info(f"{sport.upper()}: No failed records to retry") continue logger.info(f"{sport.upper()}: Retrying {len(retryable)} failed records...") # Load local data to get the records games_file = OUTPUT_DIR / f"games_{sport}_{args.season}.json" teams_file = OUTPUT_DIR / f"teams_{sport}.json" stadiums_file = OUTPUT_DIR / f"stadiums_{sport}.json" if not games_file.exists(): logger.warning(f"No games file found: {games_file}") continue games = load_games(str(games_file)) teams = load_teams(str(teams_file)) if teams_file.exists() else [] stadiums = load_stadiums(str(stadiums_file)) if stadiums_file.exists() else [] # Build record lookup records_to_retry = [] retryable_set = set(retryable) for game in games: if game.id in retryable_set: records_to_retry.append(game_to_cloudkit_record(game)) for team in teams: if team.id in retryable_set: records_to_retry.append(team_to_cloudkit_record(team)) for stadium in stadiums: if stadium.id in retryable_set: records_to_retry.append(stadium_to_cloudkit_record(stadium)) if not records_to_retry: logger.warning(f"{sport.upper()}: Could not find records for retry") continue # Mark as pending for retry for record_name in retryable: session.mark_pending(record_name) # Retry upload try: with create_progress_bar(total=len(records_to_retry), description="Retrying") as progress: batch_result = client.save_records(records_to_retry) for op_result in batch_result.successful: session.mark_uploaded(op_result.record_name, op_result.record_change_tag) progress.advance() total_succeeded += 1 for op_result in batch_result.failed: session.mark_failed(op_result.record_name, op_result.error_message or "Unknown error") progress.advance() total_failed += 1 state_manager.save_session(session) total_retried += len(records_to_retry) if batch_result.failure_count > 0: log_failure(f"{sport.upper()}: {batch_result.failure_count} still failing") else: log_success(f"{sport.upper()}: All {batch_result.success_count} retries succeeded") # Clear session if all complete if session.is_complete: state_manager.delete_session(sport, args.season, args.environment) except CloudKitAuthError as e: log_failure(f"Authentication failed: {e}") return 1 except CloudKitRateLimitError: log_failure("Rate limit exceeded - try again later") state_manager.save_session(session) return 1 except CloudKitError as e: log_failure(f"Upload error: {e}") state_manager.save_session(session) continue # Summary logger.info(f"\n{'='*50}") logger.info("RETRY SUMMARY") logger.info(f"{'='*50}") logger.info(f"Retried: {total_retried}") logger.info(f"Succeeded: {total_succeeded}") logger.info(f"Failed: {total_failed}") return 0 if total_failed == 0 else 1 def cmd_clear(args: argparse.Namespace) -> int: """Execute the clear command to delete upload state.""" from .uploaders import StateManager logger = get_logger() sports = SUPPORTED_SPORTS if args.sport == "all" else [args.sport] logger.info(f"Clearing upload state for {', '.join(sports)}") state_manager = StateManager() cleared_count = 0 for sport in sports: if state_manager.delete_session(sport, args.season, args.environment): logger.info(f" [green]✓[/green] Cleared {sport.upper()} {args.season} ({args.environment})") cleared_count += 1 else: logger.info(f" [dim]-[/dim] No session for {sport.upper()} {args.season} ({args.environment})") logger.info(f"\nCleared {cleared_count} session(s)") return 0 def cmd_purge(args: argparse.Namespace) -> int: """Execute the purge command to delete all CloudKit records.""" from .uploaders.cloudkit import CloudKitClient, RecordType logger = get_logger() # Check CloudKit configuration client = CloudKitClient(environment=args.environment) if not client.is_configured: logger.error("CloudKit not configured. Check CLOUDKIT_KEY_ID and private key.") return 1 # Confirmation prompt if not args.yes: logger.warning(f"[bold red]WARNING: This will delete ALL records from CloudKit ({args.environment})![/bold red]") logger.warning("This action cannot be undone.") logger.info("") response = input(f"Type 'DELETE {args.environment.upper()}' to confirm: ") if response != f"DELETE {args.environment.upper()}": logger.info("Aborted.") return 1 logger.info(f"Purging all records from CloudKit ({args.environment})...") logger.info("") record_types = [ RecordType.GAME, RecordType.TEAM, RecordType.STADIUM, RecordType.TEAM_ALIAS, RecordType.STADIUM_ALIAS, RecordType.SPORT, RecordType.LEAGUE_STRUCTURE, ] total_deleted = 0 total_failed = 0 for record_type in record_types: logger.info(f"Fetching {record_type.value} records...") try: records = client.fetch_all_records(record_type) except Exception as e: logger.error(f" Failed to fetch: {e}") continue if not records: logger.info(f" No {record_type.value} records found") continue logger.info(f" Deleting {len(records)} {record_type.value} records...") try: result = client.delete_records(record_type, records) total_deleted += result.success_count total_failed += result.failure_count logger.info(f" [green]✓[/green] Deleted: {result.success_count}, Failed: {result.failure_count}") except Exception as e: logger.error(f" Failed to delete: {e}") total_failed += len(records) logger.info("") logger.info(f"{'='*50}") logger.info(f"Total deleted: {total_deleted}") logger.info(f"Total failed: {total_failed}") return 0 if total_failed == 0 else 1 def cmd_upload_static(args: argparse.Namespace) -> int: """Execute the upload-static command to upload reference data to CloudKit.""" import json from rich.progress import Progress, SpinnerColumn, TextColumn from .uploaders.cloudkit import CloudKitClient, RecordType from .uploaders.diff import RecordDiffer from .models.aliases import TeamAlias, StadiumAlias from .models.sport import Sport, LeagueStructure, LeagueStructureType from .config import SCRIPTS_DIR logger = get_logger() # Check CloudKit configuration client = CloudKitClient(environment=args.environment) if not client.is_configured: logger.error("CloudKit not configured. Check CLOUDKIT_KEY_ID and private key.") return 1 logger.info(f"Uploading static reference data to CloudKit ({args.environment})") logger.info(f"{'='*50}") differ = RecordDiffer() total_uploaded = 0 total_failed = 0 # Define sports (hardcoded since there's no sports.json) sports = [ Sport(id="MLB", abbreviation="MLB", display_name="Major League Baseball", icon_name="baseball.fill", color_hex="#002D72", season_start_month=3, season_end_month=11), Sport(id="NBA", abbreviation="NBA", display_name="National Basketball Association", icon_name="basketball.fill", color_hex="#1D428A", season_start_month=10, season_end_month=6), Sport(id="NFL", abbreviation="NFL", display_name="National Football League", icon_name="football.fill", color_hex="#013369", season_start_month=9, season_end_month=2), Sport(id="NHL", abbreviation="NHL", display_name="National Hockey League", icon_name="hockey.puck.fill", color_hex="#000000", season_start_month=10, season_end_month=6), Sport(id="MLS", abbreviation="MLS", display_name="Major League Soccer", icon_name="soccerball", color_hex="#80A63A", season_start_month=2, season_end_month=11), Sport(id="WNBA", abbreviation="WNBA", display_name="Women's National Basketball Association", icon_name="basketball.fill", color_hex="#FF6600", season_start_month=5, season_end_month=10), Sport(id="NWSL", abbreviation="NWSL", display_name="National Women's Soccer League", icon_name="soccerball", color_hex="#003087", season_start_month=3, season_end_month=11), ] # Upload Sports logger.info("Uploading Sports...") try: remote_sports = client.fetch_all_records(RecordType.SPORT) except Exception: remote_sports = [] diff_result = differ.diff_sports(sports, remote_sports) records_to_upload = diff_result.get_records_to_upload() if records_to_upload: result = client.save_records(records_to_upload) total_uploaded += result.success_count total_failed += result.failure_count logger.info(f" [green]✓[/green] Sports: {result.success_count} uploaded, {result.failure_count} failed") else: logger.info(f" [dim]-[/dim] Sports: No changes") # Load and upload League Structures logger.info("Uploading League Structures...") league_structure_file = SCRIPTS_DIR / "league_structure.json" if league_structure_file.exists(): with open(league_structure_file, "r") as f: data = json.load(f) structures = [] for d in data: # Handle "type" vs "structure_type" field name structure_type = d.get("structure_type") or d.get("type") structures.append(LeagueStructure( id=d["id"], sport=d["sport"], structure_type=LeagueStructureType(structure_type), name=d["name"], abbreviation=d.get("abbreviation"), parent_id=d.get("parent_id"), display_order=d.get("display_order", 0), )) try: remote_structures = client.fetch_all_records(RecordType.LEAGUE_STRUCTURE) except Exception: remote_structures = [] diff_result = differ.diff_league_structures(structures, remote_structures) records_to_upload = diff_result.get_records_to_upload() if records_to_upload: result = client.save_records(records_to_upload) total_uploaded += result.success_count total_failed += result.failure_count logger.info(f" [green]✓[/green] League Structures: {result.success_count} uploaded, {result.failure_count} failed") else: logger.info(f" [dim]-[/dim] League Structures: No changes ({len(structures)} unchanged)") else: logger.warning(f" [yellow]![/yellow] league_structure.json not found") # Load and upload Team Aliases logger.info("Uploading Team Aliases...") team_aliases_file = SCRIPTS_DIR / "team_aliases.json" if team_aliases_file.exists(): with open(team_aliases_file, "r") as f: data = json.load(f) aliases = [TeamAlias.from_dict(d) for d in data] try: remote_aliases = client.fetch_all_records(RecordType.TEAM_ALIAS) except Exception: remote_aliases = [] diff_result = differ.diff_team_aliases(aliases, remote_aliases) records_to_upload = diff_result.get_records_to_upload() if records_to_upload: result = client.save_records(records_to_upload) total_uploaded += result.success_count total_failed += result.failure_count logger.info(f" [green]✓[/green] Team Aliases: {result.success_count} uploaded, {result.failure_count} failed") else: logger.info(f" [dim]-[/dim] Team Aliases: No changes ({len(aliases)} unchanged)") else: logger.warning(f" [yellow]![/yellow] team_aliases.json not found") # Load and upload Stadium Aliases logger.info("Uploading Stadium Aliases...") stadium_aliases_file = SCRIPTS_DIR / "stadium_aliases.json" if stadium_aliases_file.exists(): with open(stadium_aliases_file, "r") as f: data = json.load(f) aliases = [StadiumAlias.from_dict(d) for d in data] try: remote_aliases = client.fetch_all_records(RecordType.STADIUM_ALIAS) except Exception: remote_aliases = [] diff_result = differ.diff_stadium_aliases(aliases, remote_aliases) records_to_upload = diff_result.get_records_to_upload() if records_to_upload: result = client.save_records(records_to_upload) total_uploaded += result.success_count total_failed += result.failure_count logger.info(f" [green]✓[/green] Stadium Aliases: {result.success_count} uploaded, {result.failure_count} failed") else: logger.info(f" [dim]-[/dim] Stadium Aliases: No changes ({len(aliases)} unchanged)") else: logger.warning(f" [yellow]![/yellow] stadium_aliases.json not found") logger.info(f"{'='*50}") logger.info(f"Total uploaded: {total_uploaded}") logger.info(f"Total failed: {total_failed}") return 0 if total_failed == 0 else 1 def cmd_count(args: argparse.Namespace) -> int: """Execute the count command to show CloudKit record counts.""" from .uploaders.cloudkit import CloudKitClient, RecordType logger = get_logger() # Check CloudKit configuration client = CloudKitClient(environment=args.environment) if not client.is_configured: logger.error("CloudKit not configured. Check CLOUDKIT_KEY_ID and private key.") return 1 logger.info(f"CloudKit record counts ({args.environment})") logger.info(f"{'='*50}") record_types = [ RecordType.GAME, RecordType.TEAM, RecordType.STADIUM, RecordType.TEAM_ALIAS, RecordType.STADIUM_ALIAS, RecordType.SPORT, RecordType.LEAGUE_STRUCTURE, ] total = 0 errors = [] for record_type in record_types: try: records = client.fetch_all_records(record_type) count = len(records) total += count logger.info(f" {record_type.value:<20} {count:>6}") except Exception as e: logger.error(f" {record_type.value:<20} [red]Not queryable[/red]") errors.append(record_type.value) logger.info(f"{'='*50}") logger.info(f" {'Total':<20} {total:>6}") if errors: logger.info("") logger.warning(f"[yellow]Records not queryable: {', '.join(errors)}[/yellow]") logger.warning("[yellow]Enable QUERYABLE index in CloudKit Dashboard[/yellow]") return 0 def run_cli(argv: Optional[list[str]] = None) -> int: """Parse arguments and run the appropriate command.""" parser = create_parser() args = parser.parse_args(argv) if args.verbose: set_verbose(True) if args.command is None: parser.print_help() return 1 return args.func(args)