Files
Sportstime/Scripts/sportstime.py
2026-01-10 11:16:15 -06:00

1091 lines
35 KiB
Python
Executable File

#!/usr/bin/env python3
"""
SportsTime CLI - Unified entry point for all data operations.
Usage:
python sportstime.py <command> [options]
Commands:
scrape Scrape game schedules from sports reference sites
stadiums-update Scrape ALL stadium data for all 11 sports
canonicalize Run the canonicalization pipeline
validate Validate canonical data
cloudkit Import/export data to CloudKit
generate Generate canonical data files
pipeline Run the full data pipeline
Examples:
python sportstime.py scrape --sport mlb --season 2026
python sportstime.py stadiums-update
python sportstime.py canonicalize --season 2026
python sportstime.py validate --strict
python sportstime.py cloudkit --interactive
python sportstime.py pipeline --sport all --season 2026
"""
import argparse
import sys
import os
from datetime import datetime
# Add Scripts directory to path for imports
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPT_DIR)
# ANSI colors for terminal output
class Colors:
HEADER = '\033[95m'
BLUE = '\033[94m'
CYAN = '\033[96m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BOLD = '\033[1m'
DIM = '\033[2m'
RESET = '\033[0m'
def print_banner():
"""Print the SportsTime banner."""
banner = f"""
{Colors.CYAN}{Colors.BOLD}╔═══════════════════════════════════════════════════════════╗
║ ║
║ ⚾ 🏀 🏒 ⚽ 🏈 {Colors.YELLOW}SportsTime CLI{Colors.CYAN} ⚾ 🏀 🏒 ⚽ 🏈 ║
║ ║
{Colors.DIM}Unified data pipeline for sports scheduling{Colors.CYAN}{Colors.BOLD}
║ ║
╚═══════════════════════════════════════════════════════════╝{Colors.RESET}
"""
print(banner)
def interactive_menu():
"""Display interactive menu and return selected command args."""
print_banner()
current_year = datetime.now().year
print(f"{Colors.BOLD}What would you like to do?{Colors.RESET}\n")
print(f" {Colors.GREEN}[1]{Colors.RESET} {Colors.BOLD}Scrape{Colors.RESET}")
print(f" {Colors.DIM}Download schedules from web sources{Colors.RESET}")
print(f" {Colors.GREEN}[2]{Colors.RESET} {Colors.BOLD}Canonicalize{Colors.RESET}")
print(f" {Colors.DIM}Generate canonical IDs (requires scraped data){Colors.RESET}")
print(f" {Colors.GREEN}[3]{Colors.RESET} {Colors.BOLD}Validate{Colors.RESET}")
print(f" {Colors.DIM}Check data integrity (requires canonical data){Colors.RESET}")
print(f" {Colors.GREEN}[4]{Colors.RESET} {Colors.BOLD}Upload to CloudKit{Colors.RESET}")
print(f" {Colors.DIM}Upload to CloudKit (requires canonical data){Colors.RESET}")
print(f" {Colors.CYAN}[5]{Colors.RESET} {Colors.BOLD}Full Pipeline{Colors.RESET}")
print(f" {Colors.DIM}Scrape → Canonicalize → Validate → Upload (stops on error){Colors.RESET}")
print(f" {Colors.YELLOW}[6]{Colors.RESET} {Colors.BOLD}Update Stadiums{Colors.RESET}")
print(f" {Colors.DIM}Scrape ALL stadium data for all 11 sports (comprehensive){Colors.RESET}")
print(f" {Colors.DIM}[q]{Colors.RESET} Quit")
print()
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
if choice == 'q':
print(f"\n{Colors.DIM}Goodbye!{Colors.RESET}")
sys.exit(0)
if choice == '1':
return get_season_and_sport('scrape')
elif choice == '2':
return get_season_and_sport('canonicalize-only')
elif choice == '3':
return ['validate', '--verbose']
elif choice == '4':
return ['cloudkit', '--interactive']
elif choice == '5':
return get_season_and_sport('full-pipeline')
elif choice == '6':
return ['stadiums-update']
else:
print(f"\n{Colors.RED}Invalid choice. Please try again.{Colors.RESET}")
return interactive_menu()
def get_season_and_sport(mode: str):
"""Get season and sport for scrape/canonicalize commands."""
current_year = datetime.now().year
# Get season
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
season = int(season_input) if season_input else current_year
# Get sport
print(f"\n{Colors.BOLD}Select sport:{Colors.RESET}")
print(f" {Colors.GREEN}[1]{Colors.RESET} All Sports")
print(f" {Colors.GREEN}[2]{Colors.RESET} MLB {Colors.GREEN}[3]{Colors.RESET} NBA {Colors.GREEN}[4]{Colors.RESET} NHL {Colors.GREEN}[5]{Colors.RESET} NFL")
print(f" {Colors.GREEN}[6]{Colors.RESET} MLS {Colors.GREEN}[7]{Colors.RESET} WNBA {Colors.GREEN}[8]{Colors.RESET} NWSL")
sport_map = {
'1': 'all', '2': 'mlb', '3': 'nba', '4': 'nhl', '5': 'nfl',
'6': 'mls', '7': 'wnba', '8': 'nwsl'
}
sport_choice = input(f"{Colors.CYAN}Enter choice [1]:{Colors.RESET} ").strip()
sport = sport_map.get(sport_choice, 'all')
if mode == 'scrape':
return ['scrape', '--sport', sport, '--season', str(season)]
elif mode == 'canonicalize-only':
return ['canonicalize', '--sport', sport, '--season', str(season)]
elif mode == 'full-pipeline':
return ['full-pipeline', '--sport', sport, '--season', str(season)]
return None
def scrape_submenu():
"""Submenu for scrape options."""
print(f"\n{Colors.BOLD}Select sport to scrape:{Colors.RESET}\n")
sports = [
('1', 'all', 'All Sports'),
('2', 'mlb', 'MLB - Major League Baseball'),
('3', 'nba', 'NBA - National Basketball Association'),
('4', 'nhl', 'NHL - National Hockey League'),
('5', 'nfl', 'NFL - National Football League'),
('6', 'mls', 'MLS - Major League Soccer'),
('7', 'wnba', 'WNBA - Women\'s National Basketball Association'),
('8', 'nwsl', 'NWSL - National Women\'s Soccer League'),
('b', 'back', 'Back to main menu'),
]
for key, _, desc in sports:
if key == 'b':
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
else:
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
print()
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
if choice == 'b':
return interactive_menu()
sport = None
for key, sport_code, _ in sports:
if key == choice:
sport = sport_code
break
if not sport:
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
return scrape_submenu()
# Get season
current_year = datetime.now().year
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
season = int(season_input) if season_input else current_year
return ['scrape', '--sport', sport, '--season', str(season)]
def validate_submenu():
"""Submenu for validate options."""
print(f"\n{Colors.BOLD}Validation options:{Colors.RESET}\n")
options = [
('1', 'Basic validation', []),
('2', 'Strict validation (warnings become errors)', ['--strict']),
('3', 'Verbose validation', ['--verbose']),
('4', 'Strict + Verbose', ['--strict', '--verbose']),
('b', 'Back to main menu', None),
]
for key, desc, _ in options:
if key == 'b':
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
else:
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
print()
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
if choice == 'b':
return interactive_menu()
for key, _, flags in options:
if key == choice and flags is not None:
return ['validate'] + flags
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
return validate_submenu()
def pipeline_submenu():
"""Submenu for pipeline options."""
print(f"\n{Colors.BOLD}Pipeline options:{Colors.RESET}\n")
sports = [
('1', 'all', 'All Sports'),
('2', 'mlb', 'MLB only'),
('3', 'nba', 'NBA only'),
('4', 'nhl', 'NHL only'),
('5', 'nfl', 'NFL only'),
('6', 'mls', 'MLS only'),
('7', 'wnba', 'WNBA only'),
('8', 'nwsl', 'NWSL only'),
('b', 'back', 'Back to main menu'),
]
for key, _, desc in sports:
if key == 'b':
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
else:
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
print()
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
if choice == 'b':
return interactive_menu()
sport = None
for key, sport_code, _ in sports:
if key == choice:
sport = sport_code
break
if not sport:
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
return pipeline_submenu()
# Get season
current_year = datetime.now().year
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
season = int(season_input) if season_input else current_year
# Verbose?
verbose = input(f"{Colors.CYAN}Verbose output? [y/N]:{Colors.RESET} ").strip().lower()
cmd = ['pipeline', '--sport', sport, '--season', str(season)]
if verbose == 'y':
cmd.append('--verbose')
return cmd
def canonicalize_submenu():
"""Submenu for canonicalize options."""
print(f"\n{Colors.BOLD}Canonicalization options:{Colors.RESET}\n")
options = [
('1', 'Full pipeline (scrape + canonicalize + validate)', []),
('2', 'Skip scraping (use existing data)', ['--skip-scrape']),
('3', 'Skip validation', ['--no-validate']),
('4', 'Verbose output', ['--verbose']),
('b', 'Back to main menu', None),
]
for key, desc, _ in options:
if key == 'b':
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
else:
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
print()
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
if choice == 'b':
return interactive_menu()
flags = None
for key, _, f in options:
if key == choice:
flags = f
break
if flags is None:
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
return canonicalize_submenu()
# Get season
current_year = datetime.now().year
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
season = int(season_input) if season_input else current_year
return ['canonicalize', '--season', str(season)] + flags
def cmd_scrape(args):
"""Scrape game schedules from sports reference sites."""
from scrape_schedules import main as scrape_main
# Build sys.argv for the scrape script
argv = ['scrape_schedules.py']
argv.extend(['--sport', args.sport])
argv.extend(['--season', str(args.season)])
argv.extend(['--output', args.output])
if args.stadiums_only:
argv.append('--stadiums-only')
sys.argv = argv
scrape_main()
def cmd_stadiums_update(args):
"""Scrape comprehensive stadium data for all 11 sports."""
from scrape_schedules import main as scrape_main
# Build sys.argv for the scrape script with --stadiums-update flag
argv = ['scrape_schedules.py']
argv.extend(['--output', args.output])
argv.append('--stadiums-update')
argv.append('--stadiums-only')
sys.argv = argv
scrape_main()
def cmd_canonicalize(args):
"""Run the canonicalization pipeline."""
from run_canonicalization_pipeline import main as canon_main
argv = ['run_canonicalization_pipeline.py']
argv.extend(['--season', str(args.season)])
argv.extend(['--output', args.output])
if args.skip_scrape:
argv.append('--skip-scrape')
if args.no_validate:
argv.append('--no-validate')
if args.verbose:
argv.append('--verbose')
if args.strict:
argv.append('--strict')
sys.argv = argv
canon_main()
def cmd_validate(args):
"""Validate canonical data."""
from validate_canonical import main as validate_main
argv = ['validate_canonical.py']
if args.data_dir:
argv.extend(['--data-dir', args.data_dir])
if args.stadiums:
argv.extend(['--stadiums', args.stadiums])
if args.teams:
argv.extend(['--teams', args.teams])
if args.games:
argv.extend(['--games', args.games])
if args.aliases:
argv.extend(['--aliases', args.aliases])
if args.output:
argv.extend(['--output', args.output])
if args.verbose:
argv.append('--verbose')
if args.strict:
argv.append('--strict')
sys.argv = argv
validate_main()
def cmd_cloudkit(args):
"""Import/export data to CloudKit."""
from cloudkit_import import main as cloudkit_main
argv = ['cloudkit_import.py']
if args.key_id:
argv.extend(['--key-id', args.key_id])
if args.key_file:
argv.extend(['--key-file', args.key_file])
if args.container:
argv.extend(['--container', args.container])
argv.extend(['--env', args.env])
argv.extend(['--data-dir', args.data_dir])
if args.stadiums_only:
argv.append('--stadiums-only')
if args.games_only:
argv.append('--games-only')
if args.games_files:
argv.extend(['--games-files', args.games_files])
if args.league_structure_only:
argv.append('--league-structure-only')
if args.team_aliases_only:
argv.append('--team-aliases-only')
if args.stadium_aliases_only:
argv.append('--stadium-aliases-only')
if args.canonical_only:
argv.append('--canonical-only')
if args.delete_all:
argv.append('--delete-all')
if args.delete_only:
argv.append('--delete-only')
if args.dry_run:
argv.append('--dry-run')
if args.verbose:
argv.append('--verbose')
if args.interactive:
argv.append('--interactive')
sys.argv = argv
cloudkit_main()
def cmd_generate(args):
"""Generate canonical data files."""
from generate_canonical_data import main as generate_main
argv = ['generate_canonical_data.py']
argv.extend(['--output', args.output])
sys.argv = argv
generate_main()
def cmd_pipeline(args):
"""Run the full data pipeline."""
from run_pipeline import main as pipeline_main
argv = ['run_pipeline.py']
argv.extend(['--season', str(args.season)])
argv.extend(['--sport', args.sport])
argv.extend(['--output', args.output])
if args.skip_scrape:
argv.append('--skip-scrape')
if args.no_validate:
argv.append('--no-validate')
if args.verbose:
argv.append('--verbose')
sys.argv = argv
pipeline_main()
def cmd_full_pipeline(args):
"""Run complete pipeline: Scrape → Canonicalize → Validate → Upload (stops on error)."""
print(f"\n{Colors.CYAN}{Colors.BOLD}╔═══════════════════════════════════════════════════════════╗")
print(f"║ FULL PIPELINE - {args.sport.upper()} {args.season}")
print(f"╚═══════════════════════════════════════════════════════════╝{Colors.RESET}\n")
steps = [
("Scrape", "Downloading schedules from web sources"),
("Canonicalize", "Generating canonical IDs"),
("Validate", "Checking data integrity"),
]
if not args.skip_upload:
steps.append(("Upload", "Uploading to CloudKit"))
# Step 1: Scrape
print(f"{Colors.CYAN}[1/{len(steps)}] SCRAPE{Colors.RESET}")
print(f" {Colors.DIM}Downloading schedules from web sources...{Colors.RESET}\n")
try:
from scrape_schedules import main as scrape_main
sys.argv = ['scrape_schedules.py', '--sport', args.sport, '--season', str(args.season), '--output', args.output]
scrape_main()
print(f"\n{Colors.GREEN}✓ Scrape completed{Colors.RESET}\n")
except Exception as e:
print(f"\n{Colors.RED}✗ Scrape FAILED: {e}{Colors.RESET}")
print(f"{Colors.YELLOW}Pipeline stopped at step 1/4.{Colors.RESET}")
return
# Step 2: Canonicalize
print(f"{Colors.CYAN}[2/{len(steps)}] CANONICALIZE{Colors.RESET}")
print(f" {Colors.DIM}Generating canonical IDs...{Colors.RESET}\n")
try:
from run_canonicalization_pipeline import main as canon_main
sys.argv = ['run_canonicalization_pipeline.py', '--season', str(args.season), '--output', args.output, '--skip-scrape']
canon_main()
print(f"\n{Colors.GREEN}✓ Canonicalize completed{Colors.RESET}\n")
except Exception as e:
print(f"\n{Colors.RED}✗ Canonicalize FAILED: {e}{Colors.RESET}")
print(f"{Colors.YELLOW}Pipeline stopped at step 2/4.{Colors.RESET}")
return
# Step 3: Validate
print(f"{Colors.CYAN}[3/{len(steps)}] VALIDATE{Colors.RESET}")
print(f" {Colors.DIM}Checking data integrity...{Colors.RESET}\n")
try:
from validate_canonical import main as validate_main
sys.argv = ['validate_canonical.py', '--data-dir', args.output, '--verbose']
validate_main()
print(f"\n{Colors.GREEN}✓ Validate completed{Colors.RESET}\n")
except SystemExit as e:
if e.code != 0:
print(f"\n{Colors.RED}✗ Validate FAILED (exit code {e.code}){Colors.RESET}")
print(f"{Colors.YELLOW}Pipeline stopped at step 3/4.{Colors.RESET}")
return
print(f"\n{Colors.GREEN}✓ Validate completed{Colors.RESET}\n")
except Exception as e:
print(f"\n{Colors.RED}✗ Validate FAILED: {e}{Colors.RESET}")
print(f"{Colors.YELLOW}Pipeline stopped at step 3/4.{Colors.RESET}")
return
# Step 4: Upload to CloudKit (unless skipped)
if not args.skip_upload:
print(f"{Colors.CYAN}[4/{len(steps)}] UPLOAD TO CLOUDKIT{Colors.RESET}")
print(f" {Colors.DIM}Uploading to CloudKit...{Colors.RESET}\n")
try:
from cloudkit_import import main as cloudkit_main
sys.argv = ['cloudkit_import.py', '--data-dir', args.output, '--interactive']
cloudkit_main()
print(f"\n{Colors.GREEN}✓ Upload completed{Colors.RESET}\n")
except Exception as e:
print(f"\n{Colors.RED}✗ Upload FAILED: {e}{Colors.RESET}")
print(f"{Colors.YELLOW}Pipeline stopped at step 4/4.{Colors.RESET}")
return
# Success!
print(f"\n{Colors.GREEN}{Colors.BOLD}╔═══════════════════════════════════════════════════════════╗")
print(f"║ PIPELINE COMPLETE ✓ ║")
print(f"╚═══════════════════════════════════════════════════════════╝{Colors.RESET}\n")
def cmd_canonicalize_stadiums(args):
"""Canonicalize stadium data."""
from canonicalize_stadiums import main as stadium_main
argv = ['canonicalize_stadiums.py']
if args.input:
argv.extend(['--input', args.input])
argv.extend(['--output', args.output])
if args.verbose:
argv.append('--verbose')
sys.argv = argv
stadium_main()
def cmd_canonicalize_teams(args):
"""Canonicalize team data."""
from canonicalize_teams import main as team_main
argv = ['canonicalize_teams.py']
if args.stadiums:
argv.extend(['--stadiums', args.stadiums])
argv.extend(['--output', args.output])
if args.verbose:
argv.append('--verbose')
sys.argv = argv
team_main()
def cmd_canonicalize_games(args):
"""Canonicalize game data."""
from canonicalize_games import main as game_main
argv = ['canonicalize_games.py']
if args.games:
argv.extend(['--games', args.games])
if args.teams:
argv.extend(['--teams', args.teams])
if args.aliases:
argv.extend(['--aliases', args.aliases])
argv.extend(['--output', args.output])
if args.verbose:
argv.append('--verbose')
sys.argv = argv
game_main()
def main():
parser = argparse.ArgumentParser(
prog='sportstime',
description='SportsTime CLI - Unified entry point for all data operations',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s scrape --sport mlb --season 2026
%(prog)s scrape --sport all --season 2026
%(prog)s canonicalize --season 2026 --verbose
%(prog)s validate --strict
%(prog)s cloudkit --interactive
%(prog)s cloudkit --env production --stadiums-only
%(prog)s pipeline --sport all --season 2026
"""
)
subparsers = parser.add_subparsers(dest='command', help='Available commands')
# ============================================================
# SCRAPE command
# ============================================================
scrape_parser = subparsers.add_parser(
'scrape',
help='Scrape game schedules from sports reference sites',
description='Scrapes NBA, MLB, NHL, WNBA, MLS, and NWSL schedules'
)
scrape_parser.add_argument(
'--sport',
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
default='all',
help='Sport to scrape (default: all)'
)
scrape_parser.add_argument(
'--season',
type=int,
default=2026,
help='Season year - ending year for sports that span years (default: 2026)'
)
scrape_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
scrape_parser.add_argument(
'--stadiums-only',
action='store_true',
help='Only scrape stadium data, not game schedules'
)
scrape_parser.set_defaults(func=cmd_scrape)
# ============================================================
# STADIUMS-UPDATE command (comprehensive stadium scraping)
# ============================================================
stadiums_update_parser = subparsers.add_parser(
'stadiums-update',
help='Scrape ALL stadium data for all 8 sports',
description='Comprehensive stadium scraping for NBA, MLB, NHL, NFL, WNBA, MLS, and NWSL'
)
stadiums_update_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
stadiums_update_parser.set_defaults(func=cmd_stadiums_update)
# ============================================================
# CANONICALIZE command (full pipeline)
# ============================================================
canon_parser = subparsers.add_parser(
'canonicalize',
help='Run the canonicalization pipeline',
description='Runs the full canonicalization pipeline: scrape -> canonicalize -> validate'
)
canon_parser.add_argument(
'--season',
type=int,
default=2026,
help='Season year (default: 2026)'
)
canon_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
canon_parser.add_argument(
'--skip-scrape',
action='store_true',
help='Skip scraping, use existing data'
)
canon_parser.add_argument(
'--no-validate',
action='store_true',
help='Skip validation step'
)
canon_parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Verbose output'
)
canon_parser.add_argument(
'--strict',
action='store_true',
help='Strict validation mode'
)
canon_parser.set_defaults(func=cmd_canonicalize)
# ============================================================
# VALIDATE command
# ============================================================
validate_parser = subparsers.add_parser(
'validate',
help='Validate canonical data',
description='Validates canonical data files for consistency and completeness'
)
validate_parser.add_argument(
'--data-dir',
type=str,
default=None,
help='Data directory (auto-detects if not specified)'
)
validate_parser.add_argument(
'--stadiums',
type=str,
default=None,
help='Path to stadiums_canonical.json'
)
validate_parser.add_argument(
'--teams',
type=str,
default=None,
help='Path to teams_canonical.json'
)
validate_parser.add_argument(
'--games',
type=str,
default=None,
help='Path to games_canonical.json'
)
validate_parser.add_argument(
'--aliases',
type=str,
default=None,
help='Path to aliases file'
)
validate_parser.add_argument(
'--output',
type=str,
default=None,
help='Output file for validation report'
)
validate_parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Verbose output'
)
validate_parser.add_argument(
'--strict',
action='store_true',
help='Strict validation mode (warnings become errors)'
)
validate_parser.set_defaults(func=cmd_validate)
# ============================================================
# CLOUDKIT command
# ============================================================
cloudkit_parser = subparsers.add_parser(
'cloudkit',
help='Import/export data to CloudKit',
description='Import or export data to/from CloudKit'
)
cloudkit_parser.add_argument(
'--key-id',
type=str,
default=None,
help='CloudKit key ID'
)
cloudkit_parser.add_argument(
'--key-file',
type=str,
default=None,
help='Path to CloudKit private key file'
)
cloudkit_parser.add_argument(
'--container',
type=str,
default=None,
help='CloudKit container ID'
)
cloudkit_parser.add_argument(
'--env',
choices=['development', 'production'],
default='development',
help='CloudKit environment (default: development)'
)
cloudkit_parser.add_argument(
'--data-dir',
type=str,
default='./data',
help='Data directory (default: ./data)'
)
cloudkit_parser.add_argument(
'--stadiums-only',
action='store_true',
help='Import only stadiums'
)
cloudkit_parser.add_argument(
'--games-only',
action='store_true',
help='Import only games (all files)'
)
cloudkit_parser.add_argument(
'--games-files',
type=str,
help='Import specific game files (e.g., mlb_2025.json,nba_2025.json)'
)
cloudkit_parser.add_argument(
'--league-structure-only',
action='store_true',
help='Import only league structure'
)
cloudkit_parser.add_argument(
'--team-aliases-only',
action='store_true',
help='Import only team aliases'
)
cloudkit_parser.add_argument(
'--stadium-aliases-only',
action='store_true',
help='Import only stadium aliases'
)
cloudkit_parser.add_argument(
'--canonical-only',
action='store_true',
help='Import only canonical data (league structure + aliases)'
)
cloudkit_parser.add_argument(
'--delete-all',
action='store_true',
help='Delete all records before importing'
)
cloudkit_parser.add_argument(
'--delete-only',
action='store_true',
help='Only delete records, do not import'
)
cloudkit_parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be done without making changes'
)
cloudkit_parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Verbose output'
)
cloudkit_parser.add_argument(
'--interactive', '-i',
action='store_true',
help='Show interactive menu'
)
cloudkit_parser.set_defaults(func=cmd_cloudkit)
# ============================================================
# GENERATE command
# ============================================================
generate_parser = subparsers.add_parser(
'generate',
help='Generate canonical data files',
description='Generate canonical data JSON files from hardcoded data'
)
generate_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
generate_parser.set_defaults(func=cmd_generate)
# ============================================================
# PIPELINE command
# ============================================================
pipeline_parser = subparsers.add_parser(
'pipeline',
help='Run the full data pipeline',
description='Run the full data pipeline: fetch, validate, and report'
)
pipeline_parser.add_argument(
'--season',
type=int,
default=2026,
help='Season year (default: 2026)'
)
pipeline_parser.add_argument(
'--sport',
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
default='all',
help='Sport to process (default: all)'
)
pipeline_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
pipeline_parser.add_argument(
'--skip-scrape',
action='store_true',
help='Skip scraping, use existing data'
)
pipeline_parser.add_argument(
'--no-validate',
action='store_true',
help='Skip validation step'
)
pipeline_parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Verbose output'
)
pipeline_parser.set_defaults(func=cmd_pipeline)
# full-pipeline: scrape → canonicalize → validate → upload (stops on error)
full_pipeline_parser = subparsers.add_parser(
'full-pipeline',
help='Run complete pipeline: Scrape → Canonicalize → Validate → Upload',
description='Run all steps in sequence, stopping on any error'
)
full_pipeline_parser.add_argument(
'--season',
type=int,
default=2026,
help='Season year (default: 2026)'
)
full_pipeline_parser.add_argument(
'--sport',
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
default='all',
help='Sport to process (default: all)'
)
full_pipeline_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
full_pipeline_parser.add_argument(
'--skip-upload',
action='store_true',
help='Skip CloudKit upload step'
)
full_pipeline_parser.set_defaults(func=cmd_full_pipeline)
# ============================================================
# Subcommands for individual canonicalization steps
# ============================================================
# canonicalize-stadiums
canon_stadiums_parser = subparsers.add_parser(
'canonicalize-stadiums',
help='Canonicalize stadium data only',
description='Canonicalize raw stadium data into canonical format'
)
canon_stadiums_parser.add_argument(
'--input',
type=str,
default=None,
help='Input stadiums file (default: ./data/stadiums.json)'
)
canon_stadiums_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
canon_stadiums_parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Verbose output'
)
canon_stadiums_parser.set_defaults(func=cmd_canonicalize_stadiums)
# canonicalize-teams
canon_teams_parser = subparsers.add_parser(
'canonicalize-teams',
help='Canonicalize team data only',
description='Canonicalize raw team data into canonical format'
)
canon_teams_parser.add_argument(
'--stadiums',
type=str,
default=None,
help='Path to canonical stadiums file'
)
canon_teams_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
canon_teams_parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Verbose output'
)
canon_teams_parser.set_defaults(func=cmd_canonicalize_teams)
# canonicalize-games
canon_games_parser = subparsers.add_parser(
'canonicalize-games',
help='Canonicalize game data only',
description='Canonicalize raw game data into canonical format'
)
canon_games_parser.add_argument(
'--games',
type=str,
default=None,
help='Path to raw games file'
)
canon_games_parser.add_argument(
'--teams',
type=str,
default=None,
help='Path to canonical teams file'
)
canon_games_parser.add_argument(
'--aliases',
type=str,
default=None,
help='Path to stadium aliases file'
)
canon_games_parser.add_argument(
'--output',
type=str,
default='./data',
help='Output directory (default: ./data)'
)
canon_games_parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Verbose output'
)
canon_games_parser.set_defaults(func=cmd_canonicalize_games)
# Change to Scripts directory for consistent relative paths
os.chdir(SCRIPT_DIR)
# If no arguments, show interactive menu in a loop
if len(sys.argv) == 1:
while True:
cmd_args = interactive_menu()
if not cmd_args:
break
sys.argv = ['sportstime'] + cmd_args
print(f"\n{Colors.DIM}Running: sportstime {' '.join(cmd_args)}{Colors.RESET}\n")
# Parse and execute
args = parser.parse_args()
if args.command is None:
continue
try:
args.func(args)
except SystemExit:
pass # Catch sys.exit() from subcommands
except Exception as e:
print(f"\n{Colors.RED}Error: {e}{Colors.RESET}")
# Prompt to continue
print(f"\n{Colors.GREEN}{'' * 60}{Colors.RESET}")
input(f"{Colors.CYAN}Press Enter to continue...{Colors.RESET}")
# Reset sys.argv for next iteration
sys.argv = ['sportstime']
sys.exit(0)
# Parse and execute (command-line mode)
args = parser.parse_args()
if args.command is None:
parser.print_help()
sys.exit(1)
# Execute the command
args.func(args)
if __name__ == '__main__':
main()