- Remove College Football, NASCAR, and PGA from scraper and app - Clean all data files (stadiums, games, pipeline reports) - Update Sport.swift enum and all UI components - Add sportstime.py CLI tool for pipeline management - Add DATA_SCRAPING.md documentation - Add WNBA/MLS/NWSL implementation documentation - Scraper now supports: NBA, MLB, NHL, NFL, WNBA, MLS, NWSL, CBB Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1093 lines
35 KiB
Python
Executable File
1093 lines
35 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
SportsTime CLI - Unified entry point for all data operations.
|
|
|
|
Usage:
|
|
python sportstime.py <command> [options]
|
|
|
|
Commands:
|
|
scrape Scrape game schedules from sports reference sites
|
|
stadiums-update Scrape ALL stadium data for all 11 sports
|
|
canonicalize Run the canonicalization pipeline
|
|
validate Validate canonical data
|
|
cloudkit Import/export data to CloudKit
|
|
generate Generate canonical data files
|
|
pipeline Run the full data pipeline
|
|
|
|
Examples:
|
|
python sportstime.py scrape --sport mlb --season 2026
|
|
python sportstime.py stadiums-update
|
|
python sportstime.py canonicalize --season 2026
|
|
python sportstime.py validate --strict
|
|
python sportstime.py cloudkit --interactive
|
|
python sportstime.py pipeline --sport all --season 2026
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# Add Scripts directory to path for imports
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.insert(0, SCRIPT_DIR)
|
|
|
|
# ANSI colors for terminal output
|
|
class Colors:
|
|
HEADER = '\033[95m'
|
|
BLUE = '\033[94m'
|
|
CYAN = '\033[96m'
|
|
GREEN = '\033[92m'
|
|
YELLOW = '\033[93m'
|
|
RED = '\033[91m'
|
|
BOLD = '\033[1m'
|
|
DIM = '\033[2m'
|
|
RESET = '\033[0m'
|
|
|
|
|
|
def print_banner():
|
|
"""Print the SportsTime banner."""
|
|
banner = f"""
|
|
{Colors.CYAN}{Colors.BOLD}╔═══════════════════════════════════════════════════════════╗
|
|
║ ║
|
|
║ ⚾ 🏀 🏒 ⚽ 🏈 {Colors.YELLOW}SportsTime CLI{Colors.CYAN} ⚾ 🏀 🏒 ⚽ 🏈 ║
|
|
║ ║
|
|
║ {Colors.DIM}Unified data pipeline for sports scheduling{Colors.CYAN}{Colors.BOLD} ║
|
|
║ ║
|
|
╚═══════════════════════════════════════════════════════════╝{Colors.RESET}
|
|
"""
|
|
print(banner)
|
|
|
|
|
|
def interactive_menu():
|
|
"""Display interactive menu and return selected command args."""
|
|
print_banner()
|
|
|
|
current_year = datetime.now().year
|
|
|
|
print(f"{Colors.BOLD}What would you like to do?{Colors.RESET}\n")
|
|
|
|
print(f" {Colors.GREEN}[1]{Colors.RESET} {Colors.BOLD}Scrape{Colors.RESET}")
|
|
print(f" {Colors.DIM}Download schedules from web sources{Colors.RESET}")
|
|
|
|
print(f" {Colors.GREEN}[2]{Colors.RESET} {Colors.BOLD}Canonicalize{Colors.RESET}")
|
|
print(f" {Colors.DIM}Generate canonical IDs (requires scraped data){Colors.RESET}")
|
|
|
|
print(f" {Colors.GREEN}[3]{Colors.RESET} {Colors.BOLD}Validate{Colors.RESET}")
|
|
print(f" {Colors.DIM}Check data integrity (requires canonical data){Colors.RESET}")
|
|
|
|
print(f" {Colors.GREEN}[4]{Colors.RESET} {Colors.BOLD}Upload to CloudKit{Colors.RESET}")
|
|
print(f" {Colors.DIM}Upload to CloudKit (requires canonical data){Colors.RESET}")
|
|
|
|
print(f" {Colors.CYAN}[5]{Colors.RESET} {Colors.BOLD}Full Pipeline{Colors.RESET}")
|
|
print(f" {Colors.DIM}Scrape → Canonicalize → Validate → Upload (stops on error){Colors.RESET}")
|
|
|
|
print(f" {Colors.YELLOW}[6]{Colors.RESET} {Colors.BOLD}Update Stadiums{Colors.RESET}")
|
|
print(f" {Colors.DIM}Scrape ALL stadium data for all 11 sports (comprehensive){Colors.RESET}")
|
|
|
|
print(f" {Colors.DIM}[q]{Colors.RESET} Quit")
|
|
|
|
print()
|
|
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
|
|
|
|
if choice == 'q':
|
|
print(f"\n{Colors.DIM}Goodbye!{Colors.RESET}")
|
|
sys.exit(0)
|
|
|
|
if choice == '1':
|
|
return get_season_and_sport('scrape')
|
|
elif choice == '2':
|
|
return get_season_and_sport('canonicalize-only')
|
|
elif choice == '3':
|
|
return ['validate', '--verbose']
|
|
elif choice == '4':
|
|
return ['cloudkit', '--interactive']
|
|
elif choice == '5':
|
|
return get_season_and_sport('full-pipeline')
|
|
elif choice == '6':
|
|
return ['stadiums-update']
|
|
else:
|
|
print(f"\n{Colors.RED}Invalid choice. Please try again.{Colors.RESET}")
|
|
return interactive_menu()
|
|
|
|
|
|
def get_season_and_sport(mode: str):
|
|
"""Get season and sport for scrape/canonicalize commands."""
|
|
current_year = datetime.now().year
|
|
|
|
# Get season
|
|
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
|
|
season = int(season_input) if season_input else current_year
|
|
|
|
# Get sport
|
|
print(f"\n{Colors.BOLD}Select sport:{Colors.RESET}")
|
|
print(f" {Colors.GREEN}[1]{Colors.RESET} All Sports")
|
|
print(f" {Colors.GREEN}[2]{Colors.RESET} MLB {Colors.GREEN}[3]{Colors.RESET} NBA {Colors.GREEN}[4]{Colors.RESET} NHL {Colors.GREEN}[5]{Colors.RESET} NFL")
|
|
print(f" {Colors.GREEN}[6]{Colors.RESET} MLS {Colors.GREEN}[7]{Colors.RESET} WNBA {Colors.GREEN}[8]{Colors.RESET} NWSL {Colors.GREEN}[9]{Colors.RESET} CBB")
|
|
|
|
sport_map = {
|
|
'1': 'all', '2': 'mlb', '3': 'nba', '4': 'nhl', '5': 'nfl',
|
|
'6': 'mls', '7': 'wnba', '8': 'nwsl', '9': 'cbb'
|
|
}
|
|
|
|
sport_choice = input(f"{Colors.CYAN}Enter choice [1]:{Colors.RESET} ").strip()
|
|
sport = sport_map.get(sport_choice, 'all')
|
|
|
|
if mode == 'scrape':
|
|
return ['scrape', '--sport', sport, '--season', str(season)]
|
|
elif mode == 'canonicalize-only':
|
|
return ['canonicalize', '--sport', sport, '--season', str(season)]
|
|
elif mode == 'full-pipeline':
|
|
return ['full-pipeline', '--sport', sport, '--season', str(season)]
|
|
|
|
return None
|
|
|
|
|
|
def scrape_submenu():
|
|
"""Submenu for scrape options."""
|
|
print(f"\n{Colors.BOLD}Select sport to scrape:{Colors.RESET}\n")
|
|
|
|
sports = [
|
|
('1', 'all', 'All Sports'),
|
|
('2', 'mlb', 'MLB - Major League Baseball'),
|
|
('3', 'nba', 'NBA - National Basketball Association'),
|
|
('4', 'nhl', 'NHL - National Hockey League'),
|
|
('5', 'nfl', 'NFL - National Football League'),
|
|
('6', 'mls', 'MLS - Major League Soccer'),
|
|
('7', 'wnba', 'WNBA - Women\'s National Basketball Association'),
|
|
('8', 'nwsl', 'NWSL - National Women\'s Soccer League'),
|
|
('9', 'cbb', 'CBB - College Basketball'),
|
|
('b', 'back', 'Back to main menu'),
|
|
]
|
|
|
|
for key, _, desc in sports:
|
|
if key == 'b':
|
|
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
|
|
else:
|
|
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
|
|
|
|
print()
|
|
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
|
|
|
|
if choice == 'b':
|
|
return interactive_menu()
|
|
|
|
sport = None
|
|
for key, sport_code, _ in sports:
|
|
if key == choice:
|
|
sport = sport_code
|
|
break
|
|
|
|
if not sport:
|
|
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
|
|
return scrape_submenu()
|
|
|
|
# Get season
|
|
current_year = datetime.now().year
|
|
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
|
|
season = int(season_input) if season_input else current_year
|
|
|
|
return ['scrape', '--sport', sport, '--season', str(season)]
|
|
|
|
|
|
def validate_submenu():
|
|
"""Submenu for validate options."""
|
|
print(f"\n{Colors.BOLD}Validation options:{Colors.RESET}\n")
|
|
|
|
options = [
|
|
('1', 'Basic validation', []),
|
|
('2', 'Strict validation (warnings become errors)', ['--strict']),
|
|
('3', 'Verbose validation', ['--verbose']),
|
|
('4', 'Strict + Verbose', ['--strict', '--verbose']),
|
|
('b', 'Back to main menu', None),
|
|
]
|
|
|
|
for key, desc, _ in options:
|
|
if key == 'b':
|
|
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
|
|
else:
|
|
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
|
|
|
|
print()
|
|
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
|
|
|
|
if choice == 'b':
|
|
return interactive_menu()
|
|
|
|
for key, _, flags in options:
|
|
if key == choice and flags is not None:
|
|
return ['validate'] + flags
|
|
|
|
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
|
|
return validate_submenu()
|
|
|
|
|
|
def pipeline_submenu():
|
|
"""Submenu for pipeline options."""
|
|
print(f"\n{Colors.BOLD}Pipeline options:{Colors.RESET}\n")
|
|
|
|
sports = [
|
|
('1', 'all', 'All Sports'),
|
|
('2', 'mlb', 'MLB only'),
|
|
('3', 'nba', 'NBA only'),
|
|
('4', 'nhl', 'NHL only'),
|
|
('5', 'nfl', 'NFL only'),
|
|
('6', 'mls', 'MLS only'),
|
|
('7', 'wnba', 'WNBA only'),
|
|
('8', 'nwsl', 'NWSL only'),
|
|
('9', 'cbb', 'CBB only'),
|
|
('b', 'back', 'Back to main menu'),
|
|
]
|
|
|
|
for key, _, desc in sports:
|
|
if key == 'b':
|
|
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
|
|
else:
|
|
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
|
|
|
|
print()
|
|
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
|
|
|
|
if choice == 'b':
|
|
return interactive_menu()
|
|
|
|
sport = None
|
|
for key, sport_code, _ in sports:
|
|
if key == choice:
|
|
sport = sport_code
|
|
break
|
|
|
|
if not sport:
|
|
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
|
|
return pipeline_submenu()
|
|
|
|
# Get season
|
|
current_year = datetime.now().year
|
|
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
|
|
season = int(season_input) if season_input else current_year
|
|
|
|
# Verbose?
|
|
verbose = input(f"{Colors.CYAN}Verbose output? [y/N]:{Colors.RESET} ").strip().lower()
|
|
|
|
cmd = ['pipeline', '--sport', sport, '--season', str(season)]
|
|
if verbose == 'y':
|
|
cmd.append('--verbose')
|
|
|
|
return cmd
|
|
|
|
|
|
def canonicalize_submenu():
|
|
"""Submenu for canonicalize options."""
|
|
print(f"\n{Colors.BOLD}Canonicalization options:{Colors.RESET}\n")
|
|
|
|
options = [
|
|
('1', 'Full pipeline (scrape + canonicalize + validate)', []),
|
|
('2', 'Skip scraping (use existing data)', ['--skip-scrape']),
|
|
('3', 'Skip validation', ['--no-validate']),
|
|
('4', 'Verbose output', ['--verbose']),
|
|
('b', 'Back to main menu', None),
|
|
]
|
|
|
|
for key, desc, _ in options:
|
|
if key == 'b':
|
|
print(f" {Colors.DIM}[{key}]{Colors.RESET} {desc}")
|
|
else:
|
|
print(f" {Colors.GREEN}[{key}]{Colors.RESET} {desc}")
|
|
|
|
print()
|
|
choice = input(f"{Colors.CYAN}Enter choice:{Colors.RESET} ").strip().lower()
|
|
|
|
if choice == 'b':
|
|
return interactive_menu()
|
|
|
|
flags = None
|
|
for key, _, f in options:
|
|
if key == choice:
|
|
flags = f
|
|
break
|
|
|
|
if flags is None:
|
|
print(f"\n{Colors.RED}Invalid choice.{Colors.RESET}")
|
|
return canonicalize_submenu()
|
|
|
|
# Get season
|
|
current_year = datetime.now().year
|
|
season_input = input(f"{Colors.CYAN}Enter season year [{current_year}]:{Colors.RESET} ").strip()
|
|
season = int(season_input) if season_input else current_year
|
|
|
|
return ['canonicalize', '--season', str(season)] + flags
|
|
|
|
|
|
def cmd_scrape(args):
|
|
"""Scrape game schedules from sports reference sites."""
|
|
from scrape_schedules import main as scrape_main
|
|
|
|
# Build sys.argv for the scrape script
|
|
argv = ['scrape_schedules.py']
|
|
argv.extend(['--sport', args.sport])
|
|
argv.extend(['--season', str(args.season)])
|
|
argv.extend(['--output', args.output])
|
|
if args.stadiums_only:
|
|
argv.append('--stadiums-only')
|
|
|
|
sys.argv = argv
|
|
scrape_main()
|
|
|
|
|
|
def cmd_stadiums_update(args):
|
|
"""Scrape comprehensive stadium data for all 11 sports."""
|
|
from scrape_schedules import main as scrape_main
|
|
|
|
# Build sys.argv for the scrape script with --stadiums-update flag
|
|
argv = ['scrape_schedules.py']
|
|
argv.extend(['--output', args.output])
|
|
argv.append('--stadiums-update')
|
|
argv.append('--stadiums-only')
|
|
|
|
sys.argv = argv
|
|
scrape_main()
|
|
|
|
|
|
def cmd_canonicalize(args):
|
|
"""Run the canonicalization pipeline."""
|
|
from run_canonicalization_pipeline import main as canon_main
|
|
|
|
argv = ['run_canonicalization_pipeline.py']
|
|
argv.extend(['--season', str(args.season)])
|
|
argv.extend(['--output', args.output])
|
|
if args.skip_scrape:
|
|
argv.append('--skip-scrape')
|
|
if args.no_validate:
|
|
argv.append('--no-validate')
|
|
if args.verbose:
|
|
argv.append('--verbose')
|
|
if args.strict:
|
|
argv.append('--strict')
|
|
|
|
sys.argv = argv
|
|
canon_main()
|
|
|
|
|
|
def cmd_validate(args):
|
|
"""Validate canonical data."""
|
|
from validate_canonical import main as validate_main
|
|
|
|
argv = ['validate_canonical.py']
|
|
if args.data_dir:
|
|
argv.extend(['--data-dir', args.data_dir])
|
|
if args.stadiums:
|
|
argv.extend(['--stadiums', args.stadiums])
|
|
if args.teams:
|
|
argv.extend(['--teams', args.teams])
|
|
if args.games:
|
|
argv.extend(['--games', args.games])
|
|
if args.aliases:
|
|
argv.extend(['--aliases', args.aliases])
|
|
if args.output:
|
|
argv.extend(['--output', args.output])
|
|
if args.verbose:
|
|
argv.append('--verbose')
|
|
if args.strict:
|
|
argv.append('--strict')
|
|
|
|
sys.argv = argv
|
|
validate_main()
|
|
|
|
|
|
def cmd_cloudkit(args):
|
|
"""Import/export data to CloudKit."""
|
|
from cloudkit_import import main as cloudkit_main
|
|
|
|
argv = ['cloudkit_import.py']
|
|
if args.key_id:
|
|
argv.extend(['--key-id', args.key_id])
|
|
if args.key_file:
|
|
argv.extend(['--key-file', args.key_file])
|
|
if args.container:
|
|
argv.extend(['--container', args.container])
|
|
argv.extend(['--env', args.env])
|
|
argv.extend(['--data-dir', args.data_dir])
|
|
|
|
if args.stadiums_only:
|
|
argv.append('--stadiums-only')
|
|
if args.games_only:
|
|
argv.append('--games-only')
|
|
if args.games_files:
|
|
argv.extend(['--games-files', args.games_files])
|
|
if args.league_structure_only:
|
|
argv.append('--league-structure-only')
|
|
if args.team_aliases_only:
|
|
argv.append('--team-aliases-only')
|
|
if args.stadium_aliases_only:
|
|
argv.append('--stadium-aliases-only')
|
|
if args.canonical_only:
|
|
argv.append('--canonical-only')
|
|
if args.delete_all:
|
|
argv.append('--delete-all')
|
|
if args.delete_only:
|
|
argv.append('--delete-only')
|
|
if args.dry_run:
|
|
argv.append('--dry-run')
|
|
if args.verbose:
|
|
argv.append('--verbose')
|
|
if args.interactive:
|
|
argv.append('--interactive')
|
|
|
|
sys.argv = argv
|
|
cloudkit_main()
|
|
|
|
|
|
def cmd_generate(args):
|
|
"""Generate canonical data files."""
|
|
from generate_canonical_data import main as generate_main
|
|
|
|
argv = ['generate_canonical_data.py']
|
|
argv.extend(['--output', args.output])
|
|
|
|
sys.argv = argv
|
|
generate_main()
|
|
|
|
|
|
def cmd_pipeline(args):
|
|
"""Run the full data pipeline."""
|
|
from run_pipeline import main as pipeline_main
|
|
|
|
argv = ['run_pipeline.py']
|
|
argv.extend(['--season', str(args.season)])
|
|
argv.extend(['--sport', args.sport])
|
|
argv.extend(['--output', args.output])
|
|
if args.skip_scrape:
|
|
argv.append('--skip-scrape')
|
|
if args.no_validate:
|
|
argv.append('--no-validate')
|
|
if args.verbose:
|
|
argv.append('--verbose')
|
|
|
|
sys.argv = argv
|
|
pipeline_main()
|
|
|
|
|
|
def cmd_full_pipeline(args):
|
|
"""Run complete pipeline: Scrape → Canonicalize → Validate → Upload (stops on error)."""
|
|
print(f"\n{Colors.CYAN}{Colors.BOLD}╔═══════════════════════════════════════════════════════════╗")
|
|
print(f"║ FULL PIPELINE - {args.sport.upper()} {args.season} ║")
|
|
print(f"╚═══════════════════════════════════════════════════════════╝{Colors.RESET}\n")
|
|
|
|
steps = [
|
|
("Scrape", "Downloading schedules from web sources"),
|
|
("Canonicalize", "Generating canonical IDs"),
|
|
("Validate", "Checking data integrity"),
|
|
]
|
|
if not args.skip_upload:
|
|
steps.append(("Upload", "Uploading to CloudKit"))
|
|
|
|
# Step 1: Scrape
|
|
print(f"{Colors.CYAN}[1/{len(steps)}] SCRAPE{Colors.RESET}")
|
|
print(f" {Colors.DIM}Downloading schedules from web sources...{Colors.RESET}\n")
|
|
try:
|
|
from scrape_schedules import main as scrape_main
|
|
sys.argv = ['scrape_schedules.py', '--sport', args.sport, '--season', str(args.season), '--output', args.output]
|
|
scrape_main()
|
|
print(f"\n{Colors.GREEN}✓ Scrape completed{Colors.RESET}\n")
|
|
except Exception as e:
|
|
print(f"\n{Colors.RED}✗ Scrape FAILED: {e}{Colors.RESET}")
|
|
print(f"{Colors.YELLOW}Pipeline stopped at step 1/4.{Colors.RESET}")
|
|
return
|
|
|
|
# Step 2: Canonicalize
|
|
print(f"{Colors.CYAN}[2/{len(steps)}] CANONICALIZE{Colors.RESET}")
|
|
print(f" {Colors.DIM}Generating canonical IDs...{Colors.RESET}\n")
|
|
try:
|
|
from run_canonicalization_pipeline import main as canon_main
|
|
sys.argv = ['run_canonicalization_pipeline.py', '--season', str(args.season), '--output', args.output, '--skip-scrape']
|
|
canon_main()
|
|
print(f"\n{Colors.GREEN}✓ Canonicalize completed{Colors.RESET}\n")
|
|
except Exception as e:
|
|
print(f"\n{Colors.RED}✗ Canonicalize FAILED: {e}{Colors.RESET}")
|
|
print(f"{Colors.YELLOW}Pipeline stopped at step 2/4.{Colors.RESET}")
|
|
return
|
|
|
|
# Step 3: Validate
|
|
print(f"{Colors.CYAN}[3/{len(steps)}] VALIDATE{Colors.RESET}")
|
|
print(f" {Colors.DIM}Checking data integrity...{Colors.RESET}\n")
|
|
try:
|
|
from validate_canonical import main as validate_main
|
|
sys.argv = ['validate_canonical.py', '--data-dir', args.output, '--verbose']
|
|
validate_main()
|
|
print(f"\n{Colors.GREEN}✓ Validate completed{Colors.RESET}\n")
|
|
except SystemExit as e:
|
|
if e.code != 0:
|
|
print(f"\n{Colors.RED}✗ Validate FAILED (exit code {e.code}){Colors.RESET}")
|
|
print(f"{Colors.YELLOW}Pipeline stopped at step 3/4.{Colors.RESET}")
|
|
return
|
|
print(f"\n{Colors.GREEN}✓ Validate completed{Colors.RESET}\n")
|
|
except Exception as e:
|
|
print(f"\n{Colors.RED}✗ Validate FAILED: {e}{Colors.RESET}")
|
|
print(f"{Colors.YELLOW}Pipeline stopped at step 3/4.{Colors.RESET}")
|
|
return
|
|
|
|
# Step 4: Upload to CloudKit (unless skipped)
|
|
if not args.skip_upload:
|
|
print(f"{Colors.CYAN}[4/{len(steps)}] UPLOAD TO CLOUDKIT{Colors.RESET}")
|
|
print(f" {Colors.DIM}Uploading to CloudKit...{Colors.RESET}\n")
|
|
try:
|
|
from cloudkit_import import main as cloudkit_main
|
|
sys.argv = ['cloudkit_import.py', '--data-dir', args.output, '--interactive']
|
|
cloudkit_main()
|
|
print(f"\n{Colors.GREEN}✓ Upload completed{Colors.RESET}\n")
|
|
except Exception as e:
|
|
print(f"\n{Colors.RED}✗ Upload FAILED: {e}{Colors.RESET}")
|
|
print(f"{Colors.YELLOW}Pipeline stopped at step 4/4.{Colors.RESET}")
|
|
return
|
|
|
|
# Success!
|
|
print(f"\n{Colors.GREEN}{Colors.BOLD}╔═══════════════════════════════════════════════════════════╗")
|
|
print(f"║ PIPELINE COMPLETE ✓ ║")
|
|
print(f"╚═══════════════════════════════════════════════════════════╝{Colors.RESET}\n")
|
|
|
|
|
|
def cmd_canonicalize_stadiums(args):
|
|
"""Canonicalize stadium data."""
|
|
from canonicalize_stadiums import main as stadium_main
|
|
|
|
argv = ['canonicalize_stadiums.py']
|
|
if args.input:
|
|
argv.extend(['--input', args.input])
|
|
argv.extend(['--output', args.output])
|
|
if args.verbose:
|
|
argv.append('--verbose')
|
|
|
|
sys.argv = argv
|
|
stadium_main()
|
|
|
|
|
|
def cmd_canonicalize_teams(args):
|
|
"""Canonicalize team data."""
|
|
from canonicalize_teams import main as team_main
|
|
|
|
argv = ['canonicalize_teams.py']
|
|
if args.stadiums:
|
|
argv.extend(['--stadiums', args.stadiums])
|
|
argv.extend(['--output', args.output])
|
|
if args.verbose:
|
|
argv.append('--verbose')
|
|
|
|
sys.argv = argv
|
|
team_main()
|
|
|
|
|
|
def cmd_canonicalize_games(args):
|
|
"""Canonicalize game data."""
|
|
from canonicalize_games import main as game_main
|
|
|
|
argv = ['canonicalize_games.py']
|
|
if args.games:
|
|
argv.extend(['--games', args.games])
|
|
if args.teams:
|
|
argv.extend(['--teams', args.teams])
|
|
if args.aliases:
|
|
argv.extend(['--aliases', args.aliases])
|
|
argv.extend(['--output', args.output])
|
|
if args.verbose:
|
|
argv.append('--verbose')
|
|
|
|
sys.argv = argv
|
|
game_main()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
prog='sportstime',
|
|
description='SportsTime CLI - Unified entry point for all data operations',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
%(prog)s scrape --sport mlb --season 2026
|
|
%(prog)s scrape --sport all --season 2026
|
|
%(prog)s canonicalize --season 2026 --verbose
|
|
%(prog)s validate --strict
|
|
%(prog)s cloudkit --interactive
|
|
%(prog)s cloudkit --env production --stadiums-only
|
|
%(prog)s pipeline --sport all --season 2026
|
|
"""
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
|
|
|
# ============================================================
|
|
# SCRAPE command
|
|
# ============================================================
|
|
scrape_parser = subparsers.add_parser(
|
|
'scrape',
|
|
help='Scrape game schedules from sports reference sites',
|
|
description='Scrapes NBA, MLB, NHL, WNBA, MLS, and NWSL schedules'
|
|
)
|
|
scrape_parser.add_argument(
|
|
'--sport',
|
|
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
|
|
default='all',
|
|
help='Sport to scrape (default: all)'
|
|
)
|
|
scrape_parser.add_argument(
|
|
'--season',
|
|
type=int,
|
|
default=2026,
|
|
help='Season year - ending year for sports that span years (default: 2026)'
|
|
)
|
|
scrape_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
scrape_parser.add_argument(
|
|
'--stadiums-only',
|
|
action='store_true',
|
|
help='Only scrape stadium data, not game schedules'
|
|
)
|
|
scrape_parser.set_defaults(func=cmd_scrape)
|
|
|
|
# ============================================================
|
|
# STADIUMS-UPDATE command (comprehensive stadium scraping)
|
|
# ============================================================
|
|
stadiums_update_parser = subparsers.add_parser(
|
|
'stadiums-update',
|
|
help='Scrape ALL stadium data for all 8 sports',
|
|
description='Comprehensive stadium scraping for NBA, MLB, NHL, NFL, WNBA, MLS, NWSL, and CBB'
|
|
)
|
|
stadiums_update_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
stadiums_update_parser.set_defaults(func=cmd_stadiums_update)
|
|
|
|
# ============================================================
|
|
# CANONICALIZE command (full pipeline)
|
|
# ============================================================
|
|
canon_parser = subparsers.add_parser(
|
|
'canonicalize',
|
|
help='Run the canonicalization pipeline',
|
|
description='Runs the full canonicalization pipeline: scrape -> canonicalize -> validate'
|
|
)
|
|
canon_parser.add_argument(
|
|
'--season',
|
|
type=int,
|
|
default=2026,
|
|
help='Season year (default: 2026)'
|
|
)
|
|
canon_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
canon_parser.add_argument(
|
|
'--skip-scrape',
|
|
action='store_true',
|
|
help='Skip scraping, use existing data'
|
|
)
|
|
canon_parser.add_argument(
|
|
'--no-validate',
|
|
action='store_true',
|
|
help='Skip validation step'
|
|
)
|
|
canon_parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Verbose output'
|
|
)
|
|
canon_parser.add_argument(
|
|
'--strict',
|
|
action='store_true',
|
|
help='Strict validation mode'
|
|
)
|
|
canon_parser.set_defaults(func=cmd_canonicalize)
|
|
|
|
# ============================================================
|
|
# VALIDATE command
|
|
# ============================================================
|
|
validate_parser = subparsers.add_parser(
|
|
'validate',
|
|
help='Validate canonical data',
|
|
description='Validates canonical data files for consistency and completeness'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--data-dir',
|
|
type=str,
|
|
default=None,
|
|
help='Data directory (auto-detects if not specified)'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--stadiums',
|
|
type=str,
|
|
default=None,
|
|
help='Path to stadiums_canonical.json'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--teams',
|
|
type=str,
|
|
default=None,
|
|
help='Path to teams_canonical.json'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--games',
|
|
type=str,
|
|
default=None,
|
|
help='Path to games_canonical.json'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--aliases',
|
|
type=str,
|
|
default=None,
|
|
help='Path to aliases file'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default=None,
|
|
help='Output file for validation report'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Verbose output'
|
|
)
|
|
validate_parser.add_argument(
|
|
'--strict',
|
|
action='store_true',
|
|
help='Strict validation mode (warnings become errors)'
|
|
)
|
|
validate_parser.set_defaults(func=cmd_validate)
|
|
|
|
# ============================================================
|
|
# CLOUDKIT command
|
|
# ============================================================
|
|
cloudkit_parser = subparsers.add_parser(
|
|
'cloudkit',
|
|
help='Import/export data to CloudKit',
|
|
description='Import or export data to/from CloudKit'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--key-id',
|
|
type=str,
|
|
default=None,
|
|
help='CloudKit key ID'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--key-file',
|
|
type=str,
|
|
default=None,
|
|
help='Path to CloudKit private key file'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--container',
|
|
type=str,
|
|
default=None,
|
|
help='CloudKit container ID'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--env',
|
|
choices=['development', 'production'],
|
|
default='development',
|
|
help='CloudKit environment (default: development)'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--data-dir',
|
|
type=str,
|
|
default='./data',
|
|
help='Data directory (default: ./data)'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--stadiums-only',
|
|
action='store_true',
|
|
help='Import only stadiums'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--games-only',
|
|
action='store_true',
|
|
help='Import only games (all files)'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--games-files',
|
|
type=str,
|
|
help='Import specific game files (e.g., mlb_2025.json,nba_2025.json)'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--league-structure-only',
|
|
action='store_true',
|
|
help='Import only league structure'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--team-aliases-only',
|
|
action='store_true',
|
|
help='Import only team aliases'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--stadium-aliases-only',
|
|
action='store_true',
|
|
help='Import only stadium aliases'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--canonical-only',
|
|
action='store_true',
|
|
help='Import only canonical data (league structure + aliases)'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--delete-all',
|
|
action='store_true',
|
|
help='Delete all records before importing'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--delete-only',
|
|
action='store_true',
|
|
help='Only delete records, do not import'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--dry-run',
|
|
action='store_true',
|
|
help='Show what would be done without making changes'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Verbose output'
|
|
)
|
|
cloudkit_parser.add_argument(
|
|
'--interactive', '-i',
|
|
action='store_true',
|
|
help='Show interactive menu'
|
|
)
|
|
cloudkit_parser.set_defaults(func=cmd_cloudkit)
|
|
|
|
# ============================================================
|
|
# GENERATE command
|
|
# ============================================================
|
|
generate_parser = subparsers.add_parser(
|
|
'generate',
|
|
help='Generate canonical data files',
|
|
description='Generate canonical data JSON files from hardcoded data'
|
|
)
|
|
generate_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
generate_parser.set_defaults(func=cmd_generate)
|
|
|
|
# ============================================================
|
|
# PIPELINE command
|
|
# ============================================================
|
|
pipeline_parser = subparsers.add_parser(
|
|
'pipeline',
|
|
help='Run the full data pipeline',
|
|
description='Run the full data pipeline: fetch, validate, and report'
|
|
)
|
|
pipeline_parser.add_argument(
|
|
'--season',
|
|
type=int,
|
|
default=2026,
|
|
help='Season year (default: 2026)'
|
|
)
|
|
pipeline_parser.add_argument(
|
|
'--sport',
|
|
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
|
|
default='all',
|
|
help='Sport to process (default: all)'
|
|
)
|
|
pipeline_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
pipeline_parser.add_argument(
|
|
'--skip-scrape',
|
|
action='store_true',
|
|
help='Skip scraping, use existing data'
|
|
)
|
|
pipeline_parser.add_argument(
|
|
'--no-validate',
|
|
action='store_true',
|
|
help='Skip validation step'
|
|
)
|
|
pipeline_parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Verbose output'
|
|
)
|
|
pipeline_parser.set_defaults(func=cmd_pipeline)
|
|
|
|
# full-pipeline: scrape → canonicalize → validate → upload (stops on error)
|
|
full_pipeline_parser = subparsers.add_parser(
|
|
'full-pipeline',
|
|
help='Run complete pipeline: Scrape → Canonicalize → Validate → Upload',
|
|
description='Run all steps in sequence, stopping on any error'
|
|
)
|
|
full_pipeline_parser.add_argument(
|
|
'--season',
|
|
type=int,
|
|
default=2026,
|
|
help='Season year (default: 2026)'
|
|
)
|
|
full_pipeline_parser.add_argument(
|
|
'--sport',
|
|
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
|
|
default='all',
|
|
help='Sport to process (default: all)'
|
|
)
|
|
full_pipeline_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
full_pipeline_parser.add_argument(
|
|
'--skip-upload',
|
|
action='store_true',
|
|
help='Skip CloudKit upload step'
|
|
)
|
|
full_pipeline_parser.set_defaults(func=cmd_full_pipeline)
|
|
|
|
# ============================================================
|
|
# Subcommands for individual canonicalization steps
|
|
# ============================================================
|
|
|
|
# canonicalize-stadiums
|
|
canon_stadiums_parser = subparsers.add_parser(
|
|
'canonicalize-stadiums',
|
|
help='Canonicalize stadium data only',
|
|
description='Canonicalize raw stadium data into canonical format'
|
|
)
|
|
canon_stadiums_parser.add_argument(
|
|
'--input',
|
|
type=str,
|
|
default=None,
|
|
help='Input stadiums file (default: ./data/stadiums.json)'
|
|
)
|
|
canon_stadiums_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
canon_stadiums_parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Verbose output'
|
|
)
|
|
canon_stadiums_parser.set_defaults(func=cmd_canonicalize_stadiums)
|
|
|
|
# canonicalize-teams
|
|
canon_teams_parser = subparsers.add_parser(
|
|
'canonicalize-teams',
|
|
help='Canonicalize team data only',
|
|
description='Canonicalize raw team data into canonical format'
|
|
)
|
|
canon_teams_parser.add_argument(
|
|
'--stadiums',
|
|
type=str,
|
|
default=None,
|
|
help='Path to canonical stadiums file'
|
|
)
|
|
canon_teams_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
canon_teams_parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Verbose output'
|
|
)
|
|
canon_teams_parser.set_defaults(func=cmd_canonicalize_teams)
|
|
|
|
# canonicalize-games
|
|
canon_games_parser = subparsers.add_parser(
|
|
'canonicalize-games',
|
|
help='Canonicalize game data only',
|
|
description='Canonicalize raw game data into canonical format'
|
|
)
|
|
canon_games_parser.add_argument(
|
|
'--games',
|
|
type=str,
|
|
default=None,
|
|
help='Path to raw games file'
|
|
)
|
|
canon_games_parser.add_argument(
|
|
'--teams',
|
|
type=str,
|
|
default=None,
|
|
help='Path to canonical teams file'
|
|
)
|
|
canon_games_parser.add_argument(
|
|
'--aliases',
|
|
type=str,
|
|
default=None,
|
|
help='Path to stadium aliases file'
|
|
)
|
|
canon_games_parser.add_argument(
|
|
'--output',
|
|
type=str,
|
|
default='./data',
|
|
help='Output directory (default: ./data)'
|
|
)
|
|
canon_games_parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Verbose output'
|
|
)
|
|
canon_games_parser.set_defaults(func=cmd_canonicalize_games)
|
|
|
|
# Change to Scripts directory for consistent relative paths
|
|
os.chdir(SCRIPT_DIR)
|
|
|
|
# If no arguments, show interactive menu in a loop
|
|
if len(sys.argv) == 1:
|
|
while True:
|
|
cmd_args = interactive_menu()
|
|
if not cmd_args:
|
|
break
|
|
|
|
sys.argv = ['sportstime'] + cmd_args
|
|
print(f"\n{Colors.DIM}Running: sportstime {' '.join(cmd_args)}{Colors.RESET}\n")
|
|
|
|
# Parse and execute
|
|
args = parser.parse_args()
|
|
|
|
if args.command is None:
|
|
continue
|
|
|
|
try:
|
|
args.func(args)
|
|
except SystemExit:
|
|
pass # Catch sys.exit() from subcommands
|
|
except Exception as e:
|
|
print(f"\n{Colors.RED}Error: {e}{Colors.RESET}")
|
|
|
|
# Prompt to continue
|
|
print(f"\n{Colors.GREEN}{'─' * 60}{Colors.RESET}")
|
|
input(f"{Colors.CYAN}Press Enter to continue...{Colors.RESET}")
|
|
|
|
# Reset sys.argv for next iteration
|
|
sys.argv = ['sportstime']
|
|
|
|
sys.exit(0)
|
|
|
|
# Parse and execute (command-line mode)
|
|
args = parser.parse_args()
|
|
|
|
if args.command is None:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
# Execute the command
|
|
args.func(args)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|