Remove CFB/NASCAR/PGA and streamline to 8 supported sports
- Remove College Football, NASCAR, and PGA from scraper and app - Clean all data files (stadiums, games, pipeline reports) - Update Sport.swift enum and all UI components - Add sportstime.py CLI tool for pipeline management - Add DATA_SCRAPING.md documentation - Add WNBA/MLS/NWSL implementation documentation - Scraper now supports: NBA, MLB, NHL, NFL, WNBA, MLS, NWSL, CBB Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -7,24 +7,43 @@ Imports canonical JSON data into CloudKit. Run after canonicalization pipeline.
|
||||
Expected input files (from canonicalization pipeline):
|
||||
- stadiums_canonical.json
|
||||
- teams_canonical.json
|
||||
- games_canonical.json
|
||||
- games_canonical.json OR canonical/games/*.json (new structure)
|
||||
- stadium_aliases.json
|
||||
- league_structure.json
|
||||
- team_aliases.json
|
||||
|
||||
File Structure (Option B - by sport/season):
|
||||
data/
|
||||
games/ # Raw scraped games
|
||||
mlb_2025.json
|
||||
nba_2025.json
|
||||
...
|
||||
canonical/ # Canonicalized data
|
||||
games/
|
||||
mlb_2025.json
|
||||
nba_2025.json
|
||||
...
|
||||
stadiums.json
|
||||
games_canonical.json # Combined (backward compatibility)
|
||||
stadiums_canonical.json
|
||||
teams_canonical.json
|
||||
|
||||
Setup:
|
||||
1. CloudKit Dashboard > Tokens & Keys > Server-to-Server Keys
|
||||
2. Create key with Read/Write access to public database
|
||||
3. Download .p8 file and note Key ID
|
||||
|
||||
Usage:
|
||||
python cloudkit_import.py --dry-run # Preview first
|
||||
python cloudkit_import.py --key-id XX --key-file key.p8 # Import all
|
||||
python cloudkit_import.py --stadiums-only ... # Stadiums first
|
||||
python cloudkit_import.py --games-only ... # Games after
|
||||
python cloudkit_import.py --stadium-aliases-only ... # Stadium aliases only
|
||||
python cloudkit_import.py --delete-all ... # Delete then import
|
||||
python cloudkit_import.py --delete-only ... # Delete only (no import)
|
||||
python cloudkit_import.py # Interactive menu
|
||||
python cloudkit_import.py --dry-run # Preview first
|
||||
python cloudkit_import.py --key-id XX --key-file key.p8 # Import all
|
||||
python cloudkit_import.py --stadiums-only # Stadiums first
|
||||
python cloudkit_import.py --games-only # All games
|
||||
python cloudkit_import.py --games-files mlb_2025.json # Specific game file
|
||||
python cloudkit_import.py --games-files mlb_2025.json,nba_2025.json # Multiple files
|
||||
python cloudkit_import.py --stadium-aliases-only # Stadium aliases only
|
||||
python cloudkit_import.py --delete-all # Delete then import
|
||||
python cloudkit_import.py --delete-only # Delete only (no import)
|
||||
"""
|
||||
|
||||
import argparse, json, time, os, sys, hashlib, base64, requests
|
||||
@@ -48,6 +67,58 @@ DEFAULT_KEY_ID = "152be0715e0276e31aaea5cbfe79dc872f298861a55c70fae14e5fe3e026cf
|
||||
DEFAULT_KEY_FILE = "eckey.pem"
|
||||
|
||||
|
||||
def show_game_files_menu(data_dir: Path) -> list[str]:
|
||||
"""Show available game files and let user select which to import."""
|
||||
canonical_games_dir = data_dir / 'canonical' / 'games'
|
||||
|
||||
if not canonical_games_dir.exists():
|
||||
print("\n No canonical/games/ directory found.")
|
||||
return []
|
||||
|
||||
game_files = sorted(canonical_games_dir.glob('*.json'))
|
||||
if not game_files:
|
||||
print("\n No game files found in canonical/games/")
|
||||
return []
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Select Game Files to Import")
|
||||
print("="*50)
|
||||
print("\n Available files:")
|
||||
for i, f in enumerate(game_files, 1):
|
||||
# Count games in file
|
||||
with open(f) as fp:
|
||||
games = json.load(fp)
|
||||
print(f" {i}. {f.name} ({len(games):,} games)")
|
||||
|
||||
print(f"\n a. All files")
|
||||
print(f" 0. Cancel")
|
||||
print()
|
||||
|
||||
while True:
|
||||
try:
|
||||
choice = input("Enter file numbers (comma-separated), 'a' for all, or 0 to cancel: ").strip().lower()
|
||||
if choice == '0':
|
||||
return []
|
||||
if choice == 'a':
|
||||
return [f.name for f in game_files]
|
||||
|
||||
# Parse comma-separated numbers
|
||||
indices = [int(x.strip()) for x in choice.split(',')]
|
||||
selected = []
|
||||
for idx in indices:
|
||||
if 1 <= idx <= len(game_files):
|
||||
selected.append(game_files[idx-1].name)
|
||||
else:
|
||||
print(f"Invalid selection: {idx}")
|
||||
continue
|
||||
if selected:
|
||||
return selected
|
||||
print("No valid selections. Try again.")
|
||||
except (ValueError, EOFError, KeyboardInterrupt):
|
||||
print("\nCancelled.")
|
||||
return []
|
||||
|
||||
|
||||
def show_menu():
|
||||
"""Show interactive menu and return selected action."""
|
||||
print("\n" + "="*50)
|
||||
@@ -55,25 +126,26 @@ def show_menu():
|
||||
print("="*50)
|
||||
print("\n 1. Import all (stadiums, teams, games, league structure, team aliases, stadium aliases)")
|
||||
print(" 2. Stadiums only")
|
||||
print(" 3. Games only")
|
||||
print(" 4. League structure only")
|
||||
print(" 5. Team aliases only")
|
||||
print(" 6. Stadium aliases only")
|
||||
print(" 7. Canonical only (league structure + team aliases + stadium aliases)")
|
||||
print(" 8. Delete all then import")
|
||||
print(" 9. Delete only (no import)")
|
||||
print(" 10. Dry run (preview only)")
|
||||
print(" 3. Games only (all files)")
|
||||
print(" 4. Games - select specific files")
|
||||
print(" 5. League structure only")
|
||||
print(" 6. Team aliases only")
|
||||
print(" 7. Stadium aliases only")
|
||||
print(" 8. Canonical only (league structure + team aliases + stadium aliases)")
|
||||
print(" 9. Delete all then import")
|
||||
print(" 10. Delete only (no import)")
|
||||
print(" 11. Dry run (preview only)")
|
||||
print(" 0. Exit")
|
||||
print()
|
||||
|
||||
while True:
|
||||
try:
|
||||
choice = input("Enter choice [1-10, 0 to exit]: ").strip()
|
||||
choice = input("Enter choice [1-11, 0 to exit]: ").strip()
|
||||
if choice == '0':
|
||||
return None
|
||||
if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']:
|
||||
if choice in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11']:
|
||||
return int(choice)
|
||||
print("Invalid choice. Please enter 1-10 or 0.")
|
||||
print("Invalid choice. Please enter 1-11 or 0.")
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\nExiting.")
|
||||
return None
|
||||
@@ -265,6 +337,7 @@ def main():
|
||||
p.add_argument('--data-dir', default='./data')
|
||||
p.add_argument('--stadiums-only', action='store_true')
|
||||
p.add_argument('--games-only', action='store_true')
|
||||
p.add_argument('--games-files', type=str, help='Comma-separated list of game files to import (e.g., mlb_2025.json,nba_2025.json)')
|
||||
p.add_argument('--league-structure-only', action='store_true', help='Import only league structure')
|
||||
p.add_argument('--team-aliases-only', action='store_true', help='Import only team aliases')
|
||||
p.add_argument('--stadium-aliases-only', action='store_true', help='Import only stadium aliases')
|
||||
@@ -278,11 +351,18 @@ def main():
|
||||
|
||||
# Show interactive menu if no action flags provided or --interactive
|
||||
has_action_flag = any([
|
||||
args.stadiums_only, args.games_only, args.league_structure_only,
|
||||
args.stadiums_only, args.games_only, args.games_files, args.league_structure_only,
|
||||
args.team_aliases_only, args.stadium_aliases_only, args.canonical_only,
|
||||
args.delete_all, args.delete_only, args.dry_run
|
||||
])
|
||||
|
||||
# Track selected game files (for option 4 or --games-files)
|
||||
selected_game_files = None
|
||||
if args.games_files:
|
||||
# Parse comma-separated list from command line
|
||||
selected_game_files = [f.strip() for f in args.games_files.split(',')]
|
||||
args.games_only = True # Imply --games-only
|
||||
|
||||
if args.interactive or not has_action_flag:
|
||||
choice = show_menu()
|
||||
if choice is None:
|
||||
@@ -293,21 +373,27 @@ def main():
|
||||
pass # Default behavior
|
||||
elif choice == 2: # Stadiums only
|
||||
args.stadiums_only = True
|
||||
elif choice == 3: # Games only
|
||||
elif choice == 3: # Games only (all files)
|
||||
args.games_only = True
|
||||
elif choice == 4: # League structure only
|
||||
elif choice == 4: # Games - select specific files
|
||||
args.games_only = True
|
||||
selected_game_files = show_game_files_menu(Path(args.data_dir))
|
||||
if not selected_game_files:
|
||||
print("No files selected. Exiting.")
|
||||
return
|
||||
elif choice == 5: # League structure only
|
||||
args.league_structure_only = True
|
||||
elif choice == 5: # Team aliases only
|
||||
elif choice == 6: # Team aliases only
|
||||
args.team_aliases_only = True
|
||||
elif choice == 6: # Stadium aliases only
|
||||
elif choice == 7: # Stadium aliases only
|
||||
args.stadium_aliases_only = True
|
||||
elif choice == 7: # Canonical only
|
||||
elif choice == 8: # Canonical only
|
||||
args.canonical_only = True
|
||||
elif choice == 8: # Delete all then import
|
||||
elif choice == 9: # Delete all then import
|
||||
args.delete_all = True
|
||||
elif choice == 9: # Delete only
|
||||
elif choice == 10: # Delete only
|
||||
args.delete_only = True
|
||||
elif choice == 10: # Dry run
|
||||
elif choice == 11: # Dry run
|
||||
args.dry_run = True
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
@@ -332,12 +418,34 @@ def main():
|
||||
else:
|
||||
teams = [] # Legacy: extracted from stadiums
|
||||
|
||||
if (data_dir / 'games_canonical.json').exists():
|
||||
# Load games: try new structure first (canonical/games/*.json), then fallback
|
||||
canonical_games_dir = data_dir / 'canonical' / 'games'
|
||||
games = []
|
||||
games_source = None
|
||||
|
||||
if selected_game_files:
|
||||
# Load only the selected files
|
||||
for filename in selected_game_files:
|
||||
filepath = canonical_games_dir / filename
|
||||
if filepath.exists():
|
||||
with open(filepath) as f:
|
||||
file_games = json.load(f)
|
||||
games.extend(file_games)
|
||||
print(f" Loading {filename}: {len(file_games):,} games")
|
||||
games_source = f"selected files: {', '.join(selected_game_files)}"
|
||||
elif canonical_games_dir.exists() and any(canonical_games_dir.glob('*.json')):
|
||||
# New structure: load all sport/season files
|
||||
for games_file in sorted(canonical_games_dir.glob('*.json')):
|
||||
with open(games_file) as f:
|
||||
file_games = json.load(f)
|
||||
games.extend(file_games)
|
||||
games_source = "canonical/games/*.json"
|
||||
elif (data_dir / 'games_canonical.json').exists():
|
||||
games = json.load(open(data_dir / 'games_canonical.json'))
|
||||
games_source = "games_canonical.json"
|
||||
elif (data_dir / 'games.json').exists():
|
||||
games = json.load(open(data_dir / 'games.json'))
|
||||
else:
|
||||
games = []
|
||||
games_source = "games.json (legacy)"
|
||||
|
||||
league_structure = json.load(open(data_dir / 'league_structure.json')) if (data_dir / 'league_structure.json').exists() else []
|
||||
team_aliases = json.load(open(data_dir / 'team_aliases.json')) if (data_dir / 'team_aliases.json').exists() else []
|
||||
@@ -345,6 +453,8 @@ def main():
|
||||
|
||||
print(f"Using {'canonical' if use_canonical else 'legacy'} format")
|
||||
print(f"Loaded {len(stadiums)} stadiums, {len(teams)} teams, {len(games)} games")
|
||||
if games_source:
|
||||
print(f" Games loaded from: {games_source}")
|
||||
print(f"Loaded {len(league_structure)} league structures, {len(team_aliases)} team aliases, {len(stadium_aliases)} stadium aliases\n")
|
||||
|
||||
ck = None
|
||||
|
||||
Reference in New Issue
Block a user