feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export
Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
1
core/management/__init__.py
Normal file
1
core/management/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Management commands package
|
||||
1
core/management/commands/__init__.py
Normal file
1
core/management/commands/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Commands package
|
||||
445
core/management/commands/export_data.py
Normal file
445
core/management/commands/export_data.py
Normal file
@@ -0,0 +1,445 @@
|
||||
"""
|
||||
Management command to export Django database data to JSON bootstrap files for iOS app.
|
||||
"""
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from core.models import Sport, Conference, Division, Team, Stadium, Game, TeamAlias, StadiumAlias
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Export database data to JSON bootstrap files for iOS app'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
type=str,
|
||||
default='./bootstrap',
|
||||
help='Directory to write JSON files to'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--sports',
|
||||
action='store_true',
|
||||
help='Export sports only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--league-structure',
|
||||
action='store_true',
|
||||
help='Export league structure only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--teams',
|
||||
action='store_true',
|
||||
help='Export teams only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--stadiums',
|
||||
action='store_true',
|
||||
help='Export stadiums only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--games',
|
||||
action='store_true',
|
||||
help='Export games only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--team-aliases',
|
||||
action='store_true',
|
||||
help='Export team aliases only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--stadium-aliases',
|
||||
action='store_true',
|
||||
help='Export stadium aliases only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--sport',
|
||||
type=str,
|
||||
help='Filter by sport code (e.g., nba, mlb)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--year',
|
||||
type=int,
|
||||
help='Filter games by calendar year (e.g., 2025 returns all games played in 2025)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--pretty',
|
||||
action='store_true',
|
||||
default=True,
|
||||
help='Pretty print JSON output (default: true)'
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
output_dir = Path(options['output_dir'])
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# If no specific flags, export everything
|
||||
export_all = not any([
|
||||
options['sports'],
|
||||
options['league_structure'],
|
||||
options['teams'],
|
||||
options['stadiums'],
|
||||
options['games'],
|
||||
options['team_aliases'],
|
||||
options['stadium_aliases'],
|
||||
])
|
||||
|
||||
sport_filter = options.get('sport')
|
||||
year_filter = options.get('year')
|
||||
indent = 2 if options['pretty'] else None
|
||||
|
||||
if export_all or options['sports']:
|
||||
self._export_sports(output_dir, sport_filter, indent)
|
||||
|
||||
if export_all or options['league_structure']:
|
||||
self._export_league_structure(output_dir, sport_filter, indent)
|
||||
|
||||
if export_all or options['teams']:
|
||||
self._export_teams(output_dir, sport_filter, indent)
|
||||
|
||||
if export_all or options['stadiums']:
|
||||
self._export_stadiums(output_dir, sport_filter, indent)
|
||||
|
||||
if export_all or options['games']:
|
||||
self._export_games(output_dir, sport_filter, year_filter, indent)
|
||||
|
||||
if export_all or options['team_aliases']:
|
||||
self._export_team_aliases(output_dir, sport_filter, indent)
|
||||
|
||||
if export_all or options['stadium_aliases']:
|
||||
self._export_stadium_aliases(output_dir, sport_filter, indent)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(f'Export completed to {output_dir}'))
|
||||
|
||||
def _get_conference_id(self, conference):
|
||||
"""Get conference canonical ID from DB field."""
|
||||
return conference.canonical_id
|
||||
|
||||
def _get_division_id(self, division):
|
||||
"""Get division canonical ID from DB field."""
|
||||
return division.canonical_id
|
||||
|
||||
def _export_sports(self, output_dir, sport_filter, indent):
|
||||
"""Export sports to sports.json."""
|
||||
self.stdout.write('Exporting sports...')
|
||||
|
||||
sports = Sport.objects.filter(is_active=True)
|
||||
if sport_filter:
|
||||
sports = sports.filter(code=sport_filter.lower())
|
||||
|
||||
data = []
|
||||
for sport in sports.order_by('code'):
|
||||
data.append({
|
||||
'sport_id': sport.short_name,
|
||||
'abbreviation': sport.short_name,
|
||||
'display_name': sport.name,
|
||||
'icon_name': sport.icon_name,
|
||||
'color_hex': sport.color_hex,
|
||||
'season_start_month': sport.season_start_month,
|
||||
'season_end_month': sport.season_end_month,
|
||||
'is_active': sport.is_active,
|
||||
})
|
||||
|
||||
file_path = output_dir / 'sports.json'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(data, f, indent=indent)
|
||||
|
||||
self.stdout.write(f' Wrote {len(data)} sports to {file_path}')
|
||||
|
||||
def _export_league_structure(self, output_dir, sport_filter, indent):
|
||||
"""Export league structure (sports as leagues, conferences, divisions)."""
|
||||
self.stdout.write('Exporting league structure...')
|
||||
|
||||
data = []
|
||||
seen_ids = set() # Track IDs to prevent duplicates
|
||||
display_order = 0
|
||||
|
||||
# Query sports
|
||||
sports = Sport.objects.all()
|
||||
if sport_filter:
|
||||
sports = sports.filter(code=sport_filter.lower())
|
||||
|
||||
for sport in sports.order_by('code'):
|
||||
# Create league entry from Sport
|
||||
league_id = f"{sport.code}_league"
|
||||
|
||||
# Skip if we've already seen this ID
|
||||
if league_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(league_id)
|
||||
|
||||
data.append({
|
||||
'id': league_id,
|
||||
'sport': sport.short_name,
|
||||
'type': 'league',
|
||||
'name': sport.name,
|
||||
'abbreviation': sport.short_name,
|
||||
'parent_id': None,
|
||||
'display_order': display_order,
|
||||
})
|
||||
display_order += 1
|
||||
|
||||
# Get conferences for this sport
|
||||
conferences = Conference.objects.filter(sport=sport).order_by('order', 'name')
|
||||
for conf in conferences:
|
||||
conf_id = self._get_conference_id(conf)
|
||||
|
||||
# Skip duplicate conference IDs
|
||||
if conf_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(conf_id)
|
||||
|
||||
data.append({
|
||||
'id': conf_id,
|
||||
'sport': sport.short_name,
|
||||
'type': 'conference',
|
||||
'name': conf.name,
|
||||
'abbreviation': conf.short_name or None,
|
||||
'parent_id': league_id,
|
||||
'display_order': conf.order,
|
||||
})
|
||||
|
||||
# Get divisions for this conference
|
||||
divisions = Division.objects.filter(conference=conf).order_by('order', 'name')
|
||||
for div in divisions:
|
||||
div_id = self._get_division_id(div)
|
||||
|
||||
# Skip duplicate division IDs
|
||||
if div_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(div_id)
|
||||
|
||||
data.append({
|
||||
'id': div_id,
|
||||
'sport': sport.short_name,
|
||||
'type': 'division',
|
||||
'name': div.name,
|
||||
'abbreviation': div.short_name or None,
|
||||
'parent_id': conf_id,
|
||||
'display_order': div.order,
|
||||
})
|
||||
|
||||
file_path = output_dir / 'league_structure.json'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(data, f, indent=indent)
|
||||
|
||||
self.stdout.write(f' Wrote {len(data)} entries to {file_path}')
|
||||
|
||||
def _export_teams(self, output_dir, sport_filter, indent):
|
||||
"""Export teams to teams_canonical.json."""
|
||||
self.stdout.write('Exporting teams...')
|
||||
|
||||
teams = Team.objects.select_related(
|
||||
'sport', 'division', 'division__conference', 'home_stadium'
|
||||
).all()
|
||||
|
||||
if sport_filter:
|
||||
teams = teams.filter(sport__code=sport_filter.lower())
|
||||
|
||||
data = []
|
||||
for team in teams.order_by('sport__code', 'city', 'name'):
|
||||
# Get conference and division IDs
|
||||
conference_id = None
|
||||
division_id = None
|
||||
if team.division:
|
||||
division_id = self._get_division_id(team.division)
|
||||
conference_id = self._get_conference_id(team.division.conference)
|
||||
|
||||
data.append({
|
||||
'canonical_id': team.id,
|
||||
'name': team.name,
|
||||
'abbreviation': team.abbreviation,
|
||||
'sport': team.sport.short_name,
|
||||
'city': team.city,
|
||||
'stadium_canonical_id': team.home_stadium_id,
|
||||
'conference_id': conference_id,
|
||||
'division_id': division_id,
|
||||
'primary_color': team.primary_color or None,
|
||||
'secondary_color': team.secondary_color or None,
|
||||
})
|
||||
|
||||
file_path = output_dir / 'teams_canonical.json'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(data, f, indent=indent)
|
||||
|
||||
self.stdout.write(f' Wrote {len(data)} teams to {file_path}')
|
||||
|
||||
def _export_stadiums(self, output_dir, sport_filter, indent):
|
||||
"""Export stadiums to stadiums_canonical.json."""
|
||||
self.stdout.write('Exporting stadiums...')
|
||||
|
||||
stadiums = Stadium.objects.select_related('sport').all()
|
||||
|
||||
if sport_filter:
|
||||
stadiums = stadiums.filter(sport__code=sport_filter.lower())
|
||||
|
||||
# Build map of stadium -> team abbreviations
|
||||
stadium_teams = {}
|
||||
teams = Team.objects.filter(home_stadium__isnull=False).select_related('home_stadium')
|
||||
if sport_filter:
|
||||
teams = teams.filter(sport__code=sport_filter.lower())
|
||||
|
||||
for team in teams:
|
||||
if team.home_stadium_id not in stadium_teams:
|
||||
stadium_teams[team.home_stadium_id] = []
|
||||
stadium_teams[team.home_stadium_id].append(team.abbreviation)
|
||||
|
||||
data = []
|
||||
for stadium in stadiums.order_by('sport__code', 'city', 'name'):
|
||||
data.append({
|
||||
'canonical_id': stadium.id,
|
||||
'name': stadium.name,
|
||||
'city': stadium.city,
|
||||
'state': stadium.state or None,
|
||||
'latitude': float(stadium.latitude) if stadium.latitude else None,
|
||||
'longitude': float(stadium.longitude) if stadium.longitude else None,
|
||||
'capacity': stadium.capacity or 0,
|
||||
'sport': stadium.sport.short_name,
|
||||
'primary_team_abbrevs': stadium_teams.get(stadium.id, []),
|
||||
'year_opened': stadium.opened_year,
|
||||
'timezone_identifier': stadium.timezone or None,
|
||||
'image_url': stadium.image_url or None,
|
||||
})
|
||||
|
||||
file_path = output_dir / 'stadiums_canonical.json'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(data, f, indent=indent)
|
||||
|
||||
self.stdout.write(f' Wrote {len(data)} stadiums to {file_path}')
|
||||
|
||||
def _export_games(self, output_dir, sport_filter, year_filter, indent):
|
||||
"""Export games to games.json."""
|
||||
self.stdout.write('Exporting games...')
|
||||
|
||||
games = Game.objects.select_related(
|
||||
'sport', 'home_team', 'away_team', 'stadium'
|
||||
).all()
|
||||
|
||||
if sport_filter:
|
||||
games = games.filter(sport__code=sport_filter.lower())
|
||||
|
||||
if year_filter:
|
||||
games = games.filter(game_date__year=year_filter)
|
||||
|
||||
data = []
|
||||
for game in games.order_by('game_date', 'sport__code'):
|
||||
# Ensure game_date is UTC-aware
|
||||
game_dt = game.game_date
|
||||
if game_dt.tzinfo is None:
|
||||
game_dt = game_dt.replace(tzinfo=timezone.utc)
|
||||
utc_dt = game_dt.astimezone(timezone.utc)
|
||||
|
||||
# Extract domain from source_url
|
||||
source = None
|
||||
if game.source_url:
|
||||
source = self._extract_domain(game.source_url)
|
||||
|
||||
data.append({
|
||||
'id': game.id,
|
||||
'sport': game.sport.short_name,
|
||||
'season': str(game.game_date.year),
|
||||
'game_datetime_utc': utc_dt.strftime('%Y-%m-%dT%H:%M:%SZ'),
|
||||
'home_team': game.home_team.full_name,
|
||||
'away_team': game.away_team.full_name,
|
||||
'home_team_abbrev': game.home_team.abbreviation,
|
||||
'away_team_abbrev': game.away_team.abbreviation,
|
||||
'home_team_canonical_id': game.home_team_id,
|
||||
'away_team_canonical_id': game.away_team_id,
|
||||
'venue': game.stadium.name if game.stadium else None,
|
||||
'stadium_canonical_id': game.stadium_id,
|
||||
'source': source,
|
||||
'is_playoff': game.is_playoff,
|
||||
'broadcast': None, # Not tracked in DB currently
|
||||
})
|
||||
|
||||
file_path = output_dir / 'games.json'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(data, f, indent=indent)
|
||||
|
||||
self.stdout.write(f' Wrote {len(data)} games to {file_path}')
|
||||
|
||||
def _extract_domain(self, url):
|
||||
"""Extract domain from URL (e.g., 'espn.com' from 'https://www.espn.com/...')."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc
|
||||
# Remove 'www.' prefix if present
|
||||
if domain.startswith('www.'):
|
||||
domain = domain[4:]
|
||||
return domain
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _export_team_aliases(self, output_dir, sport_filter, indent):
|
||||
"""Export team aliases to team_aliases.json."""
|
||||
self.stdout.write('Exporting team aliases...')
|
||||
|
||||
aliases = TeamAlias.objects.select_related('team', 'team__sport').all()
|
||||
|
||||
if sport_filter:
|
||||
aliases = aliases.filter(team__sport__code=sport_filter.lower())
|
||||
|
||||
# Map model alias types to export alias types
|
||||
alias_type_map = {
|
||||
'full_name': 'name',
|
||||
'city_name': 'city',
|
||||
'abbreviation': 'abbreviation',
|
||||
'nickname': 'name', # Map nickname to name
|
||||
'historical': 'name', # Map historical to name
|
||||
}
|
||||
|
||||
data = []
|
||||
for alias in aliases.order_by('team__sport__code', 'team__id', 'id'):
|
||||
# Format dates
|
||||
valid_from = alias.valid_from.strftime('%Y-%m-%d') if alias.valid_from else None
|
||||
valid_until = alias.valid_until.strftime('%Y-%m-%d') if alias.valid_until else None
|
||||
|
||||
# Map alias type
|
||||
export_type = alias_type_map.get(alias.alias_type, 'name')
|
||||
|
||||
data.append({
|
||||
'id': f"alias_{alias.team.sport.code}_{alias.pk}",
|
||||
'team_canonical_id': alias.team_id,
|
||||
'alias_type': export_type,
|
||||
'alias_value': alias.alias,
|
||||
'valid_from': valid_from,
|
||||
'valid_until': valid_until,
|
||||
})
|
||||
|
||||
file_path = output_dir / 'team_aliases.json'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(data, f, indent=indent)
|
||||
|
||||
self.stdout.write(f' Wrote {len(data)} team aliases to {file_path}')
|
||||
|
||||
def _export_stadium_aliases(self, output_dir, sport_filter, indent):
|
||||
"""Export stadium aliases to stadium_aliases.json."""
|
||||
self.stdout.write('Exporting stadium aliases...')
|
||||
|
||||
aliases = StadiumAlias.objects.select_related('stadium', 'stadium__sport').all()
|
||||
|
||||
if sport_filter:
|
||||
aliases = aliases.filter(stadium__sport__code=sport_filter.lower())
|
||||
|
||||
data = []
|
||||
for alias in aliases.order_by('stadium__sport__code', 'stadium__id', 'id'):
|
||||
# Format dates
|
||||
valid_from = alias.valid_from.strftime('%Y-%m-%d') if alias.valid_from else None
|
||||
valid_until = alias.valid_until.strftime('%Y-%m-%d') if alias.valid_until else None
|
||||
|
||||
data.append({
|
||||
'alias_name': alias.alias,
|
||||
'stadium_canonical_id': alias.stadium_id,
|
||||
'valid_from': valid_from,
|
||||
'valid_until': valid_until,
|
||||
})
|
||||
|
||||
file_path = output_dir / 'stadium_aliases.json'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(data, f, indent=indent)
|
||||
|
||||
self.stdout.write(f' Wrote {len(data)} stadium aliases to {file_path}')
|
||||
98
core/management/commands/fix_wnba_stadiums.py
Normal file
98
core/management/commands/fix_wnba_stadiums.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Assign home_stadium to WNBA teams and backfill stadium on WNBA games.
|
||||
|
||||
Usage:
|
||||
python manage.py fix_wnba_stadiums
|
||||
python manage.py fix_wnba_stadiums --dry-run
|
||||
"""
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from core.models import Team, Stadium, Game
|
||||
|
||||
# WNBA team abbreviation → stadium canonical ID
|
||||
WNBA_TEAM_STADIUMS = {
|
||||
'ATL': 'stadium_wnba_gateway_center_arena',
|
||||
'CHI': 'stadium_wnba_wintrust_arena',
|
||||
'CON': 'stadium_wnba_mohegan_sun_arena',
|
||||
'DAL': 'stadium_wnba_college_park_center',
|
||||
'GSV': 'stadium_wnba_chase_center',
|
||||
'IND': 'stadium_wnba_gainbridge_fieldhouse',
|
||||
'LA': 'stadium_wnba_cryptocom_arena',
|
||||
'LV': 'stadium_wnba_michelob_ultra_arena',
|
||||
'MIN': 'stadium_wnba_target_center',
|
||||
'NY': 'stadium_wnba_barclays_center',
|
||||
'PHX': 'stadium_wnba_footprint_center',
|
||||
'SEA': 'stadium_wnba_climate_pledge_arena',
|
||||
'WAS': 'stadium_wnba_entertainment_sports_arena',
|
||||
}
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Assign home_stadium to WNBA teams and backfill game stadiums."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would change without saving',
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
dry_run = options['dry_run']
|
||||
|
||||
if dry_run:
|
||||
self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved"))
|
||||
|
||||
# 1. Assign home_stadium to WNBA teams
|
||||
self.stdout.write("\n=== Assigning WNBA team stadiums ===")
|
||||
teams_updated = 0
|
||||
for abbrev, stadium_id in WNBA_TEAM_STADIUMS.items():
|
||||
try:
|
||||
team = Team.objects.get(sport_id='wnba', abbreviation=abbrev)
|
||||
except Team.DoesNotExist:
|
||||
self.stderr.write(f" Team not found: WNBA {abbrev}")
|
||||
continue
|
||||
|
||||
try:
|
||||
stadium = Stadium.objects.get(id=stadium_id)
|
||||
except Stadium.DoesNotExist:
|
||||
self.stderr.write(f" Stadium not found: {stadium_id}")
|
||||
continue
|
||||
|
||||
if team.home_stadium_id != stadium_id:
|
||||
self.stdout.write(f" {abbrev:5} {team.city} {team.name} → {stadium.name}")
|
||||
if not dry_run:
|
||||
team.home_stadium = stadium
|
||||
team.save(update_fields=['home_stadium', 'updated_at'])
|
||||
teams_updated += 1
|
||||
|
||||
self.stdout.write(f" Teams updated: {teams_updated}")
|
||||
|
||||
# 2. Backfill stadium on WNBA games missing it
|
||||
self.stdout.write("\n=== Backfilling WNBA game stadiums ===")
|
||||
games_missing = Game.objects.filter(
|
||||
sport_id='wnba', stadium__isnull=True
|
||||
).select_related('home_team')
|
||||
|
||||
games_updated = 0
|
||||
for game in games_missing:
|
||||
stadium_id = WNBA_TEAM_STADIUMS.get(game.home_team.abbreviation)
|
||||
if not stadium_id:
|
||||
self.stderr.write(f" No stadium mapping for {game.home_team.abbreviation}: {game.id}")
|
||||
continue
|
||||
|
||||
self.stdout.write(f" {game.id} ({game.home_team.abbreviation} home) → {stadium_id}")
|
||||
if not dry_run:
|
||||
game.stadium_id = stadium_id
|
||||
game.save(update_fields=['stadium', 'updated_at'])
|
||||
games_updated += 1
|
||||
|
||||
self.stdout.write(f" Games updated: {games_updated}")
|
||||
|
||||
# 3. Summary
|
||||
self.stdout.write(f"\n=== Done ===")
|
||||
missing_stadium = Team.objects.filter(sport_id='wnba', home_stadium__isnull=True).count()
|
||||
missing_game_stadium = Game.objects.filter(sport_id='wnba', stadium__isnull=True).count()
|
||||
self.stdout.write(f" WNBA teams still missing stadium: {missing_stadium}")
|
||||
self.stdout.write(f" WNBA games still missing stadium: {missing_game_stadium}")
|
||||
512
core/management/commands/import_data.py
Normal file
512
core/management/commands/import_data.py
Normal file
@@ -0,0 +1,512 @@
|
||||
"""
|
||||
Management command to import existing JSON data into Django models.
|
||||
"""
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db import transaction
|
||||
|
||||
from core.models import Sport, Conference, Division, Team, Stadium, Game, TeamAlias, StadiumAlias
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Import existing JSON data files into Django database'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
# Lookup maps for JSON ID -> Django object
|
||||
self.divisions_by_json_id = {}
|
||||
self.conferences_by_json_id = {}
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--data-dir',
|
||||
type=str,
|
||||
default='.',
|
||||
help='Directory containing the JSON data files'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
type=str,
|
||||
default='./output',
|
||||
help='Directory containing scraped output files (teams, stadiums, games)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--league-structure',
|
||||
action='store_true',
|
||||
help='Import league structure only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--team-aliases',
|
||||
action='store_true',
|
||||
help='Import team aliases only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--stadium-aliases',
|
||||
action='store_true',
|
||||
help='Import stadium aliases only'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--scraped-data',
|
||||
action='store_true',
|
||||
help='Import scraped teams, stadiums, and games from output directory'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be imported without making changes'
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
data_dir = Path(options['data_dir'])
|
||||
output_dir = Path(options['output_dir'])
|
||||
dry_run = options['dry_run']
|
||||
|
||||
# If no specific flags, import everything
|
||||
import_all = not any([
|
||||
options['league_structure'],
|
||||
options['team_aliases'],
|
||||
options['stadium_aliases'],
|
||||
options['scraped_data'],
|
||||
])
|
||||
|
||||
if dry_run:
|
||||
self.stdout.write(self.style.WARNING('DRY RUN - No changes will be made'))
|
||||
|
||||
try:
|
||||
with transaction.atomic():
|
||||
# Always ensure sports exist first
|
||||
self._ensure_sports()
|
||||
|
||||
if import_all or options['league_structure']:
|
||||
self._import_league_structure(data_dir, dry_run)
|
||||
|
||||
if import_all or options['scraped_data']:
|
||||
self._import_scraped_data(output_dir, dry_run)
|
||||
|
||||
if import_all or options['team_aliases']:
|
||||
self._import_team_aliases(data_dir, dry_run)
|
||||
|
||||
if import_all or options['stadium_aliases']:
|
||||
self._import_stadium_aliases(data_dir, dry_run)
|
||||
|
||||
if dry_run:
|
||||
raise CommandError('Dry run complete - rolling back')
|
||||
|
||||
except CommandError as e:
|
||||
if 'Dry run' in str(e):
|
||||
self.stdout.write(self.style.SUCCESS('Dry run completed successfully'))
|
||||
else:
|
||||
raise
|
||||
|
||||
self.stdout.write(self.style.SUCCESS('Data import completed successfully'))
|
||||
|
||||
def _ensure_sports(self):
|
||||
"""Ensure all sports exist in the database."""
|
||||
sports = [
|
||||
{'code': 'mlb', 'name': 'Major League Baseball', 'short_name': 'MLB'},
|
||||
{'code': 'nba', 'name': 'National Basketball Association', 'short_name': 'NBA'},
|
||||
{'code': 'nfl', 'name': 'National Football League', 'short_name': 'NFL'},
|
||||
{'code': 'nhl', 'name': 'National Hockey League', 'short_name': 'NHL'},
|
||||
{'code': 'mls', 'name': 'Major League Soccer', 'short_name': 'MLS'},
|
||||
{'code': 'wnba', 'name': "Women's National Basketball Association", 'short_name': 'WNBA'},
|
||||
{'code': 'nwsl', 'name': "National Women's Soccer League", 'short_name': 'NWSL'},
|
||||
]
|
||||
|
||||
for sport_data in sports:
|
||||
sport, created = Sport.objects.update_or_create(
|
||||
code=sport_data['code'],
|
||||
defaults={
|
||||
'name': sport_data['name'],
|
||||
'short_name': sport_data['short_name'],
|
||||
}
|
||||
)
|
||||
if created:
|
||||
self.stdout.write(f' Created sport: {sport.short_name}')
|
||||
|
||||
def _import_league_structure(self, data_dir, dry_run):
|
||||
"""Import league structure from JSON."""
|
||||
self.stdout.write(self.style.HTTP_INFO('Importing league structure...'))
|
||||
|
||||
file_path = data_dir / 'league_structure.json'
|
||||
if not file_path.exists():
|
||||
self.stdout.write(self.style.WARNING(f' File not found: {file_path}'))
|
||||
return
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# First pass: conferences
|
||||
conference_count = 0
|
||||
for item in data:
|
||||
if item['type'] != 'conference':
|
||||
continue
|
||||
|
||||
sport_code = item['sport'].lower()
|
||||
try:
|
||||
sport = Sport.objects.get(code=sport_code)
|
||||
except Sport.DoesNotExist:
|
||||
self.stdout.write(self.style.WARNING(f' Sport not found: {sport_code}'))
|
||||
continue
|
||||
|
||||
if not dry_run:
|
||||
conference, created = Conference.objects.update_or_create(
|
||||
sport=sport,
|
||||
name=item['name'],
|
||||
defaults={
|
||||
'canonical_id': item['id'],
|
||||
'short_name': item.get('abbreviation') or '',
|
||||
'order': item.get('display_order', 0),
|
||||
}
|
||||
)
|
||||
self.conferences_by_json_id[item['id']] = conference
|
||||
if created:
|
||||
conference_count += 1
|
||||
else:
|
||||
self.conferences_by_json_id[item['id']] = item['id']
|
||||
conference_count += 1
|
||||
|
||||
self.stdout.write(f' Conferences: {conference_count} created/updated')
|
||||
|
||||
# Second pass: divisions
|
||||
division_count = 0
|
||||
for item in data:
|
||||
if item['type'] != 'division':
|
||||
continue
|
||||
|
||||
parent_id = item.get('parent_id')
|
||||
if not parent_id or parent_id not in self.conferences_by_json_id:
|
||||
self.stdout.write(self.style.WARNING(f' Parent conference not found for division: {item["name"]}'))
|
||||
continue
|
||||
|
||||
if not dry_run:
|
||||
conference = self.conferences_by_json_id[parent_id]
|
||||
division, created = Division.objects.update_or_create(
|
||||
conference=conference,
|
||||
name=item['name'],
|
||||
defaults={
|
||||
'canonical_id': item['id'],
|
||||
'short_name': item.get('abbreviation') or '',
|
||||
'order': item.get('display_order', 0),
|
||||
}
|
||||
)
|
||||
self.divisions_by_json_id[item['id']] = division
|
||||
if created:
|
||||
division_count += 1
|
||||
else:
|
||||
division_count += 1
|
||||
|
||||
self.stdout.write(f' Divisions: {division_count} created/updated')
|
||||
|
||||
def _import_team_aliases(self, data_dir, dry_run):
|
||||
"""Import team aliases from JSON."""
|
||||
self.stdout.write(self.style.HTTP_INFO('Importing team aliases...'))
|
||||
|
||||
file_path = data_dir / 'team_aliases.json'
|
||||
if not file_path.exists():
|
||||
self.stdout.write(self.style.WARNING(f' File not found: {file_path}'))
|
||||
return
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Map JSON alias types to model alias types
|
||||
alias_type_map = {
|
||||
'name': 'full_name',
|
||||
'city': 'city_name',
|
||||
'abbreviation': 'abbreviation',
|
||||
'nickname': 'nickname',
|
||||
'historical': 'historical',
|
||||
}
|
||||
|
||||
alias_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for item in data:
|
||||
team_id = item['team_canonical_id']
|
||||
|
||||
# Check if team exists
|
||||
try:
|
||||
team = Team.objects.get(id=team_id)
|
||||
except Team.DoesNotExist:
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
valid_from = None
|
||||
valid_until = None
|
||||
|
||||
if item.get('valid_from'):
|
||||
try:
|
||||
valid_from = datetime.strptime(item['valid_from'], '%Y-%m-%d').date()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if item.get('valid_until'):
|
||||
try:
|
||||
valid_until = datetime.strptime(item['valid_until'], '%Y-%m-%d').date()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Map alias type
|
||||
json_alias_type = item.get('alias_type', 'full_name')
|
||||
model_alias_type = alias_type_map.get(json_alias_type, 'full_name')
|
||||
|
||||
if not dry_run:
|
||||
# Use team + alias + alias_type as unique key (no explicit ID)
|
||||
alias, created = TeamAlias.objects.update_or_create(
|
||||
team=team,
|
||||
alias=item['alias_value'],
|
||||
alias_type=model_alias_type,
|
||||
defaults={
|
||||
'valid_from': valid_from,
|
||||
'valid_until': valid_until,
|
||||
}
|
||||
)
|
||||
if created:
|
||||
alias_count += 1
|
||||
else:
|
||||
alias_count += 1
|
||||
|
||||
self.stdout.write(f' Team aliases: {alias_count} created/updated, {skipped_count} skipped (team not found)')
|
||||
|
||||
def _import_stadium_aliases(self, data_dir, dry_run):
|
||||
"""Import stadium aliases from JSON."""
|
||||
self.stdout.write(self.style.HTTP_INFO('Importing stadium aliases...'))
|
||||
|
||||
file_path = data_dir / 'stadium_aliases.json'
|
||||
if not file_path.exists():
|
||||
self.stdout.write(self.style.WARNING(f' File not found: {file_path}'))
|
||||
return
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
alias_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for item in data:
|
||||
stadium_id = item['stadium_canonical_id']
|
||||
|
||||
# Check if stadium exists
|
||||
try:
|
||||
stadium = Stadium.objects.get(id=stadium_id)
|
||||
except Stadium.DoesNotExist:
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
valid_from = None
|
||||
valid_until = None
|
||||
|
||||
if item.get('valid_from'):
|
||||
try:
|
||||
valid_from = datetime.strptime(item['valid_from'], '%Y-%m-%d').date()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if item.get('valid_until'):
|
||||
try:
|
||||
valid_until = datetime.strptime(item['valid_until'], '%Y-%m-%d').date()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if not dry_run:
|
||||
# Use stadium + alias as unique key (no explicit ID)
|
||||
alias, created = StadiumAlias.objects.update_or_create(
|
||||
stadium=stadium,
|
||||
alias=item['alias_name'],
|
||||
defaults={
|
||||
'alias_type': 'official',
|
||||
'valid_from': valid_from,
|
||||
'valid_until': valid_until,
|
||||
}
|
||||
)
|
||||
if created:
|
||||
alias_count += 1
|
||||
else:
|
||||
alias_count += 1
|
||||
|
||||
self.stdout.write(f' Stadium aliases: {alias_count} created/updated, {skipped_count} skipped (stadium not found)')
|
||||
|
||||
def _import_scraped_data(self, output_dir, dry_run):
|
||||
"""Import scraped teams, stadiums, and games from output directory."""
|
||||
if not output_dir.exists():
|
||||
self.stdout.write(self.style.WARNING(f' Output directory not found: {output_dir}'))
|
||||
return
|
||||
|
||||
# Import stadiums first (teams reference them)
|
||||
self._import_stadiums(output_dir, dry_run)
|
||||
|
||||
# Import teams (games reference them)
|
||||
self._import_teams(output_dir, dry_run)
|
||||
|
||||
# Import games
|
||||
self._import_games(output_dir, dry_run)
|
||||
|
||||
def _import_stadiums(self, output_dir, dry_run):
|
||||
"""Import stadiums from output files."""
|
||||
self.stdout.write(self.style.HTTP_INFO('Importing stadiums...'))
|
||||
|
||||
total_count = 0
|
||||
sports = ['mlb', 'nba', 'nfl', 'nhl', 'mls', 'wnba', 'nwsl']
|
||||
|
||||
for sport_code in sports:
|
||||
file_path = output_dir / f'stadiums_{sport_code}.json'
|
||||
if not file_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
sport = Sport.objects.get(code=sport_code)
|
||||
except Sport.DoesNotExist:
|
||||
continue
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
for item in data:
|
||||
if not dry_run:
|
||||
Stadium.objects.update_or_create(
|
||||
id=item['canonical_id'],
|
||||
defaults={
|
||||
'sport': sport,
|
||||
'name': item['name'],
|
||||
'city': item.get('city', ''),
|
||||
'state': item.get('state', ''),
|
||||
'country': 'USA',
|
||||
'latitude': item.get('latitude'),
|
||||
'longitude': item.get('longitude'),
|
||||
'capacity': item.get('capacity') or None,
|
||||
'timezone': item.get('timezone_identifier', ''),
|
||||
'opened_year': item.get('year_opened'),
|
||||
'image_url': item.get('image_url', '') or '',
|
||||
}
|
||||
)
|
||||
total_count += 1
|
||||
|
||||
self.stdout.write(f' Stadiums: {total_count} created/updated')
|
||||
|
||||
def _import_teams(self, output_dir, dry_run):
|
||||
"""Import teams from output files."""
|
||||
self.stdout.write(self.style.HTTP_INFO('Importing teams...'))
|
||||
|
||||
total_count = 0
|
||||
sports = ['mlb', 'nba', 'nfl', 'nhl', 'mls', 'wnba', 'nwsl']
|
||||
|
||||
for sport_code in sports:
|
||||
file_path = output_dir / f'teams_{sport_code}.json'
|
||||
if not file_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
sport = Sport.objects.get(code=sport_code)
|
||||
except Sport.DoesNotExist:
|
||||
continue
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
for item in data:
|
||||
# Try to find division using JSON ID lookup
|
||||
division = None
|
||||
if item.get('division_id'):
|
||||
division = self.divisions_by_json_id.get(item['division_id'])
|
||||
|
||||
# Try to find home stadium
|
||||
home_stadium = None
|
||||
if item.get('stadium_canonical_id'):
|
||||
try:
|
||||
home_stadium = Stadium.objects.get(id=item['stadium_canonical_id'])
|
||||
except Stadium.DoesNotExist:
|
||||
pass
|
||||
|
||||
if not dry_run:
|
||||
Team.objects.update_or_create(
|
||||
id=item['canonical_id'],
|
||||
defaults={
|
||||
'sport': sport,
|
||||
'division': division,
|
||||
'city': item.get('city', ''),
|
||||
'name': item['name'],
|
||||
'full_name': f"{item.get('city', '')} {item['name']}".strip(),
|
||||
'abbreviation': item.get('abbreviation', ''),
|
||||
'home_stadium': home_stadium,
|
||||
'primary_color': item.get('primary_color', '') or '',
|
||||
'secondary_color': item.get('secondary_color', '') or '',
|
||||
}
|
||||
)
|
||||
total_count += 1
|
||||
|
||||
self.stdout.write(f' Teams: {total_count} created/updated')
|
||||
|
||||
def _import_games(self, output_dir, dry_run):
|
||||
"""Import games from output files."""
|
||||
self.stdout.write(self.style.HTTP_INFO('Importing games...'))
|
||||
|
||||
total_count = 0
|
||||
error_count = 0
|
||||
|
||||
# Find all games files
|
||||
game_files = list(output_dir.glob('games_*.json'))
|
||||
|
||||
for file_path in game_files:
|
||||
# Parse sport code from filename (e.g., games_mlb_2026.json)
|
||||
parts = file_path.stem.split('_')
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
|
||||
sport_code = parts[1]
|
||||
|
||||
try:
|
||||
sport = Sport.objects.get(code=sport_code)
|
||||
except Sport.DoesNotExist:
|
||||
continue
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
for item in data:
|
||||
try:
|
||||
# Get teams
|
||||
home_team = Team.objects.get(id=item['home_team_canonical_id'])
|
||||
away_team = Team.objects.get(id=item['away_team_canonical_id'])
|
||||
|
||||
# Get stadium (optional)
|
||||
stadium = None
|
||||
if item.get('stadium_canonical_id'):
|
||||
try:
|
||||
stadium = Stadium.objects.get(id=item['stadium_canonical_id'])
|
||||
except Stadium.DoesNotExist:
|
||||
pass
|
||||
|
||||
# Parse datetime
|
||||
game_date = datetime.fromisoformat(
|
||||
item['game_datetime_utc'].replace('Z', '+00:00')
|
||||
)
|
||||
|
||||
# Parse season (may be "2025" or "2025-26")
|
||||
season_str = str(item.get('season', game_date.year))
|
||||
season = int(season_str.split('-')[0])
|
||||
|
||||
if not dry_run:
|
||||
Game.objects.update_or_create(
|
||||
id=item['canonical_id'],
|
||||
defaults={
|
||||
'sport': sport,
|
||||
'season': season,
|
||||
'home_team': home_team,
|
||||
'away_team': away_team,
|
||||
'stadium': stadium,
|
||||
'game_date': game_date,
|
||||
'status': 'scheduled',
|
||||
'is_playoff': item.get('is_playoff', False),
|
||||
}
|
||||
)
|
||||
total_count += 1
|
||||
|
||||
except (Team.DoesNotExist, KeyError) as e:
|
||||
error_count += 1
|
||||
if error_count <= 5:
|
||||
self.stdout.write(self.style.WARNING(f' Error importing game: {e}'))
|
||||
|
||||
self.stdout.write(f' Games: {total_count} created/updated, {error_count} errors')
|
||||
351
core/management/commands/populate_stadium_details.py
Normal file
351
core/management/commands/populate_stadium_details.py
Normal file
@@ -0,0 +1,351 @@
|
||||
"""
|
||||
Scrape stadium capacity and year-opened from Wikipedia and update local DB.
|
||||
|
||||
Wikipedia pages used:
|
||||
- NBA: List_of_NBA_arenas
|
||||
- NFL: List_of_current_NFL_stadiums
|
||||
- MLB: List_of_current_Major_League_Baseball_stadiums
|
||||
- NHL: List_of_NHL_arenas
|
||||
- MLS: List_of_Major_League_Soccer_stadiums
|
||||
- WNBA: Women's_National_Basketball_Association
|
||||
- NWSL: List_of_National_Women's_Soccer_League_stadiums
|
||||
|
||||
Usage:
|
||||
python manage.py populate_stadium_details
|
||||
python manage.py populate_stadium_details --sport nba
|
||||
python manage.py populate_stadium_details --dry-run
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from core.models import Stadium
|
||||
|
||||
WIKI_API = "https://en.wikipedia.org/w/api.php"
|
||||
|
||||
# (page_title, table_index, name_col, capacity_col, opened_col)
|
||||
WIKI_SOURCES = {
|
||||
"nba": ("List_of_NBA_arenas", 0, "Arena", "Capacity", "Opened"),
|
||||
"nfl": ("List_of_current_NFL_stadiums", 0, "Name", "Capacity", "Opened"),
|
||||
"mlb": ("List_of_current_Major_League_Baseball_stadiums", 0, "Name", "Capacity", "Opened"),
|
||||
"nhl": ("List_of_NHL_arenas", 0, "Arena", "Capacity", "Opened"),
|
||||
"mls": ("List_of_Major_League_Soccer_stadiums", 1, "Stadium", "Capacity", "Opened"),
|
||||
"wnba": ("Women's_National_Basketball_Association", 1, "Arena", "Capacity", None),
|
||||
"nwsl": ("List_of_National_Women's_Soccer_League_stadiums", 0, "Stadium", "Capacity", None),
|
||||
}
|
||||
|
||||
# Wikipedia name → list of our possible stadium names (for fuzzy matching)
|
||||
NAME_OVERRIDES = {
|
||||
# NBA
|
||||
"Rocket Arena": ["Rocket Mortgage FieldHouse"],
|
||||
"Mortgage Matchup Center": [], # skip — not in our DB
|
||||
"Xfinity Mobile Arena": ["Footprint Center"], # Phoenix — renamed
|
||||
# NHL
|
||||
"Lenovo Center": ["PNC Arena"], # Carolina — renamed
|
||||
"Benchmark International Arena": ["Amalie Arena"], # Tampa — renamed
|
||||
"Grand Casino Arena": ["Xcel Energy Center"], # Minnesota — renamed
|
||||
# MLS
|
||||
"Energizer Park": ["CITYPARK"], # St. Louis — renamed
|
||||
"Saputo Stadium": ["Stade Saputo"], # Montreal — same stadium, French name
|
||||
"ScottsMiracle-Gro Field": ["Lower.com Field"], # Columbus — renamed
|
||||
"Sporting Park": ["Children's Mercy Park"], # KC — renamed
|
||||
"Sports Illustrated Stadium": [], # skip — may not be in our DB yet
|
||||
# NWSL
|
||||
"CPKC Stadium": ["Children's Mercy Park"], # KC shared name
|
||||
}
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Populate stadium capacity and opened_year from Wikipedia."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--sport",
|
||||
type=str,
|
||||
choices=list(WIKI_SOURCES.keys()),
|
||||
help="Only process a single sport",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Show what would change without saving",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
sport_filter = options["sport"]
|
||||
dry_run = options["dry_run"]
|
||||
|
||||
sports = [sport_filter] if sport_filter else list(WIKI_SOURCES.keys())
|
||||
|
||||
if dry_run:
|
||||
self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved"))
|
||||
|
||||
for sport_code in sports:
|
||||
self._process_sport(sport_code, dry_run)
|
||||
|
||||
self._print_summary()
|
||||
|
||||
def _process_sport(self, sport_code, dry_run):
|
||||
page, table_idx, name_col, cap_col, opened_col = WIKI_SOURCES[sport_code]
|
||||
|
||||
self.stdout.write(f"\n{'='*60}")
|
||||
self.stdout.write(self.style.HTTP_INFO(f"Processing {sport_code.upper()} — Wikipedia: {page}"))
|
||||
self.stdout.write(f"{'='*60}")
|
||||
|
||||
# Fetch Wikipedia page
|
||||
wiki_data = self._fetch_wiki_table(page, table_idx, name_col, cap_col, opened_col)
|
||||
if not wiki_data:
|
||||
self.stderr.write(self.style.ERROR(" Failed to parse Wikipedia table"))
|
||||
return
|
||||
|
||||
self.stdout.write(f" Wikipedia returned {len(wiki_data)} venues")
|
||||
|
||||
# Get our stadiums for this sport
|
||||
db_stadiums = Stadium.objects.filter(sport_id=sport_code)
|
||||
# Build lookup: normalized name → stadium
|
||||
stadium_lookup = {}
|
||||
for s in db_stadiums:
|
||||
stadium_lookup[self._normalize_name(s.name)] = s
|
||||
|
||||
matched = 0
|
||||
updated = 0
|
||||
unmatched_wiki = []
|
||||
|
||||
for wiki_name, info in wiki_data.items():
|
||||
stadium = self._find_stadium(wiki_name, stadium_lookup)
|
||||
if not stadium:
|
||||
unmatched_wiki.append(wiki_name)
|
||||
continue
|
||||
|
||||
matched += 1
|
||||
changes = []
|
||||
|
||||
capacity = info.get("capacity")
|
||||
opened = info.get("opened")
|
||||
|
||||
if capacity and (stadium.capacity is None or stadium.capacity != capacity):
|
||||
changes.append(f"capacity: {stadium.capacity} → {capacity}")
|
||||
if not dry_run:
|
||||
stadium.capacity = capacity
|
||||
|
||||
if opened and (stadium.opened_year is None or stadium.opened_year != opened):
|
||||
changes.append(f"opened_year: {stadium.opened_year} → {opened}")
|
||||
if not dry_run:
|
||||
stadium.opened_year = opened
|
||||
|
||||
if changes:
|
||||
updated += 1
|
||||
self.stdout.write(f" {stadium.name}")
|
||||
for c in changes:
|
||||
self.stdout.write(f" {c}")
|
||||
if not dry_run:
|
||||
update_fields = ["updated_at"]
|
||||
if capacity:
|
||||
update_fields.append("capacity")
|
||||
if opened:
|
||||
update_fields.append("opened_year")
|
||||
stadium.save(update_fields=update_fields)
|
||||
|
||||
self.stdout.write(f"\n Matched: {matched} | Updated: {updated}")
|
||||
|
||||
if unmatched_wiki:
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f" Wiki venues with no DB match ({len(unmatched_wiki)}):"
|
||||
))
|
||||
for name in sorted(unmatched_wiki):
|
||||
self.stdout.write(f" - {name}")
|
||||
|
||||
# Check for DB stadiums that didn't match
|
||||
matched_ids = set()
|
||||
for wiki_name in wiki_data:
|
||||
s = self._find_stadium(wiki_name, stadium_lookup)
|
||||
if s:
|
||||
matched_ids.add(s.id)
|
||||
|
||||
unmatched_db = [s for s in db_stadiums if s.id not in matched_ids]
|
||||
if unmatched_db:
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f" DB stadiums with no Wiki match ({len(unmatched_db)}):"
|
||||
))
|
||||
for s in sorted(unmatched_db, key=lambda x: x.name):
|
||||
self.stdout.write(f" - {s.name} ({s.id})")
|
||||
|
||||
def _fetch_wiki_table(self, page, table_idx, name_col, cap_col, opened_col):
|
||||
"""Fetch and parse a Wikipedia table. Returns {name: {capacity, opened}}."""
|
||||
params = {
|
||||
"action": "parse",
|
||||
"page": page,
|
||||
"prop": "text",
|
||||
"format": "json",
|
||||
"redirects": "true",
|
||||
}
|
||||
|
||||
headers = {
|
||||
"User-Agent": "SportsTimeBot/1.0 (stadium metadata; contact@example.com)",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = requests.get(WIKI_API, params=params, headers=headers, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except requests.RequestException as e:
|
||||
self.stderr.write(f" Failed to fetch Wikipedia: {e}")
|
||||
return None
|
||||
|
||||
if "error" in data:
|
||||
self.stderr.write(f" Wikipedia error: {data['error']['info']}")
|
||||
return None
|
||||
|
||||
html = data["parse"]["text"]["*"]
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
tables = soup.find_all("table", class_="wikitable")
|
||||
|
||||
if table_idx >= len(tables):
|
||||
self.stderr.write(f" Table index {table_idx} out of range ({len(tables)} tables)")
|
||||
return None
|
||||
|
||||
table = tables[table_idx]
|
||||
return self._parse_table(table, name_col, cap_col, opened_col)
|
||||
|
||||
def _parse_table(self, table, name_col, cap_col, opened_col):
|
||||
"""Parse an HTML table into {name: {capacity, opened}}.
|
||||
|
||||
Handles rowspan by detecting column count mismatches and adjusting indices.
|
||||
"""
|
||||
result = {}
|
||||
|
||||
# Get header indices from the actual <th> row
|
||||
header_row = table.find("tr")
|
||||
if not header_row:
|
||||
return result
|
||||
|
||||
headers = [th.get_text(strip=True) for th in header_row.find_all("th")]
|
||||
expected_cols = len(headers)
|
||||
|
||||
name_idx = self._find_col_idx(headers, name_col)
|
||||
cap_idx = self._find_col_idx(headers, cap_col)
|
||||
opened_idx = self._find_col_idx(headers, opened_col) if opened_col else None
|
||||
|
||||
if name_idx is None or cap_idx is None:
|
||||
self.stderr.write(f" Could not find columns: name_col={name_col}({name_idx}), cap_col={cap_col}({cap_idx})")
|
||||
self.stderr.write(f" Available headers: {headers}")
|
||||
return result
|
||||
|
||||
rows = table.find_all("tr")[1:] # Skip header
|
||||
for row in rows:
|
||||
cells = row.find_all(["td", "th"])
|
||||
actual_cols = len(cells)
|
||||
|
||||
# When a row has fewer cells than headers, a rowspan column is
|
||||
# spanning from a previous row. Shift indices down by the difference.
|
||||
offset = expected_cols - actual_cols
|
||||
adj_name = name_idx - offset
|
||||
adj_cap = cap_idx - offset
|
||||
adj_opened = (opened_idx - offset) if opened_idx is not None else None
|
||||
|
||||
if adj_name < 0 or adj_cap < 0 or adj_name >= actual_cols or adj_cap >= actual_cols:
|
||||
continue
|
||||
|
||||
name = cells[adj_name].get_text(strip=True)
|
||||
# Clean up name — remove citation marks
|
||||
name = re.sub(r"\[.*?\]", "", name).strip()
|
||||
# Remove daggers and asterisks
|
||||
name = re.sub(r"[†‡*♠§#]", "", name).strip()
|
||||
|
||||
if not name:
|
||||
continue
|
||||
|
||||
# Parse capacity
|
||||
cap_text = cells[adj_cap].get_text(strip=True)
|
||||
capacity = self._parse_capacity(cap_text)
|
||||
|
||||
# Parse opened year
|
||||
opened = None
|
||||
if adj_opened is not None and 0 <= adj_opened < actual_cols:
|
||||
opened_text = cells[adj_opened].get_text(strip=True)
|
||||
opened = self._parse_year(opened_text)
|
||||
|
||||
result[name] = {"capacity": capacity, "opened": opened}
|
||||
|
||||
return result
|
||||
|
||||
def _find_col_idx(self, headers, col_name):
|
||||
"""Find column index by name (fuzzy match)."""
|
||||
if col_name is None:
|
||||
return None
|
||||
col_lower = col_name.lower()
|
||||
for i, h in enumerate(headers):
|
||||
if col_lower in h.lower():
|
||||
return i
|
||||
return None
|
||||
|
||||
def _parse_capacity(self, text):
|
||||
"""Extract numeric capacity from text like '18,000' or '20,000[1]'."""
|
||||
# Remove citations and parenthetical notes
|
||||
text = re.sub(r"\[.*?\]", "", text)
|
||||
text = re.sub(r"\(.*?\)", "", text)
|
||||
# Find first number with commas
|
||||
match = re.search(r"[\d,]+", text)
|
||||
if match:
|
||||
try:
|
||||
return int(match.group().replace(",", ""))
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _parse_year(self, text):
|
||||
"""Extract a 4-digit year from text."""
|
||||
text = re.sub(r"\[.*?\]", "", text)
|
||||
match = re.search(r"\b((?:19|20)\d{2})\b", text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return None
|
||||
|
||||
def _normalize_name(self, name):
|
||||
"""Normalize stadium name for matching."""
|
||||
name = name.lower()
|
||||
name = re.sub(r"[''`.]", "", name)
|
||||
name = re.sub(r"\s+", " ", name).strip()
|
||||
return name
|
||||
|
||||
def _find_stadium(self, wiki_name, stadium_lookup):
|
||||
"""Find a stadium in our DB by Wikipedia name."""
|
||||
# Check overrides first (empty list = explicitly skip)
|
||||
if wiki_name in NAME_OVERRIDES:
|
||||
override_names = NAME_OVERRIDES[wiki_name]
|
||||
if not override_names:
|
||||
return None # Explicitly skip
|
||||
for alt in override_names:
|
||||
alt_norm = self._normalize_name(alt)
|
||||
if alt_norm in stadium_lookup:
|
||||
return stadium_lookup[alt_norm]
|
||||
|
||||
# Direct normalized match
|
||||
normalized = self._normalize_name(wiki_name)
|
||||
if normalized in stadium_lookup:
|
||||
return stadium_lookup[normalized]
|
||||
|
||||
# Fuzzy: check if wiki name is a substring of any DB name or vice versa
|
||||
for db_norm, stadium in stadium_lookup.items():
|
||||
if normalized in db_norm or db_norm in normalized:
|
||||
return stadium
|
||||
|
||||
return None
|
||||
|
||||
def _print_summary(self):
|
||||
self.stdout.write(f"\n{'='*60}")
|
||||
self.stdout.write(self.style.HTTP_INFO("Summary"))
|
||||
self.stdout.write(f"{'='*60}")
|
||||
|
||||
total = Stadium.objects.count()
|
||||
has_cap = Stadium.objects.exclude(capacity__isnull=True).count()
|
||||
has_year = Stadium.objects.exclude(opened_year__isnull=True).count()
|
||||
has_img = Stadium.objects.exclude(image_url="").count()
|
||||
|
||||
self.stdout.write(f" Total stadiums: {total}")
|
||||
self.stdout.write(f" With capacity: {has_cap}")
|
||||
self.stdout.write(f" With opened_year: {has_year}")
|
||||
self.stdout.write(f" With image_url: {has_img}")
|
||||
147
core/management/commands/populate_stadium_images.py
Normal file
147
core/management/commands/populate_stadium_images.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Fetch stadium image URLs from ESPN's per-team API.
|
||||
|
||||
ESPN provides venue images for NBA, NFL, MLB, NHL via each team's
|
||||
franchise.venue.images field. MLS/WNBA/NWSL are not available.
|
||||
|
||||
Usage:
|
||||
python manage.py populate_stadium_images
|
||||
python manage.py populate_stadium_images --sport nba
|
||||
python manage.py populate_stadium_images --dry-run
|
||||
"""
|
||||
|
||||
import time
|
||||
|
||||
import requests
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from core.models import Team, Stadium
|
||||
|
||||
# ESPN sport path segments (only sports with franchise.venue data)
|
||||
ESPN_SPORT_PATHS = {
|
||||
"nba": "basketball/nba",
|
||||
"nfl": "football/nfl",
|
||||
"mlb": "baseball/mlb",
|
||||
"nhl": "hockey/nhl",
|
||||
}
|
||||
|
||||
# ESPN abbreviation → slug overrides (where abbreviation != URL slug)
|
||||
ESPN_SLUG_OVERRIDES = {
|
||||
"nba": {"GS": "gs", "NO": "no", "NY": "ny", "SA": "sa", "UTAH": "utah", "WSH": "wsh"},
|
||||
"nfl": {"WSH": "wsh"},
|
||||
"mlb": {"WSH": "wsh", "ATH": "ath"},
|
||||
"nhl": {"WSH": "wsh", "UTAH": "utah"},
|
||||
}
|
||||
|
||||
# Our abbreviation → ESPN abbreviation (reverse of team metadata overrides)
|
||||
OUR_TO_ESPN_ABBREV = {
|
||||
"nba": {"GSW": "GS", "NOP": "NO", "NYK": "NY", "SAS": "SA", "UTA": "UTAH", "WAS": "WSH"},
|
||||
"nfl": {"WAS": "WSH"},
|
||||
"mlb": {"WSN": "WSH", "OAK": "ATH"},
|
||||
"nhl": {"WAS": "WSH", "ARI": "UTAH"},
|
||||
}
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Populate stadium image_url from ESPN venue data (NBA, NFL, MLB, NHL)."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--sport",
|
||||
type=str,
|
||||
choices=list(ESPN_SPORT_PATHS.keys()),
|
||||
help="Only process a single sport",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Show what would change without saving",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
sport_filter = options["sport"]
|
||||
dry_run = options["dry_run"]
|
||||
|
||||
sports = [sport_filter] if sport_filter else list(ESPN_SPORT_PATHS.keys())
|
||||
|
||||
if dry_run:
|
||||
self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved"))
|
||||
|
||||
for sport_code in sports:
|
||||
self._process_sport(sport_code, dry_run)
|
||||
|
||||
self._print_summary()
|
||||
|
||||
def _process_sport(self, sport_code, dry_run):
|
||||
self.stdout.write(f"\n{'='*60}")
|
||||
self.stdout.write(self.style.HTTP_INFO(f"Processing {sport_code.upper()} stadiums"))
|
||||
self.stdout.write(f"{'='*60}")
|
||||
|
||||
sport_path = ESPN_SPORT_PATHS[sport_code]
|
||||
abbrev_map = OUR_TO_ESPN_ABBREV.get(sport_code, {})
|
||||
|
||||
# Get teams with home stadiums
|
||||
teams = Team.objects.filter(
|
||||
sport_id=sport_code,
|
||||
home_stadium__isnull=False,
|
||||
).select_related("home_stadium")
|
||||
|
||||
updated_stadiums = set()
|
||||
failed = 0
|
||||
|
||||
for team in teams:
|
||||
stadium = team.home_stadium
|
||||
# Skip if already has image or already updated this run
|
||||
if stadium.id in updated_stadiums:
|
||||
continue
|
||||
if stadium.image_url and not dry_run:
|
||||
updated_stadiums.add(stadium.id)
|
||||
continue
|
||||
|
||||
# Build ESPN team slug (lowercase abbreviation)
|
||||
espn_abbrev = abbrev_map.get(team.abbreviation, team.abbreviation)
|
||||
slug = espn_abbrev.lower()
|
||||
|
||||
url = f"https://site.api.espn.com/apis/site/v2/sports/{sport_path}/teams/{slug}"
|
||||
|
||||
try:
|
||||
resp = requests.get(url, timeout=10)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except requests.RequestException as e:
|
||||
self.stderr.write(f" {team.abbreviation:6} FAILED: {e}")
|
||||
failed += 1
|
||||
time.sleep(0.3)
|
||||
continue
|
||||
|
||||
# Extract venue image
|
||||
venue = data.get("team", {}).get("franchise", {}).get("venue", {})
|
||||
images = venue.get("images", [])
|
||||
image_url = images[0]["href"] if images else ""
|
||||
|
||||
if image_url and stadium.image_url != image_url:
|
||||
self.stdout.write(f" {team.abbreviation:6} {stadium.name}")
|
||||
self.stdout.write(f" image_url → {image_url}")
|
||||
if not dry_run:
|
||||
stadium.image_url = image_url
|
||||
stadium.save(update_fields=["image_url", "updated_at"])
|
||||
elif not image_url:
|
||||
self.stdout.write(self.style.WARNING(
|
||||
f" {team.abbreviation:6} {stadium.name} — no image from ESPN"
|
||||
))
|
||||
|
||||
updated_stadiums.add(stadium.id)
|
||||
time.sleep(0.2) # Rate limiting
|
||||
|
||||
self.stdout.write(f"\n Stadiums updated: {len(updated_stadiums)} | Failed: {failed}")
|
||||
|
||||
def _print_summary(self):
|
||||
self.stdout.write(f"\n{'='*60}")
|
||||
self.stdout.write(self.style.HTTP_INFO("Summary"))
|
||||
self.stdout.write(f"{'='*60}")
|
||||
|
||||
total = Stadium.objects.count()
|
||||
has_image = Stadium.objects.exclude(image_url="").count()
|
||||
self.stdout.write(f" Total stadiums: {total}")
|
||||
self.stdout.write(f" With image_url: {has_image}")
|
||||
self.stdout.write(f" Missing image_url: {total - has_image}")
|
||||
268
core/management/commands/populate_team_metadata.py
Normal file
268
core/management/commands/populate_team_metadata.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""
|
||||
Fetch team logos, colors, and MLS division assignments from ESPN's public API.
|
||||
|
||||
Usage:
|
||||
python manage.py populate_team_metadata # all sports
|
||||
python manage.py populate_team_metadata --sport nba
|
||||
python manage.py populate_team_metadata --dry-run
|
||||
"""
|
||||
|
||||
import requests
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from core.models import Team, Sport, Conference, Division
|
||||
|
||||
ESPN_ENDPOINTS = {
|
||||
"nba": "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/teams",
|
||||
"nfl": "https://site.api.espn.com/apis/site/v2/sports/football/nfl/teams",
|
||||
"mlb": "https://site.api.espn.com/apis/site/v2/sports/baseball/mlb/teams",
|
||||
"nhl": "https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/teams",
|
||||
"mls": "https://site.api.espn.com/apis/site/v2/sports/soccer/usa.1/teams",
|
||||
"wnba": "https://site.api.espn.com/apis/site/v2/sports/basketball/wnba/teams",
|
||||
"nwsl": "https://site.api.espn.com/apis/site/v2/sports/soccer/usa.nwsl/teams",
|
||||
}
|
||||
|
||||
# ESPN abbreviation → our abbreviation (where they differ)
|
||||
ABBREV_OVERRIDES = {
|
||||
"nba": {"GS": "GSW", "NO": "NOP", "NY": "NYK", "SA": "SAS", "UTAH": "UTA", "WSH": "WAS"},
|
||||
"nfl": {"WSH": "WAS"},
|
||||
"mlb": {"WSH": "WSN", "ATH": "OAK"},
|
||||
"nhl": {"WSH": "WAS", "UTAH": "ARI"},
|
||||
"mls": {"ATX": "AUS", "NY": "RB", "RSL": "SLC", "LA": "LAG"},
|
||||
"wnba": {"GS": "GSV", "WSH": "WAS"},
|
||||
"nwsl": {
|
||||
"LA": "ANG",
|
||||
"GFC": "NJY",
|
||||
"KC": "KCC",
|
||||
"NC": "NCC",
|
||||
"LOU": "RGN",
|
||||
"SD": "SDW",
|
||||
"WAS": "WSH",
|
||||
},
|
||||
}
|
||||
|
||||
# MLS conference assignments (from mls.py scrape_teams)
|
||||
MLS_CONFERENCES = {
|
||||
"Eastern": [
|
||||
"ATL", "CLT", "CHI", "CIN", "CLB", "DC", "MIA", "MTL",
|
||||
"NE", "NYC", "RB", "ORL", "PHI", "TOR",
|
||||
],
|
||||
"Western": [
|
||||
"AUS", "COL", "DAL", "HOU", "LAG", "LAFC", "MIN", "NSH",
|
||||
"POR", "SLC", "SD", "SJ", "SEA", "SKC", "STL", "VAN",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Populate team logo_url, primary_color, secondary_color from ESPN, and assign MLS divisions."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--sport",
|
||||
type=str,
|
||||
choices=list(ESPN_ENDPOINTS.keys()),
|
||||
help="Only process a single sport",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Show what would change without saving",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
sport_filter = options["sport"]
|
||||
dry_run = options["dry_run"]
|
||||
|
||||
sports = [sport_filter] if sport_filter else list(ESPN_ENDPOINTS.keys())
|
||||
|
||||
if dry_run:
|
||||
self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved"))
|
||||
|
||||
for sport_code in sports:
|
||||
self._process_sport(sport_code, dry_run)
|
||||
|
||||
if "mls" in sports:
|
||||
self._assign_mls_divisions(dry_run)
|
||||
|
||||
self._print_summary()
|
||||
|
||||
def _process_sport(self, sport_code, dry_run):
|
||||
self.stdout.write(f"\n{'='*60}")
|
||||
self.stdout.write(self.style.HTTP_INFO(f"Processing {sport_code.upper()}"))
|
||||
self.stdout.write(f"{'='*60}")
|
||||
|
||||
url = ESPN_ENDPOINTS[sport_code]
|
||||
try:
|
||||
resp = requests.get(url, timeout=15)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except requests.RequestException as e:
|
||||
self.stderr.write(self.style.ERROR(f" Failed to fetch {url}: {e}"))
|
||||
return
|
||||
|
||||
# Parse ESPN response
|
||||
espn_teams = self._parse_espn_teams(data, sport_code)
|
||||
if not espn_teams:
|
||||
self.stderr.write(self.style.ERROR(f" No teams found in ESPN response"))
|
||||
return
|
||||
|
||||
self.stdout.write(f" ESPN returned {len(espn_teams)} teams")
|
||||
|
||||
# Get our DB teams for this sport
|
||||
db_teams = Team.objects.filter(sport_id=sport_code)
|
||||
db_abbrevs = {t.abbreviation: t for t in db_teams}
|
||||
|
||||
overrides = ABBREV_OVERRIDES.get(sport_code, {})
|
||||
|
||||
matched = 0
|
||||
updated = 0
|
||||
unmatched_espn = []
|
||||
|
||||
for espn_abbrev, meta in espn_teams.items():
|
||||
# Remap ESPN abbreviation to ours
|
||||
our_abbrev = overrides.get(espn_abbrev, espn_abbrev)
|
||||
|
||||
team = db_abbrevs.pop(our_abbrev, None)
|
||||
if not team:
|
||||
unmatched_espn.append(f"{espn_abbrev} (mapped→{our_abbrev})" if espn_abbrev != our_abbrev else espn_abbrev)
|
||||
continue
|
||||
|
||||
matched += 1
|
||||
changes = []
|
||||
|
||||
if meta["logo_url"] and team.logo_url != meta["logo_url"]:
|
||||
changes.append(f"logo_url → {meta['logo_url'][:60]}…")
|
||||
if not dry_run:
|
||||
team.logo_url = meta["logo_url"]
|
||||
|
||||
if meta["primary_color"] and team.primary_color != meta["primary_color"]:
|
||||
changes.append(f"primary_color → {meta['primary_color']}")
|
||||
if not dry_run:
|
||||
team.primary_color = meta["primary_color"]
|
||||
|
||||
if meta["secondary_color"] and team.secondary_color != meta["secondary_color"]:
|
||||
changes.append(f"secondary_color → {meta['secondary_color']}")
|
||||
if not dry_run:
|
||||
team.secondary_color = meta["secondary_color"]
|
||||
|
||||
if changes:
|
||||
updated += 1
|
||||
self.stdout.write(f" {team.abbreviation:6} {team.full_name}")
|
||||
for c in changes:
|
||||
self.stdout.write(f" {c}")
|
||||
if not dry_run:
|
||||
team.save(update_fields=["logo_url", "primary_color", "secondary_color", "updated_at"])
|
||||
|
||||
# Report
|
||||
self.stdout.write(f"\n Matched: {matched} | Updated: {updated}")
|
||||
|
||||
if unmatched_espn:
|
||||
self.stdout.write(self.style.WARNING(f" ESPN teams with no DB match: {', '.join(sorted(unmatched_espn))}"))
|
||||
|
||||
if db_abbrevs:
|
||||
missing = ", ".join(sorted(db_abbrevs.keys()))
|
||||
self.stdout.write(self.style.WARNING(f" DB teams with no ESPN match: {missing}"))
|
||||
|
||||
def _parse_espn_teams(self, data, sport_code):
|
||||
"""Extract abbreviation → {logo_url, primary_color, secondary_color} from ESPN response."""
|
||||
result = {}
|
||||
|
||||
try:
|
||||
teams_list = data["sports"][0]["leagues"][0]["teams"]
|
||||
except (KeyError, IndexError):
|
||||
return result
|
||||
|
||||
for entry in teams_list:
|
||||
team = entry.get("team", {})
|
||||
abbrev = team.get("abbreviation", "")
|
||||
if not abbrev:
|
||||
continue
|
||||
|
||||
color = team.get("color", "")
|
||||
alt_color = team.get("alternateColor", "")
|
||||
logos = team.get("logos", [])
|
||||
logo_url = logos[0]["href"] if logos else ""
|
||||
|
||||
result[abbrev] = {
|
||||
"logo_url": logo_url,
|
||||
"primary_color": f"#{color}" if color else "",
|
||||
"secondary_color": f"#{alt_color}" if alt_color else "",
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
def _assign_mls_divisions(self, dry_run):
|
||||
self.stdout.write(f"\n{'='*60}")
|
||||
self.stdout.write(self.style.HTTP_INFO("Assigning MLS divisions"))
|
||||
self.stdout.write(f"{'='*60}")
|
||||
|
||||
try:
|
||||
mls_sport = Sport.objects.get(code="mls")
|
||||
except Sport.DoesNotExist:
|
||||
self.stderr.write(self.style.ERROR(" MLS sport not found in DB"))
|
||||
return
|
||||
|
||||
# Build reverse lookup: abbreviation → conference name
|
||||
abbrev_to_conf = {}
|
||||
for conf_name, abbrevs in MLS_CONFERENCES.items():
|
||||
for abbrev in abbrevs:
|
||||
abbrev_to_conf[abbrev] = conf_name
|
||||
|
||||
# Pre-create conferences and divisions (skip in dry-run)
|
||||
division_cache = {} # conf_name → Division
|
||||
if not dry_run:
|
||||
for conf_name in MLS_CONFERENCES:
|
||||
conference, conf_created = Conference.objects.get_or_create(
|
||||
sport=mls_sport,
|
||||
name=f"{conf_name} Conference",
|
||||
defaults={"short_name": conf_name[:4], "order": 0 if conf_name == "Eastern" else 1},
|
||||
)
|
||||
if conf_created:
|
||||
self.stdout.write(f" Created conference: {conference}")
|
||||
|
||||
division, div_created = Division.objects.get_or_create(
|
||||
conference=conference,
|
||||
name=conf_name,
|
||||
defaults={"short_name": conf_name[:4], "order": 0},
|
||||
)
|
||||
if div_created:
|
||||
self.stdout.write(f" Created division: {division}")
|
||||
|
||||
division_cache[conf_name] = division
|
||||
|
||||
assigned = 0
|
||||
for team in Team.objects.filter(sport=mls_sport):
|
||||
conf_name = abbrev_to_conf.get(team.abbreviation)
|
||||
if not conf_name:
|
||||
self.stdout.write(self.style.WARNING(f" {team.abbreviation} not in conference map — skipping"))
|
||||
continue
|
||||
|
||||
if dry_run:
|
||||
if team.division is None:
|
||||
self.stdout.write(f" {team.abbreviation:6} → {conf_name}")
|
||||
assigned += 1
|
||||
else:
|
||||
division = division_cache[conf_name]
|
||||
if team.division != division:
|
||||
self.stdout.write(f" {team.abbreviation:6} → {division}")
|
||||
assigned += 1
|
||||
team.division = division
|
||||
team.save(update_fields=["division", "updated_at"])
|
||||
|
||||
self.stdout.write(f"\n Divisions assigned: {assigned}")
|
||||
|
||||
def _print_summary(self):
|
||||
self.stdout.write(f"\n{'='*60}")
|
||||
self.stdout.write(self.style.HTTP_INFO("Summary"))
|
||||
self.stdout.write(f"{'='*60}")
|
||||
|
||||
total = Team.objects.count()
|
||||
missing_logo = Team.objects.filter(logo_url="").count()
|
||||
missing_color = Team.objects.filter(primary_color="").count()
|
||||
missing_div = Team.objects.filter(division__isnull=True).count()
|
||||
|
||||
self.stdout.write(f" Total teams: {total}")
|
||||
self.stdout.write(f" Missing logo: {missing_logo}")
|
||||
self.stdout.write(f" Missing color: {missing_color}")
|
||||
self.stdout.write(f" Missing division: {missing_div}")
|
||||
Reference in New Issue
Block a user