From 63acf7accbd4f23d7d3882ec5174240ccb5c1aa9 Mon Sep 17 00:00:00 2001 From: Trey t Date: Thu, 19 Feb 2026 14:04:27 -0600 Subject: [PATCH] feat: add Django web app, CloudKit sync, dashboard, and game_datetime_utc export Adds the full Django application layer on top of sportstime_parser: - core: Sport, Team, Stadium, Game models with aliases and league structure - scraper: orchestration engine, adapter, job management, Celery tasks - cloudkit: CloudKit sync client, sync state tracking, sync jobs - dashboard: staff dashboard for monitoring scrapers, sync, review queue - notifications: email reports for scrape/sync results - Docker setup for deployment (Dockerfile, docker-compose, entrypoint) Game exports now use game_datetime_utc (ISO 8601 UTC) instead of venue-local date+time strings, matching the canonical format used by the iOS app. Co-Authored-By: Claude Opus 4.6 --- .env.example | 45 + .gitignore | 17 + CLAUDE.md | 131 +++ Dockerfile | 44 + README.md | 1037 +++++------------ cloudkit/__init__.py | 1 + cloudkit/admin.py | 213 ++++ cloudkit/apps.py | 7 + cloudkit/client.py | 385 ++++++ cloudkit/migrations/0001_initial.py | 120 ++ .../0002_add_sync_progress_fields.py | 63 + .../0003_alter_cloudkitsyncjob_status.py | 29 + .../0004_cloudkitsyncjob_sport_progress.py | 28 + ...0005_add_conference_division_alias_sync.py | 78 ++ cloudkit/migrations/__init__.py | 0 cloudkit/models.py | 394 +++++++ cloudkit/resources.py | 49 + cloudkit/tasks.py | 701 +++++++++++ core/__init__.py | 1 + core/admin/__init__.py | 6 + core/admin/alias_admin.py | 84 ++ core/admin/game_admin.py | 117 ++ core/admin/league_structure_admin.py | 70 ++ core/admin/sport_admin.py | 54 + core/admin/stadium_admin.py | 89 ++ core/admin/team_admin.py | 96 ++ core/apps.py | 7 + core/management/__init__.py | 1 + core/management/commands/__init__.py | 1 + core/management/commands/export_data.py | 445 +++++++ core/management/commands/fix_wnba_stadiums.py | 98 ++ core/management/commands/import_data.py | 512 ++++++++ .../commands/populate_stadium_details.py | 351 ++++++ .../commands/populate_stadium_images.py | 147 +++ .../commands/populate_team_metadata.py | 268 +++++ core/migrations/0001_initial.py | 438 +++++++ .../0002_conference_division_canonical_id.py | 53 + .../0003_sport_icon_name_color_hex.py | 21 + core/migrations/__init__.py | 0 core/models/__init__.py | 17 + core/models/alias.py | 169 +++ core/models/game.py | 146 +++ core/models/league_structure.py | 92 ++ core/models/sport.py | 78 ++ core/models/stadium.py | 109 ++ core/models/team.py | 88 ++ core/resources.py | 162 +++ dashboard/__init__.py | 1 + dashboard/apps.py | 7 + dashboard/templates/dashboard/base.html | 130 +++ dashboard/templates/dashboard/index.html | 125 ++ .../templates/dashboard/review_queue.html | 74 ++ .../templates/dashboard/scraper_status.html | 100 ++ dashboard/templates/dashboard/stats.html | 85 ++ .../templates/dashboard/sync_status.html | 382 ++++++ dashboard/urls.py | 21 + dashboard/views.py | 644 ++++++++++ docker-compose.unraid.yml | 114 ++ docker-compose.yml | 113 ++ docker-entrypoint.sh | 45 + manage.py | 22 + notifications/__init__.py | 1 + notifications/admin.py | 119 ++ notifications/apps.py | 7 + notifications/migrations/0001_initial.py | 90 ++ notifications/migrations/__init__.py | 0 notifications/models.py | 131 +++ notifications/tasks.py | 240 ++++ .../notifications/emails/scrape_report.html | 119 ++ .../notifications/emails/scrape_report.txt | 43 + .../notifications/emails/sync_report.html | 72 ++ .../notifications/emails/sync_report.txt | 23 + requirements.txt | 34 +- scraper/__init__.py | 1 + scraper/admin.py | 139 +++ scraper/admin/__init__.py | 3 + scraper/admin/config_admin.py | 110 ++ scraper/admin/job_admin.py | 154 +++ scraper/admin/review_admin.py | 157 +++ scraper/apps.py | 7 + scraper/engine/__init__.py | 1 + scraper/engine/adapter.py | 496 ++++++++ scraper/engine/db_alias_loader.py | 144 +++ scraper/migrations/0001_initial.py | 201 ++++ scraper/migrations/__init__.py | 0 scraper/models.py | 199 ++++ scraper/models/__init__.py | 10 + scraper/models/config.py | 102 ++ scraper/models/job.py | 159 +++ scraper/models/review.py | 192 +++ scraper/resources.py | 55 + scraper/tasks.py | 182 +++ sportstime/__init__.py | 5 + sportstime/celery.py | 22 + sportstime/settings.py | 236 ++++ sportstime/urls.py | 20 + sportstime/wsgi.py | 16 + .../normalizers/stadium_resolver.py | 214 ++-- sportstime_parser/scrapers/mlb.py | 42 +- sportstime_parser/scrapers/mls.py | 7 +- sportstime_parser/scrapers/nba.py | 28 +- sportstime_parser/scrapers/nfl.py | 30 +- sportstime_parser/scrapers/nhl.py | 26 + sportstime_parser/scrapers/nwsl.py | 5 +- sportstime_parser/scrapers/wnba.py | 5 +- sportstime_parser/uploaders/diff.py | 2 + stadium_aliases.json | 84 ++ templates/admin/index.html | 18 + templates/base.html | 254 ++++ templates/dashboard/export.html | 176 +++ templates/dashboard/index.html | 188 +++ templates/dashboard/review_queue.html | 96 ++ templates/dashboard/scraper_status.html | 256 ++++ templates/dashboard/stats.html | 111 ++ 114 files changed, 13070 insertions(+), 887 deletions(-) create mode 100644 .env.example create mode 100644 CLAUDE.md create mode 100644 Dockerfile create mode 100644 cloudkit/__init__.py create mode 100644 cloudkit/admin.py create mode 100644 cloudkit/apps.py create mode 100644 cloudkit/client.py create mode 100644 cloudkit/migrations/0001_initial.py create mode 100644 cloudkit/migrations/0002_add_sync_progress_fields.py create mode 100644 cloudkit/migrations/0003_alter_cloudkitsyncjob_status.py create mode 100644 cloudkit/migrations/0004_cloudkitsyncjob_sport_progress.py create mode 100644 cloudkit/migrations/0005_add_conference_division_alias_sync.py create mode 100644 cloudkit/migrations/__init__.py create mode 100644 cloudkit/models.py create mode 100644 cloudkit/resources.py create mode 100644 cloudkit/tasks.py create mode 100644 core/__init__.py create mode 100644 core/admin/__init__.py create mode 100644 core/admin/alias_admin.py create mode 100644 core/admin/game_admin.py create mode 100644 core/admin/league_structure_admin.py create mode 100644 core/admin/sport_admin.py create mode 100644 core/admin/stadium_admin.py create mode 100644 core/admin/team_admin.py create mode 100644 core/apps.py create mode 100644 core/management/__init__.py create mode 100644 core/management/commands/__init__.py create mode 100644 core/management/commands/export_data.py create mode 100644 core/management/commands/fix_wnba_stadiums.py create mode 100644 core/management/commands/import_data.py create mode 100644 core/management/commands/populate_stadium_details.py create mode 100644 core/management/commands/populate_stadium_images.py create mode 100644 core/management/commands/populate_team_metadata.py create mode 100755 core/migrations/0001_initial.py create mode 100755 core/migrations/0002_conference_division_canonical_id.py create mode 100644 core/migrations/0003_sport_icon_name_color_hex.py create mode 100755 core/migrations/__init__.py create mode 100644 core/models/__init__.py create mode 100644 core/models/alias.py create mode 100644 core/models/game.py create mode 100644 core/models/league_structure.py create mode 100644 core/models/sport.py create mode 100644 core/models/stadium.py create mode 100644 core/models/team.py create mode 100644 core/resources.py create mode 100644 dashboard/__init__.py create mode 100644 dashboard/apps.py create mode 100644 dashboard/templates/dashboard/base.html create mode 100644 dashboard/templates/dashboard/index.html create mode 100644 dashboard/templates/dashboard/review_queue.html create mode 100644 dashboard/templates/dashboard/scraper_status.html create mode 100644 dashboard/templates/dashboard/stats.html create mode 100644 dashboard/templates/dashboard/sync_status.html create mode 100644 dashboard/urls.py create mode 100644 dashboard/views.py create mode 100644 docker-compose.unraid.yml create mode 100644 docker-compose.yml create mode 100644 docker-entrypoint.sh create mode 100644 manage.py create mode 100644 notifications/__init__.py create mode 100644 notifications/admin.py create mode 100644 notifications/apps.py create mode 100644 notifications/migrations/0001_initial.py create mode 100644 notifications/migrations/__init__.py create mode 100644 notifications/models.py create mode 100644 notifications/tasks.py create mode 100644 notifications/templates/notifications/emails/scrape_report.html create mode 100644 notifications/templates/notifications/emails/scrape_report.txt create mode 100644 notifications/templates/notifications/emails/sync_report.html create mode 100644 notifications/templates/notifications/emails/sync_report.txt create mode 100644 scraper/__init__.py create mode 100644 scraper/admin.py create mode 100644 scraper/admin/__init__.py create mode 100644 scraper/admin/config_admin.py create mode 100644 scraper/admin/job_admin.py create mode 100644 scraper/admin/review_admin.py create mode 100644 scraper/apps.py create mode 100644 scraper/engine/__init__.py create mode 100644 scraper/engine/adapter.py create mode 100644 scraper/engine/db_alias_loader.py create mode 100644 scraper/migrations/0001_initial.py create mode 100644 scraper/migrations/__init__.py create mode 100644 scraper/models.py create mode 100644 scraper/models/__init__.py create mode 100644 scraper/models/config.py create mode 100644 scraper/models/job.py create mode 100644 scraper/models/review.py create mode 100644 scraper/resources.py create mode 100644 scraper/tasks.py create mode 100644 sportstime/__init__.py create mode 100644 sportstime/celery.py create mode 100644 sportstime/settings.py create mode 100644 sportstime/urls.py create mode 100644 sportstime/wsgi.py create mode 100644 templates/admin/index.html create mode 100644 templates/base.html create mode 100644 templates/dashboard/export.html create mode 100644 templates/dashboard/index.html create mode 100644 templates/dashboard/review_queue.html create mode 100644 templates/dashboard/scraper_status.html create mode 100644 templates/dashboard/stats.html diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..3df6069 --- /dev/null +++ b/.env.example @@ -0,0 +1,45 @@ +# Django Settings +DEBUG=False +SECRET_KEY=generate-a-secure-key-with-python-c-from-django.core.management.utils-import-get_random_secret_key-print-get_random_secret_key +ALLOWED_HOSTS=sportstime.yourdomain.com,localhost,127.0.0.1 + +# Admin User (created on first startup) +ADMIN_USERNAME=admin +ADMIN_PASSWORD=changeme +ADMIN_EMAIL=admin@yourdomain.com + +# Import initial data on first startup (set to true, then false after first run) +IMPORT_INITIAL_DATA=true + +# Database +DB_PASSWORD=your-secure-database-password +DATABASE_URL=postgresql://sportstime:${DB_PASSWORD}@db:5432/sportstime + +# Redis +REDIS_URL=redis://redis:6379/0 + +# CloudKit Configuration +CLOUDKIT_CONTAINER=iCloud.com.sportstime.app +CLOUDKIT_ENVIRONMENT=development +CLOUDKIT_KEY_ID=your-cloudkit-key-id +CLOUDKIT_PRIVATE_KEY_PATH=/app/secrets/cloudkit.pem + +# Email (SMTP) - Example for Gmail +EMAIL_HOST=smtp.gmail.com +EMAIL_PORT=587 +EMAIL_USE_TLS=True +EMAIL_HOST_USER=your-email@gmail.com +EMAIL_HOST_PASSWORD=your-app-specific-password +DEFAULT_FROM_EMAIL=SportsTime +ADMIN_EMAIL=admin@yourdomain.com + +# Security (for production behind HTTPS proxy) +SECURE_SSL_REDIRECT=False +CSRF_TRUSTED_ORIGINS=https://sportstime.yourdomain.com +SESSION_COOKIE_SECURE=True +CSRF_COOKIE_SECURE=True + +# Scraper Settings +SCRAPER_REQUEST_DELAY=3.0 +SCRAPER_MAX_RETRIES=3 +SCRAPER_FUZZY_THRESHOLD=85 diff --git a/.gitignore b/.gitignore index 0d09d75..11afdb7 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,20 @@ logs/ # Claude Code .claude/ + +# Django +staticfiles/ +media/ +*.sqlite3 +db.sqlite3 +celerybeat-schedule +celerybeat.pid + +# Docker +.docker/ + +# Database dumps +*.sql + +# Keep .env.example but ignore actual .env files +!.env.example diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..8961164 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,131 @@ +# CLAUDE.md + +This file provides context for Claude Code when working on this project. + +## Project Overview + +SportsTime is a Django-based sports data pipeline that scrapes game schedules from official sources, normalizes the data, stores it in PostgreSQL, and syncs to CloudKit for iOS app consumption. + +## Architecture + +``` +┌─────────────────┐ ┌──────────────┐ ┌─────────────┐ ┌──────────┐ +│ Data Sources │ ──▶ │ Scrapers │ ──▶ │ PostgreSQL │ ──▶ │ CloudKit │ +│ (ESPN, leagues) │ │ (sportstime_ │ │ (Django) │ │ (iOS) │ +└─────────────────┘ │ parser) │ └─────────────┘ └──────────┘ + └──────────────┘ +``` + +## Key Directories + +- `core/` - Django models: Sport, Team, Stadium, Game, Conference, Division, Aliases +- `scraper/` - Scraper orchestration, adapter, job management +- `sportstime_parser/` - Standalone scraper library (ESPN, league APIs) +- `cloudkit/` - CloudKit sync client and job management +- `dashboard/` - Staff dashboard for monitoring and controls +- `templates/` - Django templates for dashboard UI + +## Data Flow + +1. **Scraper runs** (manual or scheduled via Celery Beat) +2. **sportstime_parser** fetches from ESPN/league APIs +3. **Adapter** normalizes data and resolves team/stadium names +4. **Django models** store normalized data with CloudKit sync flags +5. **CloudKit sync** pushes pending records to iCloud + +## Models Hierarchy + +``` +Sport +├── Conference +│ └── Division +│ └── Team (has TeamAliases) +├── Stadium (has StadiumAliases) +└── Game (references Team, Stadium) +``` + +## Name Resolution + +Team and stadium names from scraped data are resolved via: +1. Direct ID match (canonical IDs from scraper) +2. Database aliases (TeamAlias/StadiumAlias with date validity) +3. Direct name/abbreviation match + +Aliases support validity dates for historical names (e.g., team relocations, stadium naming rights). + +## Common Tasks + +### Run a scraper +```bash +docker-compose exec web python manage.py shell +>>> from scraper.tasks import run_scraper_task +>>> run_scraper_task.delay(config_id) +``` + +### Check scraper status +Visit `/dashboard/scraper-status/` or check `ScrapeJob` model. + +### Add team/stadium alias +Use Django admin at `/admin/core/teamalias/` or `/admin/core/stadiumalias/`. + +### Export/Import data +All admin models support import/export (JSON, CSV, XLSX) via django-import-export. + +### Sync to CloudKit +```bash +docker-compose exec web python manage.py shell +>>> from cloudkit.tasks import run_cloudkit_sync +>>> run_cloudkit_sync.delay(config_id) +``` + +## Environment + +- **Docker Compose** for local development +- **PostgreSQL** database +- **Redis** for Celery broker +- **Celery** for async tasks and scheduled jobs + +## Key Files + +- `sportstime/settings.py` - Django settings +- `scraper/engine/adapter.py` - Bridges sportstime_parser to Django +- `scraper/engine/db_alias_loader.py` - Database alias resolution +- `core/resources.py` - Import/export resource definitions +- `docker-compose.yml` - Container orchestration + +## Supported Sports + +| Code | Sport | Season Type | +|------|-------|-------------| +| nba | NBA Basketball | split (Oct-Jun) | +| mlb | MLB Baseball | calendar (Mar-Oct) | +| nfl | NFL Football | split (Sep-Feb) | +| nhl | NHL Hockey | split (Oct-Jun) | +| mls | MLS Soccer | calendar (Feb-Nov) | +| wnba | WNBA Basketball | calendar (May-Sep) | +| nwsl | NWSL Soccer | calendar (Mar-Nov) | + +## Testing + +```bash +docker-compose exec web pytest +``` + +## Useful Commands + +```bash +# Restart containers +docker-compose restart + +# Rebuild after requirements change +docker-compose down && docker-compose up -d --build + +# View logs +docker-compose logs -f web + +# Django shell +docker-compose exec web python manage.py shell + +# Database shell +docker-compose exec db psql -U sportstime -d sportstime +``` diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..275a473 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,44 @@ +FROM python:3.12-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Set work directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libpq-dev \ + netcat-openbsd \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy project +COPY . . + +# Make entrypoint executable +COPY docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh + +# Create staticfiles directory before creating non-root user +RUN mkdir -p /app/staticfiles + +# Create non-root user +RUN adduser --disabled-password --gecos '' appuser && \ + chown -R appuser:appuser /app && \ + chown appuser:appuser /docker-entrypoint.sh +USER appuser + +# Expose port +EXPOSE 8000 + +# Set entrypoint +ENTRYPOINT ["/docker-entrypoint.sh"] + +# Default command +CMD ["gunicorn", "sportstime.wsgi:application", "--bind", "0.0.0.0:8000", "--workers", "3"] diff --git a/README.md b/README.md index c8a476e..422e009 100644 --- a/README.md +++ b/README.md @@ -1,833 +1,324 @@ -# SportsTime Parser +# SportsTime Data Pipeline -A Python package for scraping, normalizing, and uploading sports schedule data to CloudKit for the SportsTime iOS app. +A Django-based sports data pipeline that scrapes game schedules from official sources, normalizes data, and syncs to CloudKit for iOS app consumption. -## Table of Contents +## Features -- [Overview](#overview) -- [Installation](#installation) -- [Quick Start](#quick-start) -- [Architecture](#architecture) -- [Directory Structure](#directory-structure) -- [Configuration](#configuration) -- [Data Models](#data-models) -- [Normalizers](#normalizers) -- [Scrapers](#scrapers) -- [Uploaders](#uploaders) -- [Utilities](#utilities) -- [Manual Review Workflow](#manual-review-workflow) -- [Adding a New Sport](#adding-a-new-sport) -- [Troubleshooting](#troubleshooting) - -## Overview - -The `sportstime_parser` package provides a complete pipeline for: - -1. **Scraping** game schedules from multiple sources (Basketball-Reference, ESPN, MLB API, etc.) -2. **Normalizing** raw data to canonical identifiers (teams, stadiums, games) -3. **Resolving** team/stadium names using exact matching, historical aliases, and fuzzy matching -4. **Uploading** data to CloudKit with diff-based sync and resumable uploads - -### Supported Sports - -| Sport | Code | Sources | Season Format | -|-------|------|---------|---------------| -| NBA | `nba` | Basketball-Reference, ESPN, CBS | Oct-Jun (split year) | -| MLB | `mlb` | Baseball-Reference, MLB API, ESPN | Mar-Nov (single year) | -| NFL | `nfl` | ESPN, Pro-Football-Reference, CBS | Sep-Feb (split year) | -| NHL | `nhl` | Hockey-Reference, NHL API, ESPN | Oct-Jun (split year) | -| MLS | `mls` | ESPN, FBref | Feb-Nov (single year) | -| WNBA | `wnba` | ESPN | May-Oct (single year) | -| NWSL | `nwsl` | ESPN | Mar-Nov (single year) | - -## Installation - -```bash -cd Scripts -pip install -r requirements.txt -``` - -### Dependencies - -- `requests` - HTTP requests with session management -- `beautifulsoup4` + `lxml` - HTML parsing -- `rapidfuzz` - Fuzzy string matching -- `pyjwt` + `cryptography` - CloudKit JWT authentication -- `rich` - Terminal UI (progress bars, logging) -- `pytz` / `timezonefinder` - Timezone detection +- **Multi-sport support**: NBA, MLB, NFL, NHL, MLS, WNBA, NWSL +- **Automated scraping**: Scheduled data collection from ESPN and league APIs +- **Smart name resolution**: Team/stadium aliases with date validity support +- **CloudKit sync**: Push data to iCloud for iOS app consumption +- **Admin dashboard**: Monitor scrapers, review items, manage data +- **Import/Export**: Bulk data management via JSON, CSV, XLSX +- **Audit history**: Track all changes with django-simple-history ## Quick Start -### Scrape a Single Sport +### Prerequisites -```python -from sportstime_parser.scrapers import create_nba_scraper +- Docker and Docker Compose +- (Optional) CloudKit credentials for sync -scraper = create_nba_scraper(season=2025) -result = scraper.scrape_all() +### Setup -print(f"Games: {result.game_count}") -print(f"Teams: {result.team_count}") -print(f"Stadiums: {result.stadium_count}") -print(f"Needs review: {result.review_count}") -``` +1. Clone the repository: + ```bash + git clone + cd SportsTimeScripts + ``` -### Upload to CloudKit +2. Copy environment template: + ```bash + cp .env.example .env + ``` -```python -from sportstime_parser.uploaders import CloudKitClient, RecordDiffer +3. Start the containers: + ```bash + docker-compose up -d + ``` -client = CloudKitClient(environment="development") -differ = RecordDiffer() +4. Run migrations: + ```bash + docker-compose exec web python manage.py migrate + ``` -# Compare local vs remote -diff = differ.diff_games(local_games, remote_records) +5. Create a superuser: + ```bash + docker-compose exec web python manage.py createsuperuser + ``` -# Upload changes -records = diff.get_records_to_upload() -result = await client.save_records(records) -``` +6. Access the admin at http://localhost:8000/admin/ +7. Access the dashboard at http://localhost:8000/dashboard/ ## Architecture ``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ DATA SOURCES │ -│ Basketball-Reference │ ESPN API │ MLB API │ Hockey-Reference │ etc. │ -└────────────────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ SCRAPERS │ -│ NBAScraper │ MLBScraper │ NFLScraper │ NHLScraper │ MLSScraper │ etc. │ -│ │ -│ Features: │ -│ • Multi-source fallback (try sources in priority order) │ -│ • Automatic rate limiting with exponential backoff │ -│ • Doubleheader detection │ -│ • International game filtering (NFL London, NHL Global Series) │ -└────────────────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ NORMALIZERS │ -│ TeamResolver │ StadiumResolver │ CanonicalIdGenerator │ AliasLoader │ -│ │ -│ Resolution Strategy (in order): │ -│ 1. Exact match against canonical mappings │ -│ 2. Date-aware alias lookup (handles renames/relocations) │ -│ 3. Fuzzy matching with confidence threshold (85%) │ -│ 4. Flag for manual review if unresolved or low confidence │ -└────────────────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ DATA MODELS │ -│ Game │ Team │ Stadium │ ManualReviewItem │ -│ │ -│ All models use canonical IDs: │ -│ • team_nba_lal (Los Angeles Lakers) │ -│ • stadium_nba_los_angeles_lakers (Crypto.com Arena) │ -│ • game_nba_2025_20251022_bos_lal (specific game) │ -└────────────────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ UPLOADERS │ -│ CloudKitClient │ RecordDiffer │ StateManager │ -│ │ -│ Features: │ -│ • JWT authentication with Apple's CloudKit Web Services │ -│ • Batch operations (up to 200 records per request) │ -│ • Diff-based sync (only upload changes) │ -│ • Resumable uploads with persistent state │ -└────────────────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ CLOUDKIT │ -│ Public Database: Games, Teams, Stadiums, Aliases │ -└─────────────────────────────────────────────────────────────────────────┘ +┌─────────────────┐ ┌──────────────┐ ┌─────────────┐ ┌──────────┐ +│ Data Sources │ ──▶ │ Scrapers │ ──▶ │ PostgreSQL │ ──▶ │ CloudKit │ +│ (ESPN, leagues) │ │ (sportstime_ │ │ (Django) │ │ (iOS) │ +└─────────────────┘ │ parser) │ └─────────────┘ └──────────┘ + └──────────────┘ ``` -## Directory Structure +### Components -``` -Scripts/ -├── README.md # This file -├── requirements.txt # Python dependencies -├── pyproject.toml # Package configuration -├── league_structure.json # League hierarchy (conferences, divisions) -├── team_aliases.json # Historical team name mappings -├── stadium_aliases.json # Historical stadium name mappings -├── logs/ # Runtime logs (auto-created) -├── output/ # Scrape output files (auto-created) -└── sportstime_parser/ # Main package - ├── __init__.py - ├── config.py # Configuration constants - ├── SOURCES.md # Data source documentation - ├── models/ # Data classes - │ ├── game.py # Game model - │ ├── team.py # Team model - │ ├── stadium.py # Stadium model - │ └── aliases.py # Alias and ManualReviewItem models - ├── normalizers/ # Name resolution - │ ├── canonical_id.py # ID generation - │ ├── alias_loader.py # Alias loading and resolution - │ ├── fuzzy.py # Fuzzy string matching - │ ├── timezone.py # Timezone detection - │ ├── team_resolver.py # Team name resolution - │ └── stadium_resolver.py # Stadium name resolution - ├── scrapers/ # Sport-specific scrapers - │ ├── base.py # Abstract base scraper - │ ├── nba.py # NBA scraper - │ ├── mlb.py # MLB scraper - │ ├── nfl.py # NFL scraper - │ ├── nhl.py # NHL scraper - │ ├── mls.py # MLS scraper - │ ├── wnba.py # WNBA scraper - │ └── nwsl.py # NWSL scraper - ├── uploaders/ # CloudKit integration - │ ├── cloudkit.py # CloudKit Web Services client - │ ├── diff.py # Record diffing - │ └── state.py # Resumable upload state - └── utils/ # Shared utilities - ├── logging.py # Rich-based logging - ├── http.py # Rate-limited HTTP client - └── progress.py # Progress tracking +| Component | Description | +|-----------|-------------| +| **Django** | Web framework, ORM, admin interface | +| **PostgreSQL** | Primary database | +| **Redis** | Celery message broker | +| **Celery** | Async task queue (scraping, syncing) | +| **Celery Beat** | Scheduled task runner | +| **sportstime_parser** | Standalone scraper library | + +## Usage + +### Dashboard + +Visit http://localhost:8000/dashboard/ (staff login required) to: + +- View scraper status and run scrapers +- Monitor CloudKit sync status +- Review items needing manual attention +- See statistics across all sports + +### Running Scrapers + +**Via Dashboard:** +1. Go to Dashboard → Scraper Status +2. Click "Run Now" for a specific sport or "Run All Enabled" + +**Via Command Line:** +```bash +docker-compose exec web python manage.py shell +>>> from scraper.tasks import run_scraper_task +>>> from scraper.models import ScraperConfig +>>> config = ScraperConfig.objects.get(sport__code='nba', season=2025) +>>> run_scraper_task.delay(config.id) ``` -## Configuration +### Managing Aliases -### config.py +When scrapers encounter unknown team or stadium names: -Key configuration constants: +1. A **Review Item** is created for manual resolution +2. Add an alias via Admin → Team Aliases or Stadium Aliases +3. Re-run the scraper to pick up the new mapping -```python -# Directories -SCRIPTS_DIR = Path(__file__).parent.parent # Scripts/ -OUTPUT_DIR = SCRIPTS_DIR / "output" # JSON output -STATE_DIR = SCRIPTS_DIR / ".parser_state" # Upload state +Aliases support **validity dates** - useful for: +- Historical team names (e.g., "Washington Redskins" valid until 2020) +- Stadium naming rights changes (e.g., "Staples Center" valid until 2021) -# CloudKit -CLOUDKIT_CONTAINER = "iCloud.com.sportstime.app" -CLOUDKIT_ENVIRONMENT = "development" # or "production" +### Import/Export -# Rate Limiting -DEFAULT_REQUEST_DELAY = 3.0 # seconds between requests -MAX_RETRIES = 3 # retry attempts -BACKOFF_FACTOR = 2.0 # exponential backoff multiplier -INITIAL_BACKOFF = 5.0 # initial backoff duration +All admin models support bulk import/export: -# Fuzzy Matching -FUZZY_THRESHOLD = 85 # minimum match confidence (0-100) +1. Go to any admin list page (e.g., Teams) +2. Click **Export** → Select format (JSON recommended) → Submit +3. Modify the data as needed (e.g., ask Claude to update it) +4. Click **Import** → Upload file → Preview → Confirm -# Expected game counts (for validation) -EXPECTED_GAME_COUNTS = { - "nba": 1230, # 30 teams × 82 games ÷ 2 - "mlb": 2430, # 30 teams × 162 games ÷ 2 - "nfl": 272, # Regular season only - "nhl": 1312, # 32 teams × 82 games ÷ 2 - "mls": 544, # 29 teams × ~34 games ÷ 2 - "wnba": 228, # 12 teams × 40 games ÷ 2 - "nwsl": 182, # 14 teams × 26 games ÷ 2 -} +Imports will update existing records and create new ones. + +## Project Structure -# Geography (for filtering international games) -ALLOWED_COUNTRIES = {"USA", "Canada"} ``` - -### league_structure.json - -Defines the hierarchical structure of each league: - -```json -{ - "nba": { - "name": "National Basketball Association", - "conferences": { - "Eastern": { - "divisions": { - "Atlantic": ["BOS", "BKN", "NYK", "PHI", "TOR"], - "Central": ["CHI", "CLE", "DET", "IND", "MIL"], - "Southeast": ["ATL", "CHA", "MIA", "ORL", "WAS"] - } - }, - "Western": { ... } - } - }, - "mlb": { ... }, - ... -} -``` - -### team_aliases.json / stadium_aliases.json - -Historical name mappings with validity dates: - -```json -{ - "team_mlb_athletics": [ - { - "alias": "Oakland Athletics", - "alias_type": "full_name", - "valid_from": "1968-01-01", - "valid_until": "2024-12-31" - }, - { - "alias": "Las Vegas Athletics", - "alias_type": "full_name", - "valid_from": "2028-01-01", - "valid_until": null - } - ] -} +SportsTimeScripts/ +├── core/ # Core Django models +│ ├── models/ # Sport, Team, Stadium, Game, Aliases +│ ├── admin/ # Admin configuration with import/export +│ └── resources.py # Import/export resource definitions +├── scraper/ # Scraper orchestration +│ ├── engine/ # Adapter, DB alias loaders +│ │ ├── adapter.py # Bridges sportstime_parser to Django +│ │ └── db_alias_loader.py # Database alias resolution +│ ├── models.py # ScraperConfig, ScrapeJob, ManualReviewItem +│ └── tasks.py # Celery tasks +├── sportstime_parser/ # Standalone scraper library +│ ├── scrapers/ # Per-sport scrapers (NBA, MLB, etc.) +│ ├── normalizers/ # Team/stadium name resolution +│ ├── models/ # Data classes +│ └── uploaders/ # CloudKit client (legacy) +├── cloudkit/ # CloudKit sync +│ ├── client.py # CloudKit API client +│ ├── models.py # CloudKitConfiguration, SyncState, SyncJob +│ └── tasks.py # Sync tasks +├── dashboard/ # Staff dashboard +│ ├── views.py # Dashboard views +│ └── urls.py # Dashboard URLs +├── templates/ # Django templates +│ ├── base.html # Base template +│ └── dashboard/ # Dashboard templates +├── sportstime/ # Django project config +│ ├── settings.py # Django settings +│ ├── urls.py # URL routing +│ └── celery.py # Celery configuration +├── docker-compose.yml # Container orchestration +├── Dockerfile # Container image +├── requirements.txt # Python dependencies +├── CLAUDE.md # Claude Code context +└── README.md # This file ``` ## Data Models -### Game +### Model Hierarchy -```python -@dataclass -class Game: - id: str # Canonical ID: game_{sport}_{season}_{date}_{away}_{home} - sport: str # Sport code (nba, mlb, etc.) - season: int # Season start year - home_team_id: str # Canonical team ID - away_team_id: str # Canonical team ID - stadium_id: str # Canonical stadium ID - game_date: datetime # UTC datetime - game_number: Optional[int] # 1 or 2 for doubleheaders - home_score: Optional[int] # None if not played - away_score: Optional[int] - status: str # scheduled, final, postponed, cancelled - source_url: Optional[str] # For manual review - raw_home_team: Optional[str] # Original scraped value - raw_away_team: Optional[str] - raw_stadium: Optional[str] +``` +Sport +├── Conference +│ └── Division +│ └── Team (has TeamAliases) +├── Stadium (has StadiumAliases) +└── Game (references Team, Stadium) ``` -### Team +### Key Models -```python -@dataclass -class Team: - id: str # Canonical ID: team_{sport}_{abbrev} - sport: str - city: str # e.g., "Los Angeles" - name: str # e.g., "Lakers" - full_name: str # e.g., "Los Angeles Lakers" - abbreviation: str # e.g., "LAL" - conference: Optional[str] # e.g., "Western" - division: Optional[str] # e.g., "Pacific" - stadium_id: Optional[str] # Home stadium - primary_color: Optional[str] - secondary_color: Optional[str] - logo_url: Optional[str] +| Model | Description | +|-------|-------------| +| **Sport** | Sports with season configuration | +| **Team** | Teams with division, colors, logos | +| **Stadium** | Venues with location, capacity | +| **Game** | Games with scores, status, teams | +| **TeamAlias** | Historical team names with validity dates | +| **StadiumAlias** | Historical stadium names with validity dates | +| **ScraperConfig** | Scraper settings per sport/season | +| **ScrapeJob** | Scrape execution logs | +| **ManualReviewItem** | Items needing human review | +| **CloudKitSyncState** | Per-record sync status | + +## Configuration + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `DEBUG` | Debug mode | `False` | +| `SECRET_KEY` | Django secret key | (required in prod) | +| `DATABASE_URL` | PostgreSQL connection | `postgresql://...` | +| `REDIS_URL` | Redis connection | `redis://localhost:6379/0` | +| `CLOUDKIT_CONTAINER` | CloudKit container ID | - | +| `CLOUDKIT_KEY_ID` | CloudKit key ID | - | +| `CLOUDKIT_PRIVATE_KEY_PATH` | Path to CloudKit private key | - | + +### Scraper Settings + +| Setting | Description | Default | +|---------|-------------|---------| +| `SCRAPER_REQUEST_DELAY` | Delay between requests (seconds) | `3.0` | +| `SCRAPER_MAX_RETRIES` | Max retry attempts | `3` | +| `SCRAPER_FUZZY_THRESHOLD` | Fuzzy match confidence threshold | `85` | + +## Supported Sports + +| Code | League | Season Type | Games/Season | Data Sources | +|------|--------|-------------|--------------|--------------| +| nba | NBA | Oct-Jun (split) | ~1,230 | ESPN, NBA.com | +| mlb | MLB | Mar-Nov (calendar) | ~2,430 | ESPN, MLB.com | +| nfl | NFL | Sep-Feb (split) | ~272 | ESPN, NFL.com | +| nhl | NHL | Oct-Jun (split) | ~1,312 | ESPN, NHL.com | +| mls | MLS | Feb-Nov (calendar) | ~544 | ESPN | +| wnba | WNBA | May-Oct (calendar) | ~228 | ESPN | +| nwsl | NWSL | Mar-Nov (calendar) | ~182 | ESPN | + +## Development + +### Useful Commands + +```bash +# Start containers +docker-compose up -d + +# Stop containers +docker-compose down + +# Restart containers +docker-compose restart + +# Rebuild after requirements change +docker-compose down && docker-compose up -d --build + +# View logs +docker-compose logs -f web +docker-compose logs -f celery-worker + +# Django shell +docker-compose exec web python manage.py shell + +# Database shell +docker-compose exec db psql -U sportstime -d sportstime + +# Run migrations +docker-compose exec web python manage.py migrate + +# Create superuser +docker-compose exec web python manage.py createsuperuser ``` -### Stadium +### Running Tests -```python -@dataclass -class Stadium: - id: str # Canonical ID: stadium_{sport}_{city_team} - sport: str - name: str # Current name (e.g., "Crypto.com Arena") - city: str - state: Optional[str] - country: str - latitude: Optional[float] - longitude: Optional[float] - capacity: Optional[int] - surface: Optional[str] # grass, turf, ice, hardwood - roof_type: Optional[str] # dome, retractable, open - opened_year: Optional[int] - image_url: Optional[str] - timezone: Optional[str] +```bash +docker-compose exec web pytest ``` -### ManualReviewItem +### Adding a New Sport + +1. Create scraper in `sportstime_parser/scrapers/{sport}.py` +2. Add team mappings in `sportstime_parser/normalizers/team_resolver.py` +3. Add stadium mappings in `sportstime_parser/normalizers/stadium_resolver.py` +4. Register scraper in `scraper/engine/adapter.py` +5. Add Sport record via Django admin +6. Create ScraperConfig for the sport/season + +## sportstime_parser Library + +The `sportstime_parser` package is a standalone library that handles: + +- **Scraping** from multiple sources (ESPN, league APIs) +- **Normalizing** team/stadium names to canonical IDs +- **Resolving** names using exact match, aliases, and fuzzy matching + +### Resolution Strategy + +1. **Exact match** against canonical mappings +2. **Alias lookup** with date-aware validity +3. **Fuzzy match** with 85% confidence threshold +4. **Manual review** if unresolved + +### Canonical ID Format -```python -@dataclass -class ManualReviewItem: - item_type: str # "team" or "stadium" - raw_value: str # Original scraped value - suggested_id: Optional[str] # Best fuzzy match (if any) - confidence: float # 0.0 - 1.0 - reason: str # Why review is needed - source_url: Optional[str] # Where it came from - sport: str - check_date: Optional[date] # For date-aware alias lookup ``` - -## Normalizers - -### Canonical ID Generation - -IDs are deterministic and immutable: - -```python -# Team ID -generate_team_id("nba", "LAL") -# → "team_nba_lal" - -# Stadium ID -generate_stadium_id("nba", "Los Angeles", "Lakers") -# → "stadium_nba_los_angeles_lakers" - -# Game ID -generate_game_id( - sport="nba", - season=2025, - away_abbrev="BOS", - home_abbrev="LAL", - game_date=datetime(2025, 10, 22), - game_number=None -) -# → "game_nba_2025_20251022_bos_lal" - -# Doubleheader Game ID -generate_game_id(..., game_number=2) -# → "game_nba_2025_20251022_bos_lal_2" +team_nba_lal # Team: Los Angeles Lakers +stadium_nba_los_angeles_lakers # Stadium: Crypto.com Arena +game_nba_2025_20251022_bos_lal # Game: BOS @ LAL on Oct 22, 2025 ``` -### Team Resolution - -The `TeamResolver` uses a three-stage strategy: - -```python -resolver = get_team_resolver("nba") -result = resolver.resolve( - "Los Angeles Lakers", - check_date=date(2025, 10, 22), - source_url="https://..." -) - -# Result: -# - canonical_id: "team_nba_lal" -# - confidence: 1.0 (exact match) -# - review_item: None -``` - -**Resolution stages:** - -1. **Exact Match**: Check against canonical team mappings - - Full name: "Los Angeles Lakers" - - City + Name: "Los Angeles" + "Lakers" - - Abbreviation: "LAL" - -2. **Alias Lookup**: Check historical aliases with date awareness - - "Oakland Athletics" → "team_mlb_athletics" (valid until 2024-12-31) - - Handles relocations: "Oakland" → "Las Vegas" transition - -3. **Fuzzy Match**: Use rapidfuzz with 85% threshold - - "LA Lakers" → "Los Angeles Lakers" (92% match) - - Low-confidence matches flagged for review - -### Stadium Resolution - -Similar three-stage strategy with additional location awareness: - -```python -resolver = get_stadium_resolver("nba") -result = resolver.resolve( - "Crypto.com Arena", - check_date=date(2025, 10, 22) -) -``` - -**Key features:** -- Handles naming rights changes (Staples Center → Crypto.com Arena) -- Date-aware: "Staples Center" resolves correctly for historical games -- Location-based fallback using latitude/longitude - -## Scrapers - -### Base Scraper - -All scrapers extend `BaseScraper` with these features: - -```python -class BaseScraper(ABC): - def __init__(self, sport: str, season: int): ... - - # Required implementations - def _get_sources(self) -> list[str]: ... - def _scrape_games_from_source(self, source: str) -> list[RawGameData]: ... - def _normalize_games(self, raw_games) -> tuple[list[Game], list[ManualReviewItem]]: ... - def scrape_teams(self) -> list[Team]: ... - def scrape_stadiums(self) -> list[Stadium]: ... - - # Built-in features - def scrape_games(self) -> ScrapeResult: - """Multi-source fallback - tries each source in order.""" - ... - - def scrape_all(self) -> ScrapeResult: - """Scrapes games, teams, and stadiums with progress tracking.""" - ... -``` - -### NBA Scraper - -```python -class NBAScraper(BaseScraper): - """ - Sources (in priority order): - 1. Basketball-Reference - HTML tables, monthly pages - 2. ESPN API - JSON, per-date queries - 3. CBS Sports - Backup (not implemented) - - Season: October to June (split year, e.g., 2025-26) - """ -``` - -**Basketball-Reference parsing:** -- URL: `https://www.basketball-reference.com/leagues/NBA_{year}_games-{month}.html` -- Table columns: date_game, visitor_team_name, home_team_name, visitor_pts, home_pts, arena_name - -### MLB Scraper - -```python -class MLBScraper(BaseScraper): - """ - Sources: - 1. Baseball-Reference - Single page per season - 2. MLB Stats API - Official API with date range queries - 3. ESPN API - Backup - - Season: March to November (single year) - Handles: Doubleheaders with game_number - """ -``` - -### NFL Scraper - -```python -class NFLScraper(BaseScraper): - """ - Sources: - 1. ESPN API - Week-based queries - 2. Pro-Football-Reference - Single page per season - - Season: September to February (split year) - Filters: International games (London, Mexico City, Frankfurt) - Scrapes: Preseason (4 weeks), Regular (18 weeks), Postseason (4 rounds) - """ -``` - -### NHL Scraper - -```python -class NHLScraper(BaseScraper): - """ - Sources: - 1. Hockey-Reference - Single page per season - 2. NHL API - New API (api-web.nhle.com) - 3. ESPN API - Backup - - Season: October to June (split year) - Filters: International games (Prague, Stockholm, Helsinki) - """ -``` - -### MLS / WNBA / NWSL Scrapers - -All use ESPN API as primary source with similar structure: -- Single calendar year seasons -- Conference-based organization (MLS) or single table (WNBA, NWSL) - -## Uploaders - -### CloudKit Client - -```python -class CloudKitClient: - """CloudKit Web Services API client with JWT authentication.""" - - def __init__( - self, - container_id: str = CLOUDKIT_CONTAINER, - environment: str = "development", # or "production" - key_id: str = None, # From CloudKit Dashboard - private_key: str = None, # EC P-256 private key - ): ... - - async def fetch_records( - self, - record_type: RecordType, - filter_by: Optional[dict] = None, - sort_by: Optional[str] = None, - ) -> list[dict]: ... - - async def save_records( - self, - records: list[CloudKitRecord], - batch_size: int = 200, - ) -> BatchResult: ... - - async def delete_records( - self, - record_names: list[str], - record_type: RecordType, - ) -> BatchResult: ... -``` - -**Authentication:** -- Uses EC P-256 key pair -- JWT tokens signed with private key -- Tokens valid for 30 minutes - -### Record Differ - -```python -class RecordDiffer: - """Compares local records with CloudKit records.""" - - def diff_games(self, local: list[Game], remote: list[dict]) -> DiffResult: ... - def diff_teams(self, local: list[Team], remote: list[dict]) -> DiffResult: ... - def diff_stadiums(self, local: list[Stadium], remote: list[dict]) -> DiffResult: ... -``` - -**DiffResult:** -```python -@dataclass -class DiffResult: - creates: list[RecordDiff] # New records to create - updates: list[RecordDiff] # Changed records to update - deletes: list[RecordDiff] # Remote records to delete - unchanged: list[RecordDiff] # Records with no changes - - def get_records_to_upload(self) -> list[CloudKitRecord]: - """Returns creates + updates ready for upload.""" -``` - -### State Manager - -```python -class StateManager: - """Manages resumable upload state.""" - - def load_session(self, sport, season, environment) -> Optional[UploadSession]: ... - def save_session(self, session: UploadSession) -> None: ... - def get_session_or_create( - self, - sport, season, environment, - record_names: list[tuple[str, str]], - resume: bool = False, - ) -> UploadSession: ... -``` - -**State persistence:** -- Stored in `.parser_state/upload_state_{sport}_{season}_{env}.json` -- Tracks: pending, uploaded, failed records -- Supports retry with backoff - -## Utilities - -### HTTP Client - -```python -class RateLimitedSession: - """HTTP session with rate limiting and exponential backoff.""" - - def __init__( - self, - delay: float = 3.0, # Seconds between requests - max_retries: int = 3, - backoff_factor: float = 2.0, - ): ... - - def get(self, url, **kwargs) -> Response: ... - def get_json(self, url, **kwargs) -> dict: ... - def get_html(self, url, **kwargs) -> str: ... -``` - -**Features:** -- User-agent rotation (5 different Chrome/Firefox/Safari agents) -- Per-domain rate limiting -- Automatic 429 handling with exponential backoff + jitter -- Connection pooling - -### Logging - -```python -from sportstime_parser.utils import get_logger, log_success, log_error - -logger = get_logger() # Rich-formatted logger -logger.info("Starting scrape") - -log_success("Scraped 1230 games") # Green checkmark -log_error("Failed to parse") # Red X -``` - -**Log output:** -- Console: Rich-formatted with colors -- File: `logs/parser_{timestamp}.log` - -### Progress Tracking - -```python -from sportstime_parser.utils import ScrapeProgress, track_progress - -# Specialized scrape tracking -progress = ScrapeProgress("nba", 2025) -progress.start() - -with progress.scraping_schedule(total_months=9) as advance: - for month in months: - fetch(month) - advance() - -progress.finish() # Prints summary - -# Generic progress bar -for game in track_progress(games, "Processing games"): - process(game) -``` - -## Manual Review Workflow - -When the system can't confidently resolve a team or stadium: - -1. **Low confidence fuzzy match** (< 85%): - ``` - ManualReviewItem( - item_type="team", - raw_value="LA Lakers", - suggested_id="team_nba_lal", - confidence=0.82, - reason="Fuzzy match below threshold" - ) - ``` - -2. **No match found**: - ``` - ManualReviewItem( - raw_value="Unknown Team FC", - suggested_id=None, - confidence=0.0, - reason="No match found in canonical mappings" - ) - ``` - -3. **Ambiguous match** (multiple candidates): - ``` - ManualReviewItem( - raw_value="LA", - suggested_id="team_nba_lac", - confidence=0.5, - reason="Ambiguous: could be Lakers or Clippers" - ) - ``` - -**Resolution:** -- Review items are exported to JSON -- Manually verify and add to `team_aliases.json` or `stadium_aliases.json` -- Re-run scrape - aliases will be used for resolution - -## Adding a New Sport - -1. **Create scraper** in `scrapers/{sport}.py`: - ```python - class NewSportScraper(BaseScraper): - def __init__(self, season: int, **kwargs): - super().__init__("newsport", season, **kwargs) - self._team_resolver = get_team_resolver("newsport") - self._stadium_resolver = get_stadium_resolver("newsport") - - def _get_sources(self) -> list[str]: - return ["primary_source", "backup_source"] - - def _scrape_games_from_source(self, source: str) -> list[RawGameData]: - # Implement source-specific scraping - ... - - def _normalize_games(self, raw_games) -> tuple[list[Game], list[ManualReviewItem]]: - # Use resolvers to normalize - ... - - def scrape_teams(self) -> list[Team]: - # Return canonical team list - ... - - def scrape_stadiums(self) -> list[Stadium]: - # Return canonical stadium list - ... - ``` - -2. **Add team mappings** in `normalizers/team_resolver.py`: - ```python - TEAM_MAPPINGS["newsport"] = { - "ABC": ("team_newsport_abc", "Full Team Name", "City"), - ... - } - ``` - -3. **Add stadium mappings** in `normalizers/stadium_resolver.py`: - ```python - STADIUM_MAPPINGS["newsport"] = { - "stadium_newsport_venue": StadiumInfo( - name="Venue Name", - city="City", - state="State", - country="USA", - latitude=40.0, - longitude=-74.0, - ), - ... - } - ``` - -4. **Add to league_structure.json** (if hierarchical) - -5. **Update config.py**: - ```python - EXPECTED_GAME_COUNTS["newsport"] = 500 - ``` - -6. **Export from `__init__.py`** - ## Troubleshooting -### Rate Limiting (429 errors) +### Scraper fails with rate limiting -The system handles these automatically with exponential backoff. If persistent: -- Increase `DEFAULT_REQUEST_DELAY` in config.py -- Check if source has changed their rate limits +The system handles 429 errors automatically. If persistent, increase `SCRAPER_REQUEST_DELAY`. -### Missing Teams/Stadiums +### Unknown team/stadium names -1. Check scraper logs for raw values -2. Add to `team_aliases.json` or `stadium_aliases.json` -3. Or add to canonical mappings if it's a new team/stadium +1. Check ManualReviewItem in admin +2. Add alias via Team Aliases or Stadium Aliases +3. Re-run scraper -### CloudKit Authentication Errors +### CloudKit sync errors -1. Verify key_id matches CloudKit Dashboard -2. Check private key format (EC P-256, PEM) -3. Ensure container identifier is correct +1. Verify credentials in CloudKitConfiguration +2. Check CloudKitSyncState for failed records +3. Use "Retry failed syncs" action in admin -### Incomplete Scrapes +### Docker volume issues -The system discards partial data on errors. Check: -- `logs/` for error details -- Network connectivity -- Source website availability +If template changes don't appear: +```bash +docker-compose down && docker-compose up -d --build +``` -### International Games Appearing +## License -NFL and NHL scrapers filter these automatically. If new locations emerge: -- Add to `INTERNATIONAL_LOCATIONS` in the scraper -- Or add filtering logic for neutral site games - -## Contributing - -1. Follow existing patterns for new scrapers -2. Always use canonical IDs -3. Add aliases for historical names -4. Include source URLs for traceability -5. Test with multiple seasons +Private - All rights reserved. diff --git a/cloudkit/__init__.py b/cloudkit/__init__.py new file mode 100644 index 0000000..91343a6 --- /dev/null +++ b/cloudkit/__init__.py @@ -0,0 +1 @@ +default_app_config = 'cloudkit.apps.CloudKitConfig' diff --git a/cloudkit/admin.py b/cloudkit/admin.py new file mode 100644 index 0000000..7ccabb3 --- /dev/null +++ b/cloudkit/admin.py @@ -0,0 +1,213 @@ +from django.contrib import admin +from django.utils.html import format_html +from import_export.admin import ImportExportMixin, ImportExportModelAdmin +from simple_history.admin import SimpleHistoryAdmin + +from .models import CloudKitConfiguration, CloudKitSyncState, CloudKitSyncJob +from .resources import CloudKitConfigurationResource, CloudKitSyncStateResource, CloudKitSyncJobResource + + +@admin.register(CloudKitConfiguration) +class CloudKitConfigurationAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = CloudKitConfigurationResource + list_display = [ + 'name', + 'environment', + 'container_id', + 'is_active_badge', + 'auto_sync_after_scrape', + 'batch_size', + ] + list_filter = ['environment', 'is_active'] + search_fields = ['name', 'container_id'] + readonly_fields = ['created_at', 'updated_at'] + + fieldsets = [ + (None, { + 'fields': ['name', 'environment', 'is_active'] + }), + ('CloudKit Credentials', { + 'fields': ['container_id', 'key_id', 'private_key', 'private_key_path'], + 'description': 'Enter your private key content directly OR provide a file path' + }), + ('Sync Settings', { + 'fields': ['batch_size', 'auto_sync_after_scrape'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + actions = ['run_sync', 'test_connection'] + + def is_active_badge(self, obj): + if obj.is_active: + return format_html( + '● ACTIVE' + ) + return format_html('○ Inactive') + is_active_badge.short_description = 'Status' + + @admin.action(description='Run sync with selected configuration') + def run_sync(self, request, queryset): + from cloudkit.tasks import run_cloudkit_sync + for config in queryset: + run_cloudkit_sync.delay(config.id) + self.message_user(request, f'Started {queryset.count()} sync jobs.') + + @admin.action(description='Test CloudKit connection') + def test_connection(self, request, queryset): + from django.contrib import messages + for config in queryset: + try: + client = config.get_client() + if client.test_connection(): + self.message_user( + request, + f'✓ {config.name}: Connection successful!', + messages.SUCCESS + ) + else: + self.message_user( + request, + f'✗ {config.name}: Connection failed', + messages.ERROR + ) + except Exception as e: + self.message_user( + request, + f'✗ {config.name}: {str(e)}', + messages.ERROR + ) + + +@admin.register(CloudKitSyncState) +class CloudKitSyncStateAdmin(ImportExportModelAdmin): + resource_class = CloudKitSyncStateResource + list_display = [ + 'record_id', + 'record_type', + 'sync_status_badge', + 'last_synced', + 'retry_count', + ] + list_filter = ['sync_status', 'record_type'] + search_fields = ['record_id', 'cloudkit_record_name'] + ordering = ['-updated_at'] + readonly_fields = [ + 'record_type', + 'record_id', + 'cloudkit_record_name', + 'local_hash', + 'remote_change_tag', + 'last_synced', + 'last_error', + 'retry_count', + 'created_at', + 'updated_at', + ] + + actions = ['mark_pending', 'retry_failed'] + + def has_add_permission(self, request): + return False + + def sync_status_badge(self, obj): + colors = { + 'pending': '#f0ad4e', + 'synced': '#5cb85c', + 'failed': '#d9534f', + 'deleted': '#999', + } + color = colors.get(obj.sync_status, '#999') + return format_html( + '{}', + color, + obj.sync_status.upper() + ) + sync_status_badge.short_description = 'Status' + + @admin.action(description='Mark selected as pending sync') + def mark_pending(self, request, queryset): + updated = queryset.update(sync_status='pending') + self.message_user(request, f'{updated} records marked as pending.') + + @admin.action(description='Retry failed syncs') + def retry_failed(self, request, queryset): + updated = queryset.filter(sync_status='failed').update( + sync_status='pending', + retry_count=0 + ) + self.message_user(request, f'{updated} failed records queued for retry.') + + +@admin.register(CloudKitSyncJob) +class CloudKitSyncJobAdmin(ImportExportModelAdmin): + resource_class = CloudKitSyncJobResource + list_display = [ + 'id', + 'configuration', + 'status_badge', + 'triggered_by', + 'started_at', + 'duration_display', + 'records_summary', + ] + list_filter = ['status', 'configuration', 'triggered_by'] + date_hierarchy = 'created_at' + ordering = ['-created_at'] + readonly_fields = [ + 'configuration', + 'status', + 'triggered_by', + 'started_at', + 'finished_at', + 'duration_display', + 'records_synced', + 'records_created', + 'records_updated', + 'records_deleted', + 'records_failed', + 'sport_filter', + 'record_type_filter', + 'error_message', + 'celery_task_id', + 'created_at', + 'updated_at', + ] + + def has_add_permission(self, request): + return False + + def has_change_permission(self, request, obj=None): + return False + + def status_badge(self, obj): + colors = { + 'pending': '#999', + 'running': '#f0ad4e', + 'completed': '#5cb85c', + 'failed': '#d9534f', + 'cancelled': '#777', + } + color = colors.get(obj.status, '#999') + return format_html( + '{}', + color, + obj.status.upper() + ) + status_badge.short_description = 'Status' + + def records_summary(self, obj): + if obj.records_synced == 0 and obj.status != 'completed': + return '-' + return format_html( + '' + '{} synced ({} new)', + obj.records_created, obj.records_updated, obj.records_deleted, obj.records_failed, + obj.records_synced, obj.records_created + ) + records_summary.short_description = 'Records' diff --git a/cloudkit/apps.py b/cloudkit/apps.py new file mode 100644 index 0000000..cc35f81 --- /dev/null +++ b/cloudkit/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class CloudKitConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'cloudkit' + verbose_name = 'CloudKit Sync' diff --git a/cloudkit/client.py b/cloudkit/client.py new file mode 100644 index 0000000..3ef1043 --- /dev/null +++ b/cloudkit/client.py @@ -0,0 +1,385 @@ +""" +CloudKit Web Services API client. +Adapted from existing sportstime_parser.uploaders.cloudkit +""" +import base64 +import hashlib +import json +import time +from datetime import datetime, timedelta +from pathlib import Path +from typing import Optional + +import jwt +import requests +from cryptography.hazmat.primitives import hashes, serialization +from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.hazmat.backends import default_backend + + +class CloudKitClient: + """ + Client for CloudKit Web Services API. + """ + + BASE_URL = "https://api.apple-cloudkit.com" + TOKEN_EXPIRY_SECONDS = 3600 # 1 hour + + def __init__( + self, + container_id: str, + environment: str = 'development', + key_id: str = '', + private_key: str = '', + private_key_path: str = '', + ): + self.container_id = container_id + self.environment = environment + self.key_id = key_id + self._private_key_pem = private_key + self.private_key_path = private_key_path + self._private_key = None + self._token = None + self._token_expiry = 0 + + # Load private key + if not self._private_key_pem and private_key_path: + key_path = Path(private_key_path) + if key_path.exists(): + self._private_key_pem = key_path.read_text() + + if self._private_key_pem: + self._private_key = serialization.load_pem_private_key( + self._private_key_pem.encode(), + password=None, + backend=default_backend(), + ) + + @property + def is_configured(self) -> bool: + """Check if the client has valid authentication credentials.""" + return bool(self.key_id and self._private_key) + + def _get_api_path(self, operation: str) -> str: + """Build the full API path for an operation.""" + return f"/database/1/{self.container_id}/{self.environment}/public/{operation}" + + def _get_token(self) -> str: + """Get a valid JWT token, generating a new one if needed.""" + if not self.is_configured: + raise ValueError("CloudKit credentials not configured") + + now = time.time() + + # Return cached token if still valid (with 5 min buffer) + if self._token and (self._token_expiry - now) > 300: + return self._token + + # Generate new token + expiry = now + self.TOKEN_EXPIRY_SECONDS + + payload = { + 'iss': self.key_id, + 'iat': int(now), + 'exp': int(expiry), + 'sub': self.container_id, + } + + self._token = jwt.encode( + payload, + self._private_key, + algorithm='ES256', + ) + self._token_expiry = expiry + + return self._token + + def _sign_request(self, method: str, path: str, body: Optional[bytes] = None) -> dict: + """Generate request headers with authentication. + + Args: + method: HTTP method + path: API path + body: Request body bytes + + Returns: + Dictionary of headers to include in the request + """ + token = self._get_token() + + # CloudKit uses date in ISO format + date_str = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + + # Calculate body hash + if body: + body_hash = base64.b64encode( + hashlib.sha256(body).digest() + ).decode() + else: + body_hash = base64.b64encode( + hashlib.sha256(b"").digest() + ).decode() + + # Build the message to sign + # Format: date:body_hash:path + message = f"{date_str}:{body_hash}:{path}" + + # Sign the message + signature = self._private_key.sign( + message.encode(), + ec.ECDSA(hashes.SHA256()), + ) + signature_b64 = base64.b64encode(signature).decode() + + return { + 'Authorization': f'Bearer {token}', + 'X-Apple-CloudKit-Request-KeyID': self.key_id, + 'X-Apple-CloudKit-Request-ISO8601Date': date_str, + 'X-Apple-CloudKit-Request-SignatureV1': signature_b64, + 'Content-Type': 'application/json', + } + + def _request(self, method: str, operation: str, body: Optional[dict] = None) -> dict: + """Make a request to the CloudKit API.""" + path = self._get_api_path(operation) + url = f"{self.BASE_URL}{path}" + + body_bytes = json.dumps(body).encode() if body else None + headers = self._sign_request(method, path, body_bytes) + + response = requests.request( + method=method, + url=url, + headers=headers, + data=body_bytes, + ) + + if response.status_code == 200: + return response.json() + else: + response.raise_for_status() + + def _get_url(self, path: str) -> str: + """Build full API URL.""" + return f"{self.BASE_URL}/database/1/{self.container_id}/{self.environment}/public{path}" + + def fetch_records( + self, + record_type: str, + filter_by: Optional[dict] = None, + sort_by: Optional[str] = None, + limit: int = 200, + ) -> list: + """ + Fetch records from CloudKit. + """ + query = { + 'recordType': record_type, + } + + if filter_by: + query['filterBy'] = filter_by + + if sort_by: + query['sortBy'] = [{'fieldName': sort_by}] + + payload = { + 'query': query, + 'resultsLimit': limit, + } + + data = self._request('POST', 'records/query', payload) + return data.get('records', []) + + def save_records(self, records: list) -> dict: + """ + Save records to CloudKit. + """ + operations = [] + for record in records: + op = { + 'operationType': 'forceReplace', + 'record': record, + } + operations.append(op) + + payload = { + 'operations': operations, + } + + return self._request('POST', 'records/modify', payload) + + def delete_records(self, record_names: list, record_type: str) -> dict: + """ + Delete records from CloudKit. + """ + operations = [] + for name in record_names: + op = { + 'operationType': 'delete', + 'record': { + 'recordName': name, + 'recordType': record_type, + }, + } + operations.append(op) + + payload = { + 'operations': operations, + } + + return self._request('POST', 'records/modify', payload) + + def to_cloudkit_record(self, record_type: str, data: dict) -> dict: + """ + Convert local data dict to CloudKit record format. + Field names must match existing CloudKit schema. + """ + record = { + 'recordType': record_type, + 'recordName': data['id'], + 'fields': {}, + } + + if record_type == 'Sport': + fields = record['fields'] + fields['sportId'] = {'value': data['id'], 'type': 'STRING'} + fields['abbreviation'] = {'value': data['abbreviation'].upper(), 'type': 'STRING'} + fields['displayName'] = {'value': data['displayName'], 'type': 'STRING'} + fields['iconName'] = {'value': data.get('iconName', ''), 'type': 'STRING'} + fields['colorHex'] = {'value': data.get('colorHex', ''), 'type': 'STRING'} + fields['seasonStartMonth'] = {'value': data.get('seasonStartMonth', 1), 'type': 'INT64'} + fields['seasonEndMonth'] = {'value': data.get('seasonEndMonth', 12), 'type': 'INT64'} + fields['isActive'] = {'value': 1 if data.get('isActive') else 0, 'type': 'INT64'} + + elif record_type == 'Game': + # Match existing CloudKit Game schema + fields = record['fields'] + fields['gameId'] = {'value': data['id'], 'type': 'STRING'} + fields['canonicalId'] = {'value': data['id'], 'type': 'STRING'} + fields['sport'] = {'value': data['sport'].upper(), 'type': 'STRING'} + fields['season'] = {'value': str(data['season']), 'type': 'STRING'} + fields['homeTeamCanonicalId'] = {'value': data['homeTeamId'], 'type': 'STRING'} + fields['awayTeamCanonicalId'] = {'value': data['awayTeamId'], 'type': 'STRING'} + if data.get('stadiumId'): + fields['stadiumCanonicalId'] = {'value': data['stadiumId'], 'type': 'STRING'} + if data.get('gameDate'): + dt = datetime.fromisoformat(data['gameDate'].replace('Z', '+00:00')) + fields['dateTime'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} + fields['isPlayoff'] = {'value': 1 if data.get('isPlayoff') else 0, 'type': 'INT64'} + + elif record_type == 'Team': + # Match existing CloudKit Team schema + fields = record['fields'] + fields['teamId'] = {'value': data['id'], 'type': 'STRING'} + fields['canonicalId'] = {'value': data['id'], 'type': 'STRING'} + fields['sport'] = {'value': data['sport'].upper(), 'type': 'STRING'} + fields['city'] = {'value': data.get('city', ''), 'type': 'STRING'} + fields['name'] = {'value': data.get('name', ''), 'type': 'STRING'} + fields['abbreviation'] = {'value': data.get('abbreviation', ''), 'type': 'STRING'} + if data.get('homeStadiumId'): + fields['stadiumCanonicalId'] = {'value': data['homeStadiumId'], 'type': 'STRING'} + if data.get('primaryColor'): + fields['primaryColor'] = {'value': data['primaryColor'], 'type': 'STRING'} + if data.get('secondaryColor'): + fields['secondaryColor'] = {'value': data['secondaryColor'], 'type': 'STRING'} + if data.get('logoUrl'): + fields['logoUrl'] = {'value': data['logoUrl'], 'type': 'STRING'} + if data.get('divisionId'): + fields['divisionCanonicalId'] = {'value': data['divisionId'], 'type': 'STRING'} + if data.get('conferenceId'): + fields['conferenceCanonicalId'] = {'value': data['conferenceId'], 'type': 'STRING'} + + elif record_type == 'Stadium': + # Match existing CloudKit Stadium schema + fields = record['fields'] + fields['stadiumId'] = {'value': data['id'], 'type': 'STRING'} + fields['canonicalId'] = {'value': data['id'], 'type': 'STRING'} + fields['sport'] = {'value': data['sport'].upper(), 'type': 'STRING'} + fields['name'] = {'value': data.get('name', ''), 'type': 'STRING'} + fields['city'] = {'value': data.get('city', ''), 'type': 'STRING'} + if data.get('state'): + fields['state'] = {'value': data['state'], 'type': 'STRING'} + # Use LOCATION type for coordinates + if data.get('latitude') is not None and data.get('longitude') is not None: + fields['location'] = { + 'value': { + 'latitude': float(data['latitude']), + 'longitude': float(data['longitude']), + }, + 'type': 'LOCATION' + } + if data.get('capacity'): + fields['capacity'] = {'value': data['capacity'], 'type': 'INT64'} + if data.get('yearOpened'): + fields['yearOpened'] = {'value': data['yearOpened'], 'type': 'INT64'} + if data.get('imageUrl'): + fields['imageURL'] = {'value': data['imageUrl'], 'type': 'STRING'} + if data.get('timezone'): + fields['timezoneIdentifier'] = {'value': data['timezone'], 'type': 'STRING'} + + elif record_type == 'Conference': + fields = record['fields'] + fields['conferenceId'] = {'value': data['id'], 'type': 'STRING'} + fields['canonicalId'] = {'value': data['id'], 'type': 'STRING'} + fields['sport'] = {'value': data['sport'].upper(), 'type': 'STRING'} + fields['name'] = {'value': data.get('name', ''), 'type': 'STRING'} + fields['shortName'] = {'value': data.get('shortName', ''), 'type': 'STRING'} + fields['order'] = {'value': data.get('order', 0), 'type': 'INT64'} + + elif record_type == 'Division': + fields = record['fields'] + fields['divisionId'] = {'value': data['id'], 'type': 'STRING'} + fields['canonicalId'] = {'value': data['id'], 'type': 'STRING'} + fields['conferenceCanonicalId'] = {'value': data['conferenceId'], 'type': 'STRING'} + fields['sport'] = {'value': data['sport'].upper(), 'type': 'STRING'} + fields['name'] = {'value': data.get('name', ''), 'type': 'STRING'} + fields['shortName'] = {'value': data.get('shortName', ''), 'type': 'STRING'} + fields['order'] = {'value': data.get('order', 0), 'type': 'INT64'} + + elif record_type == 'TeamAlias': + fields = record['fields'] + fields['aliasId'] = {'value': data['id'], 'type': 'STRING'} + fields['teamCanonicalId'] = {'value': data['teamId'], 'type': 'STRING'} + fields['aliasValue'] = {'value': data.get('alias', ''), 'type': 'STRING'} + fields['aliasType'] = {'value': data.get('aliasType', ''), 'type': 'STRING'} + if data.get('validFrom'): + dt = datetime.fromisoformat(data['validFrom']) + fields['validFrom'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} + if data.get('validUntil'): + dt = datetime.fromisoformat(data['validUntil']) + fields['validUntil'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} + + elif record_type == 'StadiumAlias': + fields = record['fields'] + fields['stadiumCanonicalId'] = {'value': data['stadiumId'], 'type': 'STRING'} + fields['aliasName'] = {'value': data.get('alias', ''), 'type': 'STRING'} + if data.get('validFrom'): + dt = datetime.fromisoformat(data['validFrom']) + fields['validFrom'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} + if data.get('validUntil'): + dt = datetime.fromisoformat(data['validUntil']) + fields['validUntil'] = {'value': int(dt.timestamp() * 1000), 'type': 'TIMESTAMP'} + + elif record_type == 'LeagueStructure': + fields = record['fields'] + fields['structureId'] = {'value': data['id'], 'type': 'STRING'} + fields['sport'] = {'value': data['sport'].upper(), 'type': 'STRING'} + fields['type'] = {'value': data['type'], 'type': 'STRING'} + fields['name'] = {'value': data.get('name', ''), 'type': 'STRING'} + fields['abbreviation'] = {'value': data.get('abbreviation', ''), 'type': 'STRING'} + fields['parentId'] = {'value': data.get('parentId', ''), 'type': 'STRING'} + fields['displayOrder'] = {'value': data.get('displayOrder', 0), 'type': 'INT64'} + + return record + + def test_connection(self) -> bool: + """ + Test the CloudKit connection. + """ + try: + # Try to fetch a small query + self.fetch_records('Team', limit=1) + return True + except Exception: + return False diff --git a/cloudkit/migrations/0001_initial.py b/cloudkit/migrations/0001_initial.py new file mode 100644 index 0000000..1f69275 --- /dev/null +++ b/cloudkit/migrations/0001_initial.py @@ -0,0 +1,120 @@ +# Generated by Django 5.1.15 on 2026-01-26 08:59 + +import django.db.models.deletion +import simple_history.models +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('core', '0001_initial'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='CloudKitConfiguration', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(help_text='Configuration name (e.g., "Production", "Development")', max_length=100, unique=True)), + ('environment', models.CharField(choices=[('development', 'Development'), ('production', 'Production')], default='development', max_length=20)), + ('container_id', models.CharField(default='iCloud.com.sportstime.app', help_text='CloudKit container ID (e.g., iCloud.com.sportstime.app)', max_length=200)), + ('key_id', models.CharField(blank=True, help_text='CloudKit API key ID', max_length=200)), + ('private_key', models.TextField(blank=True, help_text='EC P-256 private key content (PEM format). Paste key here OR use path below.')), + ('private_key_path', models.CharField(blank=True, help_text='Path to EC P-256 private key file (alternative to pasting key above)', max_length=500)), + ('is_active', models.BooleanField(default=False, help_text='Whether this configuration is active for syncing')), + ('batch_size', models.PositiveIntegerField(default=200, help_text='Maximum records per batch upload')), + ('auto_sync_after_scrape', models.BooleanField(default=False, help_text='Automatically sync after scraper jobs complete')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + options={ + 'verbose_name': 'CloudKit Configuration', + 'verbose_name_plural': 'CloudKit Configurations', + }, + ), + migrations.CreateModel( + name='CloudKitSyncJob', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], default='pending', max_length=20)), + ('triggered_by', models.CharField(default='manual', help_text='How the sync was triggered', max_length=50)), + ('started_at', models.DateTimeField(blank=True, null=True)), + ('finished_at', models.DateTimeField(blank=True, null=True)), + ('records_synced', models.PositiveIntegerField(default=0)), + ('records_created', models.PositiveIntegerField(default=0)), + ('records_updated', models.PositiveIntegerField(default=0)), + ('records_deleted', models.PositiveIntegerField(default=0)), + ('records_failed', models.PositiveIntegerField(default=0)), + ('record_type_filter', models.CharField(blank=True, help_text='Only sync this record type (all if blank)', max_length=20)), + ('error_message', models.TextField(blank=True)), + ('celery_task_id', models.CharField(blank=True, max_length=255)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('configuration', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='sync_jobs', to='cloudkit.cloudkitconfiguration')), + ('sport_filter', models.ForeignKey(blank=True, help_text='Only sync this sport (all if blank)', null=True, on_delete=django.db.models.deletion.SET_NULL, to='core.sport')), + ], + options={ + 'verbose_name': 'CloudKit Sync Job', + 'verbose_name_plural': 'CloudKit Sync Jobs', + 'ordering': ['-created_at'], + }, + ), + migrations.CreateModel( + name='CloudKitSyncState', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('record_type', models.CharField(choices=[('Game', 'Game'), ('Team', 'Team'), ('Stadium', 'Stadium')], max_length=20)), + ('record_id', models.CharField(help_text='Local record ID (canonical ID)', max_length=100)), + ('cloudkit_record_name', models.CharField(blank=True, help_text='CloudKit record name (may differ from local ID)', max_length=200)), + ('local_hash', models.CharField(blank=True, help_text='Hash of local record data for change detection', max_length=64)), + ('remote_change_tag', models.CharField(blank=True, help_text='CloudKit change tag for conflict detection', max_length=200)), + ('sync_status', models.CharField(choices=[('pending', 'Pending Sync'), ('synced', 'Synced'), ('failed', 'Failed'), ('deleted', 'Deleted')], default='pending', max_length=20)), + ('last_synced', models.DateTimeField(blank=True, null=True)), + ('last_error', models.TextField(blank=True, help_text='Last sync error message')), + ('retry_count', models.PositiveSmallIntegerField(default=0)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + options={ + 'verbose_name': 'CloudKit Sync State', + 'verbose_name_plural': 'CloudKit Sync States', + 'ordering': ['-updated_at'], + 'indexes': [models.Index(fields=['sync_status', 'record_type'], name='cloudkit_cl_sync_st_cc8bf6_idx'), models.Index(fields=['record_type', 'last_synced'], name='cloudkit_cl_record__d82278_idx')], + 'unique_together': {('record_type', 'record_id')}, + }, + ), + migrations.CreateModel( + name='HistoricalCloudKitConfiguration', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('name', models.CharField(db_index=True, help_text='Configuration name (e.g., "Production", "Development")', max_length=100)), + ('environment', models.CharField(choices=[('development', 'Development'), ('production', 'Production')], default='development', max_length=20)), + ('container_id', models.CharField(default='iCloud.com.sportstime.app', help_text='CloudKit container ID (e.g., iCloud.com.sportstime.app)', max_length=200)), + ('key_id', models.CharField(blank=True, help_text='CloudKit API key ID', max_length=200)), + ('private_key', models.TextField(blank=True, help_text='EC P-256 private key content (PEM format). Paste key here OR use path below.')), + ('private_key_path', models.CharField(blank=True, help_text='Path to EC P-256 private key file (alternative to pasting key above)', max_length=500)), + ('is_active', models.BooleanField(default=False, help_text='Whether this configuration is active for syncing')), + ('batch_size', models.PositiveIntegerField(default=200, help_text='Maximum records per batch upload')), + ('auto_sync_after_scrape', models.BooleanField(default=False, help_text='Automatically sync after scraper jobs complete')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical CloudKit Configuration', + 'verbose_name_plural': 'historical CloudKit Configurations', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + ] diff --git a/cloudkit/migrations/0002_add_sync_progress_fields.py b/cloudkit/migrations/0002_add_sync_progress_fields.py new file mode 100644 index 0000000..0133e18 --- /dev/null +++ b/cloudkit/migrations/0002_add_sync_progress_fields.py @@ -0,0 +1,63 @@ +# Generated by Django 5.1.15 on 2026-01-26 13:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('cloudkit', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='cloudkitsyncjob', + name='current_record_type', + field=models.CharField(blank=True, help_text='Currently syncing record type', max_length=20), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='games_failed', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='games_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='games_total', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='stadiums_failed', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='stadiums_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='stadiums_total', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='teams_failed', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='teams_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='teams_total', + field=models.PositiveIntegerField(default=0), + ), + ] diff --git a/cloudkit/migrations/0003_alter_cloudkitsyncjob_status.py b/cloudkit/migrations/0003_alter_cloudkitsyncjob_status.py new file mode 100644 index 0000000..2f8d93e --- /dev/null +++ b/cloudkit/migrations/0003_alter_cloudkitsyncjob_status.py @@ -0,0 +1,29 @@ +# Generated manually + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('cloudkit', '0002_add_sync_progress_fields'), + ] + + operations = [ + migrations.AlterField( + model_name='cloudkitsyncjob', + name='status', + field=models.CharField( + choices=[ + ('pending', 'Pending'), + ('running', 'Running'), + ('completed', 'Completed'), + ('completed_with_errors', 'Completed with Errors'), + ('failed', 'Failed'), + ('cancelled', 'Cancelled'), + ], + default='pending', + max_length=25, + ), + ), + ] diff --git a/cloudkit/migrations/0004_cloudkitsyncjob_sport_progress.py b/cloudkit/migrations/0004_cloudkitsyncjob_sport_progress.py new file mode 100644 index 0000000..f369dbb --- /dev/null +++ b/cloudkit/migrations/0004_cloudkitsyncjob_sport_progress.py @@ -0,0 +1,28 @@ +# Generated manually + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('cloudkit', '0003_alter_cloudkitsyncjob_status'), + ] + + operations = [ + migrations.AddField( + model_name='cloudkitsyncjob', + name='sports_total', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='sports_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='sports_failed', + field=models.PositiveIntegerField(default=0), + ), + ] diff --git a/cloudkit/migrations/0005_add_conference_division_alias_sync.py b/cloudkit/migrations/0005_add_conference_division_alias_sync.py new file mode 100644 index 0000000..ded9e50 --- /dev/null +++ b/cloudkit/migrations/0005_add_conference_division_alias_sync.py @@ -0,0 +1,78 @@ +# Generated by Django 5.1.4 on 2026-02-06 02:21 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('cloudkit', '0004_cloudkitsyncjob_sport_progress'), + ] + + operations = [ + migrations.AddField( + model_name='cloudkitsyncjob', + name='conferences_failed', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='conferences_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='conferences_total', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='divisions_failed', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='divisions_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='divisions_total', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='stadium_aliases_failed', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='stadium_aliases_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='stadium_aliases_total', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='team_aliases_failed', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='team_aliases_synced', + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name='cloudkitsyncjob', + name='team_aliases_total', + field=models.PositiveIntegerField(default=0), + ), + migrations.AlterField( + model_name='cloudkitsyncstate', + name='record_type', + field=models.CharField(choices=[('Sport', 'Sport'), ('Conference', 'Conference'), ('Division', 'Division'), ('Team', 'Team'), ('Stadium', 'Stadium'), ('TeamAlias', 'Team Alias'), ('StadiumAlias', 'Stadium Alias'), ('Game', 'Game')], max_length=20), + ), + ] diff --git a/cloudkit/migrations/__init__.py b/cloudkit/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cloudkit/models.py b/cloudkit/models.py new file mode 100644 index 0000000..890e00a --- /dev/null +++ b/cloudkit/models.py @@ -0,0 +1,394 @@ +from django.db import models +from django.conf import settings +from simple_history.models import HistoricalRecords + + +class CloudKitConfiguration(models.Model): + """ + CloudKit configuration for syncing. + """ + ENVIRONMENT_CHOICES = [ + ('development', 'Development'), + ('production', 'Production'), + ] + + name = models.CharField( + max_length=100, + unique=True, + help_text='Configuration name (e.g., "Production", "Development")' + ) + environment = models.CharField( + max_length=20, + choices=ENVIRONMENT_CHOICES, + default='development' + ) + container_id = models.CharField( + max_length=200, + default=settings.CLOUDKIT_CONTAINER, + help_text='CloudKit container ID (e.g., iCloud.com.sportstime.app)' + ) + key_id = models.CharField( + max_length=200, + blank=True, + help_text='CloudKit API key ID' + ) + private_key = models.TextField( + blank=True, + help_text='EC P-256 private key content (PEM format). Paste key here OR use path below.' + ) + private_key_path = models.CharField( + max_length=500, + blank=True, + help_text='Path to EC P-256 private key file (alternative to pasting key above)' + ) + is_active = models.BooleanField( + default=False, + help_text='Whether this configuration is active for syncing' + ) + + # Sync settings + batch_size = models.PositiveIntegerField( + default=200, + help_text='Maximum records per batch upload' + ) + auto_sync_after_scrape = models.BooleanField( + default=False, + help_text='Automatically sync after scraper jobs complete' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + verbose_name = 'CloudKit Configuration' + verbose_name_plural = 'CloudKit Configurations' + + def __str__(self): + return f"{self.name} ({self.environment})" + + def save(self, *args, **kwargs): + # Ensure only one active configuration + if self.is_active: + CloudKitConfiguration.objects.filter(is_active=True).exclude(pk=self.pk).update(is_active=False) + super().save(*args, **kwargs) + + def get_client(self): + """Create a CloudKitClient from this configuration.""" + from cloudkit.client import CloudKitClient + return CloudKitClient( + container_id=self.container_id, + environment=self.environment, + key_id=self.key_id, + private_key=self.private_key, + private_key_path=self.private_key_path, + ) + + @classmethod + def get_active(cls): + """Get the active CloudKit configuration.""" + return cls.objects.filter(is_active=True).first() + + +class CloudKitSyncState(models.Model): + """ + Tracks sync state for individual records. + """ + RECORD_TYPE_CHOICES = [ + ('Sport', 'Sport'), + ('Conference', 'Conference'), + ('Division', 'Division'), + ('Team', 'Team'), + ('Stadium', 'Stadium'), + ('TeamAlias', 'Team Alias'), + ('StadiumAlias', 'Stadium Alias'), + ('Game', 'Game'), + ] + + SYNC_STATUS_CHOICES = [ + ('pending', 'Pending Sync'), + ('synced', 'Synced'), + ('failed', 'Failed'), + ('deleted', 'Deleted'), + ] + + record_type = models.CharField( + max_length=20, + choices=RECORD_TYPE_CHOICES + ) + record_id = models.CharField( + max_length=100, + help_text='Local record ID (canonical ID)' + ) + cloudkit_record_name = models.CharField( + max_length=200, + blank=True, + help_text='CloudKit record name (may differ from local ID)' + ) + local_hash = models.CharField( + max_length=64, + blank=True, + help_text='Hash of local record data for change detection' + ) + remote_change_tag = models.CharField( + max_length=200, + blank=True, + help_text='CloudKit change tag for conflict detection' + ) + sync_status = models.CharField( + max_length=20, + choices=SYNC_STATUS_CHOICES, + default='pending' + ) + last_synced = models.DateTimeField( + null=True, + blank=True + ) + last_error = models.TextField( + blank=True, + help_text='Last sync error message' + ) + retry_count = models.PositiveSmallIntegerField( + default=0 + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ['-updated_at'] + unique_together = ['record_type', 'record_id'] + verbose_name = 'CloudKit Sync State' + verbose_name_plural = 'CloudKit Sync States' + indexes = [ + models.Index(fields=['sync_status', 'record_type']), + models.Index(fields=['record_type', 'last_synced']), + ] + + def __str__(self): + return f"{self.record_type}:{self.record_id} ({self.sync_status})" + + def mark_synced(self, change_tag=''): + """Mark record as successfully synced.""" + from django.utils import timezone + self.sync_status = 'synced' + self.remote_change_tag = change_tag + self.last_synced = timezone.now() + self.last_error = '' + self.retry_count = 0 + self.save() + + def mark_failed(self, error_message): + """Mark record as failed to sync.""" + self.sync_status = 'failed' + self.last_error = error_message + self.retry_count += 1 + self.save() + + def mark_pending(self, new_hash=''): + """Mark record as pending sync (e.g., after local change).""" + self.sync_status = 'pending' + if new_hash: + self.local_hash = new_hash + self.save() + + +class CloudKitSyncJob(models.Model): + """ + Record of a CloudKit sync job execution. + """ + STATUS_CHOICES = [ + ('pending', 'Pending'), + ('running', 'Running'), + ('completed', 'Completed'), + ('completed_with_errors', 'Completed with Errors'), + ('failed', 'Failed'), + ('cancelled', 'Cancelled'), + ] + + configuration = models.ForeignKey( + CloudKitConfiguration, + on_delete=models.CASCADE, + related_name='sync_jobs' + ) + status = models.CharField( + max_length=25, + choices=STATUS_CHOICES, + default='pending' + ) + triggered_by = models.CharField( + max_length=50, + default='manual', + help_text='How the sync was triggered' + ) + + # Timing + started_at = models.DateTimeField(null=True, blank=True) + finished_at = models.DateTimeField(null=True, blank=True) + + # Results + records_synced = models.PositiveIntegerField(default=0) + records_created = models.PositiveIntegerField(default=0) + records_updated = models.PositiveIntegerField(default=0) + records_deleted = models.PositiveIntegerField(default=0) + records_failed = models.PositiveIntegerField(default=0) + + # Filter (optional - sync specific records) + sport_filter = models.ForeignKey( + 'core.Sport', + on_delete=models.SET_NULL, + null=True, + blank=True, + help_text='Only sync this sport (all if blank)' + ) + record_type_filter = models.CharField( + max_length=20, + blank=True, + help_text='Only sync this record type (all if blank)' + ) + + # Error tracking + error_message = models.TextField(blank=True) + + # Progress tracking + current_record_type = models.CharField( + max_length=20, + blank=True, + help_text='Currently syncing record type' + ) + sports_total = models.PositiveIntegerField(default=0) + sports_synced = models.PositiveIntegerField(default=0) + sports_failed = models.PositiveIntegerField(default=0) + teams_total = models.PositiveIntegerField(default=0) + teams_synced = models.PositiveIntegerField(default=0) + teams_failed = models.PositiveIntegerField(default=0) + stadiums_total = models.PositiveIntegerField(default=0) + stadiums_synced = models.PositiveIntegerField(default=0) + stadiums_failed = models.PositiveIntegerField(default=0) + conferences_total = models.PositiveIntegerField(default=0) + conferences_synced = models.PositiveIntegerField(default=0) + conferences_failed = models.PositiveIntegerField(default=0) + divisions_total = models.PositiveIntegerField(default=0) + divisions_synced = models.PositiveIntegerField(default=0) + divisions_failed = models.PositiveIntegerField(default=0) + team_aliases_total = models.PositiveIntegerField(default=0) + team_aliases_synced = models.PositiveIntegerField(default=0) + team_aliases_failed = models.PositiveIntegerField(default=0) + stadium_aliases_total = models.PositiveIntegerField(default=0) + stadium_aliases_synced = models.PositiveIntegerField(default=0) + stadium_aliases_failed = models.PositiveIntegerField(default=0) + games_total = models.PositiveIntegerField(default=0) + games_synced = models.PositiveIntegerField(default=0) + games_failed = models.PositiveIntegerField(default=0) + + # Celery task ID + celery_task_id = models.CharField( + max_length=255, + blank=True + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ['-created_at'] + verbose_name = 'CloudKit Sync Job' + verbose_name_plural = 'CloudKit Sync Jobs' + + def __str__(self): + return f"Sync {self.configuration.name} - {self.created_at.strftime('%Y-%m-%d %H:%M')}" + + @property + def duration(self): + if self.started_at and self.finished_at: + return self.finished_at - self.started_at + return None + + @property + def duration_display(self): + duration = self.duration + if duration: + total_seconds = int(duration.total_seconds()) + minutes, seconds = divmod(total_seconds, 60) + if minutes > 0: + return f"{minutes}m {seconds}s" + return f"{seconds}s" + return '-' + + def get_progress(self): + """Get progress data for API/display.""" + total = (self.sports_total + self.conferences_total + self.divisions_total + + self.teams_total + self.stadiums_total + + self.team_aliases_total + self.stadium_aliases_total + + self.games_total) + synced = (self.sports_synced + self.conferences_synced + self.divisions_synced + + self.teams_synced + self.stadiums_synced + + self.team_aliases_synced + self.stadium_aliases_synced + + self.games_synced) + failed = (self.sports_failed + self.conferences_failed + self.divisions_failed + + self.teams_failed + self.stadiums_failed + + self.team_aliases_failed + self.stadium_aliases_failed + + self.games_failed) + + return { + 'status': self.status, + 'current_type': self.current_record_type, + 'total': total, + 'synced': synced, + 'failed': failed, + 'remaining': total - synced - failed, + 'percent': round((synced + failed) / total * 100) if total > 0 else 0, + 'sports': { + 'total': self.sports_total, + 'synced': self.sports_synced, + 'failed': self.sports_failed, + 'remaining': self.sports_total - self.sports_synced - self.sports_failed, + }, + 'conferences': { + 'total': self.conferences_total, + 'synced': self.conferences_synced, + 'failed': self.conferences_failed, + 'remaining': self.conferences_total - self.conferences_synced - self.conferences_failed, + }, + 'divisions': { + 'total': self.divisions_total, + 'synced': self.divisions_synced, + 'failed': self.divisions_failed, + 'remaining': self.divisions_total - self.divisions_synced - self.divisions_failed, + }, + 'teams': { + 'total': self.teams_total, + 'synced': self.teams_synced, + 'failed': self.teams_failed, + 'remaining': self.teams_total - self.teams_synced - self.teams_failed, + }, + 'stadiums': { + 'total': self.stadiums_total, + 'synced': self.stadiums_synced, + 'failed': self.stadiums_failed, + 'remaining': self.stadiums_total - self.stadiums_synced - self.stadiums_failed, + }, + 'team_aliases': { + 'total': self.team_aliases_total, + 'synced': self.team_aliases_synced, + 'failed': self.team_aliases_failed, + 'remaining': self.team_aliases_total - self.team_aliases_synced - self.team_aliases_failed, + }, + 'stadium_aliases': { + 'total': self.stadium_aliases_total, + 'synced': self.stadium_aliases_synced, + 'failed': self.stadium_aliases_failed, + 'remaining': self.stadium_aliases_total - self.stadium_aliases_synced - self.stadium_aliases_failed, + }, + 'games': { + 'total': self.games_total, + 'synced': self.games_synced, + 'failed': self.games_failed, + 'remaining': self.games_total - self.games_synced - self.games_failed, + }, + } diff --git a/cloudkit/resources.py b/cloudkit/resources.py new file mode 100644 index 0000000..c266025 --- /dev/null +++ b/cloudkit/resources.py @@ -0,0 +1,49 @@ +"""Import/Export resources for cloudkit models.""" +from import_export import resources, fields +from import_export.widgets import ForeignKeyWidget + +from .models import CloudKitConfiguration, CloudKitSyncState, CloudKitSyncJob + + +class CloudKitConfigurationResource(resources.ModelResource): + class Meta: + model = CloudKitConfiguration + import_id_fields = ['name'] + fields = [ + 'name', 'environment', 'container_id', 'key_id', + 'is_active', 'batch_size', 'auto_sync_after_scrape', + ] + export_order = fields + # Exclude private_key for security + exclude = ['private_key', 'private_key_path'] + + +class CloudKitSyncStateResource(resources.ModelResource): + class Meta: + model = CloudKitSyncState + import_id_fields = ['record_type', 'record_id'] + fields = [ + 'record_type', 'record_id', 'cloudkit_record_name', + 'sync_status', 'local_hash', 'remote_change_tag', + 'last_synced', 'last_error', 'retry_count', + ] + export_order = fields + + +class CloudKitSyncJobResource(resources.ModelResource): + configuration = fields.Field( + column_name='configuration', + attribute='configuration', + widget=ForeignKeyWidget(CloudKitConfiguration, 'name') + ) + + class Meta: + model = CloudKitSyncJob + fields = [ + 'id', 'configuration', 'status', 'triggered_by', + 'started_at', 'finished_at', + 'records_synced', 'records_created', 'records_updated', + 'records_deleted', 'records_failed', + 'error_message', 'created_at', + ] + export_order = fields diff --git a/cloudkit/tasks.py b/cloudkit/tasks.py new file mode 100644 index 0000000..951260d --- /dev/null +++ b/cloudkit/tasks.py @@ -0,0 +1,701 @@ +import logging +import traceback + +from celery import shared_task +from django.utils import timezone + +logger = logging.getLogger('cloudkit') + + +@shared_task(bind=True, max_retries=3) +def run_cloudkit_sync(self, config_id: int, triggered_by: str = 'manual', + sport_code: str = None, record_type: str = None): + """ + Run a CloudKit sync job. + """ + from cloudkit.models import CloudKitConfiguration, CloudKitSyncJob, CloudKitSyncState + from notifications.tasks import send_sync_notification + + # Get configuration + try: + config = CloudKitConfiguration.objects.get(id=config_id) + except CloudKitConfiguration.DoesNotExist: + logger.error(f"CloudKitConfiguration {config_id} not found") + return {'error': 'Configuration not found'} + + # Create job record + job = CloudKitSyncJob.objects.create( + configuration=config, + status='running', + triggered_by=triggered_by, + started_at=timezone.now(), + celery_task_id=self.request.id, + sport_filter_id=sport_code, + record_type_filter=record_type or '', + ) + + try: + logger.info(f'Starting CloudKit sync to {config.environment}') + + # Run sync + result = perform_sync(config, job, sport_code, record_type) + + # Update job with results + job.finished_at = timezone.now() + job.records_synced = result.get('synced', 0) + job.records_created = result.get('created', 0) + job.records_updated = result.get('updated', 0) + job.records_deleted = result.get('deleted', 0) + job.records_failed = result.get('failed', 0) + + # Set status based on results + if job.records_failed > 0 and job.records_synced == 0: + job.status = 'failed' + job.error_message = f'All {job.records_failed} records failed to sync' + logger.error(f'Sync failed: {job.records_failed} failed, 0 synced') + elif job.records_failed > 0: + job.status = 'completed_with_errors' + logger.warning(f'Sync completed with errors: {job.records_synced} synced, {job.records_failed} failed') + else: + job.status = 'completed' + logger.info(f'Sync completed: {job.records_synced} synced') + job.save() + + # Send notification if configured + send_sync_notification.delay(job.id) + + return { + 'job_id': job.id, + 'status': 'completed', + 'records_synced': job.records_synced, + } + + except Exception as e: + error_msg = str(e) + error_tb = traceback.format_exc() + + job.status = 'failed' + job.finished_at = timezone.now() + job.error_message = error_msg + job.save() + + logger.error(f'Sync failed: {error_msg}') + + # Send failure notification + send_sync_notification.delay(job.id) + + # Retry if applicable + if self.request.retries < self.max_retries: + raise self.retry(exc=e, countdown=60 * (self.request.retries + 1)) + + return { + 'job_id': job.id, + 'status': 'failed', + 'error': error_msg, + } + + +def perform_sync(config, job, sport_code=None, record_type=None): + """ + Perform the actual CloudKit sync. + Syncs ALL local records to CloudKit (creates new, updates existing). + """ + from cloudkit.client import CloudKitClient + from cloudkit.models import CloudKitSyncState + from core.models import Sport, Conference, Division, Game, Team, Stadium, TeamAlias, StadiumAlias + + # Initialize CloudKit client from config + client = config.get_client() + + # Test connection first + try: + client._get_token() + except Exception as e: + logger.error(f'CloudKit authentication failed: {e}') + raise ValueError(f'CloudKit authentication failed: {e}') + + results = { + 'synced': 0, + 'created': 0, + 'updated': 0, + 'deleted': 0, + 'failed': 0, + } + + batch_size = config.batch_size + + # Sync Sports first (no dependencies) + if not record_type or record_type == 'Sport': + sports = Sport.objects.filter(is_active=True) + job.sports_total = sports.count() + job.current_record_type = 'Sport' + job.save(update_fields=['sports_total', 'current_record_type']) + + sport_results = sync_model_records(client, 'Sport', sports, sport_to_dict, batch_size, job) + results['synced'] += sport_results['synced'] + results['failed'] += sport_results['failed'] + + # Sync Conferences (FK to Sport) + if not record_type or record_type == 'Conference': + conferences = Conference.objects.select_related('sport').all() + job.conferences_total = conferences.count() + job.current_record_type = 'Conference' + job.save(update_fields=['conferences_total', 'current_record_type']) + + conf_results = sync_model_records(client, 'Conference', conferences, conference_to_dict, batch_size, job) + results['synced'] += conf_results['synced'] + results['failed'] += conf_results['failed'] + + # Sync Divisions (FK to Conference) + if not record_type or record_type == 'Division': + divisions = Division.objects.select_related('conference', 'conference__sport').all() + job.divisions_total = divisions.count() + job.current_record_type = 'Division' + job.save(update_fields=['divisions_total', 'current_record_type']) + + div_results = sync_model_records(client, 'Division', divisions, division_to_dict, batch_size, job) + results['synced'] += div_results['synced'] + results['failed'] += div_results['failed'] + + # Sync Teams (dependencies for Games, TeamAliases) + if not record_type or record_type == 'Team': + teams = Team.objects.select_related('sport', 'home_stadium', 'division', 'division__conference').all() + job.teams_total = teams.count() + job.current_record_type = 'Team' + job.save(update_fields=['teams_total', 'current_record_type']) + + team_results = sync_model_records(client, 'Team', teams, team_to_dict, batch_size, job) + results['synced'] += team_results['synced'] + results['failed'] += team_results['failed'] + + # Sync Stadiums (dependencies for Games, StadiumAliases) + if not record_type or record_type == 'Stadium': + stadiums = Stadium.objects.select_related('sport').all() + job.stadiums_total = stadiums.count() + job.current_record_type = 'Stadium' + job.save(update_fields=['stadiums_total', 'current_record_type']) + + stadium_results = sync_model_records(client, 'Stadium', stadiums, stadium_to_dict, batch_size, job) + results['synced'] += stadium_results['synced'] + results['failed'] += stadium_results['failed'] + + # Sync TeamAliases (FK to Team) + if not record_type or record_type == 'TeamAlias': + team_aliases = TeamAlias.objects.select_related('team').all() + job.team_aliases_total = team_aliases.count() + job.current_record_type = 'TeamAlias' + job.save(update_fields=['team_aliases_total', 'current_record_type']) + + ta_results = sync_model_records(client, 'TeamAlias', team_aliases, team_alias_to_dict, batch_size, job) + results['synced'] += ta_results['synced'] + results['failed'] += ta_results['failed'] + + # Sync StadiumAliases (FK to Stadium) + if not record_type or record_type == 'StadiumAlias': + stadium_aliases = StadiumAlias.objects.select_related('stadium').all() + job.stadium_aliases_total = stadium_aliases.count() + job.current_record_type = 'StadiumAlias' + job.save(update_fields=['stadium_aliases_total', 'current_record_type']) + + sa_results = sync_model_records(client, 'StadiumAlias', stadium_aliases, stadium_alias_to_dict, batch_size, job) + results['synced'] += sa_results['synced'] + results['failed'] += sa_results['failed'] + + # Sync LeagueStructure (flattened hierarchy: league + conference + division) + if not record_type or record_type == 'LeagueStructure': + ls_records = build_league_structure_records() + job.current_record_type = 'LeagueStructure' + job.save(update_fields=['current_record_type']) + + ls_results = sync_dict_records(client, 'LeagueStructure', ls_records, batch_size, job) + results['synced'] += ls_results['synced'] + results['failed'] += ls_results['failed'] + + # Sync Games (depends on Teams, Stadiums) + if not record_type or record_type == 'Game': + games = Game.objects.select_related('home_team', 'away_team', 'stadium', 'sport').all() + job.games_total = games.count() + job.current_record_type = 'Game' + job.save(update_fields=['games_total', 'current_record_type']) + + game_results = sync_model_records(client, 'Game', games, game_to_dict, batch_size, job) + results['synced'] += game_results['synced'] + results['failed'] += game_results['failed'] + + job.current_record_type = '' + job.save(update_fields=['current_record_type']) + return results + + +def sync_model_records(client, record_type, queryset, to_dict_func, batch_size, job=None): + """ + Sync all records from a queryset to CloudKit. + Updates progress frequently for real-time UI feedback. + """ + results = {'synced': 0, 'failed': 0} + + records = list(queryset) + total = len(records) + + logger.info(f'[{record_type}] Starting sync: {total} total records') + + # Field names for job updates + field_map = { + 'Sport': ('sports_synced', 'sports_failed'), + 'Conference': ('conferences_synced', 'conferences_failed'), + 'Division': ('divisions_synced', 'divisions_failed'), + 'Team': ('teams_synced', 'teams_failed'), + 'Stadium': ('stadiums_synced', 'stadiums_failed'), + 'TeamAlias': ('team_aliases_synced', 'team_aliases_failed'), + 'StadiumAlias': ('stadium_aliases_synced', 'stadium_aliases_failed'), + 'Game': ('games_synced', 'games_failed'), + } + synced_field, failed_field = field_map.get(record_type, (None, None)) + + # Use smaller batches for more frequent progress updates + # CloudKit API batch size vs progress update frequency + api_batch_size = min(batch_size, 50) # Max 50 per API call for frequent updates + progress_update_interval = 10 # Update DB every 10 records + records_since_last_update = 0 + + for i in range(0, total, api_batch_size): + batch = records[i:i + api_batch_size] + batch_num = (i // api_batch_size) + 1 + total_batches = (total + api_batch_size - 1) // api_batch_size + + # Convert to CloudKit format + cloudkit_records = [] + for record in batch: + try: + data = to_dict_func(record) + ck_record = client.to_cloudkit_record(record_type, data) + cloudkit_records.append(ck_record) + except Exception as e: + logger.error(f'Failed to convert {record_type}:{record.id}: {e}') + results['failed'] += 1 + records_since_last_update += 1 + + if cloudkit_records: + try: + response = client.save_records(cloudkit_records) + response_records = response.get('records', []) + + batch_synced = 0 + batch_failed = 0 + for rec in response_records: + if 'serverErrorCode' in rec: + logger.error(f'CloudKit error for {rec.get("recordName")}: {rec.get("reason")}') + results['failed'] += 1 + batch_failed += 1 + else: + results['synced'] += 1 + batch_synced += 1 + records_since_last_update += 1 + + # Update progress frequently for real-time UI + if job and synced_field and records_since_last_update >= progress_update_interval: + setattr(job, synced_field, results['synced']) + setattr(job, failed_field, results['failed']) + job.save(update_fields=[synced_field, failed_field]) + records_since_last_update = 0 + + # Always update after each batch completes + if job and synced_field: + setattr(job, synced_field, results['synced']) + setattr(job, failed_field, results['failed']) + job.save(update_fields=[synced_field, failed_field]) + records_since_last_update = 0 + + # Log progress after each batch + remaining = total - (results['synced'] + results['failed']) + logger.info( + f'[{record_type}] Batch {batch_num}/{total_batches}: ' + f'+{batch_synced} synced, +{batch_failed} failed | ' + f'Progress: {results["synced"]}/{total} synced, {remaining} remaining' + ) + + except Exception as e: + logger.error(f'Batch save failed: {e}') + results['failed'] += len(cloudkit_records) + + # Update job progress + if job and failed_field: + setattr(job, failed_field, results['failed']) + job.save(update_fields=[failed_field]) + + remaining = total - (results['synced'] + results['failed']) + logger.info( + f'[{record_type}] Batch {batch_num}/{total_batches} FAILED | ' + f'Progress: {results["synced"]}/{total} synced, {remaining} remaining' + ) + + logger.info(f'[{record_type}] Complete: {results["synced"]} synced, {results["failed"]} failed') + return results + + +def build_league_structure_records(): + """Build flat LeagueStructure dicts from Sport, Conference, Division models.""" + from core.models import Sport, Conference, Division + + records = [] + + for sport in Sport.objects.filter(is_active=True).order_by('code'): + league_id = f'ls_{sport.code}_league' + records.append({ + 'id': league_id, + 'structureId': league_id, + 'sport': sport.code, + 'type': 'league', + 'name': sport.name, + 'abbreviation': sport.short_name, + 'parentId': '', + 'displayOrder': 0, + }) + + for conf in Conference.objects.filter(sport=sport).order_by('order', 'name'): + raw_conf_id = conf.canonical_id or f'conf_{conf.id}' + conf_id = f'ls_{raw_conf_id}' + records.append({ + 'id': conf_id, + 'structureId': conf_id, + 'sport': sport.code, + 'type': 'conference', + 'name': conf.name, + 'abbreviation': conf.short_name or '', + 'parentId': league_id, + 'displayOrder': conf.order, + }) + + for div in Division.objects.filter(conference=conf).order_by('order', 'name'): + raw_div_id = div.canonical_id or f'div_{div.id}' + div_id = f'ls_{raw_div_id}' + records.append({ + 'id': div_id, + 'structureId': div_id, + 'sport': sport.code, + 'type': 'division', + 'name': div.name, + 'abbreviation': div.short_name or '', + 'parentId': conf_id, + 'displayOrder': div.order, + }) + + return records + + +def sync_dict_records(client, record_type, dict_records, batch_size, job=None): + """Sync pre-built dict records to CloudKit (no model/queryset needed).""" + results = {'synced': 0, 'failed': 0} + total = len(dict_records) + + logger.info(f'[{record_type}] Starting sync: {total} total records') + + api_batch_size = min(batch_size, 50) + + for i in range(0, total, api_batch_size): + batch = dict_records[i:i + api_batch_size] + batch_num = (i // api_batch_size) + 1 + total_batches = (total + api_batch_size - 1) // api_batch_size + + cloudkit_records = [] + for data in batch: + try: + ck_record = client.to_cloudkit_record(record_type, data) + cloudkit_records.append(ck_record) + except Exception as e: + logger.error(f'Failed to convert {record_type}:{data.get("id")}: {e}') + results['failed'] += 1 + + if cloudkit_records: + try: + response = client.save_records(cloudkit_records) + batch_synced = 0 + batch_failed = 0 + for rec in response.get('records', []): + if 'serverErrorCode' in rec: + logger.error(f'CloudKit error for {rec.get("recordName")}: {rec.get("reason")}') + results['failed'] += 1 + batch_failed += 1 + else: + results['synced'] += 1 + batch_synced += 1 + + remaining = total - (results['synced'] + results['failed']) + logger.info( + f'[{record_type}] Batch {batch_num}/{total_batches}: ' + f'+{batch_synced} synced, +{batch_failed} failed | ' + f'Progress: {results["synced"]}/{total} synced, {remaining} remaining' + ) + + except Exception as e: + logger.error(f'Batch save failed: {e}') + results['failed'] += len(cloudkit_records) + + logger.info(f'[{record_type}] Complete: {results["synced"]} synced, {results["failed"]} failed') + return results + + +def sync_batch(client, states): + """ + Sync a batch of records to CloudKit. + """ + from core.models import Game, Team, Stadium + + result = {'synced': 0, 'created': 0, 'updated': 0, 'failed': 0} + + records_to_save = [] + + for state in states: + try: + # Get the local record + record_data = get_record_data(state.record_type, state.record_id) + if record_data: + records_to_save.append({ + 'state': state, + 'data': record_data, + }) + except Exception as e: + logger.error(f'Failed to get record {state.record_type}:{state.record_id}: {e}') + state.mark_failed(str(e)) + result['failed'] += 1 + + if records_to_save: + # Convert to CloudKit format and upload + cloudkit_records = [ + client.to_cloudkit_record(r['state'].record_type, r['data']) + for r in records_to_save + ] + + try: + response = client.save_records(cloudkit_records) + + for i, r in enumerate(records_to_save): + if i < len(response.get('records', [])): + change_tag = response['records'][i].get('recordChangeTag', '') + r['state'].mark_synced(change_tag) + result['synced'] += 1 + if r['state'].cloudkit_record_name: + result['updated'] += 1 + else: + result['created'] += 1 + else: + r['state'].mark_failed('No response for record') + result['failed'] += 1 + + except Exception as e: + logger.error(f'CloudKit save failed: {e}') + for r in records_to_save: + r['state'].mark_failed(str(e)) + result['failed'] += len(records_to_save) + + return result + + +def get_record_data(record_type, record_id): + """ + Get the local record data for a given type and ID. + """ + from core.models import Sport, Conference, Division, Game, Team, Stadium, TeamAlias, StadiumAlias + + if record_type == 'Sport': + try: + sport = Sport.objects.get(code=record_id) + return sport_to_dict(sport) + except Sport.DoesNotExist: + return None + + elif record_type == 'Conference': + try: + conf = Conference.objects.select_related('sport').get(id=record_id) + return conference_to_dict(conf) + except Conference.DoesNotExist: + return None + + elif record_type == 'Division': + try: + div = Division.objects.select_related('conference', 'conference__sport').get(id=record_id) + return division_to_dict(div) + except Division.DoesNotExist: + return None + + elif record_type == 'Game': + try: + game = Game.objects.select_related( + 'home_team', 'away_team', 'stadium', 'sport' + ).get(id=record_id) + return game_to_dict(game) + except Game.DoesNotExist: + return None + + elif record_type == 'Team': + try: + team = Team.objects.select_related('sport', 'home_stadium').get(id=record_id) + return team_to_dict(team) + except Team.DoesNotExist: + return None + + elif record_type == 'Stadium': + try: + stadium = Stadium.objects.select_related('sport').get(id=record_id) + return stadium_to_dict(stadium) + except Stadium.DoesNotExist: + return None + + elif record_type == 'TeamAlias': + try: + alias = TeamAlias.objects.select_related('team').get(id=record_id) + return team_alias_to_dict(alias) + except TeamAlias.DoesNotExist: + return None + + elif record_type == 'StadiumAlias': + try: + alias = StadiumAlias.objects.select_related('stadium').get(id=record_id) + return stadium_alias_to_dict(alias) + except StadiumAlias.DoesNotExist: + return None + + return None + + +def sport_to_dict(sport): + """Convert Sport model to dict for CloudKit.""" + return { + 'id': sport.code, + 'abbreviation': sport.short_name, + 'displayName': sport.name, + 'iconName': sport.icon_name, + 'colorHex': sport.color_hex, + 'seasonStartMonth': sport.season_start_month, + 'seasonEndMonth': sport.season_end_month, + 'isActive': sport.is_active, + } + + +def game_to_dict(game): + """Convert Game model to dict for CloudKit.""" + return { + 'id': game.id, + 'sport': game.sport.code, + 'season': game.season, + 'homeTeamId': game.home_team_id, + 'awayTeamId': game.away_team_id, + 'stadiumId': game.stadium_id, + 'gameDate': game.game_date.isoformat(), + 'gameNumber': game.game_number, + 'homeScore': game.home_score, + 'awayScore': game.away_score, + 'status': game.status, + 'isNeutralSite': game.is_neutral_site, + 'isPlayoff': game.is_playoff, + 'playoffRound': game.playoff_round, + } + + +def team_to_dict(team): + """Convert Team model to dict for CloudKit.""" + division_id = None + conference_id = None + if team.division: + division_id = team.division.canonical_id or f'div_{team.division.id}' + conference_id = team.division.conference.canonical_id or f'conf_{team.division.conference.id}' + return { + 'id': team.id, + 'sport': team.sport.code, + 'city': team.city, + 'name': team.name, + 'fullName': team.full_name, + 'abbreviation': team.abbreviation, + 'homeStadiumId': team.home_stadium_id, + 'primaryColor': team.primary_color, + 'secondaryColor': team.secondary_color, + 'logoUrl': team.logo_url, + 'divisionId': division_id, + 'conferenceId': conference_id, + } + + +def stadium_to_dict(stadium): + """Convert Stadium model to dict for CloudKit.""" + return { + 'id': stadium.id, + 'sport': stadium.sport.code, + 'name': stadium.name, + 'city': stadium.city, + 'state': stadium.state, + 'country': stadium.country, + 'latitude': float(stadium.latitude) if stadium.latitude else None, + 'longitude': float(stadium.longitude) if stadium.longitude else None, + 'capacity': stadium.capacity, + 'yearOpened': stadium.opened_year, + 'imageUrl': stadium.image_url, + 'surface': stadium.surface, + 'roofType': stadium.roof_type, + 'timezone': stadium.timezone, + } + + +def conference_to_dict(conf): + """Convert Conference model to dict for CloudKit.""" + return { + 'id': conf.canonical_id or f'conf_{conf.id}', + 'sport': conf.sport.code, + 'name': conf.name, + 'shortName': conf.short_name, + 'order': conf.order, + } + + +def division_to_dict(div): + """Convert Division model to dict for CloudKit.""" + return { + 'id': div.canonical_id or f'div_{div.id}', + 'conferenceId': div.conference.canonical_id or f'conf_{div.conference.id}', + 'sport': div.conference.sport.code, + 'name': div.name, + 'shortName': div.short_name, + 'order': div.order, + } + + +def team_alias_to_dict(alias): + """Convert TeamAlias model to dict for CloudKit.""" + return { + 'id': f'team_alias_{alias.id}', + 'teamId': alias.team.id, + 'alias': alias.alias, + 'aliasType': alias.alias_type, + 'validFrom': alias.valid_from.isoformat() if alias.valid_from else None, + 'validUntil': alias.valid_until.isoformat() if alias.valid_until else None, + 'isPrimary': alias.is_primary, + } + + +def stadium_alias_to_dict(alias): + """Convert StadiumAlias model to dict for CloudKit.""" + return { + 'id': f'stadium_alias_{alias.id}', + 'stadiumId': alias.stadium.id, + 'alias': alias.alias, + 'aliasType': alias.alias_type, + 'validFrom': alias.valid_from.isoformat() if alias.valid_from else None, + 'validUntil': alias.valid_until.isoformat() if alias.valid_until else None, + 'isPrimary': alias.is_primary, + } + + +@shared_task +def mark_records_for_sync(record_type: str, record_ids: list): + """ + Mark records as needing sync after local changes. + """ + from cloudkit.models import CloudKitSyncState + + for record_id in record_ids: + state, created = CloudKitSyncState.objects.get_or_create( + record_type=record_type, + record_id=record_id, + ) + state.mark_pending() + + return {'marked': len(record_ids)} diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..17acc4c --- /dev/null +++ b/core/__init__.py @@ -0,0 +1 @@ +default_app_config = 'core.apps.CoreConfig' diff --git a/core/admin/__init__.py b/core/admin/__init__.py new file mode 100644 index 0000000..4508072 --- /dev/null +++ b/core/admin/__init__.py @@ -0,0 +1,6 @@ +from .sport_admin import SportAdmin +from .league_structure_admin import ConferenceAdmin, DivisionAdmin +from .team_admin import TeamAdmin +from .stadium_admin import StadiumAdmin +from .game_admin import GameAdmin +from .alias_admin import TeamAliasAdmin, StadiumAliasAdmin diff --git a/core/admin/alias_admin.py b/core/admin/alias_admin.py new file mode 100644 index 0000000..2dabe1b --- /dev/null +++ b/core/admin/alias_admin.py @@ -0,0 +1,84 @@ +from django.contrib import admin +from import_export.admin import ImportExportMixin +from simple_history.admin import SimpleHistoryAdmin + +from core.models import TeamAlias, StadiumAlias +from core.resources import TeamAliasResource, StadiumAliasResource + + +@admin.register(TeamAlias) +class TeamAliasAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = TeamAliasResource + list_display = [ + 'alias', + 'team', + 'sport_display', + 'alias_type', + 'valid_from', + 'valid_until', + 'is_primary', + ] + list_filter = ['team__sport', 'alias_type', 'is_primary'] + search_fields = ['alias', 'team__full_name', 'team__abbreviation'] + ordering = ['team__sport', 'team', '-valid_from'] + readonly_fields = ['created_at', 'updated_at'] + autocomplete_fields = ['team'] + + fieldsets = [ + (None, { + 'fields': ['team', 'alias', 'alias_type'] + }), + ('Validity Period', { + 'fields': ['valid_from', 'valid_until'] + }), + ('Options', { + 'fields': ['is_primary', 'source', 'notes'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + def sport_display(self, obj): + return obj.team.sport.short_name + sport_display.short_description = 'Sport' + + +@admin.register(StadiumAlias) +class StadiumAliasAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = StadiumAliasResource + list_display = [ + 'alias', + 'stadium', + 'sport_display', + 'alias_type', + 'valid_from', + 'valid_until', + 'is_primary', + ] + list_filter = ['stadium__sport', 'alias_type', 'is_primary'] + search_fields = ['alias', 'stadium__name', 'stadium__city'] + ordering = ['stadium__sport', 'stadium', '-valid_from'] + readonly_fields = ['created_at', 'updated_at'] + autocomplete_fields = ['stadium'] + + fieldsets = [ + (None, { + 'fields': ['stadium', 'alias', 'alias_type'] + }), + ('Validity Period', { + 'fields': ['valid_from', 'valid_until'] + }), + ('Options', { + 'fields': ['is_primary', 'source', 'notes'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + def sport_display(self, obj): + return obj.stadium.sport.short_name + sport_display.short_description = 'Sport' diff --git a/core/admin/game_admin.py b/core/admin/game_admin.py new file mode 100644 index 0000000..126ad21 --- /dev/null +++ b/core/admin/game_admin.py @@ -0,0 +1,117 @@ +from django.contrib import admin +from django.utils.html import format_html +from import_export.admin import ImportExportMixin +from simple_history.admin import SimpleHistoryAdmin + +from core.models import Game +from core.resources import GameResource + + +@admin.register(Game) +class GameAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = GameResource + list_display = [ + 'game_display', + 'sport', + 'season', + 'game_date', + 'score_display', + 'status', + 'stadium_display', + 'is_playoff', + ] + list_filter = [ + 'sport', + 'season', + 'status', + 'is_playoff', + 'is_neutral_site', + ('game_date', admin.DateFieldListFilter), + ] + search_fields = [ + 'id', + 'home_team__full_name', + 'home_team__abbreviation', + 'away_team__full_name', + 'away_team__abbreviation', + 'stadium__name', + ] + date_hierarchy = 'game_date' + ordering = ['-game_date'] + readonly_fields = ['id', 'created_at', 'updated_at', 'source_link'] + autocomplete_fields = ['home_team', 'away_team', 'stadium'] + + fieldsets = [ + (None, { + 'fields': ['id', 'sport', 'season'] + }), + ('Teams', { + 'fields': ['home_team', 'away_team'] + }), + ('Schedule', { + 'fields': ['game_date', 'game_number', 'stadium', 'is_neutral_site'] + }), + ('Score', { + 'fields': ['status', 'home_score', 'away_score'] + }), + ('Playoff', { + 'fields': ['is_playoff', 'playoff_round'], + 'classes': ['collapse'] + }), + ('Raw Data (Debug)', { + 'fields': ['raw_home_team', 'raw_away_team', 'raw_stadium', 'source_url', 'source_link'], + 'classes': ['collapse'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + actions = ['mark_as_final', 'mark_as_postponed', 'mark_as_cancelled'] + + @admin.display(description='Game', ordering='home_team__abbreviation') + def game_display(self, obj): + return f"{obj.away_team.abbreviation} @ {obj.home_team.abbreviation}" + + @admin.display(description='Score', ordering='home_score') + def score_display(self, obj): + if obj.home_score is not None and obj.away_score is not None: + winner_style = "font-weight: bold;" + away_style = winner_style if obj.away_score > obj.home_score else "" + home_style = winner_style if obj.home_score > obj.away_score else "" + return format_html( + '{} - {}', + away_style, obj.away_score, home_style, obj.home_score + ) + return '-' + + @admin.display(description='Stadium', ordering='stadium__name') + def stadium_display(self, obj): + if obj.stadium: + return obj.stadium.name[:30] + return '-' + + def source_link(self, obj): + if obj.source_url: + return format_html( + 'View Source', + obj.source_url + ) + return '-' + source_link.short_description = 'Source' + + @admin.action(description='Mark selected games as Final') + def mark_as_final(self, request, queryset): + updated = queryset.update(status='final') + self.message_user(request, f'{updated} games marked as final.') + + @admin.action(description='Mark selected games as Postponed') + def mark_as_postponed(self, request, queryset): + updated = queryset.update(status='postponed') + self.message_user(request, f'{updated} games marked as postponed.') + + @admin.action(description='Mark selected games as Cancelled') + def mark_as_cancelled(self, request, queryset): + updated = queryset.update(status='cancelled') + self.message_user(request, f'{updated} games marked as cancelled.') diff --git a/core/admin/league_structure_admin.py b/core/admin/league_structure_admin.py new file mode 100644 index 0000000..d13c101 --- /dev/null +++ b/core/admin/league_structure_admin.py @@ -0,0 +1,70 @@ +from django.contrib import admin +from import_export.admin import ImportExportMixin +from simple_history.admin import SimpleHistoryAdmin + +from core.models import Conference, Division +from core.resources import ConferenceResource, DivisionResource + + +class DivisionInline(admin.TabularInline): + model = Division + extra = 0 + fields = ['canonical_id', 'name', 'short_name', 'order'] + ordering = ['order', 'name'] + + +@admin.register(Conference) +class ConferenceAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = ConferenceResource + list_display = ['canonical_id', 'name', 'sport', 'short_name', 'division_count', 'team_count', 'order'] + list_filter = ['sport'] + search_fields = ['name', 'short_name', 'canonical_id'] + ordering = ['sport', 'order', 'name'] + readonly_fields = ['created_at', 'updated_at'] + inlines = [DivisionInline] + + fieldsets = [ + (None, { + 'fields': ['sport', 'canonical_id', 'name', 'short_name', 'order'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + def division_count(self, obj): + return obj.divisions.count() + division_count.short_description = 'Divisions' + + def team_count(self, obj): + return sum(d.teams.count() for d in obj.divisions.all()) + team_count.short_description = 'Teams' + + +@admin.register(Division) +class DivisionAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = DivisionResource + list_display = ['canonical_id', 'name', 'conference', 'sport_display', 'short_name', 'team_count', 'order'] + list_filter = ['conference__sport', 'conference'] + search_fields = ['name', 'short_name', 'canonical_id', 'conference__name'] + ordering = ['conference__sport', 'conference', 'order', 'name'] + readonly_fields = ['created_at', 'updated_at'] + + fieldsets = [ + (None, { + 'fields': ['conference', 'canonical_id', 'name', 'short_name', 'order'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + def sport_display(self, obj): + return obj.conference.sport.short_name + sport_display.short_description = 'Sport' + + def team_count(self, obj): + return obj.teams.count() + team_count.short_description = 'Teams' diff --git a/core/admin/sport_admin.py b/core/admin/sport_admin.py new file mode 100644 index 0000000..becbd26 --- /dev/null +++ b/core/admin/sport_admin.py @@ -0,0 +1,54 @@ +from django.contrib import admin +from import_export.admin import ImportExportMixin +from simple_history.admin import SimpleHistoryAdmin + +from core.models import Sport +from core.resources import SportResource + + +@admin.register(Sport) +class SportAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = SportResource + list_display = [ + 'code', + 'short_name', + 'name', + 'season_type', + 'expected_game_count', + 'is_active', + 'team_count', + 'game_count', + ] + list_filter = ['is_active', 'season_type'] + search_fields = ['code', 'name', 'short_name'] + ordering = ['name'] + readonly_fields = ['created_at', 'updated_at'] + + fieldsets = [ + (None, { + 'fields': ['code', 'name', 'short_name'] + }), + ('Season Configuration', { + 'fields': [ + 'season_type', + 'season_start_month', + 'season_end_month', + 'expected_game_count', + ] + }), + ('Status', { + 'fields': ['is_active'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + def team_count(self, obj): + return obj.teams.count() + team_count.short_description = 'Teams' + + def game_count(self, obj): + return obj.games.count() + game_count.short_description = 'Games' diff --git a/core/admin/stadium_admin.py b/core/admin/stadium_admin.py new file mode 100644 index 0000000..ce9177a --- /dev/null +++ b/core/admin/stadium_admin.py @@ -0,0 +1,89 @@ +from django.contrib import admin +from django.utils.html import format_html +from import_export.admin import ImportExportMixin +from simple_history.admin import SimpleHistoryAdmin + +from core.models import Stadium, StadiumAlias +from core.resources import StadiumResource + + +class StadiumAliasInline(admin.TabularInline): + model = StadiumAlias + extra = 0 + fields = ['alias', 'alias_type', 'valid_from', 'valid_until', 'is_primary'] + ordering = ['-valid_from'] + + +@admin.register(Stadium) +class StadiumAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = StadiumResource + list_display = [ + 'name', + 'sport', + 'location_display', + 'capacity_display', + 'surface', + 'roof_type', + 'opened_year', + 'home_team_count', + 'alias_count', + ] + list_filter = ['sport', 'country', 'surface', 'roof_type'] + search_fields = ['id', 'name', 'city', 'state'] + ordering = ['sport', 'city', 'name'] + readonly_fields = ['id', 'created_at', 'updated_at', 'map_link'] + inlines = [StadiumAliasInline] + + fieldsets = [ + (None, { + 'fields': ['id', 'sport', 'name'] + }), + ('Location', { + 'fields': ['city', 'state', 'country', 'timezone'] + }), + ('Coordinates', { + 'fields': ['latitude', 'longitude', 'map_link'] + }), + ('Venue Details', { + 'fields': ['capacity', 'surface', 'roof_type', 'opened_year'] + }), + ('Media', { + 'fields': ['image_url'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + def location_display(self, obj): + return obj.location + location_display.short_description = 'Location' + + def capacity_display(self, obj): + if obj.capacity: + return f"{obj.capacity:,}" + return '-' + capacity_display.short_description = 'Capacity' + + def home_team_count(self, obj): + return obj.home_teams.count() + home_team_count.short_description = 'Teams' + + def alias_count(self, obj): + return obj.aliases.count() + alias_count.short_description = 'Aliases' + + def map_link(self, obj): + if obj.latitude and obj.longitude: + return format_html( + 'View on Google Maps', + obj.latitude, obj.longitude + ) + return '-' + map_link.short_description = 'Map' + + def get_search_results(self, request, queryset, search_term): + """Enable autocomplete search.""" + queryset, use_distinct = super().get_search_results(request, queryset, search_term) + return queryset, use_distinct diff --git a/core/admin/team_admin.py b/core/admin/team_admin.py new file mode 100644 index 0000000..e610236 --- /dev/null +++ b/core/admin/team_admin.py @@ -0,0 +1,96 @@ +from django.contrib import admin +from django.utils.html import format_html +from import_export.admin import ImportExportMixin +from simple_history.admin import SimpleHistoryAdmin + +from core.models import Team, TeamAlias +from core.resources import TeamResource + + +class TeamAliasInline(admin.TabularInline): + model = TeamAlias + extra = 0 + fields = ['alias', 'alias_type', 'valid_from', 'valid_until', 'is_primary'] + ordering = ['-valid_from'] + + +@admin.register(Team) +class TeamAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = TeamResource + list_display = [ + 'abbreviation', + 'full_name', + 'sport', + 'division_display', + 'home_stadium', + 'color_preview', + 'is_active', + 'alias_count', + ] + list_filter = ['sport', 'is_active', 'division__conference'] + search_fields = ['id', 'city', 'name', 'full_name', 'abbreviation'] + ordering = ['sport', 'city', 'name'] + readonly_fields = ['id', 'created_at', 'updated_at', 'color_preview_large'] + autocomplete_fields = ['home_stadium', 'division'] + inlines = [TeamAliasInline] + + fieldsets = [ + (None, { + 'fields': ['id', 'sport', 'division'] + }), + ('Team Info', { + 'fields': ['city', 'name', 'full_name', 'abbreviation'] + }), + ('Venue', { + 'fields': ['home_stadium'] + }), + ('Branding', { + 'fields': ['primary_color', 'secondary_color', 'color_preview_large', 'logo_url'] + }), + ('Status', { + 'fields': ['is_active'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + def division_display(self, obj): + if obj.division: + return f"{obj.division.conference.short_name or obj.division.conference.name} - {obj.division.name}" + return '-' + division_display.short_description = 'Division' + + def color_preview(self, obj): + if obj.primary_color: + return format_html( + ' ', + obj.primary_color + ) + return '-' + color_preview.short_description = 'Color' + + def color_preview_large(self, obj): + html = '' + if obj.primary_color: + html += format_html( + '   ', + obj.primary_color + ) + if obj.secondary_color: + html += format_html( + '   ', + obj.secondary_color + ) + return format_html(html) if html else '-' + color_preview_large.short_description = 'Color Preview' + + def alias_count(self, obj): + return obj.aliases.count() + alias_count.short_description = 'Aliases' + + def get_search_results(self, request, queryset, search_term): + """Enable autocomplete search.""" + queryset, use_distinct = super().get_search_results(request, queryset, search_term) + return queryset, use_distinct diff --git a/core/apps.py b/core/apps.py new file mode 100644 index 0000000..1593dfb --- /dev/null +++ b/core/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class CoreConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'core' + verbose_name = 'Core Data' diff --git a/core/management/__init__.py b/core/management/__init__.py new file mode 100644 index 0000000..65cb83a --- /dev/null +++ b/core/management/__init__.py @@ -0,0 +1 @@ +# Management commands package diff --git a/core/management/commands/__init__.py b/core/management/commands/__init__.py new file mode 100644 index 0000000..b5a3a84 --- /dev/null +++ b/core/management/commands/__init__.py @@ -0,0 +1 @@ +# Commands package diff --git a/core/management/commands/export_data.py b/core/management/commands/export_data.py new file mode 100644 index 0000000..3a24577 --- /dev/null +++ b/core/management/commands/export_data.py @@ -0,0 +1,445 @@ +""" +Management command to export Django database data to JSON bootstrap files for iOS app. +""" +import json +from datetime import datetime, timezone +from pathlib import Path +from urllib.parse import urlparse + +from django.core.management.base import BaseCommand + +from core.models import Sport, Conference, Division, Team, Stadium, Game, TeamAlias, StadiumAlias + + +class Command(BaseCommand): + help = 'Export database data to JSON bootstrap files for iOS app' + + def add_arguments(self, parser): + parser.add_argument( + '--output-dir', + type=str, + default='./bootstrap', + help='Directory to write JSON files to' + ) + parser.add_argument( + '--sports', + action='store_true', + help='Export sports only' + ) + parser.add_argument( + '--league-structure', + action='store_true', + help='Export league structure only' + ) + parser.add_argument( + '--teams', + action='store_true', + help='Export teams only' + ) + parser.add_argument( + '--stadiums', + action='store_true', + help='Export stadiums only' + ) + parser.add_argument( + '--games', + action='store_true', + help='Export games only' + ) + parser.add_argument( + '--team-aliases', + action='store_true', + help='Export team aliases only' + ) + parser.add_argument( + '--stadium-aliases', + action='store_true', + help='Export stadium aliases only' + ) + parser.add_argument( + '--sport', + type=str, + help='Filter by sport code (e.g., nba, mlb)' + ) + parser.add_argument( + '--year', + type=int, + help='Filter games by calendar year (e.g., 2025 returns all games played in 2025)' + ) + parser.add_argument( + '--pretty', + action='store_true', + default=True, + help='Pretty print JSON output (default: true)' + ) + + def handle(self, *args, **options): + output_dir = Path(options['output_dir']) + output_dir.mkdir(parents=True, exist_ok=True) + + # If no specific flags, export everything + export_all = not any([ + options['sports'], + options['league_structure'], + options['teams'], + options['stadiums'], + options['games'], + options['team_aliases'], + options['stadium_aliases'], + ]) + + sport_filter = options.get('sport') + year_filter = options.get('year') + indent = 2 if options['pretty'] else None + + if export_all or options['sports']: + self._export_sports(output_dir, sport_filter, indent) + + if export_all or options['league_structure']: + self._export_league_structure(output_dir, sport_filter, indent) + + if export_all or options['teams']: + self._export_teams(output_dir, sport_filter, indent) + + if export_all or options['stadiums']: + self._export_stadiums(output_dir, sport_filter, indent) + + if export_all or options['games']: + self._export_games(output_dir, sport_filter, year_filter, indent) + + if export_all or options['team_aliases']: + self._export_team_aliases(output_dir, sport_filter, indent) + + if export_all or options['stadium_aliases']: + self._export_stadium_aliases(output_dir, sport_filter, indent) + + self.stdout.write(self.style.SUCCESS(f'Export completed to {output_dir}')) + + def _get_conference_id(self, conference): + """Get conference canonical ID from DB field.""" + return conference.canonical_id + + def _get_division_id(self, division): + """Get division canonical ID from DB field.""" + return division.canonical_id + + def _export_sports(self, output_dir, sport_filter, indent): + """Export sports to sports.json.""" + self.stdout.write('Exporting sports...') + + sports = Sport.objects.filter(is_active=True) + if sport_filter: + sports = sports.filter(code=sport_filter.lower()) + + data = [] + for sport in sports.order_by('code'): + data.append({ + 'sport_id': sport.short_name, + 'abbreviation': sport.short_name, + 'display_name': sport.name, + 'icon_name': sport.icon_name, + 'color_hex': sport.color_hex, + 'season_start_month': sport.season_start_month, + 'season_end_month': sport.season_end_month, + 'is_active': sport.is_active, + }) + + file_path = output_dir / 'sports.json' + with open(file_path, 'w') as f: + json.dump(data, f, indent=indent) + + self.stdout.write(f' Wrote {len(data)} sports to {file_path}') + + def _export_league_structure(self, output_dir, sport_filter, indent): + """Export league structure (sports as leagues, conferences, divisions).""" + self.stdout.write('Exporting league structure...') + + data = [] + seen_ids = set() # Track IDs to prevent duplicates + display_order = 0 + + # Query sports + sports = Sport.objects.all() + if sport_filter: + sports = sports.filter(code=sport_filter.lower()) + + for sport in sports.order_by('code'): + # Create league entry from Sport + league_id = f"{sport.code}_league" + + # Skip if we've already seen this ID + if league_id in seen_ids: + continue + seen_ids.add(league_id) + + data.append({ + 'id': league_id, + 'sport': sport.short_name, + 'type': 'league', + 'name': sport.name, + 'abbreviation': sport.short_name, + 'parent_id': None, + 'display_order': display_order, + }) + display_order += 1 + + # Get conferences for this sport + conferences = Conference.objects.filter(sport=sport).order_by('order', 'name') + for conf in conferences: + conf_id = self._get_conference_id(conf) + + # Skip duplicate conference IDs + if conf_id in seen_ids: + continue + seen_ids.add(conf_id) + + data.append({ + 'id': conf_id, + 'sport': sport.short_name, + 'type': 'conference', + 'name': conf.name, + 'abbreviation': conf.short_name or None, + 'parent_id': league_id, + 'display_order': conf.order, + }) + + # Get divisions for this conference + divisions = Division.objects.filter(conference=conf).order_by('order', 'name') + for div in divisions: + div_id = self._get_division_id(div) + + # Skip duplicate division IDs + if div_id in seen_ids: + continue + seen_ids.add(div_id) + + data.append({ + 'id': div_id, + 'sport': sport.short_name, + 'type': 'division', + 'name': div.name, + 'abbreviation': div.short_name or None, + 'parent_id': conf_id, + 'display_order': div.order, + }) + + file_path = output_dir / 'league_structure.json' + with open(file_path, 'w') as f: + json.dump(data, f, indent=indent) + + self.stdout.write(f' Wrote {len(data)} entries to {file_path}') + + def _export_teams(self, output_dir, sport_filter, indent): + """Export teams to teams_canonical.json.""" + self.stdout.write('Exporting teams...') + + teams = Team.objects.select_related( + 'sport', 'division', 'division__conference', 'home_stadium' + ).all() + + if sport_filter: + teams = teams.filter(sport__code=sport_filter.lower()) + + data = [] + for team in teams.order_by('sport__code', 'city', 'name'): + # Get conference and division IDs + conference_id = None + division_id = None + if team.division: + division_id = self._get_division_id(team.division) + conference_id = self._get_conference_id(team.division.conference) + + data.append({ + 'canonical_id': team.id, + 'name': team.name, + 'abbreviation': team.abbreviation, + 'sport': team.sport.short_name, + 'city': team.city, + 'stadium_canonical_id': team.home_stadium_id, + 'conference_id': conference_id, + 'division_id': division_id, + 'primary_color': team.primary_color or None, + 'secondary_color': team.secondary_color or None, + }) + + file_path = output_dir / 'teams_canonical.json' + with open(file_path, 'w') as f: + json.dump(data, f, indent=indent) + + self.stdout.write(f' Wrote {len(data)} teams to {file_path}') + + def _export_stadiums(self, output_dir, sport_filter, indent): + """Export stadiums to stadiums_canonical.json.""" + self.stdout.write('Exporting stadiums...') + + stadiums = Stadium.objects.select_related('sport').all() + + if sport_filter: + stadiums = stadiums.filter(sport__code=sport_filter.lower()) + + # Build map of stadium -> team abbreviations + stadium_teams = {} + teams = Team.objects.filter(home_stadium__isnull=False).select_related('home_stadium') + if sport_filter: + teams = teams.filter(sport__code=sport_filter.lower()) + + for team in teams: + if team.home_stadium_id not in stadium_teams: + stadium_teams[team.home_stadium_id] = [] + stadium_teams[team.home_stadium_id].append(team.abbreviation) + + data = [] + for stadium in stadiums.order_by('sport__code', 'city', 'name'): + data.append({ + 'canonical_id': stadium.id, + 'name': stadium.name, + 'city': stadium.city, + 'state': stadium.state or None, + 'latitude': float(stadium.latitude) if stadium.latitude else None, + 'longitude': float(stadium.longitude) if stadium.longitude else None, + 'capacity': stadium.capacity or 0, + 'sport': stadium.sport.short_name, + 'primary_team_abbrevs': stadium_teams.get(stadium.id, []), + 'year_opened': stadium.opened_year, + 'timezone_identifier': stadium.timezone or None, + 'image_url': stadium.image_url or None, + }) + + file_path = output_dir / 'stadiums_canonical.json' + with open(file_path, 'w') as f: + json.dump(data, f, indent=indent) + + self.stdout.write(f' Wrote {len(data)} stadiums to {file_path}') + + def _export_games(self, output_dir, sport_filter, year_filter, indent): + """Export games to games.json.""" + self.stdout.write('Exporting games...') + + games = Game.objects.select_related( + 'sport', 'home_team', 'away_team', 'stadium' + ).all() + + if sport_filter: + games = games.filter(sport__code=sport_filter.lower()) + + if year_filter: + games = games.filter(game_date__year=year_filter) + + data = [] + for game in games.order_by('game_date', 'sport__code'): + # Ensure game_date is UTC-aware + game_dt = game.game_date + if game_dt.tzinfo is None: + game_dt = game_dt.replace(tzinfo=timezone.utc) + utc_dt = game_dt.astimezone(timezone.utc) + + # Extract domain from source_url + source = None + if game.source_url: + source = self._extract_domain(game.source_url) + + data.append({ + 'id': game.id, + 'sport': game.sport.short_name, + 'season': str(game.game_date.year), + 'game_datetime_utc': utc_dt.strftime('%Y-%m-%dT%H:%M:%SZ'), + 'home_team': game.home_team.full_name, + 'away_team': game.away_team.full_name, + 'home_team_abbrev': game.home_team.abbreviation, + 'away_team_abbrev': game.away_team.abbreviation, + 'home_team_canonical_id': game.home_team_id, + 'away_team_canonical_id': game.away_team_id, + 'venue': game.stadium.name if game.stadium else None, + 'stadium_canonical_id': game.stadium_id, + 'source': source, + 'is_playoff': game.is_playoff, + 'broadcast': None, # Not tracked in DB currently + }) + + file_path = output_dir / 'games.json' + with open(file_path, 'w') as f: + json.dump(data, f, indent=indent) + + self.stdout.write(f' Wrote {len(data)} games to {file_path}') + + def _extract_domain(self, url): + """Extract domain from URL (e.g., 'espn.com' from 'https://www.espn.com/...').""" + try: + parsed = urlparse(url) + domain = parsed.netloc + # Remove 'www.' prefix if present + if domain.startswith('www.'): + domain = domain[4:] + return domain + except Exception: + return None + + def _export_team_aliases(self, output_dir, sport_filter, indent): + """Export team aliases to team_aliases.json.""" + self.stdout.write('Exporting team aliases...') + + aliases = TeamAlias.objects.select_related('team', 'team__sport').all() + + if sport_filter: + aliases = aliases.filter(team__sport__code=sport_filter.lower()) + + # Map model alias types to export alias types + alias_type_map = { + 'full_name': 'name', + 'city_name': 'city', + 'abbreviation': 'abbreviation', + 'nickname': 'name', # Map nickname to name + 'historical': 'name', # Map historical to name + } + + data = [] + for alias in aliases.order_by('team__sport__code', 'team__id', 'id'): + # Format dates + valid_from = alias.valid_from.strftime('%Y-%m-%d') if alias.valid_from else None + valid_until = alias.valid_until.strftime('%Y-%m-%d') if alias.valid_until else None + + # Map alias type + export_type = alias_type_map.get(alias.alias_type, 'name') + + data.append({ + 'id': f"alias_{alias.team.sport.code}_{alias.pk}", + 'team_canonical_id': alias.team_id, + 'alias_type': export_type, + 'alias_value': alias.alias, + 'valid_from': valid_from, + 'valid_until': valid_until, + }) + + file_path = output_dir / 'team_aliases.json' + with open(file_path, 'w') as f: + json.dump(data, f, indent=indent) + + self.stdout.write(f' Wrote {len(data)} team aliases to {file_path}') + + def _export_stadium_aliases(self, output_dir, sport_filter, indent): + """Export stadium aliases to stadium_aliases.json.""" + self.stdout.write('Exporting stadium aliases...') + + aliases = StadiumAlias.objects.select_related('stadium', 'stadium__sport').all() + + if sport_filter: + aliases = aliases.filter(stadium__sport__code=sport_filter.lower()) + + data = [] + for alias in aliases.order_by('stadium__sport__code', 'stadium__id', 'id'): + # Format dates + valid_from = alias.valid_from.strftime('%Y-%m-%d') if alias.valid_from else None + valid_until = alias.valid_until.strftime('%Y-%m-%d') if alias.valid_until else None + + data.append({ + 'alias_name': alias.alias, + 'stadium_canonical_id': alias.stadium_id, + 'valid_from': valid_from, + 'valid_until': valid_until, + }) + + file_path = output_dir / 'stadium_aliases.json' + with open(file_path, 'w') as f: + json.dump(data, f, indent=indent) + + self.stdout.write(f' Wrote {len(data)} stadium aliases to {file_path}') diff --git a/core/management/commands/fix_wnba_stadiums.py b/core/management/commands/fix_wnba_stadiums.py new file mode 100644 index 0000000..5d71a15 --- /dev/null +++ b/core/management/commands/fix_wnba_stadiums.py @@ -0,0 +1,98 @@ +""" +Assign home_stadium to WNBA teams and backfill stadium on WNBA games. + +Usage: + python manage.py fix_wnba_stadiums + python manage.py fix_wnba_stadiums --dry-run +""" + +from django.core.management.base import BaseCommand + +from core.models import Team, Stadium, Game + +# WNBA team abbreviation → stadium canonical ID +WNBA_TEAM_STADIUMS = { + 'ATL': 'stadium_wnba_gateway_center_arena', + 'CHI': 'stadium_wnba_wintrust_arena', + 'CON': 'stadium_wnba_mohegan_sun_arena', + 'DAL': 'stadium_wnba_college_park_center', + 'GSV': 'stadium_wnba_chase_center', + 'IND': 'stadium_wnba_gainbridge_fieldhouse', + 'LA': 'stadium_wnba_cryptocom_arena', + 'LV': 'stadium_wnba_michelob_ultra_arena', + 'MIN': 'stadium_wnba_target_center', + 'NY': 'stadium_wnba_barclays_center', + 'PHX': 'stadium_wnba_footprint_center', + 'SEA': 'stadium_wnba_climate_pledge_arena', + 'WAS': 'stadium_wnba_entertainment_sports_arena', +} + + +class Command(BaseCommand): + help = "Assign home_stadium to WNBA teams and backfill game stadiums." + + def add_arguments(self, parser): + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would change without saving', + ) + + def handle(self, *args, **options): + dry_run = options['dry_run'] + + if dry_run: + self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved")) + + # 1. Assign home_stadium to WNBA teams + self.stdout.write("\n=== Assigning WNBA team stadiums ===") + teams_updated = 0 + for abbrev, stadium_id in WNBA_TEAM_STADIUMS.items(): + try: + team = Team.objects.get(sport_id='wnba', abbreviation=abbrev) + except Team.DoesNotExist: + self.stderr.write(f" Team not found: WNBA {abbrev}") + continue + + try: + stadium = Stadium.objects.get(id=stadium_id) + except Stadium.DoesNotExist: + self.stderr.write(f" Stadium not found: {stadium_id}") + continue + + if team.home_stadium_id != stadium_id: + self.stdout.write(f" {abbrev:5} {team.city} {team.name} → {stadium.name}") + if not dry_run: + team.home_stadium = stadium + team.save(update_fields=['home_stadium', 'updated_at']) + teams_updated += 1 + + self.stdout.write(f" Teams updated: {teams_updated}") + + # 2. Backfill stadium on WNBA games missing it + self.stdout.write("\n=== Backfilling WNBA game stadiums ===") + games_missing = Game.objects.filter( + sport_id='wnba', stadium__isnull=True + ).select_related('home_team') + + games_updated = 0 + for game in games_missing: + stadium_id = WNBA_TEAM_STADIUMS.get(game.home_team.abbreviation) + if not stadium_id: + self.stderr.write(f" No stadium mapping for {game.home_team.abbreviation}: {game.id}") + continue + + self.stdout.write(f" {game.id} ({game.home_team.abbreviation} home) → {stadium_id}") + if not dry_run: + game.stadium_id = stadium_id + game.save(update_fields=['stadium', 'updated_at']) + games_updated += 1 + + self.stdout.write(f" Games updated: {games_updated}") + + # 3. Summary + self.stdout.write(f"\n=== Done ===") + missing_stadium = Team.objects.filter(sport_id='wnba', home_stadium__isnull=True).count() + missing_game_stadium = Game.objects.filter(sport_id='wnba', stadium__isnull=True).count() + self.stdout.write(f" WNBA teams still missing stadium: {missing_stadium}") + self.stdout.write(f" WNBA games still missing stadium: {missing_game_stadium}") diff --git a/core/management/commands/import_data.py b/core/management/commands/import_data.py new file mode 100644 index 0000000..a0ce6ac --- /dev/null +++ b/core/management/commands/import_data.py @@ -0,0 +1,512 @@ +""" +Management command to import existing JSON data into Django models. +""" +import json +from datetime import datetime +from pathlib import Path + +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction + +from core.models import Sport, Conference, Division, Team, Stadium, Game, TeamAlias, StadiumAlias + + +class Command(BaseCommand): + help = 'Import existing JSON data files into Django database' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Lookup maps for JSON ID -> Django object + self.divisions_by_json_id = {} + self.conferences_by_json_id = {} + + def add_arguments(self, parser): + parser.add_argument( + '--data-dir', + type=str, + default='.', + help='Directory containing the JSON data files' + ) + parser.add_argument( + '--output-dir', + type=str, + default='./output', + help='Directory containing scraped output files (teams, stadiums, games)' + ) + parser.add_argument( + '--league-structure', + action='store_true', + help='Import league structure only' + ) + parser.add_argument( + '--team-aliases', + action='store_true', + help='Import team aliases only' + ) + parser.add_argument( + '--stadium-aliases', + action='store_true', + help='Import stadium aliases only' + ) + parser.add_argument( + '--scraped-data', + action='store_true', + help='Import scraped teams, stadiums, and games from output directory' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be imported without making changes' + ) + + def handle(self, *args, **options): + data_dir = Path(options['data_dir']) + output_dir = Path(options['output_dir']) + dry_run = options['dry_run'] + + # If no specific flags, import everything + import_all = not any([ + options['league_structure'], + options['team_aliases'], + options['stadium_aliases'], + options['scraped_data'], + ]) + + if dry_run: + self.stdout.write(self.style.WARNING('DRY RUN - No changes will be made')) + + try: + with transaction.atomic(): + # Always ensure sports exist first + self._ensure_sports() + + if import_all or options['league_structure']: + self._import_league_structure(data_dir, dry_run) + + if import_all or options['scraped_data']: + self._import_scraped_data(output_dir, dry_run) + + if import_all or options['team_aliases']: + self._import_team_aliases(data_dir, dry_run) + + if import_all or options['stadium_aliases']: + self._import_stadium_aliases(data_dir, dry_run) + + if dry_run: + raise CommandError('Dry run complete - rolling back') + + except CommandError as e: + if 'Dry run' in str(e): + self.stdout.write(self.style.SUCCESS('Dry run completed successfully')) + else: + raise + + self.stdout.write(self.style.SUCCESS('Data import completed successfully')) + + def _ensure_sports(self): + """Ensure all sports exist in the database.""" + sports = [ + {'code': 'mlb', 'name': 'Major League Baseball', 'short_name': 'MLB'}, + {'code': 'nba', 'name': 'National Basketball Association', 'short_name': 'NBA'}, + {'code': 'nfl', 'name': 'National Football League', 'short_name': 'NFL'}, + {'code': 'nhl', 'name': 'National Hockey League', 'short_name': 'NHL'}, + {'code': 'mls', 'name': 'Major League Soccer', 'short_name': 'MLS'}, + {'code': 'wnba', 'name': "Women's National Basketball Association", 'short_name': 'WNBA'}, + {'code': 'nwsl', 'name': "National Women's Soccer League", 'short_name': 'NWSL'}, + ] + + for sport_data in sports: + sport, created = Sport.objects.update_or_create( + code=sport_data['code'], + defaults={ + 'name': sport_data['name'], + 'short_name': sport_data['short_name'], + } + ) + if created: + self.stdout.write(f' Created sport: {sport.short_name}') + + def _import_league_structure(self, data_dir, dry_run): + """Import league structure from JSON.""" + self.stdout.write(self.style.HTTP_INFO('Importing league structure...')) + + file_path = data_dir / 'league_structure.json' + if not file_path.exists(): + self.stdout.write(self.style.WARNING(f' File not found: {file_path}')) + return + + with open(file_path) as f: + data = json.load(f) + + # First pass: conferences + conference_count = 0 + for item in data: + if item['type'] != 'conference': + continue + + sport_code = item['sport'].lower() + try: + sport = Sport.objects.get(code=sport_code) + except Sport.DoesNotExist: + self.stdout.write(self.style.WARNING(f' Sport not found: {sport_code}')) + continue + + if not dry_run: + conference, created = Conference.objects.update_or_create( + sport=sport, + name=item['name'], + defaults={ + 'canonical_id': item['id'], + 'short_name': item.get('abbreviation') or '', + 'order': item.get('display_order', 0), + } + ) + self.conferences_by_json_id[item['id']] = conference + if created: + conference_count += 1 + else: + self.conferences_by_json_id[item['id']] = item['id'] + conference_count += 1 + + self.stdout.write(f' Conferences: {conference_count} created/updated') + + # Second pass: divisions + division_count = 0 + for item in data: + if item['type'] != 'division': + continue + + parent_id = item.get('parent_id') + if not parent_id or parent_id not in self.conferences_by_json_id: + self.stdout.write(self.style.WARNING(f' Parent conference not found for division: {item["name"]}')) + continue + + if not dry_run: + conference = self.conferences_by_json_id[parent_id] + division, created = Division.objects.update_or_create( + conference=conference, + name=item['name'], + defaults={ + 'canonical_id': item['id'], + 'short_name': item.get('abbreviation') or '', + 'order': item.get('display_order', 0), + } + ) + self.divisions_by_json_id[item['id']] = division + if created: + division_count += 1 + else: + division_count += 1 + + self.stdout.write(f' Divisions: {division_count} created/updated') + + def _import_team_aliases(self, data_dir, dry_run): + """Import team aliases from JSON.""" + self.stdout.write(self.style.HTTP_INFO('Importing team aliases...')) + + file_path = data_dir / 'team_aliases.json' + if not file_path.exists(): + self.stdout.write(self.style.WARNING(f' File not found: {file_path}')) + return + + with open(file_path) as f: + data = json.load(f) + + # Map JSON alias types to model alias types + alias_type_map = { + 'name': 'full_name', + 'city': 'city_name', + 'abbreviation': 'abbreviation', + 'nickname': 'nickname', + 'historical': 'historical', + } + + alias_count = 0 + skipped_count = 0 + + for item in data: + team_id = item['team_canonical_id'] + + # Check if team exists + try: + team = Team.objects.get(id=team_id) + except Team.DoesNotExist: + skipped_count += 1 + continue + + valid_from = None + valid_until = None + + if item.get('valid_from'): + try: + valid_from = datetime.strptime(item['valid_from'], '%Y-%m-%d').date() + except ValueError: + pass + + if item.get('valid_until'): + try: + valid_until = datetime.strptime(item['valid_until'], '%Y-%m-%d').date() + except ValueError: + pass + + # Map alias type + json_alias_type = item.get('alias_type', 'full_name') + model_alias_type = alias_type_map.get(json_alias_type, 'full_name') + + if not dry_run: + # Use team + alias + alias_type as unique key (no explicit ID) + alias, created = TeamAlias.objects.update_or_create( + team=team, + alias=item['alias_value'], + alias_type=model_alias_type, + defaults={ + 'valid_from': valid_from, + 'valid_until': valid_until, + } + ) + if created: + alias_count += 1 + else: + alias_count += 1 + + self.stdout.write(f' Team aliases: {alias_count} created/updated, {skipped_count} skipped (team not found)') + + def _import_stadium_aliases(self, data_dir, dry_run): + """Import stadium aliases from JSON.""" + self.stdout.write(self.style.HTTP_INFO('Importing stadium aliases...')) + + file_path = data_dir / 'stadium_aliases.json' + if not file_path.exists(): + self.stdout.write(self.style.WARNING(f' File not found: {file_path}')) + return + + with open(file_path) as f: + data = json.load(f) + + alias_count = 0 + skipped_count = 0 + + for item in data: + stadium_id = item['stadium_canonical_id'] + + # Check if stadium exists + try: + stadium = Stadium.objects.get(id=stadium_id) + except Stadium.DoesNotExist: + skipped_count += 1 + continue + + valid_from = None + valid_until = None + + if item.get('valid_from'): + try: + valid_from = datetime.strptime(item['valid_from'], '%Y-%m-%d').date() + except ValueError: + pass + + if item.get('valid_until'): + try: + valid_until = datetime.strptime(item['valid_until'], '%Y-%m-%d').date() + except ValueError: + pass + + if not dry_run: + # Use stadium + alias as unique key (no explicit ID) + alias, created = StadiumAlias.objects.update_or_create( + stadium=stadium, + alias=item['alias_name'], + defaults={ + 'alias_type': 'official', + 'valid_from': valid_from, + 'valid_until': valid_until, + } + ) + if created: + alias_count += 1 + else: + alias_count += 1 + + self.stdout.write(f' Stadium aliases: {alias_count} created/updated, {skipped_count} skipped (stadium not found)') + + def _import_scraped_data(self, output_dir, dry_run): + """Import scraped teams, stadiums, and games from output directory.""" + if not output_dir.exists(): + self.stdout.write(self.style.WARNING(f' Output directory not found: {output_dir}')) + return + + # Import stadiums first (teams reference them) + self._import_stadiums(output_dir, dry_run) + + # Import teams (games reference them) + self._import_teams(output_dir, dry_run) + + # Import games + self._import_games(output_dir, dry_run) + + def _import_stadiums(self, output_dir, dry_run): + """Import stadiums from output files.""" + self.stdout.write(self.style.HTTP_INFO('Importing stadiums...')) + + total_count = 0 + sports = ['mlb', 'nba', 'nfl', 'nhl', 'mls', 'wnba', 'nwsl'] + + for sport_code in sports: + file_path = output_dir / f'stadiums_{sport_code}.json' + if not file_path.exists(): + continue + + try: + sport = Sport.objects.get(code=sport_code) + except Sport.DoesNotExist: + continue + + with open(file_path) as f: + data = json.load(f) + + for item in data: + if not dry_run: + Stadium.objects.update_or_create( + id=item['canonical_id'], + defaults={ + 'sport': sport, + 'name': item['name'], + 'city': item.get('city', ''), + 'state': item.get('state', ''), + 'country': 'USA', + 'latitude': item.get('latitude'), + 'longitude': item.get('longitude'), + 'capacity': item.get('capacity') or None, + 'timezone': item.get('timezone_identifier', ''), + 'opened_year': item.get('year_opened'), + 'image_url': item.get('image_url', '') or '', + } + ) + total_count += 1 + + self.stdout.write(f' Stadiums: {total_count} created/updated') + + def _import_teams(self, output_dir, dry_run): + """Import teams from output files.""" + self.stdout.write(self.style.HTTP_INFO('Importing teams...')) + + total_count = 0 + sports = ['mlb', 'nba', 'nfl', 'nhl', 'mls', 'wnba', 'nwsl'] + + for sport_code in sports: + file_path = output_dir / f'teams_{sport_code}.json' + if not file_path.exists(): + continue + + try: + sport = Sport.objects.get(code=sport_code) + except Sport.DoesNotExist: + continue + + with open(file_path) as f: + data = json.load(f) + + for item in data: + # Try to find division using JSON ID lookup + division = None + if item.get('division_id'): + division = self.divisions_by_json_id.get(item['division_id']) + + # Try to find home stadium + home_stadium = None + if item.get('stadium_canonical_id'): + try: + home_stadium = Stadium.objects.get(id=item['stadium_canonical_id']) + except Stadium.DoesNotExist: + pass + + if not dry_run: + Team.objects.update_or_create( + id=item['canonical_id'], + defaults={ + 'sport': sport, + 'division': division, + 'city': item.get('city', ''), + 'name': item['name'], + 'full_name': f"{item.get('city', '')} {item['name']}".strip(), + 'abbreviation': item.get('abbreviation', ''), + 'home_stadium': home_stadium, + 'primary_color': item.get('primary_color', '') or '', + 'secondary_color': item.get('secondary_color', '') or '', + } + ) + total_count += 1 + + self.stdout.write(f' Teams: {total_count} created/updated') + + def _import_games(self, output_dir, dry_run): + """Import games from output files.""" + self.stdout.write(self.style.HTTP_INFO('Importing games...')) + + total_count = 0 + error_count = 0 + + # Find all games files + game_files = list(output_dir.glob('games_*.json')) + + for file_path in game_files: + # Parse sport code from filename (e.g., games_mlb_2026.json) + parts = file_path.stem.split('_') + if len(parts) < 2: + continue + + sport_code = parts[1] + + try: + sport = Sport.objects.get(code=sport_code) + except Sport.DoesNotExist: + continue + + with open(file_path) as f: + data = json.load(f) + + for item in data: + try: + # Get teams + home_team = Team.objects.get(id=item['home_team_canonical_id']) + away_team = Team.objects.get(id=item['away_team_canonical_id']) + + # Get stadium (optional) + stadium = None + if item.get('stadium_canonical_id'): + try: + stadium = Stadium.objects.get(id=item['stadium_canonical_id']) + except Stadium.DoesNotExist: + pass + + # Parse datetime + game_date = datetime.fromisoformat( + item['game_datetime_utc'].replace('Z', '+00:00') + ) + + # Parse season (may be "2025" or "2025-26") + season_str = str(item.get('season', game_date.year)) + season = int(season_str.split('-')[0]) + + if not dry_run: + Game.objects.update_or_create( + id=item['canonical_id'], + defaults={ + 'sport': sport, + 'season': season, + 'home_team': home_team, + 'away_team': away_team, + 'stadium': stadium, + 'game_date': game_date, + 'status': 'scheduled', + 'is_playoff': item.get('is_playoff', False), + } + ) + total_count += 1 + + except (Team.DoesNotExist, KeyError) as e: + error_count += 1 + if error_count <= 5: + self.stdout.write(self.style.WARNING(f' Error importing game: {e}')) + + self.stdout.write(f' Games: {total_count} created/updated, {error_count} errors') diff --git a/core/management/commands/populate_stadium_details.py b/core/management/commands/populate_stadium_details.py new file mode 100644 index 0000000..ea6c94e --- /dev/null +++ b/core/management/commands/populate_stadium_details.py @@ -0,0 +1,351 @@ +""" +Scrape stadium capacity and year-opened from Wikipedia and update local DB. + +Wikipedia pages used: + - NBA: List_of_NBA_arenas + - NFL: List_of_current_NFL_stadiums + - MLB: List_of_current_Major_League_Baseball_stadiums + - NHL: List_of_NHL_arenas + - MLS: List_of_Major_League_Soccer_stadiums + - WNBA: Women's_National_Basketball_Association + - NWSL: List_of_National_Women's_Soccer_League_stadiums + +Usage: + python manage.py populate_stadium_details + python manage.py populate_stadium_details --sport nba + python manage.py populate_stadium_details --dry-run +""" + +import re + +import requests +from bs4 import BeautifulSoup +from django.core.management.base import BaseCommand + +from core.models import Stadium + +WIKI_API = "https://en.wikipedia.org/w/api.php" + +# (page_title, table_index, name_col, capacity_col, opened_col) +WIKI_SOURCES = { + "nba": ("List_of_NBA_arenas", 0, "Arena", "Capacity", "Opened"), + "nfl": ("List_of_current_NFL_stadiums", 0, "Name", "Capacity", "Opened"), + "mlb": ("List_of_current_Major_League_Baseball_stadiums", 0, "Name", "Capacity", "Opened"), + "nhl": ("List_of_NHL_arenas", 0, "Arena", "Capacity", "Opened"), + "mls": ("List_of_Major_League_Soccer_stadiums", 1, "Stadium", "Capacity", "Opened"), + "wnba": ("Women's_National_Basketball_Association", 1, "Arena", "Capacity", None), + "nwsl": ("List_of_National_Women's_Soccer_League_stadiums", 0, "Stadium", "Capacity", None), +} + +# Wikipedia name → list of our possible stadium names (for fuzzy matching) +NAME_OVERRIDES = { + # NBA + "Rocket Arena": ["Rocket Mortgage FieldHouse"], + "Mortgage Matchup Center": [], # skip — not in our DB + "Xfinity Mobile Arena": ["Footprint Center"], # Phoenix — renamed + # NHL + "Lenovo Center": ["PNC Arena"], # Carolina — renamed + "Benchmark International Arena": ["Amalie Arena"], # Tampa — renamed + "Grand Casino Arena": ["Xcel Energy Center"], # Minnesota — renamed + # MLS + "Energizer Park": ["CITYPARK"], # St. Louis — renamed + "Saputo Stadium": ["Stade Saputo"], # Montreal — same stadium, French name + "ScottsMiracle-Gro Field": ["Lower.com Field"], # Columbus — renamed + "Sporting Park": ["Children's Mercy Park"], # KC — renamed + "Sports Illustrated Stadium": [], # skip — may not be in our DB yet + # NWSL + "CPKC Stadium": ["Children's Mercy Park"], # KC shared name +} + + +class Command(BaseCommand): + help = "Populate stadium capacity and opened_year from Wikipedia." + + def add_arguments(self, parser): + parser.add_argument( + "--sport", + type=str, + choices=list(WIKI_SOURCES.keys()), + help="Only process a single sport", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would change without saving", + ) + + def handle(self, *args, **options): + sport_filter = options["sport"] + dry_run = options["dry_run"] + + sports = [sport_filter] if sport_filter else list(WIKI_SOURCES.keys()) + + if dry_run: + self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved")) + + for sport_code in sports: + self._process_sport(sport_code, dry_run) + + self._print_summary() + + def _process_sport(self, sport_code, dry_run): + page, table_idx, name_col, cap_col, opened_col = WIKI_SOURCES[sport_code] + + self.stdout.write(f"\n{'='*60}") + self.stdout.write(self.style.HTTP_INFO(f"Processing {sport_code.upper()} — Wikipedia: {page}")) + self.stdout.write(f"{'='*60}") + + # Fetch Wikipedia page + wiki_data = self._fetch_wiki_table(page, table_idx, name_col, cap_col, opened_col) + if not wiki_data: + self.stderr.write(self.style.ERROR(" Failed to parse Wikipedia table")) + return + + self.stdout.write(f" Wikipedia returned {len(wiki_data)} venues") + + # Get our stadiums for this sport + db_stadiums = Stadium.objects.filter(sport_id=sport_code) + # Build lookup: normalized name → stadium + stadium_lookup = {} + for s in db_stadiums: + stadium_lookup[self._normalize_name(s.name)] = s + + matched = 0 + updated = 0 + unmatched_wiki = [] + + for wiki_name, info in wiki_data.items(): + stadium = self._find_stadium(wiki_name, stadium_lookup) + if not stadium: + unmatched_wiki.append(wiki_name) + continue + + matched += 1 + changes = [] + + capacity = info.get("capacity") + opened = info.get("opened") + + if capacity and (stadium.capacity is None or stadium.capacity != capacity): + changes.append(f"capacity: {stadium.capacity} → {capacity}") + if not dry_run: + stadium.capacity = capacity + + if opened and (stadium.opened_year is None or stadium.opened_year != opened): + changes.append(f"opened_year: {stadium.opened_year} → {opened}") + if not dry_run: + stadium.opened_year = opened + + if changes: + updated += 1 + self.stdout.write(f" {stadium.name}") + for c in changes: + self.stdout.write(f" {c}") + if not dry_run: + update_fields = ["updated_at"] + if capacity: + update_fields.append("capacity") + if opened: + update_fields.append("opened_year") + stadium.save(update_fields=update_fields) + + self.stdout.write(f"\n Matched: {matched} | Updated: {updated}") + + if unmatched_wiki: + self.stdout.write(self.style.WARNING( + f" Wiki venues with no DB match ({len(unmatched_wiki)}):" + )) + for name in sorted(unmatched_wiki): + self.stdout.write(f" - {name}") + + # Check for DB stadiums that didn't match + matched_ids = set() + for wiki_name in wiki_data: + s = self._find_stadium(wiki_name, stadium_lookup) + if s: + matched_ids.add(s.id) + + unmatched_db = [s for s in db_stadiums if s.id not in matched_ids] + if unmatched_db: + self.stdout.write(self.style.WARNING( + f" DB stadiums with no Wiki match ({len(unmatched_db)}):" + )) + for s in sorted(unmatched_db, key=lambda x: x.name): + self.stdout.write(f" - {s.name} ({s.id})") + + def _fetch_wiki_table(self, page, table_idx, name_col, cap_col, opened_col): + """Fetch and parse a Wikipedia table. Returns {name: {capacity, opened}}.""" + params = { + "action": "parse", + "page": page, + "prop": "text", + "format": "json", + "redirects": "true", + } + + headers = { + "User-Agent": "SportsTimeBot/1.0 (stadium metadata; contact@example.com)", + } + + try: + resp = requests.get(WIKI_API, params=params, headers=headers, timeout=15) + resp.raise_for_status() + data = resp.json() + except requests.RequestException as e: + self.stderr.write(f" Failed to fetch Wikipedia: {e}") + return None + + if "error" in data: + self.stderr.write(f" Wikipedia error: {data['error']['info']}") + return None + + html = data["parse"]["text"]["*"] + soup = BeautifulSoup(html, "lxml") + tables = soup.find_all("table", class_="wikitable") + + if table_idx >= len(tables): + self.stderr.write(f" Table index {table_idx} out of range ({len(tables)} tables)") + return None + + table = tables[table_idx] + return self._parse_table(table, name_col, cap_col, opened_col) + + def _parse_table(self, table, name_col, cap_col, opened_col): + """Parse an HTML table into {name: {capacity, opened}}. + + Handles rowspan by detecting column count mismatches and adjusting indices. + """ + result = {} + + # Get header indices from the actual row + header_row = table.find("tr") + if not header_row: + return result + + headers = [th.get_text(strip=True) for th in header_row.find_all("th")] + expected_cols = len(headers) + + name_idx = self._find_col_idx(headers, name_col) + cap_idx = self._find_col_idx(headers, cap_col) + opened_idx = self._find_col_idx(headers, opened_col) if opened_col else None + + if name_idx is None or cap_idx is None: + self.stderr.write(f" Could not find columns: name_col={name_col}({name_idx}), cap_col={cap_col}({cap_idx})") + self.stderr.write(f" Available headers: {headers}") + return result + + rows = table.find_all("tr")[1:] # Skip header + for row in rows: + cells = row.find_all(["td", "th"]) + actual_cols = len(cells) + + # When a row has fewer cells than headers, a rowspan column is + # spanning from a previous row. Shift indices down by the difference. + offset = expected_cols - actual_cols + adj_name = name_idx - offset + adj_cap = cap_idx - offset + adj_opened = (opened_idx - offset) if opened_idx is not None else None + + if adj_name < 0 or adj_cap < 0 or adj_name >= actual_cols or adj_cap >= actual_cols: + continue + + name = cells[adj_name].get_text(strip=True) + # Clean up name — remove citation marks + name = re.sub(r"\[.*?\]", "", name).strip() + # Remove daggers and asterisks + name = re.sub(r"[†‡*♠§#]", "", name).strip() + + if not name: + continue + + # Parse capacity + cap_text = cells[adj_cap].get_text(strip=True) + capacity = self._parse_capacity(cap_text) + + # Parse opened year + opened = None + if adj_opened is not None and 0 <= adj_opened < actual_cols: + opened_text = cells[adj_opened].get_text(strip=True) + opened = self._parse_year(opened_text) + + result[name] = {"capacity": capacity, "opened": opened} + + return result + + def _find_col_idx(self, headers, col_name): + """Find column index by name (fuzzy match).""" + if col_name is None: + return None + col_lower = col_name.lower() + for i, h in enumerate(headers): + if col_lower in h.lower(): + return i + return None + + def _parse_capacity(self, text): + """Extract numeric capacity from text like '18,000' or '20,000[1]'.""" + # Remove citations and parenthetical notes + text = re.sub(r"\[.*?\]", "", text) + text = re.sub(r"\(.*?\)", "", text) + # Find first number with commas + match = re.search(r"[\d,]+", text) + if match: + try: + return int(match.group().replace(",", "")) + except ValueError: + pass + return None + + def _parse_year(self, text): + """Extract a 4-digit year from text.""" + text = re.sub(r"\[.*?\]", "", text) + match = re.search(r"\b((?:19|20)\d{2})\b", text) + if match: + return int(match.group(1)) + return None + + def _normalize_name(self, name): + """Normalize stadium name for matching.""" + name = name.lower() + name = re.sub(r"[''`.]", "", name) + name = re.sub(r"\s+", " ", name).strip() + return name + + def _find_stadium(self, wiki_name, stadium_lookup): + """Find a stadium in our DB by Wikipedia name.""" + # Check overrides first (empty list = explicitly skip) + if wiki_name in NAME_OVERRIDES: + override_names = NAME_OVERRIDES[wiki_name] + if not override_names: + return None # Explicitly skip + for alt in override_names: + alt_norm = self._normalize_name(alt) + if alt_norm in stadium_lookup: + return stadium_lookup[alt_norm] + + # Direct normalized match + normalized = self._normalize_name(wiki_name) + if normalized in stadium_lookup: + return stadium_lookup[normalized] + + # Fuzzy: check if wiki name is a substring of any DB name or vice versa + for db_norm, stadium in stadium_lookup.items(): + if normalized in db_norm or db_norm in normalized: + return stadium + + return None + + def _print_summary(self): + self.stdout.write(f"\n{'='*60}") + self.stdout.write(self.style.HTTP_INFO("Summary")) + self.stdout.write(f"{'='*60}") + + total = Stadium.objects.count() + has_cap = Stadium.objects.exclude(capacity__isnull=True).count() + has_year = Stadium.objects.exclude(opened_year__isnull=True).count() + has_img = Stadium.objects.exclude(image_url="").count() + + self.stdout.write(f" Total stadiums: {total}") + self.stdout.write(f" With capacity: {has_cap}") + self.stdout.write(f" With opened_year: {has_year}") + self.stdout.write(f" With image_url: {has_img}") diff --git a/core/management/commands/populate_stadium_images.py b/core/management/commands/populate_stadium_images.py new file mode 100644 index 0000000..3a8ee9d --- /dev/null +++ b/core/management/commands/populate_stadium_images.py @@ -0,0 +1,147 @@ +""" +Fetch stadium image URLs from ESPN's per-team API. + +ESPN provides venue images for NBA, NFL, MLB, NHL via each team's +franchise.venue.images field. MLS/WNBA/NWSL are not available. + +Usage: + python manage.py populate_stadium_images + python manage.py populate_stadium_images --sport nba + python manage.py populate_stadium_images --dry-run +""" + +import time + +import requests +from django.core.management.base import BaseCommand + +from core.models import Team, Stadium + +# ESPN sport path segments (only sports with franchise.venue data) +ESPN_SPORT_PATHS = { + "nba": "basketball/nba", + "nfl": "football/nfl", + "mlb": "baseball/mlb", + "nhl": "hockey/nhl", +} + +# ESPN abbreviation → slug overrides (where abbreviation != URL slug) +ESPN_SLUG_OVERRIDES = { + "nba": {"GS": "gs", "NO": "no", "NY": "ny", "SA": "sa", "UTAH": "utah", "WSH": "wsh"}, + "nfl": {"WSH": "wsh"}, + "mlb": {"WSH": "wsh", "ATH": "ath"}, + "nhl": {"WSH": "wsh", "UTAH": "utah"}, +} + +# Our abbreviation → ESPN abbreviation (reverse of team metadata overrides) +OUR_TO_ESPN_ABBREV = { + "nba": {"GSW": "GS", "NOP": "NO", "NYK": "NY", "SAS": "SA", "UTA": "UTAH", "WAS": "WSH"}, + "nfl": {"WAS": "WSH"}, + "mlb": {"WSN": "WSH", "OAK": "ATH"}, + "nhl": {"WAS": "WSH", "ARI": "UTAH"}, +} + + +class Command(BaseCommand): + help = "Populate stadium image_url from ESPN venue data (NBA, NFL, MLB, NHL)." + + def add_arguments(self, parser): + parser.add_argument( + "--sport", + type=str, + choices=list(ESPN_SPORT_PATHS.keys()), + help="Only process a single sport", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would change without saving", + ) + + def handle(self, *args, **options): + sport_filter = options["sport"] + dry_run = options["dry_run"] + + sports = [sport_filter] if sport_filter else list(ESPN_SPORT_PATHS.keys()) + + if dry_run: + self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved")) + + for sport_code in sports: + self._process_sport(sport_code, dry_run) + + self._print_summary() + + def _process_sport(self, sport_code, dry_run): + self.stdout.write(f"\n{'='*60}") + self.stdout.write(self.style.HTTP_INFO(f"Processing {sport_code.upper()} stadiums")) + self.stdout.write(f"{'='*60}") + + sport_path = ESPN_SPORT_PATHS[sport_code] + abbrev_map = OUR_TO_ESPN_ABBREV.get(sport_code, {}) + + # Get teams with home stadiums + teams = Team.objects.filter( + sport_id=sport_code, + home_stadium__isnull=False, + ).select_related("home_stadium") + + updated_stadiums = set() + failed = 0 + + for team in teams: + stadium = team.home_stadium + # Skip if already has image or already updated this run + if stadium.id in updated_stadiums: + continue + if stadium.image_url and not dry_run: + updated_stadiums.add(stadium.id) + continue + + # Build ESPN team slug (lowercase abbreviation) + espn_abbrev = abbrev_map.get(team.abbreviation, team.abbreviation) + slug = espn_abbrev.lower() + + url = f"https://site.api.espn.com/apis/site/v2/sports/{sport_path}/teams/{slug}" + + try: + resp = requests.get(url, timeout=10) + resp.raise_for_status() + data = resp.json() + except requests.RequestException as e: + self.stderr.write(f" {team.abbreviation:6} FAILED: {e}") + failed += 1 + time.sleep(0.3) + continue + + # Extract venue image + venue = data.get("team", {}).get("franchise", {}).get("venue", {}) + images = venue.get("images", []) + image_url = images[0]["href"] if images else "" + + if image_url and stadium.image_url != image_url: + self.stdout.write(f" {team.abbreviation:6} {stadium.name}") + self.stdout.write(f" image_url → {image_url}") + if not dry_run: + stadium.image_url = image_url + stadium.save(update_fields=["image_url", "updated_at"]) + elif not image_url: + self.stdout.write(self.style.WARNING( + f" {team.abbreviation:6} {stadium.name} — no image from ESPN" + )) + + updated_stadiums.add(stadium.id) + time.sleep(0.2) # Rate limiting + + self.stdout.write(f"\n Stadiums updated: {len(updated_stadiums)} | Failed: {failed}") + + def _print_summary(self): + self.stdout.write(f"\n{'='*60}") + self.stdout.write(self.style.HTTP_INFO("Summary")) + self.stdout.write(f"{'='*60}") + + total = Stadium.objects.count() + has_image = Stadium.objects.exclude(image_url="").count() + self.stdout.write(f" Total stadiums: {total}") + self.stdout.write(f" With image_url: {has_image}") + self.stdout.write(f" Missing image_url: {total - has_image}") diff --git a/core/management/commands/populate_team_metadata.py b/core/management/commands/populate_team_metadata.py new file mode 100644 index 0000000..742c543 --- /dev/null +++ b/core/management/commands/populate_team_metadata.py @@ -0,0 +1,268 @@ +""" +Fetch team logos, colors, and MLS division assignments from ESPN's public API. + +Usage: + python manage.py populate_team_metadata # all sports + python manage.py populate_team_metadata --sport nba + python manage.py populate_team_metadata --dry-run +""" + +import requests +from django.core.management.base import BaseCommand + +from core.models import Team, Sport, Conference, Division + +ESPN_ENDPOINTS = { + "nba": "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/teams", + "nfl": "https://site.api.espn.com/apis/site/v2/sports/football/nfl/teams", + "mlb": "https://site.api.espn.com/apis/site/v2/sports/baseball/mlb/teams", + "nhl": "https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/teams", + "mls": "https://site.api.espn.com/apis/site/v2/sports/soccer/usa.1/teams", + "wnba": "https://site.api.espn.com/apis/site/v2/sports/basketball/wnba/teams", + "nwsl": "https://site.api.espn.com/apis/site/v2/sports/soccer/usa.nwsl/teams", +} + +# ESPN abbreviation → our abbreviation (where they differ) +ABBREV_OVERRIDES = { + "nba": {"GS": "GSW", "NO": "NOP", "NY": "NYK", "SA": "SAS", "UTAH": "UTA", "WSH": "WAS"}, + "nfl": {"WSH": "WAS"}, + "mlb": {"WSH": "WSN", "ATH": "OAK"}, + "nhl": {"WSH": "WAS", "UTAH": "ARI"}, + "mls": {"ATX": "AUS", "NY": "RB", "RSL": "SLC", "LA": "LAG"}, + "wnba": {"GS": "GSV", "WSH": "WAS"}, + "nwsl": { + "LA": "ANG", + "GFC": "NJY", + "KC": "KCC", + "NC": "NCC", + "LOU": "RGN", + "SD": "SDW", + "WAS": "WSH", + }, +} + +# MLS conference assignments (from mls.py scrape_teams) +MLS_CONFERENCES = { + "Eastern": [ + "ATL", "CLT", "CHI", "CIN", "CLB", "DC", "MIA", "MTL", + "NE", "NYC", "RB", "ORL", "PHI", "TOR", + ], + "Western": [ + "AUS", "COL", "DAL", "HOU", "LAG", "LAFC", "MIN", "NSH", + "POR", "SLC", "SD", "SJ", "SEA", "SKC", "STL", "VAN", + ], +} + + +class Command(BaseCommand): + help = "Populate team logo_url, primary_color, secondary_color from ESPN, and assign MLS divisions." + + def add_arguments(self, parser): + parser.add_argument( + "--sport", + type=str, + choices=list(ESPN_ENDPOINTS.keys()), + help="Only process a single sport", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would change without saving", + ) + + def handle(self, *args, **options): + sport_filter = options["sport"] + dry_run = options["dry_run"] + + sports = [sport_filter] if sport_filter else list(ESPN_ENDPOINTS.keys()) + + if dry_run: + self.stdout.write(self.style.WARNING("DRY RUN — no changes will be saved")) + + for sport_code in sports: + self._process_sport(sport_code, dry_run) + + if "mls" in sports: + self._assign_mls_divisions(dry_run) + + self._print_summary() + + def _process_sport(self, sport_code, dry_run): + self.stdout.write(f"\n{'='*60}") + self.stdout.write(self.style.HTTP_INFO(f"Processing {sport_code.upper()}")) + self.stdout.write(f"{'='*60}") + + url = ESPN_ENDPOINTS[sport_code] + try: + resp = requests.get(url, timeout=15) + resp.raise_for_status() + data = resp.json() + except requests.RequestException as e: + self.stderr.write(self.style.ERROR(f" Failed to fetch {url}: {e}")) + return + + # Parse ESPN response + espn_teams = self._parse_espn_teams(data, sport_code) + if not espn_teams: + self.stderr.write(self.style.ERROR(f" No teams found in ESPN response")) + return + + self.stdout.write(f" ESPN returned {len(espn_teams)} teams") + + # Get our DB teams for this sport + db_teams = Team.objects.filter(sport_id=sport_code) + db_abbrevs = {t.abbreviation: t for t in db_teams} + + overrides = ABBREV_OVERRIDES.get(sport_code, {}) + + matched = 0 + updated = 0 + unmatched_espn = [] + + for espn_abbrev, meta in espn_teams.items(): + # Remap ESPN abbreviation to ours + our_abbrev = overrides.get(espn_abbrev, espn_abbrev) + + team = db_abbrevs.pop(our_abbrev, None) + if not team: + unmatched_espn.append(f"{espn_abbrev} (mapped→{our_abbrev})" if espn_abbrev != our_abbrev else espn_abbrev) + continue + + matched += 1 + changes = [] + + if meta["logo_url"] and team.logo_url != meta["logo_url"]: + changes.append(f"logo_url → {meta['logo_url'][:60]}…") + if not dry_run: + team.logo_url = meta["logo_url"] + + if meta["primary_color"] and team.primary_color != meta["primary_color"]: + changes.append(f"primary_color → {meta['primary_color']}") + if not dry_run: + team.primary_color = meta["primary_color"] + + if meta["secondary_color"] and team.secondary_color != meta["secondary_color"]: + changes.append(f"secondary_color → {meta['secondary_color']}") + if not dry_run: + team.secondary_color = meta["secondary_color"] + + if changes: + updated += 1 + self.stdout.write(f" {team.abbreviation:6} {team.full_name}") + for c in changes: + self.stdout.write(f" {c}") + if not dry_run: + team.save(update_fields=["logo_url", "primary_color", "secondary_color", "updated_at"]) + + # Report + self.stdout.write(f"\n Matched: {matched} | Updated: {updated}") + + if unmatched_espn: + self.stdout.write(self.style.WARNING(f" ESPN teams with no DB match: {', '.join(sorted(unmatched_espn))}")) + + if db_abbrevs: + missing = ", ".join(sorted(db_abbrevs.keys())) + self.stdout.write(self.style.WARNING(f" DB teams with no ESPN match: {missing}")) + + def _parse_espn_teams(self, data, sport_code): + """Extract abbreviation → {logo_url, primary_color, secondary_color} from ESPN response.""" + result = {} + + try: + teams_list = data["sports"][0]["leagues"][0]["teams"] + except (KeyError, IndexError): + return result + + for entry in teams_list: + team = entry.get("team", {}) + abbrev = team.get("abbreviation", "") + if not abbrev: + continue + + color = team.get("color", "") + alt_color = team.get("alternateColor", "") + logos = team.get("logos", []) + logo_url = logos[0]["href"] if logos else "" + + result[abbrev] = { + "logo_url": logo_url, + "primary_color": f"#{color}" if color else "", + "secondary_color": f"#{alt_color}" if alt_color else "", + } + + return result + + def _assign_mls_divisions(self, dry_run): + self.stdout.write(f"\n{'='*60}") + self.stdout.write(self.style.HTTP_INFO("Assigning MLS divisions")) + self.stdout.write(f"{'='*60}") + + try: + mls_sport = Sport.objects.get(code="mls") + except Sport.DoesNotExist: + self.stderr.write(self.style.ERROR(" MLS sport not found in DB")) + return + + # Build reverse lookup: abbreviation → conference name + abbrev_to_conf = {} + for conf_name, abbrevs in MLS_CONFERENCES.items(): + for abbrev in abbrevs: + abbrev_to_conf[abbrev] = conf_name + + # Pre-create conferences and divisions (skip in dry-run) + division_cache = {} # conf_name → Division + if not dry_run: + for conf_name in MLS_CONFERENCES: + conference, conf_created = Conference.objects.get_or_create( + sport=mls_sport, + name=f"{conf_name} Conference", + defaults={"short_name": conf_name[:4], "order": 0 if conf_name == "Eastern" else 1}, + ) + if conf_created: + self.stdout.write(f" Created conference: {conference}") + + division, div_created = Division.objects.get_or_create( + conference=conference, + name=conf_name, + defaults={"short_name": conf_name[:4], "order": 0}, + ) + if div_created: + self.stdout.write(f" Created division: {division}") + + division_cache[conf_name] = division + + assigned = 0 + for team in Team.objects.filter(sport=mls_sport): + conf_name = abbrev_to_conf.get(team.abbreviation) + if not conf_name: + self.stdout.write(self.style.WARNING(f" {team.abbreviation} not in conference map — skipping")) + continue + + if dry_run: + if team.division is None: + self.stdout.write(f" {team.abbreviation:6} → {conf_name}") + assigned += 1 + else: + division = division_cache[conf_name] + if team.division != division: + self.stdout.write(f" {team.abbreviation:6} → {division}") + assigned += 1 + team.division = division + team.save(update_fields=["division", "updated_at"]) + + self.stdout.write(f"\n Divisions assigned: {assigned}") + + def _print_summary(self): + self.stdout.write(f"\n{'='*60}") + self.stdout.write(self.style.HTTP_INFO("Summary")) + self.stdout.write(f"{'='*60}") + + total = Team.objects.count() + missing_logo = Team.objects.filter(logo_url="").count() + missing_color = Team.objects.filter(primary_color="").count() + missing_div = Team.objects.filter(division__isnull=True).count() + + self.stdout.write(f" Total teams: {total}") + self.stdout.write(f" Missing logo: {missing_logo}") + self.stdout.write(f" Missing color: {missing_color}") + self.stdout.write(f" Missing division: {missing_div}") diff --git a/core/migrations/0001_initial.py b/core/migrations/0001_initial.py new file mode 100755 index 0000000..8a109b3 --- /dev/null +++ b/core/migrations/0001_initial.py @@ -0,0 +1,438 @@ +# Generated by Django 5.1.15 on 2026-01-26 08:59 + +import django.db.models.deletion +import simple_history.models +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='Conference', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=50)), + ('short_name', models.CharField(blank=True, help_text='Short name (e.g., East, West)', max_length=10)), + ('order', models.PositiveSmallIntegerField(default=0, help_text='Display order')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + options={ + 'verbose_name': 'Conference', + 'verbose_name_plural': 'Conferences', + 'ordering': ['sport', 'order', 'name'], + }, + ), + migrations.CreateModel( + name='Sport', + fields=[ + ('code', models.CharField(help_text='Sport code (e.g., nba, mlb, nfl)', max_length=10, primary_key=True, serialize=False)), + ('name', models.CharField(help_text='Full name (e.g., National Basketball Association)', max_length=100)), + ('short_name', models.CharField(help_text='Short name (e.g., NBA)', max_length=20)), + ('season_type', models.CharField(choices=[('split', 'Split Year (e.g., 2024-25)'), ('single', 'Single Year (e.g., 2024)')], help_text='Whether season spans two years or one', max_length=10)), + ('expected_game_count', models.PositiveIntegerField(default=0, help_text='Expected number of regular season games')), + ('season_start_month', models.PositiveSmallIntegerField(default=1, help_text='Month when season typically starts (1-12)')), + ('season_end_month', models.PositiveSmallIntegerField(default=12, help_text='Month when season typically ends (1-12)')), + ('is_active', models.BooleanField(default=True, help_text='Whether this sport is actively being scraped')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + options={ + 'verbose_name': 'Sport', + 'verbose_name_plural': 'Sports', + 'ordering': ['name'], + }, + ), + migrations.CreateModel( + name='Division', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=50)), + ('short_name', models.CharField(blank=True, help_text='Short name', max_length=10)), + ('order', models.PositiveSmallIntegerField(default=0, help_text='Display order')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('conference', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='divisions', to='core.conference')), + ], + options={ + 'verbose_name': 'Division', + 'verbose_name_plural': 'Divisions', + 'ordering': ['conference', 'order', 'name'], + 'unique_together': {('conference', 'name')}, + }, + ), + migrations.CreateModel( + name='HistoricalDivision', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('name', models.CharField(max_length=50)), + ('short_name', models.CharField(blank=True, help_text='Short name', max_length=10)), + ('order', models.PositiveSmallIntegerField(default=0, help_text='Display order')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('conference', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.conference')), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical Division', + 'verbose_name_plural': 'historical Divisions', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalSport', + fields=[ + ('code', models.CharField(db_index=True, help_text='Sport code (e.g., nba, mlb, nfl)', max_length=10)), + ('name', models.CharField(help_text='Full name (e.g., National Basketball Association)', max_length=100)), + ('short_name', models.CharField(help_text='Short name (e.g., NBA)', max_length=20)), + ('season_type', models.CharField(choices=[('split', 'Split Year (e.g., 2024-25)'), ('single', 'Single Year (e.g., 2024)')], help_text='Whether season spans two years or one', max_length=10)), + ('expected_game_count', models.PositiveIntegerField(default=0, help_text='Expected number of regular season games')), + ('season_start_month', models.PositiveSmallIntegerField(default=1, help_text='Month when season typically starts (1-12)')), + ('season_end_month', models.PositiveSmallIntegerField(default=12, help_text='Month when season typically ends (1-12)')), + ('is_active', models.BooleanField(default=True, help_text='Whether this sport is actively being scraped')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical Sport', + 'verbose_name_plural': 'historical Sports', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalStadium', + fields=[ + ('id', models.CharField(db_index=True, help_text='Canonical ID (e.g., stadium_nba_los_angeles_lakers)', max_length=100)), + ('name', models.CharField(help_text='Current stadium name', max_length=200)), + ('city', models.CharField(max_length=100)), + ('state', models.CharField(blank=True, help_text='State/Province (blank for international)', max_length=100)), + ('country', models.CharField(default='USA', max_length=100)), + ('latitude', models.DecimalField(blank=True, decimal_places=6, max_digits=9, null=True)), + ('longitude', models.DecimalField(blank=True, decimal_places=6, max_digits=9, null=True)), + ('capacity', models.PositiveIntegerField(blank=True, help_text='Seating capacity', null=True)), + ('surface', models.CharField(blank=True, choices=[('grass', 'Natural Grass'), ('turf', 'Artificial Turf'), ('ice', 'Ice'), ('hardwood', 'Hardwood'), ('other', 'Other')], max_length=20)), + ('roof_type', models.CharField(blank=True, choices=[('dome', 'Dome (Closed)'), ('retractable', 'Retractable'), ('open', 'Open Air')], max_length=20)), + ('opened_year', models.PositiveSmallIntegerField(blank=True, help_text='Year stadium opened', null=True)), + ('timezone', models.CharField(blank=True, help_text='IANA timezone (e.g., America/Los_Angeles)', max_length=50)), + ('image_url', models.URLField(blank=True, help_text='URL to stadium image')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')), + ], + options={ + 'verbose_name': 'historical Stadium', + 'verbose_name_plural': 'historical Stadiums', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalConference', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('name', models.CharField(max_length=50)), + ('short_name', models.CharField(blank=True, help_text='Short name (e.g., East, West)', max_length=10)), + ('order', models.PositiveSmallIntegerField(default=0, help_text='Display order')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')), + ], + options={ + 'verbose_name': 'historical Conference', + 'verbose_name_plural': 'historical Conferences', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.AddField( + model_name='conference', + name='sport', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conferences', to='core.sport'), + ), + migrations.CreateModel( + name='Stadium', + fields=[ + ('id', models.CharField(help_text='Canonical ID (e.g., stadium_nba_los_angeles_lakers)', max_length=100, primary_key=True, serialize=False)), + ('name', models.CharField(help_text='Current stadium name', max_length=200)), + ('city', models.CharField(max_length=100)), + ('state', models.CharField(blank=True, help_text='State/Province (blank for international)', max_length=100)), + ('country', models.CharField(default='USA', max_length=100)), + ('latitude', models.DecimalField(blank=True, decimal_places=6, max_digits=9, null=True)), + ('longitude', models.DecimalField(blank=True, decimal_places=6, max_digits=9, null=True)), + ('capacity', models.PositiveIntegerField(blank=True, help_text='Seating capacity', null=True)), + ('surface', models.CharField(blank=True, choices=[('grass', 'Natural Grass'), ('turf', 'Artificial Turf'), ('ice', 'Ice'), ('hardwood', 'Hardwood'), ('other', 'Other')], max_length=20)), + ('roof_type', models.CharField(blank=True, choices=[('dome', 'Dome (Closed)'), ('retractable', 'Retractable'), ('open', 'Open Air')], max_length=20)), + ('opened_year', models.PositiveSmallIntegerField(blank=True, help_text='Year stadium opened', null=True)), + ('timezone', models.CharField(blank=True, help_text='IANA timezone (e.g., America/Los_Angeles)', max_length=50)), + ('image_url', models.URLField(blank=True, help_text='URL to stadium image')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stadiums', to='core.sport')), + ], + options={ + 'verbose_name': 'Stadium', + 'verbose_name_plural': 'Stadiums', + 'ordering': ['sport', 'city', 'name'], + }, + ), + migrations.CreateModel( + name='HistoricalTeam', + fields=[ + ('id', models.CharField(db_index=True, help_text='Canonical ID (e.g., team_nba_lal)', max_length=50)), + ('city', models.CharField(help_text='Team city (e.g., Los Angeles)', max_length=100)), + ('name', models.CharField(help_text='Team name (e.g., Lakers)', max_length=100)), + ('full_name', models.CharField(help_text='Full team name (e.g., Los Angeles Lakers)', max_length=200)), + ('abbreviation', models.CharField(help_text='Team abbreviation (e.g., LAL)', max_length=10)), + ('primary_color', models.CharField(blank=True, help_text='Primary color hex (e.g., #552583)', max_length=7)), + ('secondary_color', models.CharField(blank=True, help_text='Secondary color hex (e.g., #FDB927)', max_length=7)), + ('logo_url', models.URLField(blank=True, help_text='URL to team logo')), + ('is_active', models.BooleanField(default=True, help_text='Whether team is currently active')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('division', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.division')), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')), + ('home_stadium', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.stadium')), + ], + options={ + 'verbose_name': 'historical Team', + 'verbose_name_plural': 'historical Teams', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalStadiumAlias', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('alias', models.CharField(help_text='The alias text to match against', max_length=200)), + ('alias_type', models.CharField(choices=[('official', 'Official Name'), ('former', 'Former Name'), ('nickname', 'Nickname'), ('abbreviation', 'Abbreviation')], default='official', max_length=20)), + ('valid_from', models.DateField(blank=True, help_text='Date from which this alias is valid (inclusive)', null=True)), + ('valid_until', models.DateField(blank=True, help_text='Date until which this alias is valid (inclusive)', null=True)), + ('is_primary', models.BooleanField(default=False, help_text='Whether this is the current/primary name')), + ('source', models.CharField(blank=True, help_text='Source of this alias', max_length=200)), + ('notes', models.TextField(blank=True, help_text='Notes about this alias (e.g., naming rights deal)')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('stadium', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.stadium')), + ], + options={ + 'verbose_name': 'historical Stadium Alias', + 'verbose_name_plural': 'historical Stadium Aliases', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='Team', + fields=[ + ('id', models.CharField(help_text='Canonical ID (e.g., team_nba_lal)', max_length=50, primary_key=True, serialize=False)), + ('city', models.CharField(help_text='Team city (e.g., Los Angeles)', max_length=100)), + ('name', models.CharField(help_text='Team name (e.g., Lakers)', max_length=100)), + ('full_name', models.CharField(help_text='Full team name (e.g., Los Angeles Lakers)', max_length=200)), + ('abbreviation', models.CharField(help_text='Team abbreviation (e.g., LAL)', max_length=10)), + ('primary_color', models.CharField(blank=True, help_text='Primary color hex (e.g., #552583)', max_length=7)), + ('secondary_color', models.CharField(blank=True, help_text='Secondary color hex (e.g., #FDB927)', max_length=7)), + ('logo_url', models.URLField(blank=True, help_text='URL to team logo')), + ('is_active', models.BooleanField(default=True, help_text='Whether team is currently active')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('division', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='teams', to='core.division')), + ('home_stadium', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='home_teams', to='core.stadium')), + ('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='teams', to='core.sport')), + ], + options={ + 'verbose_name': 'Team', + 'verbose_name_plural': 'Teams', + 'ordering': ['sport', 'city', 'name'], + }, + ), + migrations.CreateModel( + name='HistoricalTeamAlias', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('alias', models.CharField(help_text='The alias text to match against', max_length=200)), + ('alias_type', models.CharField(choices=[('full_name', 'Full Name'), ('city_name', 'City + Name'), ('abbreviation', 'Abbreviation'), ('nickname', 'Nickname'), ('historical', 'Historical Name')], default='full_name', max_length=20)), + ('valid_from', models.DateField(blank=True, help_text='Date from which this alias is valid (inclusive)', null=True)), + ('valid_until', models.DateField(blank=True, help_text='Date until which this alias is valid (inclusive)', null=True)), + ('is_primary', models.BooleanField(default=False, help_text='Whether this is a primary/preferred alias')), + ('source', models.CharField(blank=True, help_text='Source of this alias (e.g., ESPN, Basketball-Reference)', max_length=200)), + ('notes', models.TextField(blank=True, help_text='Notes about this alias (e.g., relocation details)')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('team', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.team')), + ], + options={ + 'verbose_name': 'historical Team Alias', + 'verbose_name_plural': 'historical Team Aliases', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalGame', + fields=[ + ('id', models.CharField(db_index=True, help_text='Canonical ID (e.g., game_nba_2025_20251022_bos_lal)', max_length=100)), + ('season', models.PositiveSmallIntegerField(help_text='Season start year (e.g., 2025 for 2025-26 season)')), + ('game_date', models.DateTimeField(help_text='Game date and time (UTC)')), + ('game_number', models.PositiveSmallIntegerField(blank=True, help_text='Game number for doubleheaders (1 or 2)', null=True)), + ('home_score', models.PositiveSmallIntegerField(blank=True, null=True)), + ('away_score', models.PositiveSmallIntegerField(blank=True, null=True)), + ('status', models.CharField(choices=[('scheduled', 'Scheduled'), ('in_progress', 'In Progress'), ('final', 'Final'), ('postponed', 'Postponed'), ('cancelled', 'Cancelled'), ('suspended', 'Suspended')], default='scheduled', max_length=20)), + ('is_neutral_site', models.BooleanField(default=False, help_text='Whether game is at neutral site')), + ('is_playoff', models.BooleanField(default=False, help_text='Whether this is a playoff game')), + ('playoff_round', models.CharField(blank=True, help_text='Playoff round (e.g., Finals, Conference Finals)', max_length=50)), + ('raw_home_team', models.CharField(blank=True, help_text='Original scraped home team name', max_length=200)), + ('raw_away_team', models.CharField(blank=True, help_text='Original scraped away team name', max_length=200)), + ('raw_stadium', models.CharField(blank=True, help_text='Original scraped stadium name', max_length=200)), + ('source_url', models.URLField(blank=True, help_text='URL where game was scraped from')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')), + ('stadium', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.stadium')), + ('away_team', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.team')), + ('home_team', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.team')), + ], + options={ + 'verbose_name': 'historical Game', + 'verbose_name_plural': 'historical Games', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.AlterUniqueTogether( + name='conference', + unique_together={('sport', 'name')}, + ), + migrations.CreateModel( + name='StadiumAlias', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('alias', models.CharField(help_text='The alias text to match against', max_length=200)), + ('alias_type', models.CharField(choices=[('official', 'Official Name'), ('former', 'Former Name'), ('nickname', 'Nickname'), ('abbreviation', 'Abbreviation')], default='official', max_length=20)), + ('valid_from', models.DateField(blank=True, help_text='Date from which this alias is valid (inclusive)', null=True)), + ('valid_until', models.DateField(blank=True, help_text='Date until which this alias is valid (inclusive)', null=True)), + ('is_primary', models.BooleanField(default=False, help_text='Whether this is the current/primary name')), + ('source', models.CharField(blank=True, help_text='Source of this alias', max_length=200)), + ('notes', models.TextField(blank=True, help_text='Notes about this alias (e.g., naming rights deal)')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('stadium', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='aliases', to='core.stadium')), + ], + options={ + 'verbose_name': 'Stadium Alias', + 'verbose_name_plural': 'Stadium Aliases', + 'ordering': ['stadium', '-valid_from'], + 'indexes': [models.Index(fields=['alias'], name='core_stadiu_alias_7984d4_idx'), models.Index(fields=['stadium', 'valid_from', 'valid_until'], name='core_stadiu_stadium_d38e1b_idx')], + }, + ), + migrations.CreateModel( + name='Game', + fields=[ + ('id', models.CharField(help_text='Canonical ID (e.g., game_nba_2025_20251022_bos_lal)', max_length=100, primary_key=True, serialize=False)), + ('season', models.PositiveSmallIntegerField(help_text='Season start year (e.g., 2025 for 2025-26 season)')), + ('game_date', models.DateTimeField(help_text='Game date and time (UTC)')), + ('game_number', models.PositiveSmallIntegerField(blank=True, help_text='Game number for doubleheaders (1 or 2)', null=True)), + ('home_score', models.PositiveSmallIntegerField(blank=True, null=True)), + ('away_score', models.PositiveSmallIntegerField(blank=True, null=True)), + ('status', models.CharField(choices=[('scheduled', 'Scheduled'), ('in_progress', 'In Progress'), ('final', 'Final'), ('postponed', 'Postponed'), ('cancelled', 'Cancelled'), ('suspended', 'Suspended')], default='scheduled', max_length=20)), + ('is_neutral_site', models.BooleanField(default=False, help_text='Whether game is at neutral site')), + ('is_playoff', models.BooleanField(default=False, help_text='Whether this is a playoff game')), + ('playoff_round', models.CharField(blank=True, help_text='Playoff round (e.g., Finals, Conference Finals)', max_length=50)), + ('raw_home_team', models.CharField(blank=True, help_text='Original scraped home team name', max_length=200)), + ('raw_away_team', models.CharField(blank=True, help_text='Original scraped away team name', max_length=200)), + ('raw_stadium', models.CharField(blank=True, help_text='Original scraped stadium name', max_length=200)), + ('source_url', models.URLField(blank=True, help_text='URL where game was scraped from')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='games', to='core.sport')), + ('stadium', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='games', to='core.stadium')), + ('away_team', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='away_games', to='core.team')), + ('home_team', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='home_games', to='core.team')), + ], + options={ + 'verbose_name': 'Game', + 'verbose_name_plural': 'Games', + 'ordering': ['-game_date', 'sport'], + 'indexes': [models.Index(fields=['sport', 'season'], name='core_game_sport_i_67c5c8_idx'), models.Index(fields=['sport', 'game_date'], name='core_game_sport_i_db4971_idx'), models.Index(fields=['home_team', 'season'], name='core_game_home_te_9b45c7_idx'), models.Index(fields=['away_team', 'season'], name='core_game_away_te_c8e42f_idx'), models.Index(fields=['status'], name='core_game_status_249a25_idx')], + }, + ), + migrations.CreateModel( + name='TeamAlias', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('alias', models.CharField(help_text='The alias text to match against', max_length=200)), + ('alias_type', models.CharField(choices=[('full_name', 'Full Name'), ('city_name', 'City + Name'), ('abbreviation', 'Abbreviation'), ('nickname', 'Nickname'), ('historical', 'Historical Name')], default='full_name', max_length=20)), + ('valid_from', models.DateField(blank=True, help_text='Date from which this alias is valid (inclusive)', null=True)), + ('valid_until', models.DateField(blank=True, help_text='Date until which this alias is valid (inclusive)', null=True)), + ('is_primary', models.BooleanField(default=False, help_text='Whether this is a primary/preferred alias')), + ('source', models.CharField(blank=True, help_text='Source of this alias (e.g., ESPN, Basketball-Reference)', max_length=200)), + ('notes', models.TextField(blank=True, help_text='Notes about this alias (e.g., relocation details)')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('team', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='aliases', to='core.team')), + ], + options={ + 'verbose_name': 'Team Alias', + 'verbose_name_plural': 'Team Aliases', + 'ordering': ['team', '-valid_from'], + 'indexes': [models.Index(fields=['alias'], name='core_teamal_alias_a89339_idx'), models.Index(fields=['team', 'valid_from', 'valid_until'], name='core_teamal_team_id_e29cea_idx')], + }, + ), + ] diff --git a/core/migrations/0002_conference_division_canonical_id.py b/core/migrations/0002_conference_division_canonical_id.py new file mode 100755 index 0000000..efe7fb4 --- /dev/null +++ b/core/migrations/0002_conference_division_canonical_id.py @@ -0,0 +1,53 @@ +# Generated manually + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='conference', + name='canonical_id', + field=models.CharField( + blank=True, + db_index=True, + help_text='Canonical ID from bootstrap JSON (e.g., nba_eastern)', + max_length=100, + ), + ), + migrations.AddField( + model_name='division', + name='canonical_id', + field=models.CharField( + blank=True, + db_index=True, + help_text='Canonical ID from bootstrap JSON (e.g., nba_southeast)', + max_length=100, + ), + ), + migrations.AddField( + model_name='historicalconference', + name='canonical_id', + field=models.CharField( + blank=True, + db_index=True, + help_text='Canonical ID from bootstrap JSON (e.g., nba_eastern)', + max_length=100, + ), + ), + migrations.AddField( + model_name='historicaldivision', + name='canonical_id', + field=models.CharField( + blank=True, + db_index=True, + help_text='Canonical ID from bootstrap JSON (e.g., nba_southeast)', + max_length=100, + ), + ), + ] diff --git a/core/migrations/0003_sport_icon_name_color_hex.py b/core/migrations/0003_sport_icon_name_color_hex.py new file mode 100644 index 0000000..de99efa --- /dev/null +++ b/core/migrations/0003_sport_icon_name_color_hex.py @@ -0,0 +1,21 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0002_conference_division_canonical_id'), + ] + + operations = [ + migrations.AddField( + model_name='sport', + name='icon_name', + field=models.CharField(blank=True, help_text='SF Symbol name (e.g., baseball.fill, basketball.fill)', max_length=50), + ), + migrations.AddField( + model_name='sport', + name='color_hex', + field=models.CharField(blank=True, help_text='Brand color hex (e.g., #CE1141)', max_length=10), + ), + ] diff --git a/core/migrations/__init__.py b/core/migrations/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/core/models/__init__.py b/core/models/__init__.py new file mode 100644 index 0000000..dc3e9af --- /dev/null +++ b/core/models/__init__.py @@ -0,0 +1,17 @@ +from .sport import Sport +from .league_structure import Conference, Division +from .team import Team +from .stadium import Stadium +from .game import Game +from .alias import TeamAlias, StadiumAlias + +__all__ = [ + 'Sport', + 'Conference', + 'Division', + 'Team', + 'Stadium', + 'Game', + 'TeamAlias', + 'StadiumAlias', +] diff --git a/core/models/alias.py b/core/models/alias.py new file mode 100644 index 0000000..32a26c5 --- /dev/null +++ b/core/models/alias.py @@ -0,0 +1,169 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class TeamAlias(models.Model): + """ + Historical team name aliases for resolution. + Handles team renames, relocations, and alternate names. + """ + ALIAS_TYPE_CHOICES = [ + ('full_name', 'Full Name'), + ('city_name', 'City + Name'), + ('abbreviation', 'Abbreviation'), + ('nickname', 'Nickname'), + ('historical', 'Historical Name'), + ] + + team = models.ForeignKey( + 'core.Team', + on_delete=models.CASCADE, + related_name='aliases' + ) + alias = models.CharField( + max_length=200, + help_text='The alias text to match against' + ) + alias_type = models.CharField( + max_length=20, + choices=ALIAS_TYPE_CHOICES, + default='full_name' + ) + valid_from = models.DateField( + null=True, + blank=True, + help_text='Date from which this alias is valid (inclusive)' + ) + valid_until = models.DateField( + null=True, + blank=True, + help_text='Date until which this alias is valid (inclusive)' + ) + is_primary = models.BooleanField( + default=False, + help_text='Whether this is a primary/preferred alias' + ) + source = models.CharField( + max_length=200, + blank=True, + help_text='Source of this alias (e.g., ESPN, Basketball-Reference)' + ) + notes = models.TextField( + blank=True, + help_text='Notes about this alias (e.g., relocation details)' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['team', '-valid_from'] + verbose_name = 'Team Alias' + verbose_name_plural = 'Team Aliases' + indexes = [ + models.Index(fields=['alias']), + models.Index(fields=['team', 'valid_from', 'valid_until']), + ] + + def __str__(self): + date_range = "" + if self.valid_from or self.valid_until: + start = self.valid_from.strftime('%Y') if self.valid_from else '...' + end = self.valid_until.strftime('%Y') if self.valid_until else 'present' + date_range = f" ({start}-{end})" + return f"{self.alias} → {self.team.abbreviation}{date_range}" + + def is_valid_for_date(self, check_date): + """Check if this alias is valid for a given date.""" + if self.valid_from and check_date < self.valid_from: + return False + if self.valid_until and check_date > self.valid_until: + return False + return True + + +class StadiumAlias(models.Model): + """ + Historical stadium name aliases for resolution. + Handles naming rights changes and alternate names. + """ + ALIAS_TYPE_CHOICES = [ + ('official', 'Official Name'), + ('former', 'Former Name'), + ('nickname', 'Nickname'), + ('abbreviation', 'Abbreviation'), + ] + + stadium = models.ForeignKey( + 'core.Stadium', + on_delete=models.CASCADE, + related_name='aliases' + ) + alias = models.CharField( + max_length=200, + help_text='The alias text to match against' + ) + alias_type = models.CharField( + max_length=20, + choices=ALIAS_TYPE_CHOICES, + default='official' + ) + valid_from = models.DateField( + null=True, + blank=True, + help_text='Date from which this alias is valid (inclusive)' + ) + valid_until = models.DateField( + null=True, + blank=True, + help_text='Date until which this alias is valid (inclusive)' + ) + is_primary = models.BooleanField( + default=False, + help_text='Whether this is the current/primary name' + ) + source = models.CharField( + max_length=200, + blank=True, + help_text='Source of this alias' + ) + notes = models.TextField( + blank=True, + help_text='Notes about this alias (e.g., naming rights deal)' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['stadium', '-valid_from'] + verbose_name = 'Stadium Alias' + verbose_name_plural = 'Stadium Aliases' + indexes = [ + models.Index(fields=['alias']), + models.Index(fields=['stadium', 'valid_from', 'valid_until']), + ] + + def __str__(self): + date_range = "" + if self.valid_from or self.valid_until: + start = self.valid_from.strftime('%Y') if self.valid_from else '...' + end = self.valid_until.strftime('%Y') if self.valid_until else 'present' + date_range = f" ({start}-{end})" + return f"{self.alias} → {self.stadium.name}{date_range}" + + def is_valid_for_date(self, check_date): + """Check if this alias is valid for a given date.""" + if self.valid_from and check_date < self.valid_from: + return False + if self.valid_until and check_date > self.valid_until: + return False + return True diff --git a/core/models/game.py b/core/models/game.py new file mode 100644 index 0000000..ef3109a --- /dev/null +++ b/core/models/game.py @@ -0,0 +1,146 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class Game(models.Model): + """ + Game model representing a single game between two teams. + """ + STATUS_CHOICES = [ + ('scheduled', 'Scheduled'), + ('in_progress', 'In Progress'), + ('final', 'Final'), + ('postponed', 'Postponed'), + ('cancelled', 'Cancelled'), + ('suspended', 'Suspended'), + ] + + id = models.CharField( + max_length=100, + primary_key=True, + help_text='Canonical ID (e.g., game_nba_2025_20251022_bos_lal)' + ) + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='games' + ) + season = models.PositiveSmallIntegerField( + help_text='Season start year (e.g., 2025 for 2025-26 season)' + ) + home_team = models.ForeignKey( + 'core.Team', + on_delete=models.CASCADE, + related_name='home_games' + ) + away_team = models.ForeignKey( + 'core.Team', + on_delete=models.CASCADE, + related_name='away_games' + ) + stadium = models.ForeignKey( + 'core.Stadium', + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='games' + ) + game_date = models.DateTimeField( + help_text='Game date and time (UTC)' + ) + game_number = models.PositiveSmallIntegerField( + null=True, + blank=True, + help_text='Game number for doubleheaders (1 or 2)' + ) + home_score = models.PositiveSmallIntegerField( + null=True, + blank=True + ) + away_score = models.PositiveSmallIntegerField( + null=True, + blank=True + ) + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + default='scheduled' + ) + is_neutral_site = models.BooleanField( + default=False, + help_text='Whether game is at neutral site' + ) + is_playoff = models.BooleanField( + default=False, + help_text='Whether this is a playoff game' + ) + playoff_round = models.CharField( + max_length=50, + blank=True, + help_text='Playoff round (e.g., Finals, Conference Finals)' + ) + + # Raw scraped values (for debugging/review) + raw_home_team = models.CharField( + max_length=200, + blank=True, + help_text='Original scraped home team name' + ) + raw_away_team = models.CharField( + max_length=200, + blank=True, + help_text='Original scraped away team name' + ) + raw_stadium = models.CharField( + max_length=200, + blank=True, + help_text='Original scraped stadium name' + ) + source_url = models.URLField( + blank=True, + help_text='URL where game was scraped from' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['-game_date', 'sport'] + verbose_name = 'Game' + verbose_name_plural = 'Games' + indexes = [ + models.Index(fields=['sport', 'season']), + models.Index(fields=['sport', 'game_date']), + models.Index(fields=['home_team', 'season']), + models.Index(fields=['away_team', 'season']), + models.Index(fields=['status']), + ] + + def __str__(self): + return f"{self.away_team.abbreviation} @ {self.home_team.abbreviation} - {self.game_date.strftime('%Y-%m-%d')}" + + @property + def is_final(self): + return self.status == 'final' + + @property + def winner(self): + """Return winning team or None if not final.""" + if not self.is_final or self.home_score is None or self.away_score is None: + return None + if self.home_score > self.away_score: + return self.home_team + elif self.away_score > self.home_score: + return self.away_team + return None # Tie + + @property + def score_display(self): + """Return score as 'away_score - home_score' or 'TBD'.""" + if self.home_score is not None and self.away_score is not None: + return f"{self.away_score} - {self.home_score}" + return "TBD" diff --git a/core/models/league_structure.py b/core/models/league_structure.py new file mode 100644 index 0000000..44ed684 --- /dev/null +++ b/core/models/league_structure.py @@ -0,0 +1,92 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class Conference(models.Model): + """ + Conference within a sport (e.g., Eastern, Western for NBA). + """ + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='conferences' + ) + canonical_id = models.CharField( + max_length=100, + blank=True, + db_index=True, + help_text='Canonical ID from bootstrap JSON (e.g., nba_eastern)' + ) + name = models.CharField(max_length=50) + short_name = models.CharField( + max_length=10, + blank=True, + help_text='Short name (e.g., East, West)' + ) + order = models.PositiveSmallIntegerField( + default=0, + help_text='Display order' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['sport', 'order', 'name'] + unique_together = ['sport', 'name'] + verbose_name = 'Conference' + verbose_name_plural = 'Conferences' + + def __str__(self): + return f"{self.sport.short_name} - {self.name}" + + +class Division(models.Model): + """ + Division within a conference (e.g., Atlantic, Central for NBA East). + """ + conference = models.ForeignKey( + Conference, + on_delete=models.CASCADE, + related_name='divisions' + ) + canonical_id = models.CharField( + max_length=100, + blank=True, + db_index=True, + help_text='Canonical ID from bootstrap JSON (e.g., nba_southeast)' + ) + name = models.CharField(max_length=50) + short_name = models.CharField( + max_length=10, + blank=True, + help_text='Short name' + ) + order = models.PositiveSmallIntegerField( + default=0, + help_text='Display order' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['conference', 'order', 'name'] + unique_together = ['conference', 'name'] + verbose_name = 'Division' + verbose_name_plural = 'Divisions' + + def __str__(self): + return f"{self.conference.sport.short_name} - {self.conference.name} - {self.name}" + + @property + def sport(self): + return self.conference.sport diff --git a/core/models/sport.py b/core/models/sport.py new file mode 100644 index 0000000..5af35fd --- /dev/null +++ b/core/models/sport.py @@ -0,0 +1,78 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class Sport(models.Model): + """ + Sport configuration model. + """ + SEASON_TYPE_CHOICES = [ + ('split', 'Split Year (e.g., 2024-25)'), + ('single', 'Single Year (e.g., 2024)'), + ] + + code = models.CharField( + max_length=10, + primary_key=True, + help_text='Sport code (e.g., nba, mlb, nfl)' + ) + name = models.CharField( + max_length=100, + help_text='Full name (e.g., National Basketball Association)' + ) + short_name = models.CharField( + max_length=20, + help_text='Short name (e.g., NBA)' + ) + season_type = models.CharField( + max_length=10, + choices=SEASON_TYPE_CHOICES, + help_text='Whether season spans two years or one' + ) + expected_game_count = models.PositiveIntegerField( + default=0, + help_text='Expected number of regular season games' + ) + season_start_month = models.PositiveSmallIntegerField( + default=1, + help_text='Month when season typically starts (1-12)' + ) + season_end_month = models.PositiveSmallIntegerField( + default=12, + help_text='Month when season typically ends (1-12)' + ) + icon_name = models.CharField( + max_length=50, + blank=True, + help_text='SF Symbol name (e.g., baseball.fill, basketball.fill)' + ) + color_hex = models.CharField( + max_length=10, + blank=True, + help_text='Brand color hex (e.g., #CE1141)' + ) + is_active = models.BooleanField( + default=True, + help_text='Whether this sport is actively being scraped' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['name'] + verbose_name = 'Sport' + verbose_name_plural = 'Sports' + + def __str__(self): + return self.short_name + + def get_season_display(self, year: int) -> str: + """Return display string for a season (e.g., '2024-25' or '2024').""" + if self.season_type == 'split': + return f"{year}-{str(year + 1)[-2:]}" + return str(year) diff --git a/core/models/stadium.py b/core/models/stadium.py new file mode 100644 index 0000000..fc727ae --- /dev/null +++ b/core/models/stadium.py @@ -0,0 +1,109 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class Stadium(models.Model): + """ + Stadium/Arena/Venue model. + """ + SURFACE_CHOICES = [ + ('grass', 'Natural Grass'), + ('turf', 'Artificial Turf'), + ('ice', 'Ice'), + ('hardwood', 'Hardwood'), + ('other', 'Other'), + ] + + ROOF_TYPE_CHOICES = [ + ('dome', 'Dome (Closed)'), + ('retractable', 'Retractable'), + ('open', 'Open Air'), + ] + + id = models.CharField( + max_length=100, + primary_key=True, + help_text='Canonical ID (e.g., stadium_nba_los_angeles_lakers)' + ) + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='stadiums' + ) + name = models.CharField( + max_length=200, + help_text='Current stadium name' + ) + city = models.CharField(max_length=100) + state = models.CharField( + max_length=100, + blank=True, + help_text='State/Province (blank for international)' + ) + country = models.CharField( + max_length=100, + default='USA' + ) + latitude = models.DecimalField( + max_digits=9, + decimal_places=6, + null=True, + blank=True + ) + longitude = models.DecimalField( + max_digits=9, + decimal_places=6, + null=True, + blank=True + ) + capacity = models.PositiveIntegerField( + null=True, + blank=True, + help_text='Seating capacity' + ) + surface = models.CharField( + max_length=20, + choices=SURFACE_CHOICES, + blank=True + ) + roof_type = models.CharField( + max_length=20, + choices=ROOF_TYPE_CHOICES, + blank=True + ) + opened_year = models.PositiveSmallIntegerField( + null=True, + blank=True, + help_text='Year stadium opened' + ) + timezone = models.CharField( + max_length=50, + blank=True, + help_text='IANA timezone (e.g., America/Los_Angeles)' + ) + image_url = models.URLField( + blank=True, + help_text='URL to stadium image' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['sport', 'city', 'name'] + verbose_name = 'Stadium' + verbose_name_plural = 'Stadiums' + + def __str__(self): + return f"{self.name} ({self.city})" + + @property + def location(self): + """Return city, state/country string.""" + if self.state: + return f"{self.city}, {self.state}" + return f"{self.city}, {self.country}" diff --git a/core/models/team.py b/core/models/team.py new file mode 100644 index 0000000..002a4b7 --- /dev/null +++ b/core/models/team.py @@ -0,0 +1,88 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class Team(models.Model): + """ + Team model with canonical identifiers. + """ + id = models.CharField( + max_length=50, + primary_key=True, + help_text='Canonical ID (e.g., team_nba_lal)' + ) + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='teams' + ) + division = models.ForeignKey( + 'core.Division', + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='teams' + ) + city = models.CharField( + max_length=100, + help_text='Team city (e.g., Los Angeles)' + ) + name = models.CharField( + max_length=100, + help_text='Team name (e.g., Lakers)' + ) + full_name = models.CharField( + max_length=200, + help_text='Full team name (e.g., Los Angeles Lakers)' + ) + abbreviation = models.CharField( + max_length=10, + help_text='Team abbreviation (e.g., LAL)' + ) + home_stadium = models.ForeignKey( + 'core.Stadium', + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='home_teams' + ) + primary_color = models.CharField( + max_length=7, + blank=True, + help_text='Primary color hex (e.g., #552583)' + ) + secondary_color = models.CharField( + max_length=7, + blank=True, + help_text='Secondary color hex (e.g., #FDB927)' + ) + logo_url = models.URLField( + blank=True, + help_text='URL to team logo' + ) + is_active = models.BooleanField( + default=True, + help_text='Whether team is currently active' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['sport', 'city', 'name'] + verbose_name = 'Team' + verbose_name_plural = 'Teams' + + def __str__(self): + return self.full_name + + @property + def conference(self): + """Return team's conference via division.""" + if self.division: + return self.division.conference + return None diff --git a/core/resources.py b/core/resources.py new file mode 100644 index 0000000..7dd433a --- /dev/null +++ b/core/resources.py @@ -0,0 +1,162 @@ +"""Import/Export resources for core models.""" +from import_export import resources, fields +from import_export.widgets import ForeignKeyWidget + +from .models import Sport, Conference, Division, Team, Stadium, Game, TeamAlias, StadiumAlias + + +class SportResource(resources.ModelResource): + class Meta: + model = Sport + import_id_fields = ['code'] + fields = [ + 'code', 'name', 'short_name', 'season_type', + 'season_start_month', 'season_end_month', + 'expected_game_count', 'is_active', + ] + export_order = fields + + +class ConferenceResource(resources.ModelResource): + sport = fields.Field( + column_name='sport', + attribute='sport', + widget=ForeignKeyWidget(Sport, 'code') + ) + + class Meta: + model = Conference + import_id_fields = ['sport', 'name'] + fields = ['sport', 'canonical_id', 'name', 'short_name', 'order'] + export_order = fields + + +class DivisionResource(resources.ModelResource): + conference = fields.Field( + column_name='conference', + attribute='conference', + widget=ForeignKeyWidget(Conference, 'name') + ) + sport = fields.Field(attribute='conference__sport__code', readonly=True) + + class Meta: + model = Division + import_id_fields = ['conference', 'name'] + fields = ['sport', 'conference', 'canonical_id', 'name', 'short_name', 'order'] + export_order = fields + + +class TeamResource(resources.ModelResource): + sport = fields.Field( + column_name='sport', + attribute='sport', + widget=ForeignKeyWidget(Sport, 'code') + ) + division = fields.Field( + column_name='division', + attribute='division', + widget=ForeignKeyWidget(Division, 'name') + ) + home_stadium = fields.Field( + column_name='home_stadium', + attribute='home_stadium', + widget=ForeignKeyWidget(Stadium, 'name') + ) + + class Meta: + model = Team + import_id_fields = ['id'] + fields = [ + 'id', 'sport', 'division', 'city', 'name', 'full_name', + 'abbreviation', 'primary_color', 'secondary_color', + 'logo_url', 'home_stadium', 'is_active', + ] + export_order = fields + + +class StadiumResource(resources.ModelResource): + sport = fields.Field( + column_name='sport', + attribute='sport', + widget=ForeignKeyWidget(Sport, 'code') + ) + + class Meta: + model = Stadium + import_id_fields = ['id'] + fields = [ + 'id', 'sport', 'name', 'city', 'state', 'country', + 'latitude', 'longitude', 'timezone', 'capacity', + 'surface', 'roof_type', 'opened_year', 'image_url', + ] + export_order = fields + + +class GameResource(resources.ModelResource): + sport = fields.Field( + column_name='sport', + attribute='sport', + widget=ForeignKeyWidget(Sport, 'code') + ) + home_team = fields.Field( + column_name='home_team', + attribute='home_team', + widget=ForeignKeyWidget(Team, 'abbreviation') + ) + away_team = fields.Field( + column_name='away_team', + attribute='away_team', + widget=ForeignKeyWidget(Team, 'abbreviation') + ) + stadium = fields.Field( + column_name='stadium', + attribute='stadium', + widget=ForeignKeyWidget(Stadium, 'name') + ) + + class Meta: + model = Game + import_id_fields = ['id'] + fields = [ + 'id', 'sport', 'season', 'home_team', 'away_team', + 'stadium', 'game_date', 'game_number', 'status', + 'home_score', 'away_score', 'is_playoff', 'playoff_round', + 'is_neutral_site', 'source_url', + ] + export_order = fields + + +class TeamAliasResource(resources.ModelResource): + team = fields.Field( + column_name='team', + attribute='team', + widget=ForeignKeyWidget(Team, 'abbreviation') + ) + sport = fields.Field(attribute='team__sport__code', readonly=True) + + class Meta: + model = TeamAlias + import_id_fields = ['team', 'alias'] + fields = [ + 'sport', 'team', 'alias', 'alias_type', + 'valid_from', 'valid_until', 'is_primary', 'source', 'notes', + ] + export_order = fields + + +class StadiumAliasResource(resources.ModelResource): + stadium = fields.Field( + column_name='stadium', + attribute='stadium', + widget=ForeignKeyWidget(Stadium, 'name') + ) + sport = fields.Field(attribute='stadium__sport__code', readonly=True) + + class Meta: + model = StadiumAlias + import_id_fields = ['stadium', 'alias'] + fields = [ + 'sport', 'stadium', 'alias', 'alias_type', + 'valid_from', 'valid_until', 'is_primary', 'source', 'notes', + ] + export_order = fields diff --git a/dashboard/__init__.py b/dashboard/__init__.py new file mode 100644 index 0000000..5410477 --- /dev/null +++ b/dashboard/__init__.py @@ -0,0 +1 @@ +default_app_config = 'dashboard.apps.DashboardConfig' diff --git a/dashboard/apps.py b/dashboard/apps.py new file mode 100644 index 0000000..2e7af9d --- /dev/null +++ b/dashboard/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class DashboardConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'dashboard' + verbose_name = 'Dashboard' diff --git a/dashboard/templates/dashboard/base.html b/dashboard/templates/dashboard/base.html new file mode 100644 index 0000000..1cb260d --- /dev/null +++ b/dashboard/templates/dashboard/base.html @@ -0,0 +1,130 @@ +{% extends "admin/base_site.html" %} +{% load static %} + +{% block extrahead %} +{{ block.super }} + +{% endblock %} + +{% block content %} + +{% endblock %} diff --git a/dashboard/templates/dashboard/index.html b/dashboard/templates/dashboard/index.html new file mode 100644 index 0000000..7066b1d --- /dev/null +++ b/dashboard/templates/dashboard/index.html @@ -0,0 +1,125 @@ +{% extends "dashboard/base.html" %} + +{% block dashboard_content %} +
+

Overview

+
+
+
{{ sports_count }}
+
Sports
+
+
+
{{ teams_count }}
+
Teams
+
+
+
{{ stadiums_count }}
+
Stadiums
+
+
+
{{ games_count }}
+
Games
+
+
+
{{ pending_reviews }}
+
Pending Reviews
+
+
+
+ +
+
+

Recent Scraper Jobs

+ {% if recent_jobs %} +
+ + + + + + + + + + + {% for job in recent_jobs %} + + + + + + + {% endfor %} + +
SportStatusGamesTime
{{ job.config.sport.short_name }} {{ job.config.season }}{{ job.status|upper }}{{ job.games_found }}{{ job.created_at|timesince }} ago
+
+ {% else %} +

No recent scraper jobs.

+ {% endif %} +

+ View All Jobs +

+
+ +
+

Recent CloudKit Syncs

+ {% if recent_syncs %} +
+ + + + + + + + + + + {% for sync in recent_syncs %} + + + + + + + {% endfor %} + +
ConfigStatusRecordsTime
{{ sync.configuration.name }}{{ sync.status|upper }}{{ sync.records_synced }}{{ sync.created_at|timesince }} ago
+
+ {% else %} +

No recent sync jobs.

+ {% endif %} +

+ View Sync Status +

+
+
+ +
+

Sport Summary

+
+ + + + + + + + + + + + {% for stat in sport_stats %} + + + + + + + + {% endfor %} + +
SportTeamsStadiumsGamesPending Reviews
{{ stat.sport.short_name }} - {{ stat.sport.name }}{{ stat.teams }}{{ stat.stadiums }}{{ stat.games }}{% if stat.pending_reviews %}{{ stat.pending_reviews }}{% else %}0{% endif %}
+
+
+{% endblock %} diff --git a/dashboard/templates/dashboard/review_queue.html b/dashboard/templates/dashboard/review_queue.html new file mode 100644 index 0000000..301d071 --- /dev/null +++ b/dashboard/templates/dashboard/review_queue.html @@ -0,0 +1,74 @@ +{% extends "dashboard/base.html" %} + +{% block dashboard_content %} +
+

Review Queue Summary

+
+
+
{{ total_pending }}
+
Total Pending
+
+ {% for item in review_summary %} +
+
{{ item.count }}
+
{{ item.sport__short_name }} {{ item.item_type }}s
+
+ {% endfor %} +
+
+ +
+

Pending Review Items

+ {% if pending_items %} +
+ + + + + + + + + + + + + + {% for item in pending_items %} + + + + + + + + + + {% endfor %} + +
TypeSportRaw ValueSuggested MatchConfidenceReasonActions
{{ item.item_type }}{{ item.sport.short_name }}{{ item.raw_value }} + {% if item.suggested_id %} + {{ item.suggested_id }} + {% else %} + None + {% endif %} + + {% if item.confidence > 0 %} + + {{ item.confidence_display }} + + {% else %}-{% endif %} + {{ item.get_reason_display }} + Review +
+
+ {% if total_pending > 50 %} +

+ Showing 50 of {{ total_pending }} items. View all in admin. +

+ {% endif %} + {% else %} +

No pending review items! 🎉

+ {% endif %} +
+{% endblock %} diff --git a/dashboard/templates/dashboard/scraper_status.html b/dashboard/templates/dashboard/scraper_status.html new file mode 100644 index 0000000..38825e9 --- /dev/null +++ b/dashboard/templates/dashboard/scraper_status.html @@ -0,0 +1,100 @@ +{% extends "dashboard/base.html" %} + +{% block dashboard_content %} +
+

Scraper Status

+
+
+
{{ running_jobs }}
+
Running
+
+
+
{{ pending_jobs }}
+
Pending
+
+
+
{{ configs.count }}
+
Configurations
+
+
+
+ +
+

Scraper Configurations

+
+ + + + + + + + + + + + + + {% for config in configs %} + + + + + + + + + + {% endfor %} + +
SportSeasonEnabledLast RunStatusGamesActions
{{ config.sport.short_name }}{{ config.sport.get_season_display }}{% if config.is_enabled %}{% else %}{% endif %}{% if config.last_run %}{{ config.last_run|timesince }} ago{% else %}-{% endif %} + {% if config.last_run_status %} + {{ config.last_run_status|upper }} + {% else %}-{% endif %} + {{ config.last_run_games }} +
+ {% csrf_token %} + +
+
+
+
+ +
+

Recent Jobs

+
+ + + + + + + + + + + + + + + {% for job in recent_jobs %} + + + + + + + + + + + {% endfor %} + +
IDSportStatusTriggerStartedDurationGamesReviews
{{ job.id }}{{ job.config.sport.short_name }} {{ job.config.season }}{{ job.status|upper }}{{ job.triggered_by }}{% if job.started_at %}{{ job.started_at|timesince }} ago{% else %}-{% endif %}{{ job.duration_display }} + {% if job.games_found %} + {{ job.games_found }} ({{ job.games_new }} new, {{ job.games_updated }} upd) + {% else %}-{% endif %} + {% if job.review_items_created %}{{ job.review_items_created }}{% else %}-{% endif %}
+
+
+{% endblock %} diff --git a/dashboard/templates/dashboard/stats.html b/dashboard/templates/dashboard/stats.html new file mode 100644 index 0000000..4704cf1 --- /dev/null +++ b/dashboard/templates/dashboard/stats.html @@ -0,0 +1,85 @@ +{% extends "dashboard/base.html" %} + +{% block dashboard_content %} +
+

Game Statistics

+
+
+
{{ game_stats.total }}
+
Total Games
+
+
+
{{ game_stats.scheduled }}
+
Scheduled
+
+
+
{{ game_stats.final }}
+
Final
+
+
+
{{ game_stats.today }}
+
Today
+
+
+
{{ game_stats.this_week }}
+
This Week
+
+
+
+ +
+

CloudKit Sync Statistics

+
+
+
{{ sync_stats.total }}
+
Total Records
+
+
+
{{ sync_stats.synced }}
+
Synced
+
+
+
{{ sync_stats.pending }}
+
Pending
+
+
+
{{ sync_stats.failed }}
+
Failed
+
+
+
+ +
+

Data by Sport

+
+ + + + + + + + + + + + {% for stat in sport_stats %} + + + + + + + + {% endfor %} + +
SportTeamsStadiumsGamesPending Reviews
{{ stat.sport.short_name }} - {{ stat.sport.name }}{{ stat.teams }}{{ stat.stadiums }}{{ stat.games }} + {% if stat.pending_reviews %} + {{ stat.pending_reviews }} + {% else %} + 0 + {% endif %} +
+
+
+{% endblock %} diff --git a/dashboard/templates/dashboard/sync_status.html b/dashboard/templates/dashboard/sync_status.html new file mode 100644 index 0000000..9ece181 --- /dev/null +++ b/dashboard/templates/dashboard/sync_status.html @@ -0,0 +1,382 @@ +{% extends 'base.html' %} + +{% block content %} +

CloudKit Sync

+ + +
+
+
{{ running_syncs|default:0 }}
+
Running Syncs
+
+
+
{{ total_records }}
+
Total Records
+
+
+ + +
+
+

CloudKit Configurations

+
+ {% if all_configs %} + + + + + + + + + + + + + {% for c in all_configs %} + + + + + + + + + {% endfor %} + +
ConfigEnvironmentContainerStatusProgressActions
{{ c.name }}{% if c.is_active %} ★{% endif %} + {% if c.environment == 'production' %} + Production + {% else %} + Development + {% endif %} + {{ c.container_id }} + {% if c.is_active %} + Active + {% else %} + Inactive + {% endif %} + + - + + + Edit +
+ {% else %} +
+

No CloudKit configuration found. Create one.

+
+ {% endif %} +
+ + + + + + +
+
+

Recent Sync Jobs

+
+ + + + + + + + + + + + + + + {% for sync in recent_syncs %} + + + + + + + + + + + {% empty %} + + + + {% endfor %} + +
IDConfigStatusTypeTriggerStartedDurationRecords
{{ sync.id }} + {{ sync.configuration.name }} + {% if sync.configuration.environment == 'production' %} + Prod + {% else %} + Dev + {% endif %} + + {% if sync.status == 'completed' %} + Completed + {% elif sync.status == 'running' %} + Running + {% elif sync.status == 'failed' %} + Failed + {% elif sync.status == 'cancelled' %} + Cancelled + {% else %} + {{ sync.status|title }} + {% endif %} + + {% if sync.record_type_filter %} + {{ sync.record_type_filter }} + {% else %} + All + {% endif %} + {{ sync.triggered_by }}{% if sync.started_at %}{{ sync.started_at|timesince }} ago{% else %}-{% endif %}{{ sync.duration_display }} + {% if sync.records_synced or sync.records_failed %} + {{ sync.records_synced }} synced{% if sync.records_failed %}, {{ sync.records_failed }} failed{% endif %} +
+ {% if sync.sports_synced or sync.sports_failed %} + Sport: {{ sync.sports_synced }}{% if sync.sports_failed %}/{{ sync.sports_failed }}f{% endif %}
+ {% endif %} + {% if sync.conferences_synced or sync.conferences_failed %} + Conf: {{ sync.conferences_synced }}{% if sync.conferences_failed %}/{{ sync.conferences_failed }}f{% endif %}
+ {% endif %} + {% if sync.divisions_synced or sync.divisions_failed %} + Div: {{ sync.divisions_synced }}{% if sync.divisions_failed %}/{{ sync.divisions_failed }}f{% endif %}
+ {% endif %} + {% if sync.teams_synced or sync.teams_failed %} + Team: {{ sync.teams_synced }}{% if sync.teams_failed %}/{{ sync.teams_failed }}f{% endif %}
+ {% endif %} + {% if sync.stadiums_synced or sync.stadiums_failed %} + Stadium: {{ sync.stadiums_synced }}{% if sync.stadiums_failed %}/{{ sync.stadiums_failed }}f{% endif %}
+ {% endif %} + {% if sync.games_synced or sync.games_failed %} + Game: {{ sync.games_synced }}{% if sync.games_failed %}/{{ sync.games_failed }}f{% endif %}
+ {% endif %} + {% if sync.team_aliases_synced or sync.team_aliases_failed %} + TeamAlias: {{ sync.team_aliases_synced }}{% if sync.team_aliases_failed %}/{{ sync.team_aliases_failed }}f{% endif %}
+ {% endif %} + {% if sync.stadium_aliases_synced or sync.stadium_aliases_failed %} + StadiumAlias: {{ sync.stadium_aliases_synced }}{% if sync.stadium_aliases_failed %}/{{ sync.stadium_aliases_failed }}f{% endif %}
+ {% endif %} +
+ {% else %}-{% endif %} +
No sync jobs yet.
+
+ + +{% endblock %} diff --git a/dashboard/urls.py b/dashboard/urls.py new file mode 100644 index 0000000..d831feb --- /dev/null +++ b/dashboard/urls.py @@ -0,0 +1,21 @@ +from django.urls import path +from . import views + +app_name = 'dashboard' + +urlpatterns = [ + path('', views.index, name='index'), + path('stats/', views.stats, name='stats'), + path('scraper-status/', views.scraper_status, name='scraper_status'), + path('sync-status/', views.sync_status, name='sync_status'), + path('review-queue/', views.review_queue, name='review_queue'), + path('export/', views.export_data, name='export'), + # Actions + path('run-scraper///', views.run_scraper, name='run_scraper'), + path('run-all-scrapers/', views.run_all_scrapers, name='run_all_scrapers'), + path('run-sync/', views.run_sync, name='run_sync'), + path('export/download/', views.export_download, name='export_download'), + # API + path('api/sync-progress//', views.sync_progress_api, name='sync_progress_api'), + path('api/running-syncs/', views.running_syncs_api, name='running_syncs_api'), +] diff --git a/dashboard/views.py b/dashboard/views.py new file mode 100644 index 0000000..58d3cd3 --- /dev/null +++ b/dashboard/views.py @@ -0,0 +1,644 @@ +import io +import json +import zipfile +from datetime import timedelta, timezone as dt_timezone +from urllib.parse import urlparse + +from django.shortcuts import render, redirect, get_object_or_404 +from django.contrib.admin.views.decorators import staff_member_required +from django.contrib import messages +from django.db.models import Count, Q +from django.http import JsonResponse, HttpResponse +from django.utils import timezone + +from core.models import Sport, Team, Stadium, Game, Conference, Division, TeamAlias, StadiumAlias +from scraper.models import ScraperConfig, ScrapeJob, ManualReviewItem +from cloudkit.models import CloudKitConfiguration, CloudKitSyncState, CloudKitSyncJob + + +@staff_member_required +def index(request): + """Main dashboard overview.""" + # Get counts + context = { + 'title': 'Dashboard', + 'sports_count': Sport.objects.filter(is_active=True).count(), + 'teams_count': Team.objects.count(), + 'stadiums_count': Stadium.objects.count(), + 'games_count': Game.objects.count(), + # Recent activity + 'recent_jobs': ScrapeJob.objects.select_related('config__sport')[:5], + 'recent_syncs': CloudKitSyncJob.objects.select_related('configuration')[:5], + 'pending_reviews': ManualReviewItem.objects.filter(status='pending').count(), + # Sport summaries + 'sport_stats': get_sport_stats(), + } + return render(request, 'dashboard/index.html', context) + + +@staff_member_required +def stats(request): + """Detailed statistics view.""" + context = { + 'title': 'Statistics', + 'sport_stats': get_sport_stats(), + 'game_stats': get_game_stats(), + 'sync_stats': get_sync_stats(), + } + return render(request, 'dashboard/stats.html', context) + + +@staff_member_required +def scraper_status(request): + """Scraper status and controls.""" + configs = ScraperConfig.objects.select_related('sport').order_by('-season', 'sport') + recent_jobs = ScrapeJob.objects.select_related('config__sport').order_by('-created_at')[:20] + + context = { + 'title': 'Scraper Status', + 'configs': configs, + 'recent_jobs': recent_jobs, + 'running_jobs': ScrapeJob.objects.filter(status='running').count(), + 'pending_jobs': ScrapeJob.objects.filter(status='pending').count(), + } + return render(request, 'dashboard/scraper_status.html', context) + + +@staff_member_required +def sync_status(request): + """CloudKit sync status.""" + from core.models import Game, Team, Stadium + + # Get all configs for the dropdown + all_configs = CloudKitConfiguration.objects.all() + + # Get selected config from query param, or default to active + selected_config_id = request.GET.get('config') + if selected_config_id: + config = CloudKitConfiguration.objects.filter(id=selected_config_id).first() + else: + config = CloudKitConfiguration.objects.filter(is_active=True).first() + + # Recent sync jobs (filtered by selected config if any) + recent_syncs = CloudKitSyncJob.objects.select_related('configuration').order_by('-created_at') + if config: + recent_syncs = recent_syncs.filter(configuration=config) + running_syncs = recent_syncs.filter(status='running').count() + recent_syncs = recent_syncs[:10] + + # Record counts + teams_count = Team.objects.count() + stadiums_count = Stadium.objects.count() + games_count = Game.objects.count() + total_records = teams_count + stadiums_count + games_count + + context = { + 'title': 'Sync Status', + 'config': config, + 'all_configs': all_configs, + 'recent_syncs': recent_syncs, + 'running_syncs': running_syncs, + 'total_records': total_records, + } + return render(request, 'dashboard/sync_status.html', context) + + +@staff_member_required +def review_queue(request): + """Manual review queue.""" + pending = ManualReviewItem.objects.filter( + status='pending' + ).select_related('sport', 'job').order_by('-confidence', '-created_at') + + # Group by sport and type + review_summary = ManualReviewItem.objects.filter( + status='pending' + ).values('sport__short_name', 'item_type').annotate(count=Count('id')) + + context = { + 'title': 'Review Queue', + 'pending_items': pending[:50], + 'review_summary': review_summary, + 'total_pending': pending.count(), + } + return render(request, 'dashboard/review_queue.html', context) + + +@staff_member_required +def run_scraper(request, sport_code, season): + """Trigger a scraper job.""" + if request.method == 'POST': + from scraper.tasks import run_scraper_task + + config = get_object_or_404(ScraperConfig, sport__code=sport_code, season=season) + run_scraper_task.delay(config.id) + messages.success(request, f'Started scraper for {config}') + + return redirect('dashboard:scraper_status') + + +@staff_member_required +def run_all_scrapers(request): + """Trigger all enabled scraper jobs.""" + if request.method == 'POST': + from scraper.tasks import run_scraper_task + + configs = ScraperConfig.objects.filter(is_enabled=True) + count = 0 + for config in configs: + run_scraper_task.delay(config.id) + count += 1 + + if count > 0: + messages.success(request, f'Started {count} scraper jobs') + else: + messages.warning(request, 'No enabled scraper configurations') + + return redirect('dashboard:scraper_status') + + +@staff_member_required +def run_sync(request): + """Trigger a CloudKit sync.""" + if request.method == 'POST': + from cloudkit.tasks import run_cloudkit_sync + + # Get config from form or fall back to active config + config_id = request.POST.get('config_id') + if config_id: + config = CloudKitConfiguration.objects.filter(id=config_id).first() + else: + config = CloudKitConfiguration.objects.filter(is_active=True).first() + + if config: + # Get selected record types + record_types = request.POST.getlist('record_types') + + if not record_types or 'all' in record_types: + # Sync all — no record_type filter + run_cloudkit_sync.delay(config.id) + messages.success(request, f'Started full CloudKit sync to {config.name} ({config.environment})') + else: + # Queue a sync job per selected record type + for rt in record_types: + run_cloudkit_sync.delay(config.id, record_type=rt) + type_list = ', '.join(record_types) + messages.success(request, f'Started CloudKit sync for {type_list} to {config.name} ({config.environment})') + + return redirect(f"{request.path.replace('/run-sync/', '/sync-status/')}?config={config.id}") + else: + messages.error(request, 'No CloudKit configuration found') + + return redirect('dashboard:sync_status') + + +@staff_member_required +def sync_progress_api(request, job_id): + """API endpoint for sync job progress.""" + try: + job = CloudKitSyncJob.objects.get(id=job_id) + return JsonResponse(job.get_progress()) + except CloudKitSyncJob.DoesNotExist: + return JsonResponse({'error': 'Job not found'}, status=404) + + +@staff_member_required +def running_syncs_api(request): + """API endpoint to check for running sync jobs.""" + running_jobs = CloudKitSyncJob.objects.filter(status='running').values( + 'id', 'configuration_id' + ) + return JsonResponse({'running': list(running_jobs)}) + + +def get_sport_stats(): + """Get stats per sport.""" + stats = [] + for sport in Sport.objects.filter(is_active=True): + stats.append({ + 'sport': sport, + 'teams': sport.teams.count(), + 'stadiums': sport.stadiums.count(), + 'games': sport.games.count(), + 'pending_reviews': sport.review_items.filter(status='pending').count(), + }) + return stats + + +def get_game_stats(): + """Get game statistics.""" + now = timezone.now() + return { + 'total': Game.objects.count(), + 'scheduled': Game.objects.filter(status='scheduled').count(), + 'final': Game.objects.filter(status='final').count(), + 'today': Game.objects.filter( + game_date__date=now.date() + ).count(), + 'this_week': Game.objects.filter( + game_date__gte=now, + game_date__lt=now + timedelta(days=7) + ).count(), + } + + +def get_sync_stats(): + """Get CloudKit sync statistics.""" + return { + 'total': CloudKitSyncState.objects.count(), + 'synced': CloudKitSyncState.objects.filter(sync_status='synced').count(), + 'pending': CloudKitSyncState.objects.filter(sync_status='pending').count(), + 'failed': CloudKitSyncState.objects.filter(sync_status='failed').count(), + } + + +# ============================================================================= +# Export Views +# ============================================================================= + +@staff_member_required +def export_data(request): + """Export data page with options.""" + sports = Sport.objects.filter(is_active=True).order_by('code') + + # Get available years from game dates + from django.db.models.functions import ExtractYear + years = Game.objects.annotate( + game_year=ExtractYear('game_date') + ).values_list('game_year', flat=True).distinct().order_by('-game_year') + + # Get record counts for display + context = { + 'title': 'Export Data', + 'sports': sports, + 'years': list(years), + 'counts': { + 'sports': Sport.objects.filter(is_active=True).count(), + 'teams': Team.objects.count(), + 'stadiums': Stadium.objects.count(), + 'games': Game.objects.count(), + 'team_aliases': TeamAlias.objects.count(), + 'stadium_aliases': StadiumAlias.objects.count(), + 'conferences': Conference.objects.count(), + 'divisions': Division.objects.count(), + }, + } + return render(request, 'dashboard/export.html', context) + + +@staff_member_required +def export_download(request): + """Generate and download export files.""" + # Get export options from request + export_types = request.GET.getlist('type') + sport_filter = request.GET.get('sport', '') + year_filter = request.GET.get('year', '') + + if not export_types: + export_types = ['sports', 'league_structure', 'teams', 'stadiums', 'games', 'team_aliases', 'stadium_aliases'] + + # Convert year to int if provided + year_int = int(year_filter) if year_filter else None + + # Generate export data + files = {} + + if 'sports' in export_types: + files['sports_canonical.json'] = export_sports(sport_filter) + + if 'league_structure' in export_types: + files['league_structure.json'] = export_league_structure(sport_filter) + + if 'teams' in export_types: + files['teams_canonical.json'] = export_teams(sport_filter) + + if 'stadiums' in export_types: + files['stadiums_canonical.json'] = export_stadiums(sport_filter) + + if 'games' in export_types: + files['games_canonical.json'] = export_games(sport_filter, year_int) + + if 'team_aliases' in export_types: + files['team_aliases.json'] = export_team_aliases(sport_filter) + + if 'stadium_aliases' in export_types: + files['stadium_aliases.json'] = export_stadium_aliases(sport_filter) + + # If single file, return JSON directly + if len(files) == 1: + filename, data = list(files.items())[0] + response = HttpResponse( + json.dumps(data, indent=2), + content_type='application/json' + ) + response['Content-Disposition'] = f'attachment; filename="{filename}"' + return response + + # Multiple files - return as ZIP + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf: + for filename, data in files.items(): + zf.writestr(filename, json.dumps(data, indent=2)) + + zip_buffer.seek(0) + + # Build filename + parts = ['sportstime_export'] + if sport_filter: + parts.append(sport_filter) + if year_filter: + parts.append(str(year_filter)) + zip_filename = '_'.join(parts) + '.zip' + + response = HttpResponse(zip_buffer.read(), content_type='application/zip') + response['Content-Disposition'] = f'attachment; filename="{zip_filename}"' + return response + + +# ============================================================================= +# Export Helper Functions +# ============================================================================= + +def _get_conference_id(conference): + """Get conference canonical ID from DB field.""" + return conference.canonical_id + + +def _get_division_id(division): + """Get division canonical ID from DB field.""" + return division.canonical_id + + +def _extract_domain(url): + """Extract domain from URL.""" + try: + parsed = urlparse(url) + domain = parsed.netloc + if domain.startswith('www.'): + domain = domain[4:] + return domain + except Exception: + return None + + +def export_sports(sport_filter=None): + """Export sports data.""" + sports = Sport.objects.filter(is_active=True) + if sport_filter: + sports = sports.filter(code=sport_filter.lower()) + + data = [] + for sport in sports.order_by('code'): + data.append({ + 'sport_id': sport.short_name.upper(), + 'abbreviation': sport.short_name.upper(), + 'display_name': sport.name, + 'icon_name': sport.icon_name or '', + 'color_hex': sport.color_hex or '', + 'season_start_month': sport.season_start_month, + 'season_end_month': sport.season_end_month, + 'is_active': sport.is_active, + }) + + return data + + +def export_league_structure(sport_filter=None): + """Export league structure data.""" + data = [] + seen_ids = set() # Track IDs to prevent duplicates + display_order = 0 + + sports = Sport.objects.all() + if sport_filter: + sports = sports.filter(code=sport_filter.lower()) + + for sport in sports.order_by('code'): + league_id = f"{sport.code}_league" + + # Skip if we've already seen this ID + if league_id in seen_ids: + continue + seen_ids.add(league_id) + + data.append({ + 'id': league_id, + 'sport': sport.short_name, + 'type': 'league', + 'name': sport.name, + 'abbreviation': sport.short_name, + 'parent_id': None, + 'display_order': display_order, + }) + display_order += 1 + + conferences = Conference.objects.filter(sport=sport).order_by('order', 'name') + for conf in conferences: + conf_id = _get_conference_id(conf) + + # Skip duplicate conference IDs + if conf_id in seen_ids: + continue + seen_ids.add(conf_id) + + data.append({ + 'id': conf_id, + 'sport': sport.short_name, + 'type': 'conference', + 'name': conf.name, + 'abbreviation': conf.short_name or None, + 'parent_id': league_id, + 'display_order': conf.order, + }) + + divisions = Division.objects.filter(conference=conf).order_by('order', 'name') + for div in divisions: + div_id = _get_division_id(div) + + # Skip duplicate division IDs + if div_id in seen_ids: + continue + seen_ids.add(div_id) + + data.append({ + 'id': div_id, + 'sport': sport.short_name, + 'type': 'division', + 'name': div.name, + 'abbreviation': div.short_name or None, + 'parent_id': conf_id, + 'display_order': div.order, + }) + + return data + + +def export_teams(sport_filter=None): + """Export teams data.""" + teams = Team.objects.select_related( + 'sport', 'division', 'division__conference', 'home_stadium' + ).all() + + if sport_filter: + teams = teams.filter(sport__code=sport_filter.lower()) + + data = [] + for team in teams.order_by('sport__code', 'city', 'name'): + conference_id = None + division_id = None + if team.division: + division_id = _get_division_id(team.division) + conference_id = _get_conference_id(team.division.conference) + + data.append({ + 'canonical_id': team.id, + 'name': team.name, + 'abbreviation': team.abbreviation, + 'sport': team.sport.short_name, + 'city': team.city, + 'stadium_canonical_id': team.home_stadium_id, + 'conference_id': conference_id, + 'division_id': division_id, + 'primary_color': team.primary_color or None, + 'secondary_color': team.secondary_color or None, + }) + + return data + + +def export_stadiums(sport_filter=None): + """Export stadiums data.""" + stadiums = Stadium.objects.select_related('sport').all() + + if sport_filter: + stadiums = stadiums.filter(sport__code=sport_filter.lower()) + + # Build map of stadium -> team abbreviations + stadium_teams = {} + teams = Team.objects.filter(home_stadium__isnull=False).select_related('home_stadium') + if sport_filter: + teams = teams.filter(sport__code=sport_filter.lower()) + + for team in teams: + if team.home_stadium_id not in stadium_teams: + stadium_teams[team.home_stadium_id] = [] + stadium_teams[team.home_stadium_id].append(team.abbreviation) + + data = [] + for stadium in stadiums.order_by('sport__code', 'city', 'name'): + data.append({ + 'canonical_id': stadium.id, + 'name': stadium.name, + 'city': stadium.city, + 'state': stadium.state or None, + 'latitude': float(stadium.latitude) if stadium.latitude else None, + 'longitude': float(stadium.longitude) if stadium.longitude else None, + 'capacity': stadium.capacity or 0, + 'sport': stadium.sport.short_name, + 'primary_team_abbrevs': stadium_teams.get(stadium.id, []), + 'year_opened': stadium.opened_year, + 'timezone_identifier': stadium.timezone or None, + 'image_url': stadium.image_url or None, + }) + + return data + + +def export_games(sport_filter=None, year_filter=None): + """Export games data.""" + games = Game.objects.select_related( + 'sport', 'home_team', 'away_team', 'stadium' + ).all() + + if sport_filter: + games = games.filter(sport__code=sport_filter.lower()) + + if year_filter: + games = games.filter(game_date__year=year_filter) + + data = [] + for game in games.order_by('game_date', 'sport__code'): + # Ensure game_date is UTC-aware + game_dt = game.game_date + if game_dt.tzinfo is None: + game_dt = game_dt.replace(tzinfo=dt_timezone.utc) + utc_dt = game_dt.astimezone(dt_timezone.utc) + + source = None + if game.source_url: + source = _extract_domain(game.source_url) + + data.append({ + 'canonical_id': game.id, + 'sport': game.sport.short_name, + 'season': str(game.game_date.year), + 'game_datetime_utc': utc_dt.strftime('%Y-%m-%dT%H:%M:%SZ'), + 'home_team': game.home_team.full_name, + 'away_team': game.away_team.full_name, + 'home_team_abbrev': game.home_team.abbreviation, + 'away_team_abbrev': game.away_team.abbreviation, + 'home_team_canonical_id': game.home_team_id, + 'away_team_canonical_id': game.away_team_id, + 'venue': game.stadium.name if game.stadium else None, + 'stadium_canonical_id': game.stadium_id, + 'source': source, + 'is_playoff': game.is_playoff, + 'broadcast_info': None, + }) + + return data + + +def export_team_aliases(sport_filter=None): + """Export team aliases data.""" + aliases = TeamAlias.objects.select_related('team', 'team__sport').all() + + if sport_filter: + aliases = aliases.filter(team__sport__code=sport_filter.lower()) + + alias_type_map = { + 'full_name': 'name', + 'city_name': 'city', + 'abbreviation': 'abbreviation', + 'nickname': 'name', + 'historical': 'name', + } + + data = [] + for alias in aliases.order_by('team__sport__code', 'team__id', 'id'): + valid_from = alias.valid_from.strftime('%Y-%m-%d') if alias.valid_from else None + valid_until = alias.valid_until.strftime('%Y-%m-%d') if alias.valid_until else None + export_type = alias_type_map.get(alias.alias_type, 'name') + + data.append({ + 'id': f"alias_{alias.team.sport.code}_{alias.pk}", + 'team_canonical_id': alias.team_id, + 'alias_type': export_type, + 'alias_value': alias.alias, + 'valid_from': valid_from, + 'valid_until': valid_until, + }) + + return data + + +def export_stadium_aliases(sport_filter=None): + """Export stadium aliases data.""" + aliases = StadiumAlias.objects.select_related('stadium', 'stadium__sport').all() + + if sport_filter: + aliases = aliases.filter(stadium__sport__code=sport_filter.lower()) + + data = [] + for alias in aliases.order_by('stadium__sport__code', 'stadium__id', 'id'): + valid_from = alias.valid_from.strftime('%Y-%m-%d') if alias.valid_from else None + valid_until = alias.valid_until.strftime('%Y-%m-%d') if alias.valid_until else None + + data.append({ + 'alias_name': alias.alias, + 'stadium_canonical_id': alias.stadium_id, + 'valid_from': valid_from, + 'valid_until': valid_until, + }) + + return data diff --git a/docker-compose.unraid.yml b/docker-compose.unraid.yml new file mode 100644 index 0000000..0cce50c --- /dev/null +++ b/docker-compose.unraid.yml @@ -0,0 +1,114 @@ +services: + db: + image: postgres:15-alpine + container_name: sportstime-db + restart: unless-stopped + volumes: + - /mnt/user/appdata/SportsTimeScraper/postgres:/var/lib/postgresql/data + environment: + POSTGRES_DB: sportstime + POSTGRES_USER: sportstime + POSTGRES_PASSWORD: ${DB_PASSWORD:-changeme} + healthcheck: + test: ["CMD-SHELL", "pg_isready -U sportstime -d sportstime"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - sportstime + + redis: + image: redis:7-alpine + container_name: sportstime-redis + restart: unless-stopped + volumes: + - /mnt/user/appdata/SportsTimeScraper/redis:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - sportstime + + web: + build: . + container_name: sportstime-web + restart: unless-stopped + volumes: + - /mnt/user/appdata/SportsTimeScraper/static:/app/staticfiles + - /mnt/user/appdata/SportsTimeScraper/media:/app/media + - /mnt/user/appdata/SportsTimeScraper/logs:/app/logs + - /mnt/user/appdata/SportsTimeScraper/secrets:/app/secrets + - /mnt/user/downloads/SportsTimeData:/app/output + ports: + - "8842:8000" + env_file: + - .env + environment: + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + - ALLOWED_HOSTS=localhost,127.0.0.1,10.3.3.11 + - SESSION_COOKIE_SECURE=False + - CSRF_COOKIE_SECURE=False + - DJANGO_SUPERUSER_USERNAME=${ADMIN_USERNAME:-admin} + - DJANGO_SUPERUSER_PASSWORD=${ADMIN_PASSWORD:-changeme} + - DJANGO_SUPERUSER_EMAIL=${ADMIN_EMAIL:-admin@localhost} + - IMPORT_INITIAL_DATA=${IMPORT_INITIAL_DATA:-false} + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - sportstime + command: gunicorn sportstime.wsgi:application --bind 0.0.0.0:8000 --workers 3 --timeout 120 + + celery-worker: + build: . + container_name: sportstime-celery-worker + restart: unless-stopped + volumes: + - /mnt/user/appdata/SportsTimeScraper/logs:/app/logs + - /mnt/user/appdata/SportsTimeScraper/secrets:/app/secrets + - /mnt/user/downloads/SportsTimeData:/app/output + env_file: + - .env + environment: + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + entrypoint: [] + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - sportstime + command: celery -A sportstime worker -l INFO --concurrency=2 + + celery-beat: + build: . + container_name: sportstime-celery-beat + restart: unless-stopped + volumes: + - /mnt/user/appdata/SportsTimeScraper/celerybeat:/app/celerybeat + - /mnt/user/appdata/SportsTimeScraper/secrets:/app/secrets + env_file: + - .env + environment: + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + entrypoint: [] + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - sportstime + command: celery -A sportstime beat -l INFO --scheduler django_celery_beat.schedulers:DatabaseScheduler + +networks: + sportstime: + driver: bridge diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..5523b0c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,113 @@ +services: + db: + image: postgres:15-alpine + container_name: sportstime-db + restart: unless-stopped + volumes: + - postgres_data:/var/lib/postgresql/data + environment: + POSTGRES_DB: sportstime + POSTGRES_USER: sportstime + POSTGRES_PASSWORD: ${DB_PASSWORD:-devpassword} + healthcheck: + test: ["CMD-SHELL", "pg_isready -U sportstime -d sportstime"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - sportstime + + redis: + image: redis:7-alpine + container_name: sportstime-redis + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - sportstime + + web: + build: . + container_name: sportstime-web + restart: unless-stopped + volumes: + - .:/app + - ./output:/app/output:ro + ports: + - "8842:8000" + environment: + - DEBUG=True + - SECRET_KEY=dev-secret-key-not-for-production + - ALLOWED_HOSTS=localhost,127.0.0.1,10.3.3.11 + - SESSION_COOKIE_SECURE=False + - CSRF_COOKIE_SECURE=False + - DATABASE_URL=postgresql://sportstime:${DB_PASSWORD:-devpassword}@db:5432/sportstime + - REDIS_URL=redis://redis:6379/0 + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + - DJANGO_SUPERUSER_USERNAME=admin + - DJANGO_SUPERUSER_PASSWORD=admin + - DJANGO_SUPERUSER_EMAIL=admin@localhost + - IMPORT_INITIAL_DATA=true + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - sportstime + + celery-worker: + build: . + container_name: sportstime-celery-worker + restart: unless-stopped + volumes: + - .:/app + environment: + - DEBUG=True + - SECRET_KEY=dev-secret-key-not-for-production + - DATABASE_URL=postgresql://sportstime:${DB_PASSWORD:-devpassword}@db:5432/sportstime + - REDIS_URL=redis://redis:6379/0 + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + entrypoint: [] + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - sportstime + command: celery -A sportstime worker -l INFO --concurrency=2 + + celery-beat: + build: . + container_name: sportstime-celery-beat + restart: unless-stopped + environment: + - DEBUG=True + - SECRET_KEY=dev-secret-key-not-for-production + - DATABASE_URL=postgresql://sportstime:${DB_PASSWORD:-devpassword}@db:5432/sportstime + - REDIS_URL=redis://redis:6379/0 + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + entrypoint: [] + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + networks: + - sportstime + command: celery -A sportstime beat -l INFO --scheduler django_celery_beat.schedulers:DatabaseScheduler + +volumes: + postgres_data: + +networks: + sportstime: + driver: bridge + diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 0000000..0e11e45 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,45 @@ +#!/bin/bash +set -e + +# Wait for database to be ready +echo "Waiting for PostgreSQL..." +while ! nc -z $POSTGRES_HOST ${POSTGRES_PORT:-5432}; do + sleep 1 +done +echo "PostgreSQL is ready!" + +# Run migrations +echo "Running migrations..." +python manage.py migrate --noinput + +# Collect static files (skip in DEBUG mode - Django serves them directly) +if [ "$DEBUG" != "True" ]; then + echo "Collecting static files..." + python manage.py collectstatic --noinput +else + echo "DEBUG mode - skipping collectstatic" +fi + +# Create superuser if not exists +if [ -n "$DJANGO_SUPERUSER_USERNAME" ] && [ -n "$DJANGO_SUPERUSER_PASSWORD" ] && [ -n "$DJANGO_SUPERUSER_EMAIL" ]; then + echo "Creating superuser..." + python manage.py shell << EOF +from django.contrib.auth import get_user_model +User = get_user_model() +if not User.objects.filter(username='$DJANGO_SUPERUSER_USERNAME').exists(): + User.objects.create_superuser('$DJANGO_SUPERUSER_USERNAME', '$DJANGO_SUPERUSER_EMAIL', '$DJANGO_SUPERUSER_PASSWORD') + print('Superuser created successfully') +else: + print('Superuser already exists') +EOF +fi + +# Import initial data if flag is set +if [ "$IMPORT_INITIAL_DATA" = "true" ]; then + echo "Importing initial data..." + python manage.py import_data --data-dir=/app --output-dir=/app/output || true +fi + +# Start the server +echo "Starting server..." +exec "$@" diff --git a/manage.py b/manage.py new file mode 100644 index 0000000..1453299 --- /dev/null +++ b/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'sportstime.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/notifications/__init__.py b/notifications/__init__.py new file mode 100644 index 0000000..d51321c --- /dev/null +++ b/notifications/__init__.py @@ -0,0 +1 @@ +default_app_config = 'notifications.apps.NotificationsConfig' diff --git a/notifications/admin.py b/notifications/admin.py new file mode 100644 index 0000000..cfc950f --- /dev/null +++ b/notifications/admin.py @@ -0,0 +1,119 @@ +from django.contrib import admin +from django.utils.html import format_html +from simple_history.admin import SimpleHistoryAdmin + +from .models import EmailConfiguration, EmailLog + + +@admin.register(EmailConfiguration) +class EmailConfigurationAdmin(SimpleHistoryAdmin): + list_display = [ + 'name', + 'is_enabled_badge', + 'recipient_count', + 'notify_on_scrape_complete', + 'notify_on_scrape_failure', + 'notify_on_sync_failure', + ] + list_filter = ['is_enabled'] + readonly_fields = ['created_at', 'updated_at'] + + fieldsets = [ + (None, { + 'fields': ['name', 'is_enabled'] + }), + ('Recipients', { + 'fields': ['recipient_emails'] + }), + ('Scraper Notifications', { + 'fields': [ + 'notify_on_scrape_complete', + 'notify_on_scrape_failure', + 'notify_on_new_reviews', + ] + }), + ('CloudKit Sync Notifications', { + 'fields': [ + 'notify_on_sync_complete', + 'notify_on_sync_failure', + ] + }), + ('Thresholds', { + 'fields': ['min_games_for_notification'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + actions = ['send_test_email'] + + def is_enabled_badge(self, obj): + if obj.is_enabled: + return format_html('● Enabled') + return format_html('○ Disabled') + is_enabled_badge.short_description = 'Status' + + def recipient_count(self, obj): + return len(obj.get_recipients()) + recipient_count.short_description = 'Recipients' + + @admin.action(description='Send test email') + def send_test_email(self, request, queryset): + from notifications.tasks import send_test_notification + for config in queryset: + send_test_notification.delay(config.id) + self.message_user(request, f'Test emails queued for {queryset.count()} configurations.') + + +@admin.register(EmailLog) +class EmailLogAdmin(admin.ModelAdmin): + list_display = [ + 'subject', + 'status_badge', + 'recipients_display', + 'created_at', + ] + list_filter = ['status', 'created_at'] + search_fields = ['subject', 'recipients'] + date_hierarchy = 'created_at' + ordering = ['-created_at'] + readonly_fields = [ + 'configuration', + 'subject', + 'recipients', + 'body_preview', + 'status', + 'error_message', + 'scrape_job', + 'sync_job', + 'created_at', + ] + + def has_add_permission(self, request): + return False + + def has_change_permission(self, request, obj=None): + return False + + def status_badge(self, obj): + colors = { + 'sent': '#5cb85c', + 'failed': '#d9534f', + } + color = colors.get(obj.status, '#999') + return format_html( + '{}', + color, + obj.status.upper() + ) + status_badge.short_description = 'Status' + + def recipients_display(self, obj): + recipients = obj.recipients.split(',') + if len(recipients) > 2: + return f"{recipients[0]}, +{len(recipients)-1} more" + return obj.recipients + recipients_display.short_description = 'Recipients' diff --git a/notifications/apps.py b/notifications/apps.py new file mode 100644 index 0000000..ea7bfa3 --- /dev/null +++ b/notifications/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class NotificationsConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'notifications' + verbose_name = 'Notifications' diff --git a/notifications/migrations/0001_initial.py b/notifications/migrations/0001_initial.py new file mode 100644 index 0000000..eea2cf6 --- /dev/null +++ b/notifications/migrations/0001_initial.py @@ -0,0 +1,90 @@ +# Generated by Django 5.1.15 on 2026-01-26 08:59 + +import django.db.models.deletion +import simple_history.models +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('cloudkit', '0001_initial'), + ('scraper', '0001_initial'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='EmailConfiguration', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(default='Default', help_text='Configuration name', max_length=100)), + ('is_enabled', models.BooleanField(default=True, help_text='Whether email notifications are enabled')), + ('recipient_emails', models.TextField(help_text='Comma-separated list of recipient email addresses')), + ('notify_on_scrape_complete', models.BooleanField(default=True, help_text='Send email after each scraper job completes')), + ('notify_on_scrape_failure', models.BooleanField(default=True, help_text='Send email when scraper job fails')), + ('notify_on_sync_complete', models.BooleanField(default=False, help_text='Send email after CloudKit sync completes')), + ('notify_on_sync_failure', models.BooleanField(default=True, help_text='Send email when CloudKit sync fails')), + ('notify_on_new_reviews', models.BooleanField(default=True, help_text='Include review items in scrape notifications')), + ('min_games_for_notification', models.PositiveIntegerField(default=0, help_text='Minimum games changed to trigger notification (0 = always)')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + options={ + 'verbose_name': 'Email Configuration', + 'verbose_name_plural': 'Email Configurations', + }, + ), + migrations.CreateModel( + name='EmailLog', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('subject', models.CharField(max_length=255)), + ('recipients', models.TextField(help_text='Comma-separated list of recipients')), + ('body_preview', models.TextField(blank=True, help_text='First 500 chars of email body')), + ('status', models.CharField(choices=[('sent', 'Sent'), ('failed', 'Failed')], max_length=10)), + ('error_message', models.TextField(blank=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('configuration', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='logs', to='notifications.emailconfiguration')), + ('scrape_job', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='email_logs', to='scraper.scrapejob')), + ('sync_job', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='email_logs', to='cloudkit.cloudkitsyncjob')), + ], + options={ + 'verbose_name': 'Email Log', + 'verbose_name_plural': 'Email Logs', + 'ordering': ['-created_at'], + }, + ), + migrations.CreateModel( + name='HistoricalEmailConfiguration', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('name', models.CharField(default='Default', help_text='Configuration name', max_length=100)), + ('is_enabled', models.BooleanField(default=True, help_text='Whether email notifications are enabled')), + ('recipient_emails', models.TextField(help_text='Comma-separated list of recipient email addresses')), + ('notify_on_scrape_complete', models.BooleanField(default=True, help_text='Send email after each scraper job completes')), + ('notify_on_scrape_failure', models.BooleanField(default=True, help_text='Send email when scraper job fails')), + ('notify_on_sync_complete', models.BooleanField(default=False, help_text='Send email after CloudKit sync completes')), + ('notify_on_sync_failure', models.BooleanField(default=True, help_text='Send email when CloudKit sync fails')), + ('notify_on_new_reviews', models.BooleanField(default=True, help_text='Include review items in scrape notifications')), + ('min_games_for_notification', models.PositiveIntegerField(default=0, help_text='Minimum games changed to trigger notification (0 = always)')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'verbose_name': 'historical Email Configuration', + 'verbose_name_plural': 'historical Email Configurations', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + ] diff --git a/notifications/migrations/__init__.py b/notifications/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/notifications/models.py b/notifications/models.py new file mode 100644 index 0000000..193083b --- /dev/null +++ b/notifications/models.py @@ -0,0 +1,131 @@ +from django.db import models +from django.conf import settings +from simple_history.models import HistoricalRecords + + +class EmailConfiguration(models.Model): + """ + Email notification configuration. + """ + name = models.CharField( + max_length=100, + default='Default', + help_text='Configuration name' + ) + is_enabled = models.BooleanField( + default=True, + help_text='Whether email notifications are enabled' + ) + + # Recipients + recipient_emails = models.TextField( + help_text='Comma-separated list of recipient email addresses' + ) + + # What to notify about + notify_on_scrape_complete = models.BooleanField( + default=True, + help_text='Send email after each scraper job completes' + ) + notify_on_scrape_failure = models.BooleanField( + default=True, + help_text='Send email when scraper job fails' + ) + notify_on_sync_complete = models.BooleanField( + default=False, + help_text='Send email after CloudKit sync completes' + ) + notify_on_sync_failure = models.BooleanField( + default=True, + help_text='Send email when CloudKit sync fails' + ) + notify_on_new_reviews = models.BooleanField( + default=True, + help_text='Include review items in scrape notifications' + ) + + # Thresholds + min_games_for_notification = models.PositiveIntegerField( + default=0, + help_text='Minimum games changed to trigger notification (0 = always)' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + verbose_name = 'Email Configuration' + verbose_name_plural = 'Email Configurations' + + def __str__(self): + return self.name + + def get_recipients(self): + """Return list of recipient emails.""" + return [ + email.strip() + for email in self.recipient_emails.split(',') + if email.strip() + ] + + +class EmailLog(models.Model): + """ + Log of sent email notifications. + """ + STATUS_CHOICES = [ + ('sent', 'Sent'), + ('failed', 'Failed'), + ] + + configuration = models.ForeignKey( + EmailConfiguration, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='logs' + ) + subject = models.CharField(max_length=255) + recipients = models.TextField( + help_text='Comma-separated list of recipients' + ) + body_preview = models.TextField( + blank=True, + help_text='First 500 chars of email body' + ) + status = models.CharField( + max_length=10, + choices=STATUS_CHOICES + ) + error_message = models.TextField(blank=True) + + # Related objects + scrape_job = models.ForeignKey( + 'scraper.ScrapeJob', + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='email_logs' + ) + sync_job = models.ForeignKey( + 'cloudkit.CloudKitSyncJob', + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='email_logs' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ['-created_at'] + verbose_name = 'Email Log' + verbose_name_plural = 'Email Logs' + + def __str__(self): + return f"{self.subject} ({self.status})" diff --git a/notifications/tasks.py b/notifications/tasks.py new file mode 100644 index 0000000..09cb888 --- /dev/null +++ b/notifications/tasks.py @@ -0,0 +1,240 @@ +import logging + +from celery import shared_task +from django.core.mail import send_mail +from django.template.loader import render_to_string +from django.conf import settings + +logger = logging.getLogger('notifications') + + +@shared_task +def send_scrape_notification(job_id: int): + """ + Send email notification after scraper job. + """ + from scraper.models import ScrapeJob, ManualReviewItem + from notifications.models import EmailConfiguration, EmailLog + + try: + job = ScrapeJob.objects.select_related('config__sport').get(id=job_id) + except ScrapeJob.DoesNotExist: + logger.error(f"ScrapeJob {job_id} not found") + return + + # Get email configuration + config = EmailConfiguration.objects.filter(is_enabled=True).first() + if not config: + logger.info("No email configuration enabled") + return + + # Check if we should send based on configuration + if job.status == 'completed' and not config.notify_on_scrape_complete: + return + if job.status == 'failed' and not config.notify_on_scrape_failure: + return + + # Check minimum games threshold + total_changes = job.games_new + job.games_updated + if job.status == 'completed' and total_changes < config.min_games_for_notification: + logger.info(f"Skipping notification: {total_changes} changes below threshold {config.min_games_for_notification}") + return + + # Get review items if configured + review_items = [] + if config.notify_on_new_reviews and job.review_items_created > 0: + review_items = list( + ManualReviewItem.objects.filter(job=job) + .values('raw_value', 'item_type', 'suggested_id', 'confidence', 'reason')[:10] + ) + + # Build context + context = { + 'job': job, + 'sport': job.config.sport, + 'season_display': job.config.sport.get_season_display(job.config.season), + 'review_items': review_items, + 'suggested_actions': get_suggested_actions(job), + } + + # Render email + subject = f"[SportsTime] {job.config.sport.short_name} Scraper: {job.status.upper()}" + if job.status == 'completed': + subject = f"[SportsTime] {job.config.sport.short_name}: {job.games_new} new, {job.games_updated} updated" + + html_body = render_to_string('notifications/emails/scrape_report.html', context) + text_body = render_to_string('notifications/emails/scrape_report.txt', context) + + # Send email + recipients = config.get_recipients() + try: + send_mail( + subject=subject, + message=text_body, + from_email=settings.DEFAULT_FROM_EMAIL, + recipient_list=recipients, + html_message=html_body, + fail_silently=False, + ) + + # Log success + EmailLog.objects.create( + configuration=config, + subject=subject, + recipients=','.join(recipients), + body_preview=text_body[:500], + status='sent', + scrape_job=job, + ) + logger.info(f"Sent scrape notification for job {job_id}") + + except Exception as e: + # Log failure + EmailLog.objects.create( + configuration=config, + subject=subject, + recipients=','.join(recipients), + body_preview=text_body[:500], + status='failed', + error_message=str(e), + scrape_job=job, + ) + logger.error(f"Failed to send scrape notification: {e}") + + +@shared_task +def send_sync_notification(job_id: int): + """ + Send email notification after CloudKit sync. + """ + from cloudkit.models import CloudKitSyncJob + from notifications.models import EmailConfiguration, EmailLog + + try: + job = CloudKitSyncJob.objects.select_related('configuration').get(id=job_id) + except CloudKitSyncJob.DoesNotExist: + logger.error(f"CloudKitSyncJob {job_id} not found") + return + + # Get email configuration + config = EmailConfiguration.objects.filter(is_enabled=True).first() + if not config: + return + + # Check if we should send + if job.status == 'completed' and not config.notify_on_sync_complete: + return + if job.status == 'failed' and not config.notify_on_sync_failure: + return + + # Build email + subject = f"[SportsTime] CloudKit Sync: {job.status.upper()}" + if job.status == 'completed': + subject = f"[SportsTime] CloudKit Sync: {job.records_synced} records" + + context = { + 'job': job, + } + + html_body = render_to_string('notifications/emails/sync_report.html', context) + text_body = render_to_string('notifications/emails/sync_report.txt', context) + + recipients = config.get_recipients() + try: + send_mail( + subject=subject, + message=text_body, + from_email=settings.DEFAULT_FROM_EMAIL, + recipient_list=recipients, + html_message=html_body, + fail_silently=False, + ) + + EmailLog.objects.create( + configuration=config, + subject=subject, + recipients=','.join(recipients), + body_preview=text_body[:500], + status='sent', + sync_job=job, + ) + + except Exception as e: + EmailLog.objects.create( + configuration=config, + subject=subject, + recipients=','.join(recipients), + body_preview=text_body[:500], + status='failed', + error_message=str(e), + sync_job=job, + ) + logger.error(f"Failed to send sync notification: {e}") + + +@shared_task +def send_test_notification(config_id: int): + """ + Send a test notification email. + """ + from notifications.models import EmailConfiguration, EmailLog + + try: + config = EmailConfiguration.objects.get(id=config_id) + except EmailConfiguration.DoesNotExist: + return + + subject = "[SportsTime] Test Notification" + body = "This is a test notification from SportsTime.\n\nIf you received this, email notifications are working correctly." + + recipients = config.get_recipients() + try: + send_mail( + subject=subject, + message=body, + from_email=settings.DEFAULT_FROM_EMAIL, + recipient_list=recipients, + fail_silently=False, + ) + + EmailLog.objects.create( + configuration=config, + subject=subject, + recipients=','.join(recipients), + body_preview=body, + status='sent', + ) + logger.info(f"Sent test notification to {recipients}") + + except Exception as e: + EmailLog.objects.create( + configuration=config, + subject=subject, + recipients=','.join(recipients), + body_preview=body, + status='failed', + error_message=str(e), + ) + logger.error(f"Failed to send test notification: {e}") + + +def get_suggested_actions(job): + """ + Generate suggested actions based on job results. + """ + actions = [] + + if job.review_items_created > 0: + actions.append(f"Review {job.review_items_created} items in the review queue") + + if job.games_errors > 0: + actions.append(f"Investigate {job.games_errors} game processing errors") + + if job.status == 'failed': + actions.append("Check scraper logs for error details") + actions.append("Verify data source availability") + + if job.games_found == 0 and job.status == 'completed': + actions.append("Verify scraper configuration and season dates") + + return actions diff --git a/notifications/templates/notifications/emails/scrape_report.html b/notifications/templates/notifications/emails/scrape_report.html new file mode 100644 index 0000000..a661357 --- /dev/null +++ b/notifications/templates/notifications/emails/scrape_report.html @@ -0,0 +1,119 @@ + + + + + + + +
+

{{ sport.short_name }} Scraper Report

+
+ {{ season_display }} • + {% if job.status == 'completed' %}Completed + {% elif job.status == 'failed' %}Failed + {% else %}{{ job.status|title }}{% endif %} + • {{ job.duration_display }} +
+
+ +
+ {% if job.status == 'completed' %} +
+
+
{{ job.games_found }}
+
Games Found
+
+
+
{{ job.games_new }}
+
New
+
+
+
{{ job.games_updated }}
+
Updated
+
+
+
{{ job.games_unchanged }}
+
Unchanged
+
+ {% if job.games_errors %} +
+
{{ job.games_errors }}
+
Errors
+
+ {% endif %} +
+ + {% if job.review_items_created > 0 %} +
+

⚠️ Review Queue ({{ job.review_items_created }} items)

+ {% if review_items %} + + + + + + + + + + + {% for item in review_items %} + + + + + + + {% endfor %} + +
TypeRaw ValueSuggestedConfidence
{{ item.item_type }}{{ item.raw_value }}{% if item.suggested_id %}{{ item.suggested_id }}{% else %}-{% endif %}{% if item.confidence %}{{ item.confidence|floatformat:0 }}%{% else %}-{% endif %}
+ {% if job.review_items_created > 10 %} +

Showing 10 of {{ job.review_items_created }} items

+ {% endif %} + {% endif %} +
+ {% endif %} + + {% else %} +
+

❌ Scraper Failed

+

Error: {{ job.error_message }}

+
+ {% endif %} + + {% if suggested_actions %} +
+

📋 Suggested Actions

+ {% for action in suggested_actions %} +
• {{ action }}
+ {% endfor %} +
+ {% endif %} +
+ + + + diff --git a/notifications/templates/notifications/emails/scrape_report.txt b/notifications/templates/notifications/emails/scrape_report.txt new file mode 100644 index 0000000..d1712e4 --- /dev/null +++ b/notifications/templates/notifications/emails/scrape_report.txt @@ -0,0 +1,43 @@ +{{ sport.short_name }} SCRAPER REPORT +================================ + +Season: {{ season_display }} +Status: {{ job.status|upper }} +Duration: {{ job.duration_display }} + +{% if job.status == 'completed' %} +SUMMARY +------- +Games Found: {{ job.games_found }} +New: {{ job.games_new }} +Updated: {{ job.games_updated }} +Unchanged: {{ job.games_unchanged }} +{% if job.games_errors %}Errors: {{ job.games_errors }}{% endif %} + +{% if job.review_items_created > 0 %} +REVIEW QUEUE ({{ job.review_items_created }} items) +------------------------------------------------- +{% for item in review_items %} +- {{ item.item_type }}: "{{ item.raw_value }}" -> {{ item.suggested_id|default:"None" }} ({{ item.confidence|floatformat:0 }}%) +{% endfor %} +{% if job.review_items_created > 10 %} +... and {{ job.review_items_created|add:"-10" }} more items +{% endif %} +{% endif %} + +{% else %} +ERROR +----- +{{ job.error_message }} +{% endif %} + +{% if suggested_actions %} +SUGGESTED ACTIONS +----------------- +{% for action in suggested_actions %} +- {{ action }} +{% endfor %} +{% endif %} + +--- +SportsTime Scraper | Job #{{ job.id }} | {{ job.finished_at|date:"Y-m-d H:i" }} UTC diff --git a/notifications/templates/notifications/emails/sync_report.html b/notifications/templates/notifications/emails/sync_report.html new file mode 100644 index 0000000..c8801f3 --- /dev/null +++ b/notifications/templates/notifications/emails/sync_report.html @@ -0,0 +1,72 @@ + + + + + + + +
+

CloudKit Sync Report

+
+ {{ job.configuration.name }} ({{ job.configuration.environment }}) • + {% if job.status == 'completed' %}Completed + {% elif job.status == 'failed' %}Failed + {% else %}{{ job.status|title }}{% endif %} + • {{ job.duration_display }} +
+
+ +
+ {% if job.status == 'completed' %} +
+
+
{{ job.records_synced }}
+
Records Synced
+
+
+
{{ job.records_created }}
+
Created
+
+
+
{{ job.records_updated }}
+
Updated
+
+
+
{{ job.records_deleted }}
+
Deleted
+
+ {% if job.records_failed %} +
+
{{ job.records_failed }}
+
Failed
+
+ {% endif %} +
+ {% else %} +
+

❌ Sync Failed

+

Error: {{ job.error_message }}

+
+ {% endif %} +
+ + + + diff --git a/notifications/templates/notifications/emails/sync_report.txt b/notifications/templates/notifications/emails/sync_report.txt new file mode 100644 index 0000000..f4f1740 --- /dev/null +++ b/notifications/templates/notifications/emails/sync_report.txt @@ -0,0 +1,23 @@ +CLOUDKIT SYNC REPORT +==================== + +Configuration: {{ job.configuration.name }} ({{ job.configuration.environment }}) +Status: {{ job.status|upper }} +Duration: {{ job.duration_display }} + +{% if job.status == 'completed' %} +SUMMARY +------- +Records Synced: {{ job.records_synced }} +Created: {{ job.records_created }} +Updated: {{ job.records_updated }} +Deleted: {{ job.records_deleted }} +{% if job.records_failed %}Failed: {{ job.records_failed }}{% endif %} +{% else %} +ERROR +----- +{{ job.error_message }} +{% endif %} + +--- +SportsTime CloudKit Sync | Job #{{ job.id }} | {{ job.finished_at|date:"Y-m-d H:i" }} UTC diff --git a/requirements.txt b/requirements.txt index cb771b8..b2d34d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,43 @@ -# Core dependencies +# Django +Django>=5.1,<5.2 +gunicorn>=21.2.0 +whitenoise>=6.6.0 + +# Database +psycopg2-binary>=2.9.9 +dj-database-url>=2.1.0 + +# Celery +celery>=5.3.6 +redis>=5.0.1 +django-celery-beat>=2.6.0 +django-celery-results>=2.5.1 + +# Audit Trail +django-simple-history>=3.4.0 + +# Import/Export +django-import-export>=4.0.0 + +# Scraping (existing dependencies) requests>=2.31.0 beautifulsoup4>=4.12.0 lxml>=5.0.0 rapidfuzz>=3.5.0 python-dateutil>=2.8.0 pytz>=2024.1 -rich>=13.7.0 +timezonefinder>=6.2.0 + +# CloudKit Authentication pyjwt>=2.8.0 cryptography>=42.0.0 -# Development dependencies +# Utilities +python-dotenv>=1.0.0 +rich>=13.7.0 + +# Development pytest>=8.0.0 pytest-cov>=4.1.0 +pytest-django>=4.7.0 responses>=0.25.0 diff --git a/scraper/__init__.py b/scraper/__init__.py new file mode 100644 index 0000000..2e77a19 --- /dev/null +++ b/scraper/__init__.py @@ -0,0 +1 @@ +default_app_config = 'scraper.apps.ScraperConfig' diff --git a/scraper/admin.py b/scraper/admin.py new file mode 100644 index 0000000..8f6ceec --- /dev/null +++ b/scraper/admin.py @@ -0,0 +1,139 @@ +""" +Admin configuration for scraper models. +""" +from django.contrib import admin +from django.utils.html import format_html +from import_export.admin import ImportExportMixin, ImportExportModelAdmin +from simple_history.admin import SimpleHistoryAdmin + +from .models import ScraperConfig, ScrapeJob, ManualReviewItem +from .resources import ScraperConfigResource, ScrapeJobResource, ManualReviewItemResource + + +@admin.register(ScraperConfig) +class ScraperConfigAdmin(ImportExportMixin, SimpleHistoryAdmin): + resource_class = ScraperConfigResource + list_display = [ + '__str__', + 'sport', + 'season', + 'is_active', + 'last_scrape_at', + 'next_scrape_at', + 'scrape_interval_hours', + ] + list_filter = ['sport', 'is_active', 'season'] + search_fields = ['sport__name', 'sport__short_name'] + ordering = ['-season', 'sport'] + readonly_fields = ['created_at', 'updated_at'] + + +@admin.register(ScrapeJob) +class ScrapeJobAdmin(ImportExportModelAdmin): + resource_class = ScrapeJobResource + list_display = [ + '__str__', + 'status_badge', + 'games_found', + 'games_created', + 'games_updated', + 'duration_display', + 'created_at', + ] + list_filter = ['status', 'config__sport', ('created_at', admin.DateFieldListFilter)] + search_fields = ['config__sport__name', 'errors'] + ordering = ['-created_at'] + readonly_fields = ['created_at', 'updated_at', 'duration_display'] + + @admin.display(description='Status') + def status_badge(self, obj): + colors = { + 'pending': '#ffc107', + 'running': '#17a2b8', + 'completed': '#28a745', + 'failed': '#dc3545', + 'cancelled': '#6c757d', + } + color = colors.get(obj.status, '#6c757d') + return format_html( + '{}', + color, obj.get_status_display() + ) + + @admin.display(description='Duration') + def duration_display(self, obj): + duration = obj.duration + if duration is not None: + if duration < 60: + return f"{duration:.1f}s" + elif duration < 3600: + return f"{duration/60:.1f}m" + else: + return f"{duration/3600:.1f}h" + return '-' + + +@admin.register(ManualReviewItem) +class ManualReviewItemAdmin(ImportExportModelAdmin): + resource_class = ManualReviewItemResource + list_display = [ + 'raw_value', + 'item_type', + 'sport', + 'status_badge', + 'confidence_bar', + 'matched_value', + 'created_at', + ] + list_filter = ['status', 'item_type', 'sport'] + search_fields = ['raw_value', 'matched_value'] + ordering = ['-confidence', '-created_at'] + readonly_fields = ['created_at', 'updated_at', 'resolved_at', 'resolved_by'] + actions = ['approve_items', 'reject_items'] + + @admin.display(description='Status') + def status_badge(self, obj): + colors = { + 'pending': '#ffc107', + 'approved': '#28a745', + 'rejected': '#dc3545', + 'resolved': '#17a2b8', + } + color = colors.get(obj.status, '#6c757d') + return format_html( + '{}', + color, obj.get_status_display() + ) + + @admin.display(description='Confidence') + def confidence_bar(self, obj): + color = '#28a745' if obj.confidence >= 85 else '#ffc107' if obj.confidence >= 70 else '#dc3545' + return format_html( + '
' + '
' + '{}%
', + obj.confidence, color, obj.confidence + ) + + @admin.action(description='Approve selected items') + def approve_items(self, request, queryset): + from django.utils import timezone + updated = queryset.update( + status='approved', + resolved_at=timezone.now(), + resolved_by=request.user + ) + self.message_user(request, f'{updated} items approved.') + + @admin.action(description='Reject selected items') + def reject_items(self, request, queryset): + from django.utils import timezone + updated = queryset.update( + status='rejected', + resolved_at=timezone.now(), + resolved_by=request.user + ) + self.message_user(request, f'{updated} items rejected.') diff --git a/scraper/admin/__init__.py b/scraper/admin/__init__.py new file mode 100644 index 0000000..aa6eb7f --- /dev/null +++ b/scraper/admin/__init__.py @@ -0,0 +1,3 @@ +from .config_admin import ScraperConfigAdmin +from .job_admin import ScrapeJobAdmin +from .review_admin import ManualReviewItemAdmin diff --git a/scraper/admin/config_admin.py b/scraper/admin/config_admin.py new file mode 100644 index 0000000..c385560 --- /dev/null +++ b/scraper/admin/config_admin.py @@ -0,0 +1,110 @@ +from django.contrib import admin +from django.utils.html import format_html +from django.urls import reverse +from simple_history.admin import SimpleHistoryAdmin + +from scraper.models import ScraperConfig + + +@admin.register(ScraperConfig) +class ScraperConfigAdmin(SimpleHistoryAdmin): + list_display = [ + 'sport', + 'season_display', + 'is_enabled', + 'primary_source', + 'last_run_display', + 'last_run_status_badge', + 'last_run_games', + 'job_count', + ] + list_filter = ['sport', 'is_enabled', 'last_run_status'] + search_fields = ['sport__name', 'sport__short_name'] + ordering = ['-season', 'sport'] + readonly_fields = [ + 'created_at', + 'updated_at', + 'last_run', + 'last_run_status', + 'last_run_games', + ] + + fieldsets = [ + (None, { + 'fields': ['sport', 'season', 'is_enabled'] + }), + ('Source Configuration', { + 'fields': ['sources', 'primary_source'] + }), + ('Rate Limiting', { + 'fields': ['request_delay', 'max_retries'] + }), + ('Matching', { + 'fields': ['fuzzy_threshold'] + }), + ('Last Run', { + 'fields': ['last_run', 'last_run_status', 'last_run_games'], + 'classes': ['collapse'] + }), + ('Notes', { + 'fields': ['notes'], + 'classes': ['collapse'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + actions = ['run_scraper', 'enable_scrapers', 'disable_scrapers'] + + def season_display(self, obj): + return obj.sport.get_season_display(obj.season) + season_display.short_description = 'Season' + + def last_run_display(self, obj): + if obj.last_run: + return obj.last_run.strftime('%Y-%m-%d %H:%M') + return '-' + last_run_display.short_description = 'Last Run' + + def last_run_status_badge(self, obj): + if not obj.last_run_status: + return '-' + colors = { + 'completed': 'green', + 'failed': 'red', + 'running': 'orange', + } + color = colors.get(obj.last_run_status, 'gray') + return format_html( + '{}', + color, + obj.last_run_status.upper() + ) + last_run_status_badge.short_description = 'Status' + + def job_count(self, obj): + count = obj.jobs.count() + if count > 0: + url = reverse('admin:scraper_scrapejob_changelist') + f'?config__id__exact={obj.id}' + return format_html('{} jobs', url, count) + return '0 jobs' + job_count.short_description = 'Jobs' + + @admin.action(description='Run scraper for selected configurations') + def run_scraper(self, request, queryset): + from scraper.tasks import run_scraper_task + for config in queryset: + run_scraper_task.delay(config.id) + self.message_user(request, f'Started {queryset.count()} scraper jobs.') + + @admin.action(description='Enable selected scrapers') + def enable_scrapers(self, request, queryset): + updated = queryset.update(is_enabled=True) + self.message_user(request, f'{updated} scrapers enabled.') + + @admin.action(description='Disable selected scrapers') + def disable_scrapers(self, request, queryset): + updated = queryset.update(is_enabled=False) + self.message_user(request, f'{updated} scrapers disabled.') diff --git a/scraper/admin/job_admin.py b/scraper/admin/job_admin.py new file mode 100644 index 0000000..2be5666 --- /dev/null +++ b/scraper/admin/job_admin.py @@ -0,0 +1,154 @@ +from django.contrib import admin +from django.utils.html import format_html +from django.urls import reverse + +from scraper.models import ScrapeJob, ScrapeJobLog + + +class ScrapeJobLogInline(admin.TabularInline): + model = ScrapeJobLog + extra = 0 + readonly_fields = ['created_at', 'level', 'source', 'message'] + fields = ['created_at', 'level', 'source', 'message'] + ordering = ['created_at'] + can_delete = False + + def has_add_permission(self, request, obj=None): + return False + + +@admin.register(ScrapeJob) +class ScrapeJobAdmin(admin.ModelAdmin): + list_display = [ + 'id', + 'config', + 'status_badge', + 'triggered_by', + 'started_at', + 'duration_display', + 'games_summary', + 'review_items_link', + ] + list_filter = ['status', 'config__sport', 'triggered_by', 'config__season'] + search_fields = ['config__sport__name', 'celery_task_id'] + date_hierarchy = 'created_at' + ordering = ['-created_at'] + readonly_fields = [ + 'id', + 'config', + 'status', + 'triggered_by', + 'started_at', + 'finished_at', + 'duration_display', + 'games_found', + 'games_new', + 'games_updated', + 'games_unchanged', + 'games_errors', + 'teams_found', + 'stadiums_found', + 'review_items_created', + 'error_message', + 'error_traceback', + 'celery_task_id', + 'created_at', + 'updated_at', + ] + inlines = [ScrapeJobLogInline] + + fieldsets = [ + (None, { + 'fields': ['id', 'config', 'status', 'triggered_by', 'celery_task_id'] + }), + ('Timing', { + 'fields': ['started_at', 'finished_at', 'duration_display'] + }), + ('Results - Games', { + 'fields': [ + 'games_found', + 'games_new', + 'games_updated', + 'games_unchanged', + 'games_errors', + ] + }), + ('Results - Other', { + 'fields': ['teams_found', 'stadiums_found', 'review_items_created'] + }), + ('Errors', { + 'fields': ['error_message', 'error_traceback'], + 'classes': ['collapse'] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + actions = ['cancel_jobs', 'retry_jobs'] + + def has_add_permission(self, request): + return False + + def has_change_permission(self, request, obj=None): + return False + + def status_badge(self, obj): + colors = { + 'pending': '#999', + 'running': '#f0ad4e', + 'completed': '#5cb85c', + 'failed': '#d9534f', + 'cancelled': '#777', + } + color = colors.get(obj.status, '#999') + return format_html( + '{}', + color, + obj.status.upper() + ) + status_badge.short_description = 'Status' + + def games_summary(self, obj): + if obj.games_found == 0: + return '-' + return format_html( + '' + '{} found ({} new, {} upd)', + obj.games_new, obj.games_updated, obj.games_unchanged, obj.games_errors, + obj.games_found, obj.games_new, obj.games_updated + ) + games_summary.short_description = 'Games' + + def review_items_link(self, obj): + if obj.review_items_created > 0: + url = reverse('admin:scraper_manualreviewitem_changelist') + f'?job__id__exact={obj.id}' + return format_html( + '{} items', + url, obj.review_items_created + ) + return '-' + review_items_link.short_description = 'Review' + + @admin.action(description='Cancel selected jobs') + def cancel_jobs(self, request, queryset): + from celery.result import AsyncResult + cancelled = 0 + for job in queryset.filter(status__in=['pending', 'running']): + if job.celery_task_id: + AsyncResult(job.celery_task_id).revoke(terminate=True) + job.status = 'cancelled' + job.save() + cancelled += 1 + self.message_user(request, f'{cancelled} jobs cancelled.') + + @admin.action(description='Retry failed jobs') + def retry_jobs(self, request, queryset): + from scraper.tasks import run_scraper_task + retried = 0 + for job in queryset.filter(status='failed'): + run_scraper_task.delay(job.config.id) + retried += 1 + self.message_user(request, f'{retried} jobs requeued.') diff --git a/scraper/admin/review_admin.py b/scraper/admin/review_admin.py new file mode 100644 index 0000000..557dbb0 --- /dev/null +++ b/scraper/admin/review_admin.py @@ -0,0 +1,157 @@ +from django.contrib import admin +from django.utils.html import format_html +from django.utils import timezone +from simple_history.admin import SimpleHistoryAdmin + +from scraper.models import ManualReviewItem + + +@admin.register(ManualReviewItem) +class ManualReviewItemAdmin(SimpleHistoryAdmin): + list_display = [ + 'raw_value', + 'item_type', + 'sport', + 'status_badge', + 'suggested_match', + 'confidence_badge', + 'reason', + 'created_at', + ] + list_filter = ['status', 'item_type', 'sport', 'reason'] + search_fields = ['raw_value', 'suggested_id', 'resolved_to'] + ordering = ['-created_at'] + readonly_fields = [ + 'job', + 'item_type', + 'sport', + 'raw_value', + 'suggested_id', + 'confidence', + 'reason', + 'source_url', + 'check_date', + 'context', + 'resolved_by', + 'resolved_at', + 'created_at', + 'updated_at', + ] + autocomplete_fields = [] + + fieldsets = [ + (None, { + 'fields': ['job', 'item_type', 'sport', 'raw_value'] + }), + ('Suggested Match', { + 'fields': ['suggested_id', 'confidence', 'reason'] + }), + ('Context', { + 'fields': ['source_url', 'check_date', 'context'], + 'classes': ['collapse'] + }), + ('Resolution', { + 'fields': [ + 'status', + 'resolved_to', + 'create_alias', + 'resolution_notes', + 'resolved_by', + 'resolved_at', + ] + }), + ('Metadata', { + 'fields': ['created_at', 'updated_at'], + 'classes': ['collapse'] + }), + ] + + actions = [ + 'accept_suggested', + 'mark_ignored', + 'accept_and_create_alias', + ] + + def status_badge(self, obj): + colors = { + 'pending': '#f0ad4e', + 'resolved': '#5cb85c', + 'ignored': '#999', + 'new_entity': '#5bc0de', + } + color = colors.get(obj.status, '#999') + return format_html( + '{}', + color, + obj.get_status_display().upper() + ) + status_badge.short_description = 'Status' + + def suggested_match(self, obj): + if obj.suggested_id: + return format_html( + '{}', + obj.suggested_id + ) + return '-' + suggested_match.short_description = 'Suggested' + + def confidence_badge(self, obj): + if obj.confidence == 0: + return '-' + pct = obj.confidence * 100 + if pct >= 85: + color = '#5cb85c' + elif pct >= 70: + color = '#f0ad4e' + else: + color = '#d9534f' + return format_html( + '{:.0f}%', + color, pct + ) + confidence_badge.short_description = 'Conf.' + + @admin.action(description='Accept suggested match') + def accept_suggested(self, request, queryset): + resolved = 0 + for item in queryset.filter(status='pending', suggested_id__isnull=False): + item.resolve( + canonical_id=item.suggested_id, + user=request.user, + notes='Accepted suggested match via admin action' + ) + resolved += 1 + self.message_user(request, f'{resolved} items resolved.') + + @admin.action(description='Accept suggested and create alias') + def accept_and_create_alias(self, request, queryset): + resolved = 0 + for item in queryset.filter(status='pending', suggested_id__isnull=False): + item.resolve( + canonical_id=item.suggested_id, + user=request.user, + notes='Accepted and created alias via admin action', + create_alias=True + ) + resolved += 1 + self.message_user(request, f'{resolved} items resolved with aliases created.') + + @admin.action(description='Mark as ignored') + def mark_ignored(self, request, queryset): + ignored = 0 + for item in queryset.filter(status='pending'): + item.ignore( + user=request.user, + notes='Ignored via admin action' + ) + ignored += 1 + self.message_user(request, f'{ignored} items ignored.') + + def save_model(self, request, obj, form, change): + # Auto-set resolved_by and resolved_at when status changes to resolved + if change and obj.status in ['resolved', 'ignored'] and not obj.resolved_by: + obj.resolved_by = request.user + obj.resolved_at = timezone.now() + super().save_model(request, obj, form, change) diff --git a/scraper/apps.py b/scraper/apps.py new file mode 100644 index 0000000..2805118 --- /dev/null +++ b/scraper/apps.py @@ -0,0 +1,7 @@ +from django.apps import AppConfig + + +class ScraperConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'scraper' + verbose_name = 'Scraper Management' diff --git a/scraper/engine/__init__.py b/scraper/engine/__init__.py new file mode 100644 index 0000000..20f6afd --- /dev/null +++ b/scraper/engine/__init__.py @@ -0,0 +1 @@ +# Scraper engine package diff --git a/scraper/engine/adapter.py b/scraper/engine/adapter.py new file mode 100644 index 0000000..fb44ad3 --- /dev/null +++ b/scraper/engine/adapter.py @@ -0,0 +1,496 @@ +""" +Adapter to bridge existing sportstime_parser scrapers with Django models. +""" +import hashlib +from datetime import datetime +from typing import Callable, Optional + +from django.db import transaction +from django.utils import timezone + + +class ScraperAdapter: + """ + Adapts the existing sportstime_parser scrapers to work with Django models. + """ + + def __init__( + self, + sport_code: str, + season: int, + config, + log_func: Optional[Callable] = None, + ): + self.sport_code = sport_code + self.season = season + self.config = config + self.log = log_func or (lambda level, msg, **kw: None) + + def run(self) -> dict: + """ + Run the scraper and return results. + """ + from core.models import Sport, Team, Stadium, Game + from scraper.models import ManualReviewItem + from cloudkit.models import CloudKitSyncState + + result = { + 'games_found': 0, + 'games_new': 0, + 'games_updated': 0, + 'games_unchanged': 0, + 'games_errors': 0, + 'teams_found': 0, + 'stadiums_found': 0, + 'review_items': 0, + } + + # Get sport + try: + sport = Sport.objects.get(code=self.sport_code) + except Sport.DoesNotExist: + raise ValueError(f"Sport {self.sport_code} not found in database") + + self.log('info', f'Starting scraper for {sport.short_name} {self.season}', source='adapter') + + # Import and create the appropriate scraper + scraper = self._create_scraper() + + # Run the scrape + self.log('info', 'Scraping games...', source='adapter') + raw_result = scraper.scrape_all() + + # Process stadiums first (teams reference stadiums via home_stadium FK) + self.log('info', f'Processing {len(raw_result.stadiums)} stadiums...', source='adapter') + result['stadiums_found'] = len(raw_result.stadiums) + self._process_stadiums(sport, raw_result.stadiums) + + # Process teams + self.log('info', f'Processing {len(raw_result.teams)} teams...', source='adapter') + result['teams_found'] = len(raw_result.teams) + self._process_teams(sport, raw_result.teams) + + # Process games + self.log('info', f'Processing {len(raw_result.games)} games...', source='adapter') + game_result = self._process_games(sport, raw_result.games) + result.update(game_result) + + # Process review items + if raw_result.review_items: + self.log('info', f'Creating {len(raw_result.review_items)} review items...', source='adapter') + result['review_items'] = self._process_review_items(sport, raw_result.review_items) + + self.log('info', f'Scrape complete: {result}', source='adapter') + return result + + def _create_scraper(self): + """Create the appropriate scraper instance.""" + # Import from existing sportstime_parser + from sportstime_parser.scrapers import ( + create_nba_scraper, + create_mlb_scraper, + create_nfl_scraper, + create_nhl_scraper, + create_mls_scraper, + create_wnba_scraper, + create_nwsl_scraper, + ) + + scrapers = { + 'nba': create_nba_scraper, + 'mlb': create_mlb_scraper, + 'nfl': create_nfl_scraper, + 'nhl': create_nhl_scraper, + 'mls': create_mls_scraper, + 'wnba': create_wnba_scraper, + 'nwsl': create_nwsl_scraper, + } + + creator = scrapers.get(self.sport_code) + if not creator: + raise ValueError(f"No scraper for sport: {self.sport_code}") + + # Create scraper (config overrides handled via session/resolver settings if needed) + return creator(season=self.season) + + def _process_teams(self, sport, teams): + """Process and upsert teams.""" + from core.models import Team, Stadium, Division, Conference + from cloudkit.models import CloudKitSyncState + + for team_data in teams: + team_id = team_data.id + + # Find division if available + division = None + if team_data.division: + division = Division.objects.filter( + conference__sport=sport, + name__iexact=team_data.division + ).first() + # Fallback to partial match + if not division: + division = Division.objects.filter( + conference__sport=sport, + name__icontains=team_data.division + ).first() + + # Resolve home stadium if available + home_stadium = None + stadium_id = getattr(team_data, 'stadium_id', None) + if stadium_id: + home_stadium = Stadium.objects.filter(id=stadium_id).first() + + team, created = Team.objects.update_or_create( + id=team_id, + defaults={ + 'sport': sport, + 'division': division, + 'city': team_data.city, + 'name': team_data.name, + 'full_name': team_data.full_name, + 'abbreviation': team_data.abbreviation, + 'home_stadium': home_stadium, + 'primary_color': getattr(team_data, 'primary_color', '') or '', + 'secondary_color': getattr(team_data, 'secondary_color', '') or '', + 'logo_url': getattr(team_data, 'logo_url', '') or '', + } + ) + + # Mark for sync + if created: + CloudKitSyncState.objects.get_or_create( + record_type='Team', + record_id=team_id, + defaults={'sync_status': 'pending'} + ) + + def _process_stadiums(self, sport, stadiums): + """Process and upsert stadiums.""" + from core.models import Stadium + from cloudkit.models import CloudKitSyncState + + for stadium_data in stadiums: + stadium_id = stadium_data.id + + stadium, created = Stadium.objects.update_or_create( + id=stadium_id, + defaults={ + 'sport': sport, + 'name': stadium_data.name, + 'city': stadium_data.city, + 'state': getattr(stadium_data, 'state', '') or '', + 'country': getattr(stadium_data, 'country', 'USA'), + 'latitude': getattr(stadium_data, 'latitude', None), + 'longitude': getattr(stadium_data, 'longitude', None), + 'capacity': getattr(stadium_data, 'capacity', None), + 'surface': getattr(stadium_data, 'surface', '') or '', + 'roof_type': getattr(stadium_data, 'roof_type', '') or '', + 'opened_year': getattr(stadium_data, 'opened_year', None), + 'timezone': getattr(stadium_data, 'timezone', '') or '', + 'image_url': getattr(stadium_data, 'image_url', '') or '', + } + ) + + if created: + CloudKitSyncState.objects.get_or_create( + record_type='Stadium', + record_id=stadium_id, + defaults={'sync_status': 'pending'} + ) + + def _resolve_team_via_db_alias(self, sport, raw_name, check_date=None): + """Try to resolve a team name using database aliases. + + Args: + sport: Sport model instance + raw_name: Raw team name from scraper + check_date: Date for alias validity check + + Returns: + Team instance if found, None otherwise + """ + from core.models import Team, TeamAlias + from datetime import date + + if not raw_name: + return None + + check_date = check_date or date.today() + + # Check TeamAlias model + aliases = TeamAlias.objects.filter( + alias__iexact=raw_name.strip(), + team__sport=sport, + ).select_related('team') + + for alias in aliases: + if alias.is_valid_for_date(check_date): + return alias.team + + # Also try partial matching on team full_name and city + team = Team.objects.filter( + sport=sport, + full_name__iexact=raw_name.strip() + ).first() + if team: + return team + + team = Team.objects.filter( + sport=sport, + city__iexact=raw_name.strip() + ).first() + if team: + return team + + return None + + def _resolve_stadium_via_db_alias(self, sport, raw_name, check_date=None): + """Try to resolve a stadium name using database aliases. + + Args: + sport: Sport model instance + raw_name: Raw stadium name from scraper + check_date: Date for alias validity check + + Returns: + Stadium instance if found, None otherwise + """ + from core.models import Stadium, StadiumAlias + from datetime import date + + if not raw_name: + return None + + check_date = check_date or date.today() + + # Check StadiumAlias model + aliases = StadiumAlias.objects.filter( + alias__iexact=raw_name.strip(), + stadium__sport=sport, + ).select_related('stadium') + + for alias in aliases: + if alias.is_valid_for_date(check_date): + return alias.stadium + + # Also try direct matching on stadium name + stadium = Stadium.objects.filter( + sport=sport, + name__iexact=raw_name.strip() + ).first() + if stadium: + return stadium + + return None + + def _process_games(self, sport, games): + """Process and upsert games.""" + from core.models import Game, Team, Stadium + from cloudkit.models import CloudKitSyncState + + result = { + 'games_found': len(games), + 'games_new': 0, + 'games_updated': 0, + 'games_unchanged': 0, + 'games_errors': 0, + } + + for game_data in games: + try: + game_id = game_data.id + check_date = game_data.game_date.date() if hasattr(game_data.game_date, 'date') else game_data.game_date + + # Get related objects - try by ID first, then by DB alias + home_team = None + away_team = None + + try: + home_team = Team.objects.get(id=game_data.home_team_id) + except Team.DoesNotExist: + # Try resolving via database alias using raw name + raw_home = getattr(game_data, 'raw_home_team', None) + if raw_home: + home_team = self._resolve_team_via_db_alias(sport, raw_home, check_date) + if home_team: + self.log('info', f'Resolved home team via DB alias: {raw_home} -> {home_team.abbreviation}', source='adapter') + + try: + away_team = Team.objects.get(id=game_data.away_team_id) + except Team.DoesNotExist: + # Try resolving via database alias using raw name + raw_away = getattr(game_data, 'raw_away_team', None) + if raw_away: + away_team = self._resolve_team_via_db_alias(sport, raw_away, check_date) + if away_team: + self.log('info', f'Resolved away team via DB alias: {raw_away} -> {away_team.abbreviation}', source='adapter') + + if not home_team or not away_team: + missing = [] + if not home_team: + missing.append(f'home={game_data.home_team_id}') + if not away_team: + missing.append(f'away={game_data.away_team_id}') + self.log('warning', f'Team not found for game {game_id}: {", ".join(missing)}', source='adapter') + result['games_errors'] += 1 + continue + + stadium = None + if game_data.stadium_id: + try: + stadium = Stadium.objects.get(id=game_data.stadium_id) + except Stadium.DoesNotExist: + # Try resolving via database alias using raw name + raw_stadium = getattr(game_data, 'raw_stadium', None) + if raw_stadium: + stadium = self._resolve_stadium_via_db_alias(sport, raw_stadium, check_date) + if stadium: + self.log('info', f'Resolved stadium via DB alias: {raw_stadium} -> {stadium.name}', source='adapter') + + # Build game dict + game_defaults = { + 'sport': sport, + 'season': game_data.season, + 'home_team': home_team, + 'away_team': away_team, + 'stadium': stadium, + 'game_date': game_data.game_date, + 'game_number': getattr(game_data, 'game_number', None), + 'home_score': game_data.home_score, + 'away_score': game_data.away_score, + 'status': game_data.status, + 'raw_home_team': getattr(game_data, 'raw_home_team', '') or '', + 'raw_away_team': getattr(game_data, 'raw_away_team', '') or '', + 'raw_stadium': getattr(game_data, 'raw_stadium', '') or '', + 'source_url': getattr(game_data, 'source_url', '') or '', + } + + # Check if game exists + try: + existing = Game.objects.get(id=game_id) + # Check if changed + changed = False + for key, value in game_defaults.items(): + if getattr(existing, key if not hasattr(existing, f'{key}_id') else f'{key}_id') != (value.id if hasattr(value, 'id') else value): + changed = True + break + + if changed: + for key, value in game_defaults.items(): + setattr(existing, key, value) + existing.save() + result['games_updated'] += 1 + + # Mark for sync + CloudKitSyncState.objects.update_or_create( + record_type='Game', + record_id=game_id, + defaults={'sync_status': 'pending'} + ) + else: + result['games_unchanged'] += 1 + + except Game.DoesNotExist: + # Create new game + Game.objects.create(id=game_id, **game_defaults) + result['games_new'] += 1 + + # Mark for sync + CloudKitSyncState.objects.get_or_create( + record_type='Game', + record_id=game_id, + defaults={'sync_status': 'pending'} + ) + + except Exception as e: + self.log('error', f'Error processing game: {e}', source='adapter') + result['games_errors'] += 1 + + return result + + def _process_review_items(self, sport, review_items): + """Create manual review items.""" + from scraper.models import ManualReviewItem, ScrapeJob + from sportstime_parser.models.aliases import ReviewReason + + # Get current job + job = ScrapeJob.objects.filter( + config=self.config, + status='running' + ).order_by('-created_at').first() + + count = 0 + for item in review_items: + # Derive item_type from reason + item_type = self._get_item_type_from_reason(item.reason) + + # Get suggested match info (parser uses suggested_matches list) + suggested_id = '' + confidence = 0.0 + if item.suggested_matches: + best_match = item.suggested_matches[0] + suggested_id = best_match.canonical_id + confidence = best_match.confidence / 100.0 # Convert to 0-1 range + + ManualReviewItem.objects.create( + job=job, + item_type=item_type, + sport=sport, + raw_value=item.raw_value, + suggested_id=suggested_id, + confidence=confidence, + reason=self._map_reason(item.reason), + source_url=item.source_url or '', + check_date=item.game_date, + context=item.context if item.context else None, + ) + count += 1 + + return count + + def _get_item_type_from_reason(self, reason) -> str: + """Derive item type (team/stadium) from ReviewReason enum.""" + from sportstime_parser.models.aliases import ReviewReason + + # Map reason to item type + if isinstance(reason, ReviewReason): + reason_value = reason.value + else: + reason_value = str(reason).lower() + + if 'team' in reason_value: + return 'team' + elif 'stadium' in reason_value: + return 'stadium' + else: + # Default to team for other reasons + return 'team' + + def _map_reason(self, reason) -> str: + """Map scraper ReviewReason to model choice.""" + from sportstime_parser.models.aliases import ReviewReason + + # Handle ReviewReason enum + if isinstance(reason, ReviewReason): + reason_value = reason.value + else: + reason_value = str(reason).lower() + + reason_map = { + 'unresolved_team': 'no_match', + 'unresolved_stadium': 'no_match', + 'low_confidence_match': 'low_confidence', + 'missing_data': 'no_match', + 'duplicate_game': 'ambiguous', + 'timezone_unknown': 'no_match', + 'geographic_filter': 'no_match', + # Legacy mappings + 'no_match': 'no_match', + 'no match found': 'no_match', + 'low_confidence': 'low_confidence', + 'fuzzy match below threshold': 'low_confidence', + 'ambiguous': 'ambiguous', + 'new_entity': 'new_entity', + } + return reason_map.get(reason_value.lower(), 'no_match') diff --git a/scraper/engine/db_alias_loader.py b/scraper/engine/db_alias_loader.py new file mode 100644 index 0000000..211fe84 --- /dev/null +++ b/scraper/engine/db_alias_loader.py @@ -0,0 +1,144 @@ +"""Database-aware alias loaders for team and stadium resolution. + +These loaders check the Django TeamAlias and StadiumAlias models +in addition to the hardcoded mappings, allowing aliases to be +managed via the admin interface. +""" + +from datetime import date +from typing import Optional + + +class DatabaseTeamAliasLoader: + """Load team aliases from the Django database. + + Checks the core.TeamAlias model for alias mappings, + supporting date-aware lookups for historical names. + """ + + def resolve( + self, + value: str, + sport_code: str, + check_date: Optional[date] = None, + ) -> Optional[str]: + """Resolve an alias value to a canonical team ID. + + Args: + value: Alias value to look up (case-insensitive) + sport_code: Sport code to filter by + check_date: Date to check validity (None = current date) + + Returns: + Canonical team ID if found, None otherwise + """ + from core.models import TeamAlias + from django.db.models import Q + + if check_date is None: + check_date = date.today() + + value_lower = value.lower().strip() + + # Query aliases matching the value and sport + aliases = TeamAlias.objects.filter( + alias__iexact=value_lower, + team__sport__code=sport_code, + ).select_related('team') + + for alias in aliases: + if alias.is_valid_for_date(check_date): + return alias.team.id + + return None + + def get_aliases_for_team( + self, + team_id: str, + check_date: Optional[date] = None, + ) -> list: + """Get all aliases for a team. + + Args: + team_id: Team ID + check_date: Date to filter by (None = all aliases) + + Returns: + List of TeamAlias objects + """ + from core.models import TeamAlias + + aliases = TeamAlias.objects.filter(team_id=team_id) + + if check_date: + result = [] + for alias in aliases: + if alias.is_valid_for_date(check_date): + result.append(alias) + return result + + return list(aliases) + + +class DatabaseStadiumAliasLoader: + """Load stadium aliases from the Django database. + + Checks the core.StadiumAlias model for alias mappings, + supporting date-aware lookups for naming rights changes. + """ + + def resolve( + self, + name: str, + sport_code: str, + check_date: Optional[date] = None, + ) -> Optional[str]: + """Resolve a stadium name to a canonical stadium ID. + + Args: + name: Stadium name to look up (case-insensitive) + sport_code: Sport code to filter by + check_date: Date to check validity (None = current date) + + Returns: + Canonical stadium ID if found, None otherwise + """ + from core.models import StadiumAlias + + if check_date is None: + check_date = date.today() + + name_lower = name.lower().strip() + + # Query aliases matching the name and sport + aliases = StadiumAlias.objects.filter( + alias__iexact=name_lower, + stadium__sport__code=sport_code, + ).select_related('stadium') + + for alias in aliases: + if alias.is_valid_for_date(check_date): + return alias.stadium.id + + return None + + +# Global instances +_db_team_loader: Optional[DatabaseTeamAliasLoader] = None +_db_stadium_loader: Optional[DatabaseStadiumAliasLoader] = None + + +def get_db_team_alias_loader() -> DatabaseTeamAliasLoader: + """Get the database team alias loader.""" + global _db_team_loader + if _db_team_loader is None: + _db_team_loader = DatabaseTeamAliasLoader() + return _db_team_loader + + +def get_db_stadium_alias_loader() -> DatabaseStadiumAliasLoader: + """Get the database stadium alias loader.""" + global _db_stadium_loader + if _db_stadium_loader is None: + _db_stadium_loader = DatabaseStadiumAliasLoader() + return _db_stadium_loader diff --git a/scraper/migrations/0001_initial.py b/scraper/migrations/0001_initial.py new file mode 100644 index 0000000..9e3a714 --- /dev/null +++ b/scraper/migrations/0001_initial.py @@ -0,0 +1,201 @@ +# Generated by Django 5.1.15 on 2026-01-26 08:59 + +import django.db.models.deletion +import simple_history.models +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('core', '0001_initial'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='ScrapeJob', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], default='pending', max_length=20)), + ('triggered_by', models.CharField(default='manual', help_text='How the job was triggered (manual, scheduled, api)', max_length=50)), + ('started_at', models.DateTimeField(blank=True, null=True)), + ('finished_at', models.DateTimeField(blank=True, null=True)), + ('games_found', models.PositiveIntegerField(default=0)), + ('games_new', models.PositiveIntegerField(default=0)), + ('games_updated', models.PositiveIntegerField(default=0)), + ('games_unchanged', models.PositiveIntegerField(default=0)), + ('games_errors', models.PositiveIntegerField(default=0)), + ('teams_found', models.PositiveIntegerField(default=0)), + ('stadiums_found', models.PositiveIntegerField(default=0)), + ('review_items_created', models.PositiveIntegerField(default=0)), + ('error_message', models.TextField(blank=True)), + ('error_traceback', models.TextField(blank=True)), + ('celery_task_id', models.CharField(blank=True, help_text='Celery task ID for this job', max_length=255)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ], + options={ + 'verbose_name': 'Scrape Job', + 'verbose_name_plural': 'Scrape Jobs', + 'ordering': ['-created_at'], + }, + ), + migrations.CreateModel( + name='HistoricalScraperConfig', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('season', models.PositiveSmallIntegerField(help_text='Season to scrape (e.g., 2025 for 2025-26 season)')), + ('is_enabled', models.BooleanField(default=True, help_text='Whether this scraper is enabled for scheduling')), + ('sources', models.JSONField(default=list, help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])')), + ('primary_source', models.CharField(blank=True, help_text='Primary source for this scraper', max_length=100)), + ('request_delay', models.FloatField(default=3.0, help_text='Seconds between requests')), + ('max_retries', models.PositiveSmallIntegerField(default=3, help_text='Maximum retry attempts')), + ('fuzzy_threshold', models.PositiveSmallIntegerField(default=85, help_text='Minimum fuzzy match confidence (0-100)')), + ('last_run', models.DateTimeField(blank=True, help_text='Last successful run timestamp', null=True)), + ('last_run_status', models.CharField(blank=True, help_text='Status of last run', max_length=20)), + ('last_run_games', models.PositiveIntegerField(default=0, help_text='Games found in last run')), + ('notes', models.TextField(blank=True, help_text='Configuration notes')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')), + ], + options={ + 'verbose_name': 'historical Scraper Configuration', + 'verbose_name_plural': 'historical Scraper Configurations', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='HistoricalManualReviewItem', + fields=[ + ('id', models.BigIntegerField(auto_created=True, blank=True, db_index=True, verbose_name='ID')), + ('item_type', models.CharField(choices=[('team', 'Team'), ('stadium', 'Stadium')], max_length=20)), + ('raw_value', models.CharField(help_text='Original scraped value', max_length=300)), + ('suggested_id', models.CharField(blank=True, help_text='Suggested canonical ID (if any match found)', max_length=100)), + ('confidence', models.FloatField(default=0.0, help_text='Match confidence (0.0 - 1.0)')), + ('reason', models.CharField(choices=[('no_match', 'No Match Found'), ('low_confidence', 'Low Confidence Match'), ('ambiguous', 'Ambiguous Match'), ('new_entity', 'Potentially New Entity')], help_text='Why manual review is needed', max_length=20)), + ('source_url', models.URLField(blank=True, help_text='URL where this value was found')), + ('check_date', models.DateField(blank=True, help_text='Date context for alias resolution', null=True)), + ('context', models.JSONField(blank=True, help_text='Additional context (e.g., game info)', null=True)), + ('status', models.CharField(choices=[('pending', 'Pending Review'), ('resolved', 'Resolved'), ('ignored', 'Ignored'), ('new_entity', 'Created New Entity')], default='pending', max_length=20)), + ('resolved_to', models.CharField(blank=True, help_text='Final resolved canonical ID', max_length=100)), + ('resolved_at', models.DateTimeField(blank=True, null=True)), + ('resolution_notes', models.TextField(blank=True, help_text='Notes about the resolution')), + ('create_alias', models.BooleanField(default=False, help_text='Whether to create an alias from this resolution')), + ('created_at', models.DateTimeField(blank=True, editable=False)), + ('updated_at', models.DateTimeField(blank=True, editable=False)), + ('history_id', models.AutoField(primary_key=True, serialize=False)), + ('history_date', models.DateTimeField(db_index=True)), + ('history_change_reason', models.CharField(max_length=100, null=True)), + ('history_type', models.CharField(choices=[('+', 'Created'), ('~', 'Changed'), ('-', 'Deleted')], max_length=1)), + ('history_user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)), + ('resolved_by', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to=settings.AUTH_USER_MODEL)), + ('sport', models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='core.sport')), + ('job', models.ForeignKey(blank=True, db_constraint=False, help_text='Job that created this review item', null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='scraper.scrapejob')), + ], + options={ + 'verbose_name': 'historical Manual Review Item', + 'verbose_name_plural': 'historical Manual Review Items', + 'ordering': ('-history_date', '-history_id'), + 'get_latest_by': ('history_date', 'history_id'), + }, + bases=(simple_history.models.HistoricalChanges, models.Model), + ), + migrations.CreateModel( + name='ScrapeJobLog', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('level', models.CharField(choices=[('debug', 'Debug'), ('info', 'Info'), ('warning', 'Warning'), ('error', 'Error')], default='info', max_length=10)), + ('message', models.TextField()), + ('source', models.CharField(blank=True, help_text='Source/component that generated this log', max_length=100)), + ('extra_data', models.JSONField(blank=True, help_text='Additional structured data', null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='logs', to='scraper.scrapejob')), + ], + options={ + 'verbose_name': 'Scrape Job Log', + 'verbose_name_plural': 'Scrape Job Logs', + 'ordering': ['created_at'], + }, + ), + migrations.CreateModel( + name='ScraperConfig', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('season', models.PositiveSmallIntegerField(help_text='Season to scrape (e.g., 2025 for 2025-26 season)')), + ('is_enabled', models.BooleanField(default=True, help_text='Whether this scraper is enabled for scheduling')), + ('sources', models.JSONField(default=list, help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])')), + ('primary_source', models.CharField(blank=True, help_text='Primary source for this scraper', max_length=100)), + ('request_delay', models.FloatField(default=3.0, help_text='Seconds between requests')), + ('max_retries', models.PositiveSmallIntegerField(default=3, help_text='Maximum retry attempts')), + ('fuzzy_threshold', models.PositiveSmallIntegerField(default=85, help_text='Minimum fuzzy match confidence (0-100)')), + ('last_run', models.DateTimeField(blank=True, help_text='Last successful run timestamp', null=True)), + ('last_run_status', models.CharField(blank=True, help_text='Status of last run', max_length=20)), + ('last_run_games', models.PositiveIntegerField(default=0, help_text='Games found in last run')), + ('notes', models.TextField(blank=True, help_text='Configuration notes')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='scraper_configs', to='core.sport')), + ], + options={ + 'verbose_name': 'Scraper Configuration', + 'verbose_name_plural': 'Scraper Configurations', + 'ordering': ['sport', 'season'], + 'unique_together': {('sport', 'season')}, + }, + ), + migrations.AddField( + model_name='scrapejob', + name='config', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='jobs', to='scraper.scraperconfig'), + ), + migrations.CreateModel( + name='ManualReviewItem', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('item_type', models.CharField(choices=[('team', 'Team'), ('stadium', 'Stadium')], max_length=20)), + ('raw_value', models.CharField(help_text='Original scraped value', max_length=300)), + ('suggested_id', models.CharField(blank=True, help_text='Suggested canonical ID (if any match found)', max_length=100)), + ('confidence', models.FloatField(default=0.0, help_text='Match confidence (0.0 - 1.0)')), + ('reason', models.CharField(choices=[('no_match', 'No Match Found'), ('low_confidence', 'Low Confidence Match'), ('ambiguous', 'Ambiguous Match'), ('new_entity', 'Potentially New Entity')], help_text='Why manual review is needed', max_length=20)), + ('source_url', models.URLField(blank=True, help_text='URL where this value was found')), + ('check_date', models.DateField(blank=True, help_text='Date context for alias resolution', null=True)), + ('context', models.JSONField(blank=True, help_text='Additional context (e.g., game info)', null=True)), + ('status', models.CharField(choices=[('pending', 'Pending Review'), ('resolved', 'Resolved'), ('ignored', 'Ignored'), ('new_entity', 'Created New Entity')], default='pending', max_length=20)), + ('resolved_to', models.CharField(blank=True, help_text='Final resolved canonical ID', max_length=100)), + ('resolved_at', models.DateTimeField(blank=True, null=True)), + ('resolution_notes', models.TextField(blank=True, help_text='Notes about the resolution')), + ('create_alias', models.BooleanField(default=False, help_text='Whether to create an alias from this resolution')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('resolved_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='resolved_review_items', to=settings.AUTH_USER_MODEL)), + ('sport', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='review_items', to='core.sport')), + ('job', models.ForeignKey(blank=True, help_text='Job that created this review item', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='review_items', to='scraper.scrapejob')), + ], + options={ + 'verbose_name': 'Manual Review Item', + 'verbose_name_plural': 'Manual Review Items', + 'ordering': ['-created_at'], + 'indexes': [models.Index(fields=['status', 'item_type'], name='scraper_man_status_5d06e2_idx'), models.Index(fields=['sport', 'status'], name='scraper_man_sport_i_7af37b_idx'), models.Index(fields=['raw_value'], name='scraper_man_raw_val_abdd0a_idx')], + }, + ), + migrations.AddIndex( + model_name='scrapejob', + index=models.Index(fields=['config', 'status'], name='scraper_scr_config__4c4058_idx'), + ), + migrations.AddIndex( + model_name='scrapejob', + index=models.Index(fields=['status', 'created_at'], name='scraper_scr_status_f3978d_idx'), + ), + ] diff --git a/scraper/migrations/__init__.py b/scraper/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scraper/models.py b/scraper/models.py new file mode 100644 index 0000000..330faa6 --- /dev/null +++ b/scraper/models.py @@ -0,0 +1,199 @@ +""" +Scraper models for tracking scraping jobs and manual reviews. +""" +from django.db import models +from simple_history.models import HistoricalRecords + + +class ScraperConfig(models.Model): + """ + Configuration for a sport scraper per season. + """ + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='scraper_configs' + ) + season = models.PositiveSmallIntegerField( + help_text='Season year (start year for split seasons)' + ) + is_active = models.BooleanField( + default=True, + help_text='Whether this config is actively scraping' + ) + schedule_url = models.URLField( + blank=True, + help_text='Base URL for schedule scraping' + ) + scrape_interval_hours = models.PositiveSmallIntegerField( + default=24, + help_text='How often to run the scraper (hours)' + ) + last_scrape_at = models.DateTimeField( + null=True, + blank=True, + help_text='When the last scrape completed' + ) + next_scrape_at = models.DateTimeField( + null=True, + blank=True, + help_text='When the next scrape is scheduled' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['-season', 'sport'] + unique_together = ['sport', 'season'] + verbose_name = 'Scraper Config' + verbose_name_plural = 'Scraper Configs' + + def __str__(self): + return f"{self.sport.short_name} {self.sport.get_season_display(self.season)}" + + +class ScrapeJob(models.Model): + """ + Record of a scraping job execution. + """ + STATUS_CHOICES = [ + ('pending', 'Pending'), + ('running', 'Running'), + ('completed', 'Completed'), + ('failed', 'Failed'), + ('cancelled', 'Cancelled'), + ] + + config = models.ForeignKey( + ScraperConfig, + on_delete=models.CASCADE, + related_name='jobs' + ) + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + default='pending' + ) + started_at = models.DateTimeField( + null=True, + blank=True + ) + completed_at = models.DateTimeField( + null=True, + blank=True + ) + games_found = models.PositiveIntegerField(default=0) + games_created = models.PositiveIntegerField(default=0) + games_updated = models.PositiveIntegerField(default=0) + errors = models.TextField(blank=True) + log_output = models.TextField(blank=True) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ['-created_at'] + verbose_name = 'Scrape Job' + verbose_name_plural = 'Scrape Jobs' + + def __str__(self): + return f"{self.config} - {self.status} ({self.created_at.strftime('%Y-%m-%d %H:%M')})" + + @property + def duration(self): + """Return job duration in seconds.""" + if self.started_at and self.completed_at: + return (self.completed_at - self.started_at).total_seconds() + return None + + +class ManualReviewItem(models.Model): + """ + Items flagged for manual review (fuzzy matches, conflicts, etc). + """ + STATUS_CHOICES = [ + ('pending', 'Pending Review'), + ('approved', 'Approved'), + ('rejected', 'Rejected'), + ('resolved', 'Resolved'), + ] + + ITEM_TYPE_CHOICES = [ + ('team', 'Team Match'), + ('stadium', 'Stadium Match'), + ('game', 'Game Conflict'), + ('alias', 'New Alias'), + ] + + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='review_items' + ) + job = models.ForeignKey( + ScrapeJob, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='review_items' + ) + item_type = models.CharField( + max_length=20, + choices=ITEM_TYPE_CHOICES + ) + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + default='pending' + ) + raw_value = models.CharField( + max_length=500, + help_text='The raw scraped value' + ) + matched_value = models.CharField( + max_length=500, + blank=True, + help_text='The matched canonical value (if any)' + ) + confidence = models.PositiveSmallIntegerField( + default=0, + help_text='Match confidence score (0-100)' + ) + context = models.JSONField( + default=dict, + blank=True, + help_text='Additional context (game date, opposing team, etc)' + ) + resolution_notes = models.TextField( + blank=True, + help_text='Notes about the resolution' + ) + resolved_at = models.DateTimeField( + null=True, + blank=True + ) + resolved_by = models.ForeignKey( + 'auth.User', + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='resolved_reviews' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ['-confidence', '-created_at'] + verbose_name = 'Manual Review Item' + verbose_name_plural = 'Manual Review Items' + + def __str__(self): + return f"{self.item_type}: {self.raw_value} ({self.confidence}%)" diff --git a/scraper/models/__init__.py b/scraper/models/__init__.py new file mode 100644 index 0000000..d066dfd --- /dev/null +++ b/scraper/models/__init__.py @@ -0,0 +1,10 @@ +from .config import ScraperConfig +from .job import ScrapeJob, ScrapeJobLog +from .review import ManualReviewItem + +__all__ = [ + 'ScraperConfig', + 'ScrapeJob', + 'ScrapeJobLog', + 'ManualReviewItem', +] diff --git a/scraper/models/config.py b/scraper/models/config.py new file mode 100644 index 0000000..fc1db76 --- /dev/null +++ b/scraper/models/config.py @@ -0,0 +1,102 @@ +from django.db import models +from django.conf import settings +from simple_history.models import HistoricalRecords + + +class ScraperConfig(models.Model): + """ + Configuration for a sport's scraper. + """ + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='scraper_configs' + ) + season = models.PositiveSmallIntegerField( + help_text='Season to scrape (e.g., 2025 for 2025-26 season)' + ) + is_enabled = models.BooleanField( + default=True, + help_text='Whether this scraper is enabled for scheduling' + ) + + # Source configuration + sources = models.JSONField( + default=list, + help_text='Ordered list of sources to try (e.g., ["basketball_reference", "espn"])' + ) + primary_source = models.CharField( + max_length=100, + blank=True, + help_text='Primary source for this scraper' + ) + + # Rate limiting + request_delay = models.FloatField( + default=settings.SCRAPER_REQUEST_DELAY, + help_text='Seconds between requests' + ) + max_retries = models.PositiveSmallIntegerField( + default=settings.SCRAPER_MAX_RETRIES, + help_text='Maximum retry attempts' + ) + + # Fuzzy matching + fuzzy_threshold = models.PositiveSmallIntegerField( + default=settings.SCRAPER_FUZZY_THRESHOLD, + help_text='Minimum fuzzy match confidence (0-100)' + ) + + # Scheduling + last_run = models.DateTimeField( + null=True, + blank=True, + help_text='Last successful run timestamp' + ) + last_run_status = models.CharField( + max_length=20, + blank=True, + help_text='Status of last run' + ) + last_run_games = models.PositiveIntegerField( + default=0, + help_text='Games found in last run' + ) + + # Notes + notes = models.TextField( + blank=True, + help_text='Configuration notes' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['sport', 'season'] + unique_together = ['sport', 'season'] + verbose_name = 'Scraper Configuration' + verbose_name_plural = 'Scraper Configurations' + + def __str__(self): + return f"{self.sport.short_name} {self.sport.get_season_display(self.season)}" + + def get_sources_list(self): + """Return sources as list, using defaults if empty.""" + if self.sources: + return self.sources + # Default sources per sport + defaults = { + 'nba': ['basketball_reference', 'espn'], + 'mlb': ['baseball_reference', 'mlb_api', 'espn'], + 'nfl': ['espn', 'pro_football_reference'], + 'nhl': ['hockey_reference', 'nhl_api', 'espn'], + 'mls': ['espn'], + 'wnba': ['espn'], + 'nwsl': ['espn'], + } + return defaults.get(self.sport.code, ['espn']) diff --git a/scraper/models/job.py b/scraper/models/job.py new file mode 100644 index 0000000..2d696fd --- /dev/null +++ b/scraper/models/job.py @@ -0,0 +1,159 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class ScrapeJob(models.Model): + """ + Record of a scraping job execution. + """ + STATUS_CHOICES = [ + ('pending', 'Pending'), + ('running', 'Running'), + ('completed', 'Completed'), + ('failed', 'Failed'), + ('cancelled', 'Cancelled'), + ] + + config = models.ForeignKey( + 'scraper.ScraperConfig', + on_delete=models.CASCADE, + related_name='jobs' + ) + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + default='pending' + ) + triggered_by = models.CharField( + max_length=50, + default='manual', + help_text='How the job was triggered (manual, scheduled, api)' + ) + + # Timing + started_at = models.DateTimeField(null=True, blank=True) + finished_at = models.DateTimeField(null=True, blank=True) + + # Results + games_found = models.PositiveIntegerField(default=0) + games_new = models.PositiveIntegerField(default=0) + games_updated = models.PositiveIntegerField(default=0) + games_unchanged = models.PositiveIntegerField(default=0) + games_errors = models.PositiveIntegerField(default=0) + + teams_found = models.PositiveIntegerField(default=0) + stadiums_found = models.PositiveIntegerField(default=0) + review_items_created = models.PositiveIntegerField(default=0) + + # Error tracking + error_message = models.TextField(blank=True) + error_traceback = models.TextField(blank=True) + + # Celery task ID for tracking + celery_task_id = models.CharField( + max_length=255, + blank=True, + help_text='Celery task ID for this job' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ['-created_at'] + verbose_name = 'Scrape Job' + verbose_name_plural = 'Scrape Jobs' + indexes = [ + models.Index(fields=['config', 'status']), + models.Index(fields=['status', 'created_at']), + ] + + def __str__(self): + return f"{self.config} - {self.created_at.strftime('%Y-%m-%d %H:%M')}" + + @property + def duration(self): + """Return job duration as timedelta or None.""" + if self.started_at and self.finished_at: + return self.finished_at - self.started_at + return None + + @property + def duration_display(self): + """Return formatted duration string.""" + duration = self.duration + if duration: + total_seconds = int(duration.total_seconds()) + minutes, seconds = divmod(total_seconds, 60) + if minutes > 0: + return f"{minutes}m {seconds}s" + return f"{seconds}s" + return '-' + + @property + def sport(self): + return self.config.sport + + @property + def season(self): + return self.config.season + + def get_summary(self): + """Return summary dict for notifications.""" + return { + 'sport': self.config.sport.short_name, + 'season': self.config.sport.get_season_display(self.config.season), + 'status': self.status, + 'duration': self.duration_display, + 'games_found': self.games_found, + 'games_new': self.games_new, + 'games_updated': self.games_updated, + 'games_unchanged': self.games_unchanged, + 'games_errors': self.games_errors, + 'review_items': self.review_items_created, + 'error_message': self.error_message, + } + + +class ScrapeJobLog(models.Model): + """ + Log entries for a scrape job. + """ + LEVEL_CHOICES = [ + ('debug', 'Debug'), + ('info', 'Info'), + ('warning', 'Warning'), + ('error', 'Error'), + ] + + job = models.ForeignKey( + ScrapeJob, + on_delete=models.CASCADE, + related_name='logs' + ) + level = models.CharField( + max_length=10, + choices=LEVEL_CHOICES, + default='info' + ) + message = models.TextField() + source = models.CharField( + max_length=100, + blank=True, + help_text='Source/component that generated this log' + ) + extra_data = models.JSONField( + null=True, + blank=True, + help_text='Additional structured data' + ) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ['created_at'] + verbose_name = 'Scrape Job Log' + verbose_name_plural = 'Scrape Job Logs' + + def __str__(self): + return f"[{self.level.upper()}] {self.message[:50]}" diff --git a/scraper/models/review.py b/scraper/models/review.py new file mode 100644 index 0000000..66cecab --- /dev/null +++ b/scraper/models/review.py @@ -0,0 +1,192 @@ +from django.db import models +from simple_history.models import HistoricalRecords + + +class ManualReviewItem(models.Model): + """ + Items that require manual review before resolution. + """ + ITEM_TYPE_CHOICES = [ + ('team', 'Team'), + ('stadium', 'Stadium'), + ] + + STATUS_CHOICES = [ + ('pending', 'Pending Review'), + ('resolved', 'Resolved'), + ('ignored', 'Ignored'), + ('new_entity', 'Created New Entity'), + ] + + REASON_CHOICES = [ + ('no_match', 'No Match Found'), + ('low_confidence', 'Low Confidence Match'), + ('ambiguous', 'Ambiguous Match'), + ('new_entity', 'Potentially New Entity'), + ] + + job = models.ForeignKey( + 'scraper.ScrapeJob', + on_delete=models.CASCADE, + related_name='review_items', + null=True, + blank=True, + help_text='Job that created this review item' + ) + item_type = models.CharField( + max_length=20, + choices=ITEM_TYPE_CHOICES + ) + sport = models.ForeignKey( + 'core.Sport', + on_delete=models.CASCADE, + related_name='review_items' + ) + + # Raw value from scraping + raw_value = models.CharField( + max_length=300, + help_text='Original scraped value' + ) + + # Suggested resolution + suggested_id = models.CharField( + max_length=100, + blank=True, + help_text='Suggested canonical ID (if any match found)' + ) + confidence = models.FloatField( + default=0.0, + help_text='Match confidence (0.0 - 1.0)' + ) + reason = models.CharField( + max_length=20, + choices=REASON_CHOICES, + help_text='Why manual review is needed' + ) + + # Context + source_url = models.URLField( + blank=True, + help_text='URL where this value was found' + ) + check_date = models.DateField( + null=True, + blank=True, + help_text='Date context for alias resolution' + ) + context = models.JSONField( + null=True, + blank=True, + help_text='Additional context (e.g., game info)' + ) + + # Resolution + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + default='pending' + ) + resolved_to = models.CharField( + max_length=100, + blank=True, + help_text='Final resolved canonical ID' + ) + resolved_by = models.ForeignKey( + 'auth.User', + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name='resolved_review_items' + ) + resolved_at = models.DateTimeField(null=True, blank=True) + resolution_notes = models.TextField( + blank=True, + help_text='Notes about the resolution' + ) + create_alias = models.BooleanField( + default=False, + help_text='Whether to create an alias from this resolution' + ) + + # Metadata + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + # Audit trail + history = HistoricalRecords() + + class Meta: + ordering = ['-created_at'] + verbose_name = 'Manual Review Item' + verbose_name_plural = 'Manual Review Items' + indexes = [ + models.Index(fields=['status', 'item_type']), + models.Index(fields=['sport', 'status']), + models.Index(fields=['raw_value']), + ] + + def __str__(self): + return f"{self.item_type}: {self.raw_value} ({self.get_status_display()})" + + @property + def confidence_display(self): + """Return confidence as percentage string.""" + return f"{self.confidence * 100:.0f}%" + + def resolve(self, canonical_id, user=None, notes='', create_alias=False): + """Resolve this review item.""" + from django.utils import timezone + self.status = 'resolved' + self.resolved_to = canonical_id + self.resolved_by = user + self.resolved_at = timezone.now() + self.resolution_notes = notes + self.create_alias = create_alias + self.save() + + # Optionally create alias + if create_alias and canonical_id: + self._create_alias(canonical_id) + + def _create_alias(self, canonical_id): + """Create an alias from this resolution.""" + from core.models import TeamAlias, StadiumAlias, Team, Stadium + + if self.item_type == 'team': + try: + team = Team.objects.get(id=canonical_id) + TeamAlias.objects.get_or_create( + team=team, + alias=self.raw_value, + defaults={ + 'alias_type': 'historical', + 'source': 'manual_review', + 'notes': f'Created from review item #{self.id}', + } + ) + except Team.DoesNotExist: + pass + elif self.item_type == 'stadium': + try: + stadium = Stadium.objects.get(id=canonical_id) + StadiumAlias.objects.get_or_create( + stadium=stadium, + alias=self.raw_value, + defaults={ + 'alias_type': 'former', + 'source': 'manual_review', + 'notes': f'Created from review item #{self.id}', + } + ) + except Stadium.DoesNotExist: + pass + + def ignore(self, user=None, notes=''): + """Mark this review item as ignored.""" + from django.utils import timezone + self.status = 'ignored' + self.resolved_by = user + self.resolved_at = timezone.now() + self.resolution_notes = notes + self.save() diff --git a/scraper/resources.py b/scraper/resources.py new file mode 100644 index 0000000..d2f669a --- /dev/null +++ b/scraper/resources.py @@ -0,0 +1,55 @@ +"""Import/Export resources for scraper models.""" +from import_export import resources, fields +from import_export.widgets import ForeignKeyWidget + +from core.models import Sport +from .models import ScraperConfig, ScrapeJob, ManualReviewItem + + +class ScraperConfigResource(resources.ModelResource): + sport = fields.Field( + column_name='sport', + attribute='sport', + widget=ForeignKeyWidget(Sport, 'code') + ) + + class Meta: + model = ScraperConfig + import_id_fields = ['sport', 'season'] + fields = [ + 'sport', 'season', 'is_active', 'is_enabled', + 'scrape_interval_hours', 'primary_source', + ] + export_order = fields + + +class ScrapeJobResource(resources.ModelResource): + sport = fields.Field(attribute='config__sport__code', readonly=True) + season = fields.Field(attribute='config__season', readonly=True) + + class Meta: + model = ScrapeJob + fields = [ + 'id', 'sport', 'season', 'status', + 'games_found', 'games_new', 'games_updated', 'games_unchanged', + 'started_at', 'finished_at', 'errors', 'created_at', + ] + export_order = fields + + +class ManualReviewItemResource(resources.ModelResource): + sport = fields.Field( + column_name='sport', + attribute='sport', + widget=ForeignKeyWidget(Sport, 'code') + ) + + class Meta: + model = ManualReviewItem + import_id_fields = ['id'] + fields = [ + 'id', 'sport', 'item_type', 'raw_value', 'matched_value', + 'status', 'confidence', 'reason', 'source_url', + 'check_date', 'created_at', + ] + export_order = fields diff --git a/scraper/tasks.py b/scraper/tasks.py new file mode 100644 index 0000000..d9415d3 --- /dev/null +++ b/scraper/tasks.py @@ -0,0 +1,182 @@ +import logging +import traceback +from datetime import datetime + +from celery import shared_task +from django.utils import timezone + +logger = logging.getLogger('scraper') + + +@shared_task(bind=True, max_retries=3) +def run_scraper_task(self, config_id: int, triggered_by: str = 'manual'): + """ + Run a scraper job for the given configuration. + """ + from scraper.models import ScraperConfig, ScrapeJob, ScrapeJobLog + from notifications.tasks import send_scrape_notification + + # Get configuration + try: + config = ScraperConfig.objects.select_related('sport').get(id=config_id) + except ScraperConfig.DoesNotExist: + logger.error(f"ScraperConfig {config_id} not found") + return {'error': 'Configuration not found'} + + # Create job record + job = ScrapeJob.objects.create( + config=config, + status='running', + triggered_by=triggered_by, + started_at=timezone.now(), + celery_task_id=self.request.id, + ) + + def log(level, message, source='', extra_data=None): + ScrapeJobLog.objects.create( + job=job, + level=level, + message=message, + source=source, + extra_data=extra_data, + ) + getattr(logger, level)(f"[{config.sport.code}] {message}") + + try: + log('info', f'Starting scraper for {config.sport.short_name} {config.season}') + + # Import and run the appropriate scraper + result = run_sport_scraper(config, log) + + # Update job with results + job.status = 'completed' + job.finished_at = timezone.now() + job.games_found = result.get('games_found', 0) + job.games_new = result.get('games_new', 0) + job.games_updated = result.get('games_updated', 0) + job.games_unchanged = result.get('games_unchanged', 0) + job.games_errors = result.get('games_errors', 0) + job.teams_found = result.get('teams_found', 0) + job.stadiums_found = result.get('stadiums_found', 0) + job.review_items_created = result.get('review_items', 0) + job.save() + + # Update config + config.last_run = timezone.now() + config.last_run_status = 'completed' + config.last_run_games = result.get('games_found', 0) + config.save() + + log('info', f'Scraper completed: {job.games_found} games, {job.games_new} new, {job.review_items_created} reviews') + + # Send notification + send_scrape_notification.delay(job.id) + + return { + 'job_id': job.id, + 'status': 'completed', + 'games_found': job.games_found, + 'games_new': job.games_new, + 'review_items': job.review_items_created, + } + + except Exception as e: + error_msg = str(e) + error_tb = traceback.format_exc() + + job.status = 'failed' + job.finished_at = timezone.now() + job.error_message = error_msg + job.error_traceback = error_tb + job.save() + + config.last_run = timezone.now() + config.last_run_status = 'failed' + config.save() + + log('error', f'Scraper failed: {error_msg}', extra_data={'traceback': error_tb}) + + # Send failure notification + send_scrape_notification.delay(job.id) + + # Retry if applicable + if self.request.retries < self.max_retries: + raise self.retry(exc=e, countdown=60 * (self.request.retries + 1)) + + return { + 'job_id': job.id, + 'status': 'failed', + 'error': error_msg, + } + + +def run_sport_scraper(config, log_func): + """ + Run the appropriate scraper for the sport. + Returns dict with results. + """ + from core.models import Game, Team, Stadium + from scraper.models import ManualReviewItem + + sport_code = config.sport.code + season = config.season + + log_func('info', f'Loading scraper for {sport_code}', source='engine') + + # Import the scraper engine from sportstime_parser + # This adapts the existing scrapers to work with Django models + from scraper.engine.adapter import ScraperAdapter + + adapter = ScraperAdapter( + sport_code=sport_code, + season=season, + config=config, + log_func=log_func, + ) + + # Run the scraper + result = adapter.run() + + return result + + +@shared_task +def run_all_enabled_scrapers(): + """ + Run all enabled scraper configurations. + Called by celery-beat on schedule. + """ + from scraper.models import ScraperConfig + + configs = ScraperConfig.objects.filter(is_enabled=True) + for config in configs: + run_scraper_task.delay(config.id, triggered_by='scheduled') + + return {'configs_queued': configs.count()} + + +@shared_task +def cleanup_old_jobs(days: int = 30): + """ + Clean up old scrape job records. + """ + from scraper.models import ScrapeJob, ScrapeJobLog + from django.utils import timezone + from datetime import timedelta + + cutoff = timezone.now() - timedelta(days=days) + + # Delete old logs first (foreign key) + logs_deleted, _ = ScrapeJobLog.objects.filter( + job__created_at__lt=cutoff + ).delete() + + # Then delete old jobs + jobs_deleted, _ = ScrapeJob.objects.filter( + created_at__lt=cutoff + ).delete() + + return { + 'jobs_deleted': jobs_deleted, + 'logs_deleted': logs_deleted, + } diff --git a/sportstime/__init__.py b/sportstime/__init__.py new file mode 100644 index 0000000..15d7c50 --- /dev/null +++ b/sportstime/__init__.py @@ -0,0 +1,5 @@ +# This will make sure the app is always imported when +# Django starts so that shared_task will use this app. +from .celery import app as celery_app + +__all__ = ('celery_app',) diff --git a/sportstime/celery.py b/sportstime/celery.py new file mode 100644 index 0000000..4faeb22 --- /dev/null +++ b/sportstime/celery.py @@ -0,0 +1,22 @@ +import os + +from celery import Celery + +# Set the default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'sportstime.settings') + +app = Celery('sportstime') + +# Using a string here means the worker doesn't have to serialize +# the configuration object to child processes. +# - namespace='CELERY' means all celery-related configuration keys +# should have a `CELERY_` prefix. +app.config_from_object('django.conf:settings', namespace='CELERY') + +# Load task modules from all registered Django apps. +app.autodiscover_tasks() + + +@app.task(bind=True, ignore_result=True) +def debug_task(self): + print(f'Request: {self.request!r}') diff --git a/sportstime/settings.py b/sportstime/settings.py new file mode 100644 index 0000000..bd902b6 --- /dev/null +++ b/sportstime/settings.py @@ -0,0 +1,236 @@ +""" +Django settings for sportstime project. +""" +import os +from pathlib import Path + +import dj_database_url +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = os.environ.get('SECRET_KEY', 'django-insecure-dev-key-change-in-production') + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = os.environ.get('DEBUG', 'False').lower() in ('true', '1', 'yes') + +ALLOWED_HOSTS = [ + host.strip() + for host in os.environ.get('ALLOWED_HOSTS', 'localhost,127.0.0.1').split(',') + if host.strip() +] + +# Application definition +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + # Third party + 'django_celery_beat', + 'django_celery_results', + 'simple_history', + 'import_export', + # Local apps + 'core', + 'scraper', + 'cloudkit', + 'dashboard', + 'notifications', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'whitenoise.middleware.WhiteNoiseMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', + 'simple_history.middleware.HistoryRequestMiddleware', +] + +ROOT_URLCONF = 'sportstime.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [BASE_DIR / 'templates'], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'sportstime.wsgi.application' + +# Database +DATABASES = { + 'default': dj_database_url.config( + default='postgresql://sportstime:changeme@localhost:5432/sportstime', + conn_max_age=600, + conn_health_checks=True, + ) +} + +# Password validation +AUTH_PASSWORD_VALIDATORS = [ + {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'}, + {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'}, + {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'}, + {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'}, +] + +# Internationalization +LANGUAGE_CODE = 'en-us' +TIME_ZONE = 'UTC' +USE_I18N = True +USE_TZ = True + +# Static files (CSS, JavaScript, Images) +STATIC_URL = 'static/' +STATIC_ROOT = BASE_DIR / 'staticfiles' +STATICFILES_DIRS = [BASE_DIR / 'static'] if (BASE_DIR / 'static').exists() else [] +STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage' + +# Media files +MEDIA_URL = 'media/' +MEDIA_ROOT = BASE_DIR / 'media' + +# Default primary key field type +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +# Celery Configuration +CELERY_BROKER_URL = os.environ.get('REDIS_URL', 'redis://localhost:6379/0') +CELERY_RESULT_BACKEND = 'django-db' +CELERY_CACHE_BACKEND = 'django-cache' +CELERY_ACCEPT_CONTENT = ['json'] +CELERY_TASK_SERIALIZER = 'json' +CELERY_RESULT_SERIALIZER = 'json' +CELERY_TIMEZONE = TIME_ZONE +CELERY_TASK_TRACK_STARTED = True +CELERY_TASK_TIME_LIMIT = 30 * 60 # 30 minutes +CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers:DatabaseScheduler' + +# Email Configuration +EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' +EMAIL_HOST = os.environ.get('EMAIL_HOST', 'smtp.gmail.com') +EMAIL_PORT = int(os.environ.get('EMAIL_PORT', 587)) +EMAIL_USE_TLS = os.environ.get('EMAIL_USE_TLS', 'True').lower() in ('true', '1', 'yes') +EMAIL_HOST_USER = os.environ.get('EMAIL_HOST_USER', '') +EMAIL_HOST_PASSWORD = os.environ.get('EMAIL_HOST_PASSWORD', '') +DEFAULT_FROM_EMAIL = os.environ.get('DEFAULT_FROM_EMAIL', 'SportsTime ') +ADMIN_EMAIL = os.environ.get('ADMIN_EMAIL', '') + +# Security settings (for production behind proxy) +if not DEBUG: + SECURE_SSL_REDIRECT = os.environ.get('SECURE_SSL_REDIRECT', 'False').lower() in ('true', '1', 'yes') + SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') + SESSION_COOKIE_SECURE = os.environ.get('SESSION_COOKIE_SECURE', 'True').lower() in ('true', '1', 'yes') + CSRF_COOKIE_SECURE = os.environ.get('CSRF_COOKIE_SECURE', 'True').lower() in ('true', '1', 'yes') + CSRF_TRUSTED_ORIGINS = [ + origin.strip() + for origin in os.environ.get('CSRF_TRUSTED_ORIGINS', '').split(',') + if origin.strip() + ] + +# CloudKit Configuration +CLOUDKIT_CONTAINER = os.environ.get('CLOUDKIT_CONTAINER', 'iCloud.com.sportstime.app') +CLOUDKIT_ENVIRONMENT = os.environ.get('CLOUDKIT_ENVIRONMENT', 'development') +CLOUDKIT_KEY_ID = os.environ.get('CLOUDKIT_KEY_ID', '') +CLOUDKIT_PRIVATE_KEY_PATH = os.environ.get('CLOUDKIT_PRIVATE_KEY_PATH', '') + +# Scraper Configuration +SCRAPER_REQUEST_DELAY = float(os.environ.get('SCRAPER_REQUEST_DELAY', 3.0)) +SCRAPER_MAX_RETRIES = int(os.environ.get('SCRAPER_MAX_RETRIES', 3)) +SCRAPER_FUZZY_THRESHOLD = int(os.environ.get('SCRAPER_FUZZY_THRESHOLD', 85)) +SCRAPER_BACKOFF_FACTOR = 2.0 +SCRAPER_INITIAL_BACKOFF = 5.0 + +# Expected game counts per sport (for validation) +EXPECTED_GAME_COUNTS = { + 'nba': 1230, + 'mlb': 2430, + 'nfl': 272, + 'nhl': 1312, + 'mls': 544, + 'wnba': 228, + 'nwsl': 182, +} + +# Logging Configuration +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'verbose': { + 'format': '{levelname} {asctime} {module} {process:d} {thread:d} {message}', + 'style': '{', + }, + 'simple': { + 'format': '{levelname} {asctime} {message}', + 'style': '{', + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'simple', + }, + 'file': { + 'class': 'logging.handlers.RotatingFileHandler', + 'filename': BASE_DIR / 'logs' / 'sportstime.log', + 'maxBytes': 10 * 1024 * 1024, # 10 MB + 'backupCount': 5, + 'formatter': 'verbose', + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'INFO', + }, + 'loggers': { + 'django': { + 'handlers': ['console'], + 'level': os.environ.get('DJANGO_LOG_LEVEL', 'INFO'), + 'propagate': False, + }, + 'scraper': { + 'handlers': ['console', 'file'] if not DEBUG else ['console'], + 'level': 'INFO', + 'propagate': False, + }, + 'cloudkit': { + 'handlers': ['console', 'file'] if not DEBUG else ['console'], + 'level': 'INFO', + 'propagate': False, + }, + 'celery': { + 'handlers': ['console'], + 'level': 'INFO', + 'propagate': False, + }, + }, +} + +# Create logs directory if it doesn't exist +(BASE_DIR / 'logs').mkdir(exist_ok=True) + +# Admin site customization +ADMIN_SITE_HEADER = 'SportsTime Administration' +ADMIN_SITE_TITLE = 'SportsTime Admin' +ADMIN_INDEX_TITLE = 'Dashboard' diff --git a/sportstime/urls.py b/sportstime/urls.py new file mode 100644 index 0000000..f814091 --- /dev/null +++ b/sportstime/urls.py @@ -0,0 +1,20 @@ +""" +URL configuration for sportstime project. +""" +from django.contrib import admin +from django.urls import path, include +from django.conf import settings +from django.conf.urls.static import static + +# Customize admin site +admin.site.site_header = 'SportsTime Admin' +admin.site.site_title = 'SportsTime' +admin.site.index_title = 'Sports Data Management' + +urlpatterns = [ + path('admin/', admin.site.urls), + path('dashboard/', include('dashboard.urls')), +] + +if settings.DEBUG: + urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/sportstime/wsgi.py b/sportstime/wsgi.py new file mode 100644 index 0000000..caf6f5f --- /dev/null +++ b/sportstime/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for sportstime project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'sportstime.settings') + +application = get_wsgi_application() diff --git a/sportstime_parser/normalizers/stadium_resolver.py b/sportstime_parser/normalizers/stadium_resolver.py index 40a66de..9ba3784 100644 --- a/sportstime_parser/normalizers/stadium_resolver.py +++ b/sportstime_parser/normalizers/stadium_resolver.py @@ -53,29 +53,29 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { "stadium_nba_td_garden": StadiumInfo("stadium_nba_td_garden", "TD Garden", "Boston", "MA", "USA", "nba", 42.3662, -71.0621), "stadium_nba_barclays_center": StadiumInfo("stadium_nba_barclays_center", "Barclays Center", "Brooklyn", "NY", "USA", "nba", 40.6826, -73.9754), "stadium_nba_spectrum_center": StadiumInfo("stadium_nba_spectrum_center", "Spectrum Center", "Charlotte", "NC", "USA", "nba", 35.2251, -80.8392), - "stadium_nba_united_center": StadiumInfo("stadium_nba_united_center", "United Center", "Chicago", "IL", "USA", "nba", 41.8807, -87.6742), + "stadium_nba_united_center": StadiumInfo("stadium_nba_united_center", "United Center", "Chicago", "IL", "USA", "nba", 41.8807, -87.6742, "America/Chicago"), "stadium_nba_rocket_mortgage_fieldhouse": StadiumInfo("stadium_nba_rocket_mortgage_fieldhouse", "Rocket Mortgage FieldHouse", "Cleveland", "OH", "USA", "nba", 41.4965, -81.6882), - "stadium_nba_american_airlines_center": StadiumInfo("stadium_nba_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nba", 32.7905, -96.8103), + "stadium_nba_american_airlines_center": StadiumInfo("stadium_nba_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nba", 32.7905, -96.8103, "America/Chicago"), "stadium_nba_ball_arena": StadiumInfo("stadium_nba_ball_arena", "Ball Arena", "Denver", "CO", "USA", "nba", 39.7487, -105.0077, "America/Denver"), "stadium_nba_little_caesars_arena": StadiumInfo("stadium_nba_little_caesars_arena", "Little Caesars Arena", "Detroit", "MI", "USA", "nba", 42.3411, -83.0553), "stadium_nba_chase_center": StadiumInfo("stadium_nba_chase_center", "Chase Center", "San Francisco", "CA", "USA", "nba", 37.7680, -122.3877, "America/Los_Angeles"), "stadium_nba_toyota_center": StadiumInfo("stadium_nba_toyota_center", "Toyota Center", "Houston", "TX", "USA", "nba", 29.7508, -95.3621, "America/Chicago"), "stadium_nba_gainbridge_fieldhouse": StadiumInfo("stadium_nba_gainbridge_fieldhouse", "Gainbridge Fieldhouse", "Indianapolis", "IN", "USA", "nba", 39.7640, -86.1555), - "stadium_nba_intuit_dome": StadiumInfo("stadium_nba_intuit_dome", "Intuit Dome", "Inglewood", "CA", "USA", "nba", 33.9425, -118.3417), - "stadium_nba_cryptocom_arena": StadiumInfo("stadium_nba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nba", 34.0430, -118.2673), - "stadium_nba_fedexforum": StadiumInfo("stadium_nba_fedexforum", "FedExForum", "Memphis", "TN", "USA", "nba", 35.1383, -90.0505), + "stadium_nba_intuit_dome": StadiumInfo("stadium_nba_intuit_dome", "Intuit Dome", "Inglewood", "CA", "USA", "nba", 33.9425, -118.3417, "America/Los_Angeles"), + "stadium_nba_cryptocom_arena": StadiumInfo("stadium_nba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nba", 34.0430, -118.2673, "America/Los_Angeles"), + "stadium_nba_fedexforum": StadiumInfo("stadium_nba_fedexforum", "FedExForum", "Memphis", "TN", "USA", "nba", 35.1383, -90.0505, "America/Chicago"), "stadium_nba_kaseya_center": StadiumInfo("stadium_nba_kaseya_center", "Kaseya Center", "Miami", "FL", "USA", "nba", 25.7814, -80.1870), - "stadium_nba_fiserv_forum": StadiumInfo("stadium_nba_fiserv_forum", "Fiserv Forum", "Milwaukee", "WI", "USA", "nba", 43.0451, -87.9172), - "stadium_nba_target_center": StadiumInfo("stadium_nba_target_center", "Target Center", "Minneapolis", "MN", "USA", "nba", 44.9795, -93.2761), - "stadium_nba_smoothie_king_center": StadiumInfo("stadium_nba_smoothie_king_center", "Smoothie King Center", "New Orleans", "LA", "USA", "nba", 29.9490, -90.0821), + "stadium_nba_fiserv_forum": StadiumInfo("stadium_nba_fiserv_forum", "Fiserv Forum", "Milwaukee", "WI", "USA", "nba", 43.0451, -87.9172, "America/Chicago"), + "stadium_nba_target_center": StadiumInfo("stadium_nba_target_center", "Target Center", "Minneapolis", "MN", "USA", "nba", 44.9795, -93.2761, "America/Chicago"), + "stadium_nba_smoothie_king_center": StadiumInfo("stadium_nba_smoothie_king_center", "Smoothie King Center", "New Orleans", "LA", "USA", "nba", 29.9490, -90.0821, "America/Chicago"), "stadium_nba_madison_square_garden": StadiumInfo("stadium_nba_madison_square_garden", "Madison Square Garden", "New York", "NY", "USA", "nba", 40.7505, -73.9934), - "stadium_nba_paycom_center": StadiumInfo("stadium_nba_paycom_center", "Paycom Center", "Oklahoma City", "OK", "USA", "nba", 35.4634, -97.5151), + "stadium_nba_paycom_center": StadiumInfo("stadium_nba_paycom_center", "Paycom Center", "Oklahoma City", "OK", "USA", "nba", 35.4634, -97.5151, "America/Chicago"), "stadium_nba_kia_center": StadiumInfo("stadium_nba_kia_center", "Kia Center", "Orlando", "FL", "USA", "nba", 28.5392, -81.3839), "stadium_nba_wells_fargo_center": StadiumInfo("stadium_nba_wells_fargo_center", "Wells Fargo Center", "Philadelphia", "PA", "USA", "nba", 39.9012, -75.1720), - "stadium_nba_footprint_center": StadiumInfo("stadium_nba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "nba", 33.4457, -112.0712), - "stadium_nba_moda_center": StadiumInfo("stadium_nba_moda_center", "Moda Center", "Portland", "OR", "USA", "nba", 45.5316, -122.6668), - "stadium_nba_golden_1_center": StadiumInfo("stadium_nba_golden_1_center", "Golden 1 Center", "Sacramento", "CA", "USA", "nba", 38.5802, -121.4997), - "stadium_nba_frost_bank_center": StadiumInfo("stadium_nba_frost_bank_center", "Frost Bank Center", "San Antonio", "TX", "USA", "nba", 29.4270, -98.4375), + "stadium_nba_footprint_center": StadiumInfo("stadium_nba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "nba", 33.4457, -112.0712, "America/Phoenix"), + "stadium_nba_moda_center": StadiumInfo("stadium_nba_moda_center", "Moda Center", "Portland", "OR", "USA", "nba", 45.5316, -122.6668, "America/Los_Angeles"), + "stadium_nba_golden_1_center": StadiumInfo("stadium_nba_golden_1_center", "Golden 1 Center", "Sacramento", "CA", "USA", "nba", 38.5802, -121.4997, "America/Los_Angeles"), + "stadium_nba_frost_bank_center": StadiumInfo("stadium_nba_frost_bank_center", "Frost Bank Center", "San Antonio", "TX", "USA", "nba", 29.4270, -98.4375, "America/Chicago"), "stadium_nba_scotiabank_arena": StadiumInfo("stadium_nba_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nba", 43.6435, -79.3791, "America/Toronto"), "stadium_nba_delta_center": StadiumInfo("stadium_nba_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nba", 40.7683, -111.9011, "America/Denver"), "stadium_nba_capital_one_arena": StadiumInfo("stadium_nba_capital_one_arena", "Capital One Arena", "Washington", "DC", "USA", "nba", 38.8981, -77.0209), @@ -83,35 +83,35 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { "stadium_nba_mexico_city_arena": StadiumInfo("stadium_nba_mexico_city_arena", "Mexico City Arena", "Mexico City", "CDMX", "Mexico", "nba", 19.4042, -99.0970, "America/Mexico_City"), }, "mlb": { - "stadium_mlb_chase_field": StadiumInfo("stadium_mlb_chase_field", "Chase Field", "Phoenix", "AZ", "USA", "mlb", 33.4455, -112.0667), + "stadium_mlb_chase_field": StadiumInfo("stadium_mlb_chase_field", "Chase Field", "Phoenix", "AZ", "USA", "mlb", 33.4455, -112.0667, "America/Phoenix"), "stadium_mlb_truist_park": StadiumInfo("stadium_mlb_truist_park", "Truist Park", "Atlanta", "GA", "USA", "mlb", 33.8908, -84.4678), "stadium_mlb_oriole_park_at_camden_yards": StadiumInfo("stadium_mlb_oriole_park_at_camden_yards", "Oriole Park at Camden Yards", "Baltimore", "MD", "USA", "mlb", 39.2839, -76.6217), "stadium_mlb_fenway_park": StadiumInfo("stadium_mlb_fenway_park", "Fenway Park", "Boston", "MA", "USA", "mlb", 42.3467, -71.0972), - "stadium_mlb_wrigley_field": StadiumInfo("stadium_mlb_wrigley_field", "Wrigley Field", "Chicago", "IL", "USA", "mlb", 41.9484, -87.6553), - "stadium_mlb_guaranteed_rate_field": StadiumInfo("stadium_mlb_guaranteed_rate_field", "Guaranteed Rate Field", "Chicago", "IL", "USA", "mlb", 41.8299, -87.6338), + "stadium_mlb_wrigley_field": StadiumInfo("stadium_mlb_wrigley_field", "Wrigley Field", "Chicago", "IL", "USA", "mlb", 41.9484, -87.6553, "America/Chicago"), + "stadium_mlb_guaranteed_rate_field": StadiumInfo("stadium_mlb_guaranteed_rate_field", "Guaranteed Rate Field", "Chicago", "IL", "USA", "mlb", 41.8299, -87.6338, "America/Chicago"), "stadium_mlb_great_american_ball_park": StadiumInfo("stadium_mlb_great_american_ball_park", "Great American Ball Park", "Cincinnati", "OH", "USA", "mlb", 39.0974, -84.5082), "stadium_mlb_progressive_field": StadiumInfo("stadium_mlb_progressive_field", "Progressive Field", "Cleveland", "OH", "USA", "mlb", 41.4962, -81.6852), - "stadium_mlb_coors_field": StadiumInfo("stadium_mlb_coors_field", "Coors Field", "Denver", "CO", "USA", "mlb", 39.7559, -104.9942), + "stadium_mlb_coors_field": StadiumInfo("stadium_mlb_coors_field", "Coors Field", "Denver", "CO", "USA", "mlb", 39.7559, -104.9942, "America/Denver"), "stadium_mlb_comerica_park": StadiumInfo("stadium_mlb_comerica_park", "Comerica Park", "Detroit", "MI", "USA", "mlb", 42.3390, -83.0485), - "stadium_mlb_minute_maid_park": StadiumInfo("stadium_mlb_minute_maid_park", "Minute Maid Park", "Houston", "TX", "USA", "mlb", 29.7573, -95.3555), - "stadium_mlb_kauffman_stadium": StadiumInfo("stadium_mlb_kauffman_stadium", "Kauffman Stadium", "Kansas City", "MO", "USA", "mlb", 39.0517, -94.4803), - "stadium_mlb_angel_stadium": StadiumInfo("stadium_mlb_angel_stadium", "Angel Stadium", "Anaheim", "CA", "USA", "mlb", 33.8003, -117.8827), - "stadium_mlb_dodger_stadium": StadiumInfo("stadium_mlb_dodger_stadium", "Dodger Stadium", "Los Angeles", "CA", "USA", "mlb", 34.0739, -118.2400), + "stadium_mlb_minute_maid_park": StadiumInfo("stadium_mlb_minute_maid_park", "Minute Maid Park", "Houston", "TX", "USA", "mlb", 29.7573, -95.3555, "America/Chicago"), + "stadium_mlb_kauffman_stadium": StadiumInfo("stadium_mlb_kauffman_stadium", "Kauffman Stadium", "Kansas City", "MO", "USA", "mlb", 39.0517, -94.4803, "America/Chicago"), + "stadium_mlb_angel_stadium": StadiumInfo("stadium_mlb_angel_stadium", "Angel Stadium", "Anaheim", "CA", "USA", "mlb", 33.8003, -117.8827, "America/Los_Angeles"), + "stadium_mlb_dodger_stadium": StadiumInfo("stadium_mlb_dodger_stadium", "Dodger Stadium", "Los Angeles", "CA", "USA", "mlb", 34.0739, -118.2400, "America/Los_Angeles"), "stadium_mlb_loandepot_park": StadiumInfo("stadium_mlb_loandepot_park", "loanDepot park", "Miami", "FL", "USA", "mlb", 25.7781, -80.2195), - "stadium_mlb_american_family_field": StadiumInfo("stadium_mlb_american_family_field", "American Family Field", "Milwaukee", "WI", "USA", "mlb", 43.0280, -87.9712), - "stadium_mlb_target_field": StadiumInfo("stadium_mlb_target_field", "Target Field", "Minneapolis", "MN", "USA", "mlb", 44.9818, -93.2775), + "stadium_mlb_american_family_field": StadiumInfo("stadium_mlb_american_family_field", "American Family Field", "Milwaukee", "WI", "USA", "mlb", 43.0280, -87.9712, "America/Chicago"), + "stadium_mlb_target_field": StadiumInfo("stadium_mlb_target_field", "Target Field", "Minneapolis", "MN", "USA", "mlb", 44.9818, -93.2775, "America/Chicago"), "stadium_mlb_citi_field": StadiumInfo("stadium_mlb_citi_field", "Citi Field", "New York", "NY", "USA", "mlb", 40.7571, -73.8458), "stadium_mlb_yankee_stadium": StadiumInfo("stadium_mlb_yankee_stadium", "Yankee Stadium", "Bronx", "NY", "USA", "mlb", 40.8296, -73.9262), - "stadium_mlb_sutter_health_park": StadiumInfo("stadium_mlb_sutter_health_park", "Sutter Health Park", "Sacramento", "CA", "USA", "mlb", 38.5803, -121.5005), + "stadium_mlb_sutter_health_park": StadiumInfo("stadium_mlb_sutter_health_park", "Sutter Health Park", "Sacramento", "CA", "USA", "mlb", 38.5803, -121.5005, "America/Los_Angeles"), "stadium_mlb_citizens_bank_park": StadiumInfo("stadium_mlb_citizens_bank_park", "Citizens Bank Park", "Philadelphia", "PA", "USA", "mlb", 39.9061, -75.1665), "stadium_mlb_pnc_park": StadiumInfo("stadium_mlb_pnc_park", "PNC Park", "Pittsburgh", "PA", "USA", "mlb", 40.4469, -80.0057), - "stadium_mlb_petco_park": StadiumInfo("stadium_mlb_petco_park", "Petco Park", "San Diego", "CA", "USA", "mlb", 32.7076, -117.1570), - "stadium_mlb_oracle_park": StadiumInfo("stadium_mlb_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "mlb", 37.7786, -122.3893), - "stadium_mlb_tmobile_park": StadiumInfo("stadium_mlb_tmobile_park", "T-Mobile Park", "Seattle", "WA", "USA", "mlb", 47.5914, -122.3325), - "stadium_mlb_busch_stadium": StadiumInfo("stadium_mlb_busch_stadium", "Busch Stadium", "St. Louis", "MO", "USA", "mlb", 38.6226, -90.1928), + "stadium_mlb_petco_park": StadiumInfo("stadium_mlb_petco_park", "Petco Park", "San Diego", "CA", "USA", "mlb", 32.7076, -117.1570, "America/Los_Angeles"), + "stadium_mlb_oracle_park": StadiumInfo("stadium_mlb_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "mlb", 37.7786, -122.3893, "America/Los_Angeles"), + "stadium_mlb_tmobile_park": StadiumInfo("stadium_mlb_tmobile_park", "T-Mobile Park", "Seattle", "WA", "USA", "mlb", 47.5914, -122.3325, "America/Los_Angeles"), + "stadium_mlb_busch_stadium": StadiumInfo("stadium_mlb_busch_stadium", "Busch Stadium", "St. Louis", "MO", "USA", "mlb", 38.6226, -90.1928, "America/Chicago"), "stadium_mlb_tropicana_field": StadiumInfo("stadium_mlb_tropicana_field", "Tropicana Field", "St. Petersburg", "FL", "USA", "mlb", 27.7682, -82.6534), - "stadium_mlb_globe_life_field": StadiumInfo("stadium_mlb_globe_life_field", "Globe Life Field", "Arlington", "TX", "USA", "mlb", 32.7473, -97.0845), - "stadium_mlb_rogers_centre": StadiumInfo("stadium_mlb_rogers_centre", "Rogers Centre", "Toronto", "ON", "Canada", "mlb", 43.6414, -79.3894), + "stadium_mlb_globe_life_field": StadiumInfo("stadium_mlb_globe_life_field", "Globe Life Field", "Arlington", "TX", "USA", "mlb", 32.7473, -97.0845, "America/Chicago"), + "stadium_mlb_rogers_centre": StadiumInfo("stadium_mlb_rogers_centre", "Rogers Centre", "Toronto", "ON", "Canada", "mlb", 43.6414, -79.3894, "America/Toronto"), "stadium_mlb_nationals_park": StadiumInfo("stadium_mlb_nationals_park", "Nationals Park", "Washington", "DC", "USA", "mlb", 38.8730, -77.0074), # Spring Training - Cactus League (Arizona) "stadium_mlb_spring_salt_river_fields": StadiumInfo("stadium_mlb_spring_salt_river_fields", "Salt River Fields at Talking Stick", "Scottsdale", "AZ", "USA", "mlb", 33.5412, -111.8847, "America/Phoenix"), @@ -145,116 +145,127 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { "stadium_mlb_journey_bank_ballpark": StadiumInfo("stadium_mlb_journey_bank_ballpark", "Journey Bank Ballpark", "Williamsport", "PA", "USA", "mlb", 41.2415, -77.0011), }, "nfl": { - "stadium_nfl_state_farm_stadium": StadiumInfo("stadium_nfl_state_farm_stadium", "State Farm Stadium", "Glendale", "AZ", "USA", "nfl", 33.5276, -112.2626), + "stadium_nfl_state_farm_stadium": StadiumInfo("stadium_nfl_state_farm_stadium", "State Farm Stadium", "Glendale", "AZ", "USA", "nfl", 33.5276, -112.2626, "America/Phoenix"), "stadium_nfl_mercedes_benz_stadium": StadiumInfo("stadium_nfl_mercedes_benz_stadium", "Mercedes-Benz Stadium", "Atlanta", "GA", "USA", "nfl", 33.7553, -84.4006), "stadium_nfl_mandt_bank_stadium": StadiumInfo("stadium_nfl_mandt_bank_stadium", "M&T Bank Stadium", "Baltimore", "MD", "USA", "nfl", 39.2780, -76.6227), "stadium_nfl_highmark_stadium": StadiumInfo("stadium_nfl_highmark_stadium", "Highmark Stadium", "Orchard Park", "NY", "USA", "nfl", 42.7738, -78.7870), "stadium_nfl_bank_of_america_stadium": StadiumInfo("stadium_nfl_bank_of_america_stadium", "Bank of America Stadium", "Charlotte", "NC", "USA", "nfl", 35.2258, -80.8528), - "stadium_nfl_soldier_field": StadiumInfo("stadium_nfl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nfl", 41.8623, -87.6167), + "stadium_nfl_soldier_field": StadiumInfo("stadium_nfl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nfl", 41.8623, -87.6167, "America/Chicago"), "stadium_nfl_paycor_stadium": StadiumInfo("stadium_nfl_paycor_stadium", "Paycor Stadium", "Cincinnati", "OH", "USA", "nfl", 39.0955, -84.5161), "stadium_nfl_huntington_bank_field": StadiumInfo("stadium_nfl_huntington_bank_field", "Huntington Bank Field", "Cleveland", "OH", "USA", "nfl", 41.5061, -81.6995), - "stadium_nfl_att_stadium": StadiumInfo("stadium_nfl_att_stadium", "AT&T Stadium", "Arlington", "TX", "USA", "nfl", 32.7473, -97.0945), - "stadium_nfl_empower_field": StadiumInfo("stadium_nfl_empower_field", "Empower Field at Mile High", "Denver", "CO", "USA", "nfl", 39.7439, -105.0201), + "stadium_nfl_att_stadium": StadiumInfo("stadium_nfl_att_stadium", "AT&T Stadium", "Arlington", "TX", "USA", "nfl", 32.7473, -97.0945, "America/Chicago"), + "stadium_nfl_empower_field": StadiumInfo("stadium_nfl_empower_field", "Empower Field at Mile High", "Denver", "CO", "USA", "nfl", 39.7439, -105.0201, "America/Denver"), "stadium_nfl_ford_field": StadiumInfo("stadium_nfl_ford_field", "Ford Field", "Detroit", "MI", "USA", "nfl", 42.3400, -83.0456), - "stadium_nfl_lambeau_field": StadiumInfo("stadium_nfl_lambeau_field", "Lambeau Field", "Green Bay", "WI", "USA", "nfl", 44.5013, -88.0622), - "stadium_nfl_nrg_stadium": StadiumInfo("stadium_nfl_nrg_stadium", "NRG Stadium", "Houston", "TX", "USA", "nfl", 29.6847, -95.4107), + "stadium_nfl_lambeau_field": StadiumInfo("stadium_nfl_lambeau_field", "Lambeau Field", "Green Bay", "WI", "USA", "nfl", 44.5013, -88.0622, "America/Chicago"), + "stadium_nfl_nrg_stadium": StadiumInfo("stadium_nfl_nrg_stadium", "NRG Stadium", "Houston", "TX", "USA", "nfl", 29.6847, -95.4107, "America/Chicago"), "stadium_nfl_lucas_oil_stadium": StadiumInfo("stadium_nfl_lucas_oil_stadium", "Lucas Oil Stadium", "Indianapolis", "IN", "USA", "nfl", 39.7601, -86.1639), "stadium_nfl_everbank_stadium": StadiumInfo("stadium_nfl_everbank_stadium", "EverBank Stadium", "Jacksonville", "FL", "USA", "nfl", 30.3239, -81.6373), - "stadium_nfl_arrowhead_stadium": StadiumInfo("stadium_nfl_arrowhead_stadium", "Arrowhead Stadium", "Kansas City", "MO", "USA", "nfl", 39.0489, -94.4839), - "stadium_nfl_allegiant_stadium": StadiumInfo("stadium_nfl_allegiant_stadium", "Allegiant Stadium", "Las Vegas", "NV", "USA", "nfl", 36.0909, -115.1833), - "stadium_nfl_sofi_stadium": StadiumInfo("stadium_nfl_sofi_stadium", "SoFi Stadium", "Inglewood", "CA", "USA", "nfl", 33.9534, -118.3386), + "stadium_nfl_arrowhead_stadium": StadiumInfo("stadium_nfl_arrowhead_stadium", "Arrowhead Stadium", "Kansas City", "MO", "USA", "nfl", 39.0489, -94.4839, "America/Chicago"), + "stadium_nfl_allegiant_stadium": StadiumInfo("stadium_nfl_allegiant_stadium", "Allegiant Stadium", "Las Vegas", "NV", "USA", "nfl", 36.0909, -115.1833, "America/Los_Angeles"), + "stadium_nfl_sofi_stadium": StadiumInfo("stadium_nfl_sofi_stadium", "SoFi Stadium", "Inglewood", "CA", "USA", "nfl", 33.9534, -118.3386, "America/Los_Angeles"), "stadium_nfl_hard_rock_stadium": StadiumInfo("stadium_nfl_hard_rock_stadium", "Hard Rock Stadium", "Miami Gardens", "FL", "USA", "nfl", 25.9580, -80.2389), - "stadium_nfl_us_bank_stadium": StadiumInfo("stadium_nfl_us_bank_stadium", "U.S. Bank Stadium", "Minneapolis", "MN", "USA", "nfl", 44.9737, -93.2575), + "stadium_nfl_us_bank_stadium": StadiumInfo("stadium_nfl_us_bank_stadium", "U.S. Bank Stadium", "Minneapolis", "MN", "USA", "nfl", 44.9737, -93.2575, "America/Chicago"), "stadium_nfl_gillette_stadium": StadiumInfo("stadium_nfl_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "nfl", 42.0909, -71.2643), - "stadium_nfl_caesars_superdome": StadiumInfo("stadium_nfl_caesars_superdome", "Caesars Superdome", "New Orleans", "LA", "USA", "nfl", 29.9511, -90.0812), + "stadium_nfl_caesars_superdome": StadiumInfo("stadium_nfl_caesars_superdome", "Caesars Superdome", "New Orleans", "LA", "USA", "nfl", 29.9511, -90.0812, "America/Chicago"), "stadium_nfl_metlife_stadium": StadiumInfo("stadium_nfl_metlife_stadium", "MetLife Stadium", "East Rutherford", "NJ", "USA", "nfl", 40.8128, -74.0742), "stadium_nfl_lincoln_financial_field": StadiumInfo("stadium_nfl_lincoln_financial_field", "Lincoln Financial Field", "Philadelphia", "PA", "USA", "nfl", 39.9008, -75.1675), "stadium_nfl_acrisure_stadium": StadiumInfo("stadium_nfl_acrisure_stadium", "Acrisure Stadium", "Pittsburgh", "PA", "USA", "nfl", 40.4468, -80.0158), - "stadium_nfl_levis_stadium": StadiumInfo("stadium_nfl_levis_stadium", "Levi's Stadium", "Santa Clara", "CA", "USA", "nfl", 37.4033, -121.9695), - "stadium_nfl_lumen_field": StadiumInfo("stadium_nfl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nfl", 47.5952, -122.3316), + "stadium_nfl_levis_stadium": StadiumInfo("stadium_nfl_levis_stadium", "Levi's Stadium", "Santa Clara", "CA", "USA", "nfl", 37.4033, -121.9695, "America/Los_Angeles"), + "stadium_nfl_lumen_field": StadiumInfo("stadium_nfl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nfl", 47.5952, -122.3316, "America/Los_Angeles"), "stadium_nfl_raymond_james_stadium": StadiumInfo("stadium_nfl_raymond_james_stadium", "Raymond James Stadium", "Tampa", "FL", "USA", "nfl", 27.9759, -82.5033), - "stadium_nfl_nissan_stadium": StadiumInfo("stadium_nfl_nissan_stadium", "Nissan Stadium", "Nashville", "TN", "USA", "nfl", 36.1665, -86.7713), + "stadium_nfl_nissan_stadium": StadiumInfo("stadium_nfl_nissan_stadium", "Nissan Stadium", "Nashville", "TN", "USA", "nfl", 36.1665, -86.7713, "America/Chicago"), "stadium_nfl_northwest_stadium": StadiumInfo("stadium_nfl_northwest_stadium", "Northwest Stadium", "Landover", "MD", "USA", "nfl", 38.9076, -76.8645), + # Special and international venues + "stadium_nfl_tom_benson_hall_of_fame_stadium": StadiumInfo("stadium_nfl_tom_benson_hall_of_fame_stadium", "Tom Benson Hall of Fame Stadium", "Canton", "OH", "USA", "nfl", 40.8209, -81.3985), + "stadium_nfl_corinthians_arena": StadiumInfo("stadium_nfl_corinthians_arena", "Corinthians Arena", "São Paulo", "SP", "Brazil", "nfl", -23.5453, -46.4742, "America/Sao_Paulo"), + "stadium_nfl_croke_park": StadiumInfo("stadium_nfl_croke_park", "Croke Park", "Dublin", "", "Ireland", "nfl", 53.3609, -6.2514, "Europe/Dublin"), + "stadium_nfl_olympic_stadium_berlin": StadiumInfo("stadium_nfl_olympic_stadium_berlin", "Olympic Stadium Berlin", "Berlin", "", "Germany", "nfl", 52.5147, 13.2395, "Europe/Berlin"), + "stadium_nfl_santiago_bernabeu": StadiumInfo("stadium_nfl_santiago_bernabeu", "Santiago Bernabéu", "Madrid", "", "Spain", "nfl", 40.4531, -3.6883, "Europe/Madrid"), }, "nhl": { - "stadium_nhl_honda_center": StadiumInfo("stadium_nhl_honda_center", "Honda Center", "Anaheim", "CA", "USA", "nhl", 33.8078, -117.8765), - "stadium_nhl_delta_center": StadiumInfo("stadium_nhl_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nhl", 40.7683, -111.9011), + "stadium_nhl_honda_center": StadiumInfo("stadium_nhl_honda_center", "Honda Center", "Anaheim", "CA", "USA", "nhl", 33.8078, -117.8765, "America/Los_Angeles"), + "stadium_nhl_delta_center": StadiumInfo("stadium_nhl_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nhl", 40.7683, -111.9011, "America/Denver"), "stadium_nhl_td_garden": StadiumInfo("stadium_nhl_td_garden", "TD Garden", "Boston", "MA", "USA", "nhl", 42.3662, -71.0621), "stadium_nhl_keybank_center": StadiumInfo("stadium_nhl_keybank_center", "KeyBank Center", "Buffalo", "NY", "USA", "nhl", 42.8750, -78.8764), - "stadium_nhl_scotiabank_saddledome": StadiumInfo("stadium_nhl_scotiabank_saddledome", "Scotiabank Saddledome", "Calgary", "AB", "Canada", "nhl", 51.0374, -114.0519), + "stadium_nhl_scotiabank_saddledome": StadiumInfo("stadium_nhl_scotiabank_saddledome", "Scotiabank Saddledome", "Calgary", "AB", "Canada", "nhl", 51.0374, -114.0519, "America/Edmonton"), "stadium_nhl_pnc_arena": StadiumInfo("stadium_nhl_pnc_arena", "PNC Arena", "Raleigh", "NC", "USA", "nhl", 35.8033, -78.7220), - "stadium_nhl_united_center": StadiumInfo("stadium_nhl_united_center", "United Center", "Chicago", "IL", "USA", "nhl", 41.8807, -87.6742), - "stadium_nhl_ball_arena": StadiumInfo("stadium_nhl_ball_arena", "Ball Arena", "Denver", "CO", "USA", "nhl", 39.7487, -105.0077), + "stadium_nhl_united_center": StadiumInfo("stadium_nhl_united_center", "United Center", "Chicago", "IL", "USA", "nhl", 41.8807, -87.6742, "America/Chicago"), + "stadium_nhl_ball_arena": StadiumInfo("stadium_nhl_ball_arena", "Ball Arena", "Denver", "CO", "USA", "nhl", 39.7487, -105.0077, "America/Denver"), "stadium_nhl_nationwide_arena": StadiumInfo("stadium_nhl_nationwide_arena", "Nationwide Arena", "Columbus", "OH", "USA", "nhl", 39.9692, -83.0061), - "stadium_nhl_american_airlines_center": StadiumInfo("stadium_nhl_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nhl", 32.7905, -96.8103), + "stadium_nhl_american_airlines_center": StadiumInfo("stadium_nhl_american_airlines_center", "American Airlines Center", "Dallas", "TX", "USA", "nhl", 32.7905, -96.8103, "America/Chicago"), "stadium_nhl_little_caesars_arena": StadiumInfo("stadium_nhl_little_caesars_arena", "Little Caesars Arena", "Detroit", "MI", "USA", "nhl", 42.3411, -83.0553), - "stadium_nhl_rogers_place": StadiumInfo("stadium_nhl_rogers_place", "Rogers Place", "Edmonton", "AB", "Canada", "nhl", 53.5469, -113.4979), + "stadium_nhl_rogers_place": StadiumInfo("stadium_nhl_rogers_place", "Rogers Place", "Edmonton", "AB", "Canada", "nhl", 53.5469, -113.4979, "America/Edmonton"), "stadium_nhl_amerant_bank_arena": StadiumInfo("stadium_nhl_amerant_bank_arena", "Amerant Bank Arena", "Sunrise", "FL", "USA", "nhl", 26.1584, -80.3256), - "stadium_nhl_cryptocom_arena": StadiumInfo("stadium_nhl_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nhl", 34.0430, -118.2673), - "stadium_nhl_xcel_energy_center": StadiumInfo("stadium_nhl_xcel_energy_center", "Xcel Energy Center", "St. Paul", "MN", "USA", "nhl", 44.9448, -93.1010), - "stadium_nhl_bell_centre": StadiumInfo("stadium_nhl_bell_centre", "Bell Centre", "Montreal", "QC", "Canada", "nhl", 45.4961, -73.5693), - "stadium_nhl_bridgestone_arena": StadiumInfo("stadium_nhl_bridgestone_arena", "Bridgestone Arena", "Nashville", "TN", "USA", "nhl", 36.1592, -86.7785), + "stadium_nhl_cryptocom_arena": StadiumInfo("stadium_nhl_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "nhl", 34.0430, -118.2673, "America/Los_Angeles"), + "stadium_nhl_xcel_energy_center": StadiumInfo("stadium_nhl_xcel_energy_center", "Xcel Energy Center", "St. Paul", "MN", "USA", "nhl", 44.9448, -93.1010, "America/Chicago"), + "stadium_nhl_bell_centre": StadiumInfo("stadium_nhl_bell_centre", "Bell Centre", "Montreal", "QC", "Canada", "nhl", 45.4961, -73.5693, "America/Toronto"), + "stadium_nhl_bridgestone_arena": StadiumInfo("stadium_nhl_bridgestone_arena", "Bridgestone Arena", "Nashville", "TN", "USA", "nhl", 36.1592, -86.7785, "America/Chicago"), "stadium_nhl_prudential_center": StadiumInfo("stadium_nhl_prudential_center", "Prudential Center", "Newark", "NJ", "USA", "nhl", 40.7334, -74.1712), "stadium_nhl_ubs_arena": StadiumInfo("stadium_nhl_ubs_arena", "UBS Arena", "Elmont", "NY", "USA", "nhl", 40.7170, -73.7255), "stadium_nhl_madison_square_garden": StadiumInfo("stadium_nhl_madison_square_garden", "Madison Square Garden", "New York", "NY", "USA", "nhl", 40.7505, -73.9934), - "stadium_nhl_canadian_tire_centre": StadiumInfo("stadium_nhl_canadian_tire_centre", "Canadian Tire Centre", "Ottawa", "ON", "Canada", "nhl", 45.2969, -75.9272), + "stadium_nhl_canadian_tire_centre": StadiumInfo("stadium_nhl_canadian_tire_centre", "Canadian Tire Centre", "Ottawa", "ON", "Canada", "nhl", 45.2969, -75.9272, "America/Toronto"), "stadium_nhl_wells_fargo_center": StadiumInfo("stadium_nhl_wells_fargo_center", "Wells Fargo Center", "Philadelphia", "PA", "USA", "nhl", 39.9012, -75.1720), "stadium_nhl_ppg_paints_arena": StadiumInfo("stadium_nhl_ppg_paints_arena", "PPG Paints Arena", "Pittsburgh", "PA", "USA", "nhl", 40.4395, -79.9890), - "stadium_nhl_sap_center": StadiumInfo("stadium_nhl_sap_center", "SAP Center", "San Jose", "CA", "USA", "nhl", 37.3327, -121.9011), - "stadium_nhl_climate_pledge_arena": StadiumInfo("stadium_nhl_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "nhl", 47.6221, -122.3540), - "stadium_nhl_enterprise_center": StadiumInfo("stadium_nhl_enterprise_center", "Enterprise Center", "St. Louis", "MO", "USA", "nhl", 38.6268, -90.2025), + "stadium_nhl_sap_center": StadiumInfo("stadium_nhl_sap_center", "SAP Center", "San Jose", "CA", "USA", "nhl", 37.3327, -121.9011, "America/Los_Angeles"), + "stadium_nhl_climate_pledge_arena": StadiumInfo("stadium_nhl_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "nhl", 47.6221, -122.3540, "America/Los_Angeles"), + "stadium_nhl_enterprise_center": StadiumInfo("stadium_nhl_enterprise_center", "Enterprise Center", "St. Louis", "MO", "USA", "nhl", 38.6268, -90.2025, "America/Chicago"), "stadium_nhl_amalie_arena": StadiumInfo("stadium_nhl_amalie_arena", "Amalie Arena", "Tampa", "FL", "USA", "nhl", 27.9428, -82.4519), - "stadium_nhl_scotiabank_arena": StadiumInfo("stadium_nhl_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nhl", 43.6435, -79.3791), - "stadium_nhl_rogers_arena": StadiumInfo("stadium_nhl_rogers_arena", "Rogers Arena", "Vancouver", "BC", "Canada", "nhl", 49.2778, -123.1088), - "stadium_nhl_tmobile_arena": StadiumInfo("stadium_nhl_tmobile_arena", "T-Mobile Arena", "Las Vegas", "NV", "USA", "nhl", 36.1028, -115.1783), + "stadium_nhl_scotiabank_arena": StadiumInfo("stadium_nhl_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nhl", 43.6435, -79.3791, "America/Toronto"), + "stadium_nhl_rogers_arena": StadiumInfo("stadium_nhl_rogers_arena", "Rogers Arena", "Vancouver", "BC", "Canada", "nhl", 49.2778, -123.1088, "America/Vancouver"), + "stadium_nhl_tmobile_arena": StadiumInfo("stadium_nhl_tmobile_arena", "T-Mobile Arena", "Las Vegas", "NV", "USA", "nhl", 36.1028, -115.1783, "America/Los_Angeles"), "stadium_nhl_capital_one_arena": StadiumInfo("stadium_nhl_capital_one_arena", "Capital One Arena", "Washington", "DC", "USA", "nhl", 38.8981, -77.0209), - "stadium_nhl_canada_life_centre": StadiumInfo("stadium_nhl_canada_life_centre", "Canada Life Centre", "Winnipeg", "MB", "Canada", "nhl", 49.8928, -97.1433), + "stadium_nhl_canada_life_centre": StadiumInfo("stadium_nhl_canada_life_centre", "Canada Life Centre", "Winnipeg", "MB", "Canada", "nhl", 49.8928, -97.1433, "America/Winnipeg"), }, "mls": { "stadium_mls_mercedes_benz_stadium": StadiumInfo("stadium_mls_mercedes_benz_stadium", "Mercedes-Benz Stadium", "Atlanta", "GA", "USA", "mls", 33.7553, -84.4006), - "stadium_mls_q2_stadium": StadiumInfo("stadium_mls_q2_stadium", "Q2 Stadium", "Austin", "TX", "USA", "mls", 30.3875, -97.7186), + "stadium_mls_q2_stadium": StadiumInfo("stadium_mls_q2_stadium", "Q2 Stadium", "Austin", "TX", "USA", "mls", 30.3875, -97.7186, "America/Chicago"), "stadium_mls_bank_of_america_stadium": StadiumInfo("stadium_mls_bank_of_america_stadium", "Bank of America Stadium", "Charlotte", "NC", "USA", "mls", 35.2258, -80.8528), - "stadium_mls_soldier_field": StadiumInfo("stadium_mls_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "mls", 41.8623, -87.6167), + "stadium_mls_soldier_field": StadiumInfo("stadium_mls_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "mls", 41.8623, -87.6167, "America/Chicago"), "stadium_mls_tql_stadium": StadiumInfo("stadium_mls_tql_stadium", "TQL Stadium", "Cincinnati", "OH", "USA", "mls", 39.1112, -84.5225), - "stadium_mls_dicks_sporting_goods_park": StadiumInfo("stadium_mls_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "mls", 39.8056, -104.8922), + "stadium_mls_dicks_sporting_goods_park": StadiumInfo("stadium_mls_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "mls", 39.8056, -104.8922, "America/Denver"), "stadium_mls_lowercom_field": StadiumInfo("stadium_mls_lowercom_field", "Lower.com Field", "Columbus", "OH", "USA", "mls", 39.9689, -83.0173), - "stadium_mls_toyota_stadium": StadiumInfo("stadium_mls_toyota_stadium", "Toyota Stadium", "Frisco", "TX", "USA", "mls", 33.1545, -96.8353), + "stadium_mls_toyota_stadium": StadiumInfo("stadium_mls_toyota_stadium", "Toyota Stadium", "Frisco", "TX", "USA", "mls", 33.1545, -96.8353, "America/Chicago"), "stadium_mls_audi_field": StadiumInfo("stadium_mls_audi_field", "Audi Field", "Washington", "DC", "USA", "mls", 38.8687, -77.0128), - "stadium_mls_shell_energy_stadium": StadiumInfo("stadium_mls_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "mls", 29.7522, -95.3527), - "stadium_mls_dignity_health_sports_park": StadiumInfo("stadium_mls_dignity_health_sports_park", "Dignity Health Sports Park", "Carson", "CA", "USA", "mls", 33.8644, -118.2611), - "stadium_mls_bmo_stadium": StadiumInfo("stadium_mls_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "mls", 34.0128, -118.2841), + "stadium_mls_shell_energy_stadium": StadiumInfo("stadium_mls_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "mls", 29.7522, -95.3527, "America/Chicago"), + "stadium_mls_dignity_health_sports_park": StadiumInfo("stadium_mls_dignity_health_sports_park", "Dignity Health Sports Park", "Carson", "CA", "USA", "mls", 33.8644, -118.2611, "America/Los_Angeles"), + "stadium_mls_bmo_stadium": StadiumInfo("stadium_mls_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "mls", 34.0128, -118.2841, "America/Los_Angeles"), "stadium_mls_chase_stadium": StadiumInfo("stadium_mls_chase_stadium", "Chase Stadium", "Fort Lauderdale", "FL", "USA", "mls", 26.1930, -80.1611), - "stadium_mls_allianz_field": StadiumInfo("stadium_mls_allianz_field", "Allianz Field", "St. Paul", "MN", "USA", "mls", 44.9528, -93.1650), - "stadium_mls_stade_saputo": StadiumInfo("stadium_mls_stade_saputo", "Stade Saputo", "Montreal", "QC", "Canada", "mls", 45.5622, -73.5528), - "stadium_mls_geodis_park": StadiumInfo("stadium_mls_geodis_park", "GEODIS Park", "Nashville", "TN", "USA", "mls", 36.1304, -86.7651), + "stadium_mls_allianz_field": StadiumInfo("stadium_mls_allianz_field", "Allianz Field", "St. Paul", "MN", "USA", "mls", 44.9528, -93.1650, "America/Chicago"), + "stadium_mls_stade_saputo": StadiumInfo("stadium_mls_stade_saputo", "Stade Saputo", "Montreal", "QC", "Canada", "mls", 45.5622, -73.5528, "America/Toronto"), + "stadium_mls_geodis_park": StadiumInfo("stadium_mls_geodis_park", "GEODIS Park", "Nashville", "TN", "USA", "mls", 36.1304, -86.7651, "America/Chicago"), "stadium_mls_gillette_stadium": StadiumInfo("stadium_mls_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "mls", 42.0909, -71.2643), "stadium_mls_yankee_stadium": StadiumInfo("stadium_mls_yankee_stadium", "Yankee Stadium", "Bronx", "NY", "USA", "mls", 40.8296, -73.9262), "stadium_mls_red_bull_arena": StadiumInfo("stadium_mls_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "mls", 40.7369, -74.1503), "stadium_mls_interco_stadium": StadiumInfo("stadium_mls_interco_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "mls", 28.5411, -81.3895), "stadium_mls_subaru_park": StadiumInfo("stadium_mls_subaru_park", "Subaru Park", "Chester", "PA", "USA", "mls", 39.8328, -75.3789), - "stadium_mls_providence_park": StadiumInfo("stadium_mls_providence_park", "Providence Park", "Portland", "OR", "USA", "mls", 45.5216, -122.6917), - "stadium_mls_america_first_field": StadiumInfo("stadium_mls_america_first_field", "America First Field", "Sandy", "UT", "USA", "mls", 40.5830, -111.8933), - "stadium_mls_paypal_park": StadiumInfo("stadium_mls_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "mls", 37.3511, -121.9250), - "stadium_mls_snapdragon_stadium": StadiumInfo("stadium_mls_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "mls", 32.7837, -117.1225), - "stadium_mls_lumen_field": StadiumInfo("stadium_mls_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "mls", 47.5952, -122.3316), - "stadium_mls_childrens_mercy_park": StadiumInfo("stadium_mls_childrens_mercy_park", "Children's Mercy Park", "Kansas City", "KS", "USA", "mls", 39.1217, -94.8231), - "stadium_mls_citypark": StadiumInfo("stadium_mls_citypark", "CITYPARK", "St. Louis", "MO", "USA", "mls", 38.6316, -90.2106), - "stadium_mls_bmo_field": StadiumInfo("stadium_mls_bmo_field", "BMO Field", "Toronto", "ON", "Canada", "mls", 43.6332, -79.4186), - "stadium_mls_bc_place": StadiumInfo("stadium_mls_bc_place", "BC Place", "Vancouver", "BC", "Canada", "mls", 49.2768, -123.1118), + "stadium_mls_providence_park": StadiumInfo("stadium_mls_providence_park", "Providence Park", "Portland", "OR", "USA", "mls", 45.5216, -122.6917, "America/Los_Angeles"), + "stadium_mls_america_first_field": StadiumInfo("stadium_mls_america_first_field", "America First Field", "Sandy", "UT", "USA", "mls", 40.5830, -111.8933, "America/Denver"), + "stadium_mls_paypal_park": StadiumInfo("stadium_mls_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "mls", 37.3511, -121.9250, "America/Los_Angeles"), + "stadium_mls_snapdragon_stadium": StadiumInfo("stadium_mls_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "mls", 32.7837, -117.1225, "America/Los_Angeles"), + "stadium_mls_lumen_field": StadiumInfo("stadium_mls_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "mls", 47.5952, -122.3316, "America/Los_Angeles"), + "stadium_mls_childrens_mercy_park": StadiumInfo("stadium_mls_childrens_mercy_park", "Children's Mercy Park", "Kansas City", "KS", "USA", "mls", 39.1217, -94.8231, "America/Chicago"), + "stadium_mls_citypark": StadiumInfo("stadium_mls_citypark", "CITYPARK", "St. Louis", "MO", "USA", "mls", 38.6316, -90.2106, "America/Chicago"), + "stadium_mls_bmo_field": StadiumInfo("stadium_mls_bmo_field", "BMO Field", "Toronto", "ON", "Canada", "mls", 43.6332, -79.4186, "America/Toronto"), + "stadium_mls_bc_place": StadiumInfo("stadium_mls_bc_place", "BC Place", "Vancouver", "BC", "Canada", "mls", 49.2768, -123.1118, "America/Vancouver"), + # Alternate and special venues + "stadium_mls_miami_freedom_park": StadiumInfo("stadium_mls_miami_freedom_park", "Miami Freedom Park", "Miami", "FL", "USA", "mls", 25.789, -80.237), + "stadium_mls_citi_field": StadiumInfo("stadium_mls_citi_field", "Citi Field", "New York", "NY", "USA", "mls", 40.7571, -73.8458), + "stadium_mls_los_angeles_memorial_coliseum": StadiumInfo("stadium_mls_los_angeles_memorial_coliseum", "Los Angeles Memorial Coliseum", "Los Angeles", "CA", "USA", "mls", 34.0141, -118.2879, "America/Los_Angeles"), + "stadium_mls_mandt_bank_stadium": StadiumInfo("stadium_mls_mandt_bank_stadium", "M&T Bank Stadium", "Baltimore", "MD", "USA", "mls", 39.2780, -76.6227), }, "wnba": { "stadium_wnba_gateway_center_arena": StadiumInfo("stadium_wnba_gateway_center_arena", "Gateway Center Arena", "College Park", "GA", "USA", "wnba", 33.6510, -84.4474), - "stadium_wnba_wintrust_arena": StadiumInfo("stadium_wnba_wintrust_arena", "Wintrust Arena", "Chicago", "IL", "USA", "wnba", 41.8658, -87.6169), + "stadium_wnba_wintrust_arena": StadiumInfo("stadium_wnba_wintrust_arena", "Wintrust Arena", "Chicago", "IL", "USA", "wnba", 41.8658, -87.6169, "America/Chicago"), "stadium_wnba_mohegan_sun_arena": StadiumInfo("stadium_wnba_mohegan_sun_arena", "Mohegan Sun Arena", "Uncasville", "CT", "USA", "wnba", 41.4931, -72.0912), - "stadium_wnba_college_park_center": StadiumInfo("stadium_wnba_college_park_center", "College Park Center", "Arlington", "TX", "USA", "wnba", 32.7304, -97.1077), - "stadium_wnba_chase_center": StadiumInfo("stadium_wnba_chase_center", "Chase Center", "San Francisco", "CA", "USA", "wnba", 37.7680, -122.3877), + "stadium_wnba_college_park_center": StadiumInfo("stadium_wnba_college_park_center", "College Park Center", "Arlington", "TX", "USA", "wnba", 32.7304, -97.1077, "America/Chicago"), + "stadium_wnba_chase_center": StadiumInfo("stadium_wnba_chase_center", "Chase Center", "San Francisco", "CA", "USA", "wnba", 37.7680, -122.3877, "America/Los_Angeles"), "stadium_wnba_gainbridge_fieldhouse": StadiumInfo("stadium_wnba_gainbridge_fieldhouse", "Gainbridge Fieldhouse", "Indianapolis", "IN", "USA", "wnba", 39.7640, -86.1555), - "stadium_wnba_michelob_ultra_arena": StadiumInfo("stadium_wnba_michelob_ultra_arena", "Michelob Ultra Arena", "Las Vegas", "NV", "USA", "wnba", 36.0902, -115.1756), - "stadium_wnba_cryptocom_arena": StadiumInfo("stadium_wnba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "wnba", 34.0430, -118.2673), - "stadium_wnba_target_center": StadiumInfo("stadium_wnba_target_center", "Target Center", "Minneapolis", "MN", "USA", "wnba", 44.9795, -93.2761), + "stadium_wnba_michelob_ultra_arena": StadiumInfo("stadium_wnba_michelob_ultra_arena", "Michelob Ultra Arena", "Las Vegas", "NV", "USA", "wnba", 36.0902, -115.1756, "America/Los_Angeles"), + "stadium_wnba_cryptocom_arena": StadiumInfo("stadium_wnba_cryptocom_arena", "Crypto.com Arena", "Los Angeles", "CA", "USA", "wnba", 34.0430, -118.2673, "America/Los_Angeles"), + "stadium_wnba_target_center": StadiumInfo("stadium_wnba_target_center", "Target Center", "Minneapolis", "MN", "USA", "wnba", 44.9795, -93.2761, "America/Chicago"), "stadium_wnba_barclays_center": StadiumInfo("stadium_wnba_barclays_center", "Barclays Center", "Brooklyn", "NY", "USA", "wnba", 40.6826, -73.9754), - "stadium_wnba_footprint_center": StadiumInfo("stadium_wnba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "wnba", 33.4457, -112.0712), - "stadium_wnba_climate_pledge_arena": StadiumInfo("stadium_wnba_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "wnba", 47.6221, -122.3540), + "stadium_wnba_footprint_center": StadiumInfo("stadium_wnba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "wnba", 33.4457, -112.0712, "America/Phoenix"), + "stadium_wnba_climate_pledge_arena": StadiumInfo("stadium_wnba_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "wnba", 47.6221, -122.3540, "America/Los_Angeles"), "stadium_wnba_entertainment_sports_arena": StadiumInfo("stadium_wnba_entertainment_sports_arena", "Entertainment & Sports Arena", "Washington", "DC", "USA", "wnba", 38.8690, -76.9745), "stadium_wnba_state_farm_arena": StadiumInfo("stadium_wnba_state_farm_arena", "State Farm Arena", "Atlanta", "GA", "USA", "wnba", 33.7573, -84.3963), "stadium_wnba_rocket_mortgage_fieldhouse": StadiumInfo("stadium_wnba_rocket_mortgage_fieldhouse", "Rocket Mortgage FieldHouse", "Cleveland", "OH", "USA", "wnba", 41.4965, -81.6882), @@ -262,20 +273,20 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { "stadium_wnba_purcell_pavilion": StadiumInfo("stadium_wnba_purcell_pavilion", "Purcell Pavilion", "Notre Dame", "IN", "USA", "wnba", 41.6987, -86.2340), }, "nwsl": { - "stadium_nwsl_bmo_stadium": StadiumInfo("stadium_nwsl_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "nwsl", 34.0128, -118.2841), - "stadium_nwsl_seatgeek_stadium": StadiumInfo("stadium_nwsl_seatgeek_stadium", "SeatGeek Stadium", "Bridgeview", "IL", "USA", "nwsl", 41.7500, -87.8028), - "stadium_nwsl_shell_energy_stadium": StadiumInfo("stadium_nwsl_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "nwsl", 29.7522, -95.3527), - "stadium_nwsl_cpkc_stadium": StadiumInfo("stadium_nwsl_cpkc_stadium", "CPKC Stadium", "Kansas City", "MO", "USA", "nwsl", 39.1050, -94.5580), + "stadium_nwsl_bmo_stadium": StadiumInfo("stadium_nwsl_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "nwsl", 34.0128, -118.2841, "America/Los_Angeles"), + "stadium_nwsl_seatgeek_stadium": StadiumInfo("stadium_nwsl_seatgeek_stadium", "SeatGeek Stadium", "Bridgeview", "IL", "USA", "nwsl", 41.7500, -87.8028, "America/Chicago"), + "stadium_nwsl_shell_energy_stadium": StadiumInfo("stadium_nwsl_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "nwsl", 29.7522, -95.3527, "America/Chicago"), + "stadium_nwsl_cpkc_stadium": StadiumInfo("stadium_nwsl_cpkc_stadium", "CPKC Stadium", "Kansas City", "MO", "USA", "nwsl", 39.1050, -94.5580, "America/Chicago"), "stadium_nwsl_red_bull_arena": StadiumInfo("stadium_nwsl_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "nwsl", 40.7369, -74.1503), "stadium_nwsl_wakemed_soccer_park": StadiumInfo("stadium_nwsl_wakemed_soccer_park", "WakeMed Soccer Park", "Cary", "NC", "USA", "nwsl", 35.7879, -78.7806), "stadium_nwsl_interco_stadium": StadiumInfo("stadium_nwsl_interco_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "nwsl", 28.5411, -81.3895), - "stadium_nwsl_providence_park": StadiumInfo("stadium_nwsl_providence_park", "Providence Park", "Portland", "OR", "USA", "nwsl", 45.5216, -122.6917), + "stadium_nwsl_providence_park": StadiumInfo("stadium_nwsl_providence_park", "Providence Park", "Portland", "OR", "USA", "nwsl", 45.5216, -122.6917, "America/Los_Angeles"), "stadium_nwsl_lynn_family_stadium": StadiumInfo("stadium_nwsl_lynn_family_stadium", "Lynn Family Stadium", "Louisville", "KY", "USA", "nwsl", 38.2219, -85.7381), - "stadium_nwsl_snapdragon_stadium": StadiumInfo("stadium_nwsl_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "nwsl", 32.7837, -117.1225), - "stadium_nwsl_lumen_field": StadiumInfo("stadium_nwsl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nwsl", 47.5952, -122.3316), - "stadium_nwsl_america_first_field": StadiumInfo("stadium_nwsl_america_first_field", "America First Field", "Sandy", "UT", "USA", "nwsl", 40.5830, -111.8933), + "stadium_nwsl_snapdragon_stadium": StadiumInfo("stadium_nwsl_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "nwsl", 32.7837, -117.1225, "America/Los_Angeles"), + "stadium_nwsl_lumen_field": StadiumInfo("stadium_nwsl_lumen_field", "Lumen Field", "Seattle", "WA", "USA", "nwsl", 47.5952, -122.3316, "America/Los_Angeles"), + "stadium_nwsl_america_first_field": StadiumInfo("stadium_nwsl_america_first_field", "America First Field", "Sandy", "UT", "USA", "nwsl", 40.5830, -111.8933, "America/Denver"), "stadium_nwsl_audi_field": StadiumInfo("stadium_nwsl_audi_field", "Audi Field", "Washington", "DC", "USA", "nwsl", 38.8687, -77.0128), - "stadium_nwsl_paypal_park": StadiumInfo("stadium_nwsl_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "nwsl", 37.3511, -121.9250), + "stadium_nwsl_paypal_park": StadiumInfo("stadium_nwsl_paypal_park", "PayPal Park", "San Jose", "CA", "USA", "nwsl", 37.3511, -121.9250, "America/Los_Angeles"), # Boston Legacy FC venues "stadium_nwsl_gillette_stadium": StadiumInfo("stadium_nwsl_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "nwsl", 42.0909, -71.2643), "stadium_nwsl_centreville_bank_stadium": StadiumInfo("stadium_nwsl_centreville_bank_stadium", "Centreville Bank Stadium", "Pawtucket", "RI", "USA", "nwsl", 41.8770, -71.3910), @@ -284,8 +295,11 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { "stadium_nwsl_dicks_sporting_goods_park": StadiumInfo("stadium_nwsl_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "nwsl", 39.8056, -104.8922, "America/Denver"), "stadium_nwsl_centennial_stadium": StadiumInfo("stadium_nwsl_centennial_stadium", "Centennial Stadium", "Centennial", "CO", "USA", "nwsl", 39.6000, -104.8800, "America/Denver"), # Shared NFL/MLB venues - "stadium_nwsl_soldier_field": StadiumInfo("stadium_nwsl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nwsl", 41.8623, -87.6167), + "stadium_nwsl_soldier_field": StadiumInfo("stadium_nwsl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nwsl", 41.8623, -87.6167, "America/Chicago"), "stadium_nwsl_oracle_park": StadiumInfo("stadium_nwsl_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "nwsl", 37.7786, -122.3893, "America/Los_Angeles"), + # Additional alternate venues + "stadium_nwsl_northwestern_medicine_field": StadiumInfo("stadium_nwsl_northwestern_medicine_field", "Northwestern Medicine Field at Martin Stadium", "Evanston", "IL", "USA", "nwsl", 42.0598, -87.6743, "America/Chicago"), + "stadium_nwsl_one_spokane_stadium": StadiumInfo("stadium_nwsl_one_spokane_stadium", "ONE Spokane Stadium", "Spokane", "WA", "USA", "nwsl", 47.6588, -117.4101, "America/Los_Angeles"), }, } diff --git a/sportstime_parser/scrapers/mlb.py b/sportstime_parser/scrapers/mlb.py index 1a54162..7592fb7 100644 --- a/sportstime_parser/scrapers/mlb.py +++ b/sportstime_parser/scrapers/mlb.py @@ -2,6 +2,7 @@ from datetime import datetime, date, timedelta from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from .base import BaseScraper, RawGameData, ScrapeResult @@ -28,9 +29,14 @@ class MLBScraper(BaseScraper): """MLB schedule scraper with multi-source fallback. Sources (in priority order): - 1. Baseball-Reference - Most reliable, complete historical data - 2. MLB Stats API - Official MLB data - 3. ESPN API - Backup option + 1. MLB Stats API - Official MLB data (primary) + 2. ESPN API - Backup option + 3. Baseball-Reference - Complete historical data + + Source Timezones: + - mlb_api: UTC - ISO 8601 format with "Z" suffix (gameDate field) + - espn: UTC - ISO 8601 format with "Z" suffix + - baseball_reference: Eastern Time (ET) - times displayed as "7:05 PM ET" """ def __init__(self, season: int, **kwargs): @@ -143,7 +149,22 @@ class MLBScraper(BaseScraper): continue try: - game = self._parse_br_game(elem, current_date, source_url) + # Extract game time from the element if present + # Baseball-Reference may have time in a span or in the text + game_time_for_row = None + time_elem = elem.find("span", class_="game_time") + if time_elem: + time_text = time_elem.get_text(strip=True) + if time_text: + try: + # Parse time like "7:05 PM ET" or "1:10 PM" + # Remove timezone suffix if present + time_clean = time_text.replace(" ET", "").replace(" PT", "").replace(" CT", "").replace(" MT", "").strip() + game_time_for_row = datetime.strptime(time_clean, "%I:%M %p") + except ValueError: + pass + + game = self._parse_br_game(elem, current_date, source_url, game_time_for_row) if game: games.append(game) except Exception as e: @@ -157,6 +178,7 @@ class MLBScraper(BaseScraper): elem, game_date: datetime, source_url: str, + game_time: Optional[datetime] = None, ) -> Optional[RawGameData]: """Parse a single Baseball-Reference game element.""" text = elem.get_text(" ", strip=True) @@ -206,8 +228,17 @@ class MLBScraper(BaseScraper): # Third link might be stadium stadium = links[2].get_text(strip=True) + # Combine date and time if time was provided, with ET timezone (Baseball-Reference uses ET) + final_game_date = game_date + if game_time: + final_game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + return RawGameData( - game_date=game_date, + game_date=final_game_date, home_team_raw=home_team, away_team_raw=away_team, stadium_raw=stadium, @@ -672,6 +703,7 @@ class MLBScraper(BaseScraper): country=info.country, latitude=info.latitude, longitude=info.longitude, + timezone=info.timezone, surface="grass", # Most MLB stadiums roof_type="open", # Most MLB stadiums ) diff --git a/sportstime_parser/scrapers/mls.py b/sportstime_parser/scrapers/mls.py index d3bea38..82f5bab 100644 --- a/sportstime_parser/scrapers/mls.py +++ b/sportstime_parser/scrapers/mls.py @@ -27,7 +27,11 @@ class MLSScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for MLS - 2. FBref - Backup option + 2. FBref - Backup option (not implemented) + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix + - fbref: Not implemented """ def __init__(self, season: int, **kwargs): @@ -387,6 +391,7 @@ class MLSScraper(BaseScraper): country=info.country, latitude=info.latitude, longitude=info.longitude, + timezone=info.timezone, surface="grass", roof_type="open", ) diff --git a/sportstime_parser/scrapers/nba.py b/sportstime_parser/scrapers/nba.py index c5fbfcb..3ed7dfc 100644 --- a/sportstime_parser/scrapers/nba.py +++ b/sportstime_parser/scrapers/nba.py @@ -2,6 +2,7 @@ from datetime import datetime, date, timezone from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup import re @@ -45,7 +46,12 @@ class NBAScraper(BaseScraper): Sources (in priority order): 1. Basketball-Reference - Most reliable, complete historical data 2. ESPN API - Good for current/future seasons - 3. CBS Sports - Backup option + 3. CBS Sports - Backup option (not implemented) + + Source Timezones: + - basketball_reference: Eastern Time (ET) - times displayed as "7:30p" + - espn: UTC - ISO 8601 format with "Z" suffix + - cbs: Not implemented """ def __init__(self, season: int, **kwargs): @@ -196,6 +202,25 @@ class NBAScraper(BaseScraper): self._logger.debug(f"Could not parse date: {date_text}") return None + # Get game start time (format: "7:30p" or "10:00p") - times are in ET + time_cell = row.find("td", {"data-stat": "game_start_time"}) + if time_cell: + time_text = time_cell.get_text(strip=True) + if time_text: + try: + # Parse time like "7:30p" or "10:00p" + # Normalize: "7:30p" -> "7:30 PM", "10:00p" -> "10:00 PM" + time_normalized = time_text.replace("p", " PM").replace("a", " AM") + game_time = datetime.strptime(time_normalized, "%I:%M %p") + # Combine date and time with ET timezone (Basketball-Reference uses ET) + game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + except ValueError: + self._logger.debug(f"Could not parse time: {time_text}, using midnight") + # Get teams away_cell = row.find("td", {"data-stat": "visitor_team_name"}) home_cell = row.find("td", {"data-stat": "home_team_name"}) @@ -648,6 +673,7 @@ class NBAScraper(BaseScraper): country=info.country, latitude=info.latitude, longitude=info.longitude, + timezone=info.timezone, surface="hardwood", roof_type="dome", ) diff --git a/sportstime_parser/scrapers/nfl.py b/sportstime_parser/scrapers/nfl.py index 03c9163..5e5a8f7 100644 --- a/sportstime_parser/scrapers/nfl.py +++ b/sportstime_parser/scrapers/nfl.py @@ -2,6 +2,7 @@ from datetime import datetime, date from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from .base import BaseScraper, RawGameData, ScrapeResult @@ -33,7 +34,12 @@ class NFLScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for NFL 2. Pro-Football-Reference - Complete historical data - 3. CBS Sports - Backup option + 3. CBS Sports - Backup option (not implemented) + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix + - pro_football_reference: Eastern Time (ET) - times displayed as "8:20PM" + - cbs: Not implemented """ def __init__(self, season: int, **kwargs): @@ -56,7 +62,7 @@ class NFLScraper(BaseScraper): if source == "espn": week = kwargs.get("week", 1) season_type = kwargs.get("season_type", 2) # 1=preseason, 2=regular, 3=postseason - return f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?seasontype={season_type}&week={week}" + return f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?season={self.season}&seasontype={season_type}&week={week}" elif source == "pro_football_reference": return f"https://www.pro-football-reference.com/years/{self.season}/games.htm" @@ -323,6 +329,25 @@ class NFLScraper(BaseScraper): except ValueError: return None + # Get game start time (format: "8:20PM" or "1:00PM") - times are in ET + time_cell = row.find("td", {"data-stat": "gametime"}) + if time_cell: + time_text = time_cell.get_text(strip=True) + if time_text: + try: + # Parse time like "8:20PM" or "1:00PM" + # Normalize: "8:20PM" -> "8:20 PM" + time_normalized = time_text.replace("PM", " PM").replace("AM", " AM") + game_time = datetime.strptime(time_normalized, "%I:%M %p") + # Combine date and time with ET timezone (Pro-Football-Reference uses ET) + game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + except ValueError: + self._logger.debug(f"Could not parse time: {time_text}, using midnight") + # Get teams winner_cell = row.find("td", {"data-stat": "winner"}) loser_cell = row.find("td", {"data-stat": "loser"}) @@ -566,6 +591,7 @@ class NFLScraper(BaseScraper): country=info.country, latitude=info.latitude, longitude=info.longitude, + timezone=info.timezone, surface="turf", # Many NFL stadiums roof_type="open", # Most outdoor ) diff --git a/sportstime_parser/scrapers/nhl.py b/sportstime_parser/scrapers/nhl.py index b9c815f..2388515 100644 --- a/sportstime_parser/scrapers/nhl.py +++ b/sportstime_parser/scrapers/nhl.py @@ -2,6 +2,7 @@ from datetime import datetime, date from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from .base import BaseScraper, RawGameData, ScrapeResult @@ -40,6 +41,11 @@ class NHLScraper(BaseScraper): 1. Hockey-Reference - Most reliable for NHL 2. NHL API - Official NHL data 3. ESPN API - Backup option + + Source Timezones: + - hockey_reference: Eastern Time (ET) - times displayed as "7:00p" + - nhl_api: UTC - ISO 8601 format with "Z" suffix (startTimeUTC field) + - espn: UTC - ISO 8601 format with "Z" suffix """ def __init__(self, season: int, **kwargs): @@ -158,6 +164,25 @@ class NHLScraper(BaseScraper): except ValueError: return None + # Get game start time (format: "7:00p" or "10:30p") - times are in ET + time_cell = row.find("td", {"data-stat": "time_game"}) + if time_cell: + time_text = time_cell.get_text(strip=True) + if time_text: + try: + # Parse time like "7:00p" or "10:30p" + # Normalize: "7:00p" -> "7:00 PM", "10:30p" -> "10:30 PM" + time_normalized = time_text.replace("p", " PM").replace("a", " AM") + game_time = datetime.strptime(time_normalized, "%I:%M %p") + # Combine date and time with ET timezone (Hockey-Reference uses ET) + game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + except ValueError: + self._logger.debug(f"Could not parse time: {time_text}, using midnight") + # Get teams visitor_cell = row.find("td", {"data-stat": "visitor_team_name"}) home_cell = row.find("td", {"data-stat": "home_team_name"}) @@ -644,6 +669,7 @@ class NHLScraper(BaseScraper): country=info.country, latitude=info.latitude, longitude=info.longitude, + timezone=info.timezone, surface="ice", roof_type="dome", ) diff --git a/sportstime_parser/scrapers/nwsl.py b/sportstime_parser/scrapers/nwsl.py index 5b55e98..3405fdb 100644 --- a/sportstime_parser/scrapers/nwsl.py +++ b/sportstime_parser/scrapers/nwsl.py @@ -27,7 +27,9 @@ class NWSLScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for NWSL - 2. NWSL official (via ESPN) - Backup option + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix """ def __init__(self, season: int, **kwargs): @@ -361,6 +363,7 @@ class NWSLScraper(BaseScraper): country=info.country, latitude=info.latitude, longitude=info.longitude, + timezone=info.timezone, surface="grass", roof_type="open", ) diff --git a/sportstime_parser/scrapers/wnba.py b/sportstime_parser/scrapers/wnba.py index 7b4b1f5..ad32099 100644 --- a/sportstime_parser/scrapers/wnba.py +++ b/sportstime_parser/scrapers/wnba.py @@ -27,7 +27,9 @@ class WNBAScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for WNBA - 2. WNBA official (via ESPN) - Backup option + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix """ def __init__(self, season: int, **kwargs): @@ -362,6 +364,7 @@ class WNBAScraper(BaseScraper): country=info.country, latitude=info.latitude, longitude=info.longitude, + timezone=info.timezone, surface="hardwood", roof_type="dome", ) diff --git a/sportstime_parser/uploaders/diff.py b/sportstime_parser/uploaders/diff.py index 3bec2c1..472696e 100644 --- a/sportstime_parser/uploaders/diff.py +++ b/sportstime_parser/uploaders/diff.py @@ -545,6 +545,7 @@ class RecordDiffer: - yearOpened: Year opened as int - imageURL: URL string for stadium image - sport: Sport code uppercase (e.g., "MLB") + - timezoneIdentifier: IANA timezone (e.g., "America/New_York") """ return CloudKitRecord( record_name=stadium.id, @@ -564,6 +565,7 @@ class RecordDiffer: "capacity": stadium.capacity, "yearOpened": stadium.opened_year, "imageURL": stadium.image_url, + "timezoneIdentifier": stadium.timezone, }, ) diff --git a/stadium_aliases.json b/stadium_aliases.json index 1401b37..2355caa 100644 --- a/stadium_aliases.json +++ b/stadium_aliases.json @@ -2032,5 +2032,89 @@ "stadium_canonical_id": "stadium_wnba_purcell_pavilion", "valid_from": null, "valid_until": null + }, + { + "alias_name": "inter miami", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "inter miami cf", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "miami", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "mia", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "nycfc", + "stadium_canonical_id": "stadium_mls_yankee_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "nyc", + "stadium_canonical_id": "stadium_mls_yankee_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "new york city fc", + "stadium_canonical_id": "stadium_mls_yankee_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "lafc", + "stadium_canonical_id": "stadium_mls_bmo_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "los angeles fc", + "stadium_canonical_id": "stadium_mls_bmo_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "dc united", + "stadium_canonical_id": "stadium_mls_audi_field", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "d.c. united", + "stadium_canonical_id": "stadium_mls_audi_field", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "chicago red stars", + "stadium_canonical_id": "stadium_nwsl_seatgeek_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "chi", + "stadium_canonical_id": "stadium_nwsl_seatgeek_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "chicago", + "stadium_canonical_id": "stadium_nwsl_seatgeek_stadium", + "valid_from": null, + "valid_until": null } ] \ No newline at end of file diff --git a/templates/admin/index.html b/templates/admin/index.html new file mode 100644 index 0000000..2a5a656 --- /dev/null +++ b/templates/admin/index.html @@ -0,0 +1,18 @@ +{% extends "admin/index.html" %} +{% load i18n %} + +{% block content %} +
+
+

+ Dashboard +

+

View statistics, scraper status, sync status, and review queue

+
+ + Open Dashboard + +
+ +{{ block.super }} +{% endblock %} diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..84e292c --- /dev/null +++ b/templates/base.html @@ -0,0 +1,254 @@ + + + + + + {% block title %}{{ title|default:'SportsTime' }}{% endblock %} - SportsTime + + {% block extra_css %}{% endblock %} + + + + +
+ {% if messages %} + {% for message in messages %} +
{{ message }}
+ {% endfor %} + {% endif %} + + {% block content %}{% endblock %} +
+ + {% block extra_js %}{% endblock %} + + diff --git a/templates/dashboard/export.html b/templates/dashboard/export.html new file mode 100644 index 0000000..37f67c6 --- /dev/null +++ b/templates/dashboard/export.html @@ -0,0 +1,176 @@ +{% extends 'base.html' %} + +{% block content %} + + + +
+
+
{{ counts.teams }}
+
Teams
+
+
+
{{ counts.stadiums }}
+
Stadiums
+
+
+
{{ counts.games|floatformat:0 }}
+
Games
+
+
+
{{ counts.team_aliases }}
+
Team Aliases
+
+
+
{{ counts.stadium_aliases }}
+
Stadium Aliases
+
+
+ +
+ +
+
+

Export Options

+
+
+
+ +
+ + + + + + + +
+
+ +
+ + +
+ +
+ + + All games played in this calendar year, regardless of season +
+ + +
+
+ + +
+
+

Output Files

+
+
+
+ sports_canonical.json +

+ Sport metadata: display names, SF Symbol icons, brand colors, and season month ranges. +

+
+
+ league_structure.json +

+ League hierarchy: sports as leagues, conferences, and divisions with parent_id references. +

+
+
+ teams_canonical.json +

+ Teams with canonical IDs, abbreviations, stadium references, and conference/division IDs. +

+
+
+ stadiums_canonical.json +

+ Stadiums with coordinates, capacity, timezone, and primary team abbreviations. +

+
+
+ games_canonical.json +

+ Games with local date/time (converted from UTC), team names, abbreviations, and canonical IDs. +

+
+
+ team_aliases.json +

+ Historical team names and abbreviations with validity date ranges. +

+
+
+ stadium_aliases.json +

+ Stadium name aliases (e.g., former naming rights) with validity dates. +

+
+
+
+
+ + + +{% endblock %} diff --git a/templates/dashboard/index.html b/templates/dashboard/index.html new file mode 100644 index 0000000..5c16aa6 --- /dev/null +++ b/templates/dashboard/index.html @@ -0,0 +1,188 @@ +{% extends 'base.html' %} + +{% block content %} + + + +
+
+
{{ sports_count }}
+
Active Sports
+
+
+
{{ teams_count }}
+
Teams
+
+
+
{{ stadiums_count }}
+
Stadiums
+
+
+
{{ games_count|floatformat:0 }}
+
Games
+
+
+
{{ pending_reviews }}
+
Pending Reviews
+
+
+ +
+ +
+
+

Stats by Sport

+
+ + + + + + + + + + + + {% for stat in sport_stats %} + + + + + + + + {% empty %} + + + + {% endfor %} + +
SportTeamsStadiumsGamesReviews
{{ stat.sport.short_name }}{{ stat.teams }}{{ stat.stadiums }}{{ stat.games }} + {% if stat.pending_reviews > 0 %} + {{ stat.pending_reviews }} + {% else %} + 0 + {% endif %} +
No sports configured
+
+ + +
+
+

Recent Scrape Jobs

+ View All +
+ + + + + + + + + + + {% for job in recent_jobs %} + + + + + + + {% empty %} + + + + {% endfor %} + +
SportStatusGamesTime
{{ job.config.sport.short_name }} + {% if job.status == 'completed' %} + Completed + {% elif job.status == 'running' %} + Running + {% elif job.status == 'failed' %} + Failed + {% else %} + {{ job.status }} + {% endif %} + {{ job.games_found }}{{ job.created_at|timesince }} ago
No recent jobs
+
+
+ +
+ +
+
+

Recent CloudKit Syncs

+ View All +
+ + + + + + + + + + + {% for sync in recent_syncs %} + + + + + + + {% empty %} + + + + {% endfor %} + +
TypeStatusRecordsTime
{{ sync.sync_type }} + {% if sync.status == 'completed' %} + Completed + {% elif sync.status == 'running' %} + Running + {% elif sync.status == 'failed' %} + Failed + {% else %} + {{ sync.status }} + {% endif %} + {{ sync.records_synced }}{{ sync.created_at|timesince }} ago
No recent syncs
+
+ + + +
+{% endblock %} diff --git a/templates/dashboard/review_queue.html b/templates/dashboard/review_queue.html new file mode 100644 index 0000000..92b6212 --- /dev/null +++ b/templates/dashboard/review_queue.html @@ -0,0 +1,96 @@ +{% extends 'base.html' %} + +{% block content %} + + +{% if total_pending == 0 %} +
+ No items pending review. All data has been validated. +
+{% endif %} + + +{% if review_summary %} +
+
+

Review Summary

+
+
+ {% for item in review_summary %} +
+
{{ item.count }}
+
{{ item.sport__short_name }} - {{ item.item_type|title }}
+
+ {% endfor %} +
+
+{% endif %} + + +
+
+

Pending Reviews

+ View in Admin +
+ + + + + + + + + + + + + + {% for item in pending_items %} + + + + + + + + + + {% empty %} + + + + {% endfor %} + +
TypeSportRaw ValueMatched ToConfidenceCreatedActions
+ {% if item.item_type == 'team' %} + Team + {% elif item.item_type == 'stadium' %} + Stadium + {% elif item.item_type == 'game' %} + Game + {% else %} + {{ item.item_type }} + {% endif %} + {{ item.sport.short_name }}{{ item.raw_value }} + {% if item.matched_value %} + {{ item.matched_value }} + {% else %} + No match + {% endif %} + +
+
+
+ {{ item.confidence }}% +
{{ item.created_at|timesince }} ago + + Review + +
No pending reviews
+
+{% endblock %} diff --git a/templates/dashboard/scraper_status.html b/templates/dashboard/scraper_status.html new file mode 100644 index 0000000..4111f11 --- /dev/null +++ b/templates/dashboard/scraper_status.html @@ -0,0 +1,256 @@ +{% extends 'base.html' %} + +{% block content %} + + + +
+
+
{{ running_jobs }}
+
Running Jobs
+
+
+
{{ pending_jobs }}
+
Pending Jobs
+
+
+ + +
+
+

Scraper Configurations

+
+ {% csrf_token %} + +
+
+ + + + + + + + + + + + + + {% for config in configs %} + + + + + + + + + + {% empty %} + + + + {% endfor %} + +
SportSeasonStatusLast RunGames FoundSourceActions
{{ config.sport.short_name }}{{ config.season }} + {% if config.is_enabled %} + Active + {% else %} + Inactive + {% endif %} + + {% if config.last_run %} + {{ config.last_run|timesince }} ago + {% if config.last_run_status == 'completed' %} + + {% elif config.last_run_status == 'failed' %} + + {% endif %} + {% else %} + Never + {% endif %} + + {% if config.last_run_games %} + {{ config.last_run_games }} games + {% else %} + - + {% endif %} + {{ config.primary_source|default:"auto" }} +
+ {% csrf_token %} + +
+
No scraper configurations
+
+ + +
+
+

Recent Jobs

+
+ + + + + + + + + + + + + + {% for job in recent_jobs %} + + + + + + + + + + {% empty %} + + + + {% endfor %} + +
SportSeasonStatusGamesStartedDurationError
{{ job.config.sport.short_name }}{{ job.config.season }} + {% if job.status == 'completed' %} + Completed + {% elif job.status == 'running' %} + Running + {% elif job.status == 'failed' %} + Failed + {% elif job.status == 'pending' %} + Pending + {% else %} + {{ job.status }} + {% endif %} + + {% if job.status == 'completed' %} + {{ job.games_found }} found, +{{ job.games_new }} new + {% else %} + - + {% endif %} + {{ job.created_at|timesince }} ago{{ job.duration_display }} + {% if job.error_message %} + {{ job.error_message|truncatechars:50 }} + {% else %} + - + {% endif %} +
No recent jobs
+
+ + +
+
+

How Scrapers Work

+
+
+

What Gets Updated Automatically

+

When a scraper runs, it fetches schedule data from official sources and updates the following:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Data TypeBehavior
GamesCreates new games, updates scores/status for existing games. Uses canonical IDs to match.
TeamsAuto-created from scraper's built-in team mappings. New teams are added automatically.
StadiumsAuto-created from scraper's built-in stadium mappings. New venues are added automatically.
ConferencesAuto-created based on team data (e.g., Eastern, Western).
DivisionsAuto-created based on team data (e.g., Atlantic, Pacific).
+ +

New Team Scenario

+

If a league adds a new team (e.g., expansion team):

+
    +
  1. Add the team via Admin → Teams
  2. +
  3. Add Team Aliases for any names/abbreviations used by data sources
  4. +
  5. Add the stadium via Admin → Stadiums (if it's a new venue)
  6. +
  7. Add Stadium Aliases for any alternate names used by data sources
  8. +
  9. Run the scraper - it will automatically import all the new team's games
  10. +
+

If the scraper encounters an unknown team or stadium name, it creates a Review Item for manual resolution.

+ +

What Requires Manual Action

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SituationAction Required
Unknown team name in scheduleAdd a Team Alias in the admin, or resolve in Review Queue
Unknown stadium nameAdd a Stadium Alias in the admin, or resolve in Review Queue
New expansion teamAdd a new Team in the admin, then add aliases for any alternate names
Team relocation/renameAdd a Team Alias with validity dates for the old name
Stadium rename (naming rights)Add a Stadium Alias with validity dates (e.g., "Staples Center" valid until 2021)
+ +

Managing Aliases via Admin

+

Team and stadium name mappings can be managed directly in the admin interface:

+
    +
  • Team Aliases - Map alternate team names, abbreviations, historical names
  • +
  • Stadium Aliases - Map alternate stadium names, former names (naming rights changes)
  • +
+

Aliases support validity dates - useful for historical names like "Washington Redskins" (valid until 2020) or stadium naming rights changes.

+ +

Data Flow

+

+ Scraper runs → Fetches from source (ESPN, league API, etc.) → Normalizes team/stadium names → Creates/updates records → Marks changed records for CloudKit sync → Creates review items for unresolved names +

+ +

Tips

+
    +
  • Run scrapers regularly to keep scores and game statuses current
  • +
  • Check the Review Queue after scrapes for items needing attention
  • +
  • Scrapers are idempotent - running multiple times is safe and won't duplicate data
  • +
  • Each sport uses multiple data sources with automatic fallback if one fails
  • +
+
+
+{% endblock %} diff --git a/templates/dashboard/stats.html b/templates/dashboard/stats.html new file mode 100644 index 0000000..0438602 --- /dev/null +++ b/templates/dashboard/stats.html @@ -0,0 +1,111 @@ +{% extends 'base.html' %} + +{% block content %} + + + +
+
+

Game Statistics

+
+
+
+
{{ game_stats.total|floatformat:0 }}
+
Total Games
+
+
+
{{ game_stats.final|floatformat:0 }}
+
Completed
+
+
+
{{ game_stats.scheduled|floatformat:0 }}
+
Scheduled
+
+
+
{{ game_stats.today }}
+
Today
+
+
+
{{ game_stats.this_week }}
+
This Week
+
+
+
+ + +
+
+

CloudKit Sync Statistics

+
+
+
+
{{ sync_stats.total|floatformat:0 }}
+
Total Records
+
+
+
{{ sync_stats.synced|floatformat:0 }}
+
Synced
+
+
+
{{ sync_stats.pending|floatformat:0 }}
+
Pending
+
+
+
{{ sync_stats.failed|floatformat:0 }}
+
Failed
+
+
+ + {% if sync_stats.total > 0 %} +
+
+ Sync Progress + {{ sync_stats.synced }} / {{ sync_stats.total }} ({% widthratio sync_stats.synced sync_stats.total 100 %}%) +
+
+
+
+
+ {% endif %} +
+ + +
+
+

Statistics by Sport

+
+ + + + + + + + + + + + {% for stat in sport_stats %} + + + + + + + + {% endfor %} + +
SportTeamsStadiumsGamesPending Reviews
+ {{ stat.sport.short_name }} +
{{ stat.sport.name }}
+
{{ stat.teams }}{{ stat.stadiums }}{{ stat.games }} + {% if stat.pending_reviews > 0 %} + {{ stat.pending_reviews }} + {% else %} + 0 + {% endif %} +
+
+{% endblock %}