#!/usr/bin/env python3
"""
Sports Schedule Scraper Orchestrator

This script coordinates scraping across sport-specific modules:
- core.py: Shared utilities, data classes, fallback system
- mlb.py: MLB scrapers
- nba.py: NBA scrapers
- nhl.py: NHL scrapers
- nfl.py: NFL scrapers
- mls.py: MLS stadiums
- wnba.py: WNBA stadiums
- nwsl.py: NWSL stadiums

Usage:
    python scrape_schedules.py --sport nba --season 2026
    python scrape_schedules.py --sport all --season 2026
    python scrape_schedules.py --stadiums-only
"""

import argparse
import csv
import json
import time
from collections import defaultdict
from dataclasses import asdict
from datetime import datetime
from io import StringIO
from pathlib import Path
from typing import Optional

import requests

# Import from core module
from core import (
    Game,
    Stadium,
    ScraperSource,
    StadiumScraperSource,
    fetch_page,
    scrape_with_fallback,
    scrape_stadiums_with_fallback,
    assign_stable_ids,
    export_to_json,
)

# Import from sport modules (core 4 sports)
from mlb import (
    scrape_mlb_games,
    scrape_mlb_stadiums,
    MLB_TEAMS,
)
from nba import (
    scrape_nba_games,
    scrape_nba_stadiums,
    get_nba_season_string,
    NBA_TEAMS,
)
from nhl import (
    scrape_nhl_games,
    scrape_nhl_stadiums,
    get_nhl_season_string,
    NHL_TEAMS,
)
from nfl import (
    scrape_nfl_games,
    scrape_nfl_stadiums,
    get_nfl_season_string,
    NFL_TEAMS,
)
from mls import (
    MLS_TEAMS,
    get_mls_team_abbrev,
    scrape_mls_stadiums,
    MLS_STADIUM_SOURCES,
)
from wnba import (
    WNBA_TEAMS,
    get_wnba_team_abbrev,
    scrape_wnba_stadiums,
    WNBA_STADIUM_SOURCES,
)
from nwsl import (
    NWSL_TEAMS,
    get_nwsl_team_abbrev,
    scrape_nwsl_stadiums,
    NWSL_STADIUM_SOURCES,
)


# =============================================================================
# NON-CORE SPORT SCRAPERS
# NOTE: MLS, WNBA, NWSL stadiums are now imported from their respective modules
# =============================================================================

def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
    """
    Fetch schedule from ESPN API.
    Shared helper for non-core sports that use ESPN API.
    """
    games = []
    sport_upper = {
        'wnba': 'WNBA',
        'usa.1': 'MLS',
        'usa.nwsl': 'NWSL',
    }.get(league, league.upper())

    print(f"Fetching {sport_upper} {season} from ESPN API...")

    url = f"https://site.api.espn.com/apis/site/v2/sports/{sport}/{league}/scoreboard"
    params = {
        'dates': f"{date_range[0]}-{date_range[1]}",
        'limit': 1000
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
    }

    try:
        response = requests.get(url, params=params, headers=headers, timeout=30)
        response.raise_for_status()
        data = response.json()

        events = data.get('events', [])

        for event in events:
            try:
                date_str = event.get('date', '')[:10]
                time_str = event.get('date', '')[11:16] if len(event.get('date', '')) > 11 else None

                competitions = event.get('competitions', [{}])
                if not competitions:
                    continue

                comp = competitions[0]
                competitors = comp.get('competitors', [])

                if len(competitors) < 2:
                    continue

                home_team = None
                away_team = None
                home_abbrev = None
                away_abbrev = None

                for team in competitors:
                    team_data = team.get('team', {})
                    team_name = team_data.get('displayName', team_data.get('name', ''))
                    team_abbrev = team_data.get('abbreviation', '')

                    if team.get('homeAway') == 'home':
                        home_team = team_name
                        home_abbrev = team_abbrev
                    else:
                        away_team = team_name
                        away_abbrev = team_abbrev

                if not home_team or not away_team:
                    continue

                venue = comp.get('venue', {}).get('fullName', '')
                game_id = f"{sport_upper.lower()}_{date_str}_{away_abbrev}_{home_abbrev}".lower()

                game = Game(
                    id=game_id,
                    sport=sport_upper,
                    season=str(season),
                    date=date_str,
                    time=time_str,
                    home_team=home_team,
                    away_team=away_team,
                    home_team_abbrev=home_abbrev or get_team_abbrev(home_team, sport_upper),
                    away_team_abbrev=away_abbrev or get_team_abbrev(away_team, sport_upper),
                    venue=venue,
                    source='espn.com'
                )
                games.append(game)

            except Exception:
                continue

        print(f"  Found {len(games)} games from ESPN")

    except Exception as e:
        print(f"Error fetching ESPN {sport_upper}: {e}")

    return games


def scrape_wnba_espn(season: int) -> list[Game]:
    """Fetch WNBA schedule from ESPN API."""
    start = f"{season}0501"
    end = f"{season}1031"
    return _scrape_espn_schedule('basketball', 'wnba', season, (start, end))


def scrape_mls_espn(season: int) -> list[Game]:
    """Fetch MLS schedule from ESPN API."""
    start = f"{season}0201"
    end = f"{season}1231"
    return _scrape_espn_schedule('soccer', 'usa.1', season, (start, end))


def scrape_nwsl_espn(season: int) -> list[Game]:
    """Fetch NWSL schedule from ESPN API."""
    start = f"{season}0301"
    end = f"{season}1130"
    return _scrape_espn_schedule('soccer', 'usa.nwsl', season, (start, end))


def scrape_wnba_basketball_reference(season: int) -> list[Game]:
    """Scrape WNBA schedule from Basketball-Reference."""
    games = []
    url = f"https://www.basketball-reference.com/wnba/years/{season}_games.html"

    print(f"Scraping WNBA {season} from Basketball-Reference...")
    soup = fetch_page(url, 'basketball-reference.com')

    if not soup:
        return games

    table = soup.find('table', {'id': 'schedule'})
    if not table:
        return games

    tbody = table.find('tbody')
    if not tbody:
        return games

    for row in tbody.find_all('tr'):
        if row.get('class') and 'thead' in row.get('class'):
            continue

        try:
            date_cell = row.find('th', {'data-stat': 'date_game'})
            if not date_cell:
                continue
            date_link = date_cell.find('a')
            date_str = date_link.text if date_link else date_cell.text

            visitor_cell = row.find('td', {'data-stat': 'visitor_team_name'})
            home_cell = row.find('td', {'data-stat': 'home_team_name'})

            if not visitor_cell or not home_cell:
                continue

            visitor_link = visitor_cell.find('a')
            home_link = home_cell.find('a')

            away_team = visitor_link.text if visitor_link else visitor_cell.text
            home_team = home_link.text if home_link else home_cell.text

            try:
                parsed_date = datetime.strptime(date_str.strip(), '%a, %b %d, %Y')
                date_formatted = parsed_date.strftime('%Y-%m-%d')
            except:
                continue

            away_abbrev = get_team_abbrev(away_team, 'WNBA')
            home_abbrev = get_team_abbrev(home_team, 'WNBA')
            game_id = f"wnba_{date_formatted}_{away_abbrev}_{home_abbrev}".lower().replace(' ', '')

            game = Game(
                id=game_id,
                sport='WNBA',
                season=str(season),
                date=date_formatted,
                time=None,
                home_team=home_team,
                away_team=away_team,
                home_team_abbrev=home_abbrev,
                away_team_abbrev=away_abbrev,
                venue='',
                source='basketball-reference.com'
            )
            games.append(game)

        except Exception:
            continue

    print(f"  Found {len(games)} games from Basketball-Reference")
    return games


def scrape_wnba_cbssports(season: int) -> list[Game]:
    """Fetch WNBA schedule from CBS Sports."""
    games = []
    print(f"Fetching WNBA {season} from CBS Sports...")
    # Placeholder - CBS Sports scraping would go here
    print(f"  Found {len(games)} games from CBS Sports")
    return games


def scrape_mls_fbref(season: int) -> list[Game]:
    """Scrape MLS schedule from FBref."""
    games = []
    print(f"Scraping MLS {season} from FBref...")
    # Placeholder - FBref scraping would go here
    print(f"  Found {len(games)} games from FBref")
    return games


def scrape_mls_mlssoccer(season: int) -> list[Game]:
    """Scrape MLS schedule from MLSSoccer.com."""
    games = []
    print(f"Scraping MLS {season} from MLSSoccer.com...")
    # Placeholder - MLSSoccer.com scraping would go here
    print(f"  Found {len(games)} games from MLSSoccer.com")
    return games


def scrape_nwsl_fbref(season: int) -> list[Game]:
    """Scrape NWSL schedule from FBref."""
    games = []
    print(f"Scraping NWSL {season} from FBref...")
    # Placeholder - FBref scraping would go here
    print(f"  Found {len(games)} games from FBref")
    return games


def scrape_nwsl_nwslsoccer(season: int) -> list[Game]:
    """Scrape NWSL schedule from NWSL.com."""
    games = []
    print(f"Scraping NWSL {season} from NWSL.com...")
    # Placeholder - NWSL.com scraping would go here
    print(f"  Found {len(games)} games from NWSL.com")
    return games


# =============================================================================
# LEGACY STADIUM FUNCTIONS
# =============================================================================

def scrape_stadiums_hifld() -> list[Stadium]:
    """Legacy: Scrape from HIFLD open data."""
    # Placeholder for legacy HIFLD scraping
    return []


def generate_stadiums_from_teams() -> list[Stadium]:
    """Generate stadium entries from team data with hardcoded coordinates."""
    stadiums = []
    # This function would generate stadiums from all team dictionaries
    # Keeping as placeholder since sport modules have their own stadium scrapers
    return stadiums


def scrape_all_stadiums() -> list[Stadium]:
    """Comprehensive stadium scraping for all sports."""
    all_stadiums = []

    # Core sports (from modules)
    all_stadiums.extend(scrape_mlb_stadiums())
    all_stadiums.extend(scrape_nba_stadiums())
    all_stadiums.extend(scrape_nhl_stadiums())
    all_stadiums.extend(scrape_nfl_stadiums())

    # Non-core sports
    all_stadiums.extend(scrape_mls_stadiums())
    all_stadiums.extend(scrape_wnba_stadiums())
    all_stadiums.extend(scrape_nwsl_stadiums())

    return all_stadiums


# =============================================================================
# HELPERS
# =============================================================================

def get_team_abbrev(team_name: str, sport: str) -> str:
    """Get team abbreviation from full name."""
    teams = {
        'NBA': NBA_TEAMS,
        'MLB': MLB_TEAMS,
        'NHL': NHL_TEAMS,
        'NFL': NFL_TEAMS,
        'WNBA': WNBA_TEAMS,
        'MLS': MLS_TEAMS,
        'NWSL': NWSL_TEAMS,
    }.get(sport, {})

    for abbrev, info in teams.items():
        if info['name'].lower() == team_name.lower():
            return abbrev
        if team_name.lower() in info['name'].lower():
            return abbrev

    # Return first 3 letters as fallback
    return team_name[:3].upper()


# =============================================================================
# MAIN ORCHESTRATOR
# =============================================================================

def main():
    parser = argparse.ArgumentParser(description='Scrape sports schedules')
    parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all')
    parser.add_argument('--season', type=int, default=2026, help='Season year (ending year)')
    parser.add_argument('--stadiums-only', action='store_true', help='Only scrape stadium data (legacy method)')
    parser.add_argument('--stadiums-update', action='store_true', help='Scrape ALL stadium data for all 8 sports (comprehensive)')
    parser.add_argument('--output', type=str, default='./data', help='Output directory')

    args = parser.parse_args()
    output_dir = Path(args.output)

    all_games = []
    all_stadiums = []

    # Scrape stadiums
    print("\n" + "="*60)
    print("SCRAPING STADIUMS")
    print("="*60)

    if args.stadiums_update:
        print("Using comprehensive stadium scrapers for all sports...")
        all_stadiums.extend(scrape_all_stadiums())
        print(f"  Total stadiums scraped: {len(all_stadiums)}")
    else:
        all_stadiums.extend(scrape_stadiums_hifld())
        all_stadiums.extend(generate_stadiums_from_teams())

    # If stadiums-only mode, export and exit
    if args.stadiums_only:
        export_to_json([], all_stadiums, output_dir)
        return

    # Scrape schedules using sport modules
    if args.sport in ['nba', 'all']:
        print("\n" + "="*60)
        print(f"SCRAPING NBA {args.season}")
        print("="*60)
        nba_games = scrape_nba_games(args.season)
        nba_season = get_nba_season_string(args.season)
        nba_games = assign_stable_ids(nba_games, 'NBA', nba_season)
        all_games.extend(nba_games)

    if args.sport in ['mlb', 'all']:
        print("\n" + "="*60)
        print(f"SCRAPING MLB {args.season}")
        print("="*60)
        mlb_games = scrape_mlb_games(args.season)
        mlb_games = assign_stable_ids(mlb_games, 'MLB', str(args.season))
        all_games.extend(mlb_games)

    if args.sport in ['nhl', 'all']:
        print("\n" + "="*60)
        print(f"SCRAPING NHL {args.season}")
        print("="*60)
        nhl_games = scrape_nhl_games(args.season)
        nhl_season = get_nhl_season_string(args.season)
        nhl_games = assign_stable_ids(nhl_games, 'NHL', nhl_season)
        all_games.extend(nhl_games)

    if args.sport in ['nfl', 'all']:
        print("\n" + "="*60)
        print(f"SCRAPING NFL {args.season}")
        print("="*60)
        nfl_games = scrape_nfl_games(args.season)
        nfl_season = get_nfl_season_string(args.season)
        nfl_games = assign_stable_ids(nfl_games, 'NFL', nfl_season)
        all_games.extend(nfl_games)

    # Non-core sports (TODO: Extract to modules)
    if args.sport in ['wnba', 'all']:
        print("\n" + "="*60)
        print(f"SCRAPING WNBA {args.season}")
        print("="*60)
        wnba_sources = [
            ScraperSource('ESPN', scrape_wnba_espn, priority=1, min_games=100),
            ScraperSource('Basketball-Reference', scrape_wnba_basketball_reference, priority=2, min_games=100),
            ScraperSource('CBS Sports', scrape_wnba_cbssports, priority=3, min_games=50),
        ]
        wnba_games = scrape_with_fallback('WNBA', args.season, wnba_sources)
        wnba_games = assign_stable_ids(wnba_games, 'WNBA', str(args.season))
        all_games.extend(wnba_games)

    if args.sport in ['mls', 'all']:
        print("\n" + "="*60)
        print(f"SCRAPING MLS {args.season}")
        print("="*60)
        mls_sources = [
            ScraperSource('ESPN', scrape_mls_espn, priority=1, min_games=200),
            ScraperSource('FBref', scrape_mls_fbref, priority=2, min_games=100),
            ScraperSource('MLSSoccer.com', scrape_mls_mlssoccer, priority=3, min_games=100),
        ]
        mls_games = scrape_with_fallback('MLS', args.season, mls_sources)
        mls_games = assign_stable_ids(mls_games, 'MLS', str(args.season))
        all_games.extend(mls_games)

    if args.sport in ['nwsl', 'all']:
        print("\n" + "="*60)
        print(f"SCRAPING NWSL {args.season}")
        print("="*60)
        nwsl_sources = [
            ScraperSource('ESPN', scrape_nwsl_espn, priority=1, min_games=100),
            ScraperSource('FBref', scrape_nwsl_fbref, priority=2, min_games=50),
            ScraperSource('NWSL.com', scrape_nwsl_nwslsoccer, priority=3, min_games=50),
        ]
        nwsl_games = scrape_with_fallback('NWSL', args.season, nwsl_sources)
        nwsl_games = assign_stable_ids(nwsl_games, 'NWSL', str(args.season))
        all_games.extend(nwsl_games)

    # Export
    print("\n" + "="*60)
    print("EXPORTING DATA")
    print("="*60)

    export_to_json(all_games, all_stadiums, output_dir)

    # Summary
    print("\n" + "="*60)
    print("SUMMARY")
    print("="*60)
    print(f"Total games scraped: {len(all_games)}")
    print(f"Total stadiums: {len(all_stadiums)}")

    by_sport = {}
    for g in all_games:
        by_sport[g.sport] = by_sport.get(g.sport, 0) + 1
    for sport, count in by_sport.items():
        print(f"  {sport}: {count} games")


if __name__ == '__main__':
    main()