remove cbb

This commit is contained in:
Trey t
2026-01-10 11:16:15 -06:00
parent ca9fa535f1
commit 9ef4b1a770
5 changed files with 11 additions and 77 deletions

View File

@@ -12,8 +12,6 @@ This script coordinates scraping across sport-specific modules:
- wnba.py: WNBA stadiums
- nwsl.py: NWSL stadiums
CBB (College Basketball) remains inline pending extraction (350+ D1 teams).
Usage:
python scrape_schedules.py --sport nba --season 2026
python scrape_schedules.py --sport all --season 2026
@@ -93,7 +91,6 @@ from nwsl import (
# =============================================================================
# NON-CORE SPORT SCRAPERS
# NOTE: MLS, WNBA, NWSL stadiums are now imported from their respective modules
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
# =============================================================================
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
@@ -106,7 +103,6 @@ def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tupl
'wnba': 'WNBA',
'usa.1': 'MLS',
'usa.nwsl': 'NWSL',
'mens-college-basketball': 'CBB'
}.get(league, league.upper())
print(f"Fetching {sport_upper} {season} from ESPN API...")
@@ -213,13 +209,6 @@ def scrape_nwsl_espn(season: int) -> list[Game]:
return _scrape_espn_schedule('soccer', 'usa.nwsl', season, (start, end))
def scrape_cbb_espn(season: int) -> list[Game]:
"""Fetch College Basketball schedule from ESPN API (D1 only)."""
start = f"{season-1}1101"
end = f"{season}0415"
return _scrape_espn_schedule('basketball', 'mens-college-basketball', season, (start, end))
def scrape_wnba_basketball_reference(season: int) -> list[Game]:
"""Scrape WNBA schedule from Basketball-Reference."""
games = []
@@ -339,42 +328,6 @@ def scrape_nwsl_nwslsoccer(season: int) -> list[Game]:
return games
def scrape_cbb_sports_reference(season: int) -> list[Game]:
"""Scrape College Basketball schedule from Sports-Reference."""
games = []
print(f"Scraping CBB {season} from Sports-Reference...")
# Placeholder - Sports-Reference scraping would go here
print(f" Found {len(games)} games from Sports-Reference")
return games
def scrape_cbb_cbssports(season: int) -> list[Game]:
"""Fetch College Basketball schedule from CBS Sports."""
games = []
print(f"Fetching CBB {season} from CBS Sports...")
# Placeholder - CBS Sports scraping would go here
print(f" Found {len(games)} games from CBS Sports")
return games
# =============================================================================
# NON-CORE STADIUM SCRAPERS
# NOTE: scrape_mls_stadiums() is now imported from mls.py
# NOTE: scrape_wnba_stadiums() is now imported from wnba.py
# NOTE: scrape_nwsl_stadiums() is now imported from nwsl.py
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
# =============================================================================
def scrape_cbb_stadiums() -> list[Stadium]:
"""Fetch College Basketball arena data."""
print("\nCBB STADIUMS")
print("-" * 40)
stadiums = []
# Would include CBB arena data here
print(f" Found {len(stadiums)} CBB arenas")
return stadiums
# =============================================================================
# LEGACY STADIUM FUNCTIONS
# =============================================================================
@@ -407,7 +360,6 @@ def scrape_all_stadiums() -> list[Stadium]:
all_stadiums.extend(scrape_mls_stadiums())
all_stadiums.extend(scrape_wnba_stadiums())
all_stadiums.extend(scrape_nwsl_stadiums())
all_stadiums.extend(scrape_cbb_stadiums())
return all_stadiums
@@ -444,7 +396,7 @@ def get_team_abbrev(team_name: str, sport: str) -> str:
def main():
parser = argparse.ArgumentParser(description='Scrape sports schedules')
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'], default='all')
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all')
parser.add_argument('--season', type=int, default=2026, help='Season year (ending year)')
parser.add_argument('--stadiums-only', action='store_true', help='Only scrape stadium data (legacy method)')
parser.add_argument('--stadiums-update', action='store_true', help='Scrape ALL stadium data for all 8 sports (comprehensive)')
@@ -550,20 +502,6 @@ def main():
nwsl_games = assign_stable_ids(nwsl_games, 'NWSL', str(args.season))
all_games.extend(nwsl_games)
if args.sport in ['cbb', 'all']:
print("\n" + "="*60)
print(f"SCRAPING CBB {args.season}")
print("="*60)
cbb_sources = [
ScraperSource('ESPN', scrape_cbb_espn, priority=1, min_games=1000),
ScraperSource('Sports-Reference', scrape_cbb_sports_reference, priority=2, min_games=500),
ScraperSource('CBS Sports', scrape_cbb_cbssports, priority=3, min_games=300),
]
cbb_games = scrape_with_fallback('CBB', args.season, cbb_sources)
cbb_season = f"{args.season-1}-{str(args.season)[2:]}"
cbb_games = assign_stable_ids(cbb_games, 'CBB', cbb_season)
all_games.extend(cbb_games)
# Export
print("\n" + "="*60)
print("EXPORTING DATA")