remove cbb
This commit is contained in:
@@ -12,8 +12,6 @@ This script coordinates scraping across sport-specific modules:
|
||||
- wnba.py: WNBA stadiums
|
||||
- nwsl.py: NWSL stadiums
|
||||
|
||||
CBB (College Basketball) remains inline pending extraction (350+ D1 teams).
|
||||
|
||||
Usage:
|
||||
python scrape_schedules.py --sport nba --season 2026
|
||||
python scrape_schedules.py --sport all --season 2026
|
||||
@@ -93,7 +91,6 @@ from nwsl import (
|
||||
# =============================================================================
|
||||
# NON-CORE SPORT SCRAPERS
|
||||
# NOTE: MLS, WNBA, NWSL stadiums are now imported from their respective modules
|
||||
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
|
||||
# =============================================================================
|
||||
|
||||
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
|
||||
@@ -106,7 +103,6 @@ def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tupl
|
||||
'wnba': 'WNBA',
|
||||
'usa.1': 'MLS',
|
||||
'usa.nwsl': 'NWSL',
|
||||
'mens-college-basketball': 'CBB'
|
||||
}.get(league, league.upper())
|
||||
|
||||
print(f"Fetching {sport_upper} {season} from ESPN API...")
|
||||
@@ -213,13 +209,6 @@ def scrape_nwsl_espn(season: int) -> list[Game]:
|
||||
return _scrape_espn_schedule('soccer', 'usa.nwsl', season, (start, end))
|
||||
|
||||
|
||||
def scrape_cbb_espn(season: int) -> list[Game]:
|
||||
"""Fetch College Basketball schedule from ESPN API (D1 only)."""
|
||||
start = f"{season-1}1101"
|
||||
end = f"{season}0415"
|
||||
return _scrape_espn_schedule('basketball', 'mens-college-basketball', season, (start, end))
|
||||
|
||||
|
||||
def scrape_wnba_basketball_reference(season: int) -> list[Game]:
|
||||
"""Scrape WNBA schedule from Basketball-Reference."""
|
||||
games = []
|
||||
@@ -339,42 +328,6 @@ def scrape_nwsl_nwslsoccer(season: int) -> list[Game]:
|
||||
return games
|
||||
|
||||
|
||||
def scrape_cbb_sports_reference(season: int) -> list[Game]:
|
||||
"""Scrape College Basketball schedule from Sports-Reference."""
|
||||
games = []
|
||||
print(f"Scraping CBB {season} from Sports-Reference...")
|
||||
# Placeholder - Sports-Reference scraping would go here
|
||||
print(f" Found {len(games)} games from Sports-Reference")
|
||||
return games
|
||||
|
||||
|
||||
def scrape_cbb_cbssports(season: int) -> list[Game]:
|
||||
"""Fetch College Basketball schedule from CBS Sports."""
|
||||
games = []
|
||||
print(f"Fetching CBB {season} from CBS Sports...")
|
||||
# Placeholder - CBS Sports scraping would go here
|
||||
print(f" Found {len(games)} games from CBS Sports")
|
||||
return games
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NON-CORE STADIUM SCRAPERS
|
||||
# NOTE: scrape_mls_stadiums() is now imported from mls.py
|
||||
# NOTE: scrape_wnba_stadiums() is now imported from wnba.py
|
||||
# NOTE: scrape_nwsl_stadiums() is now imported from nwsl.py
|
||||
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
|
||||
# =============================================================================
|
||||
|
||||
def scrape_cbb_stadiums() -> list[Stadium]:
|
||||
"""Fetch College Basketball arena data."""
|
||||
print("\nCBB STADIUMS")
|
||||
print("-" * 40)
|
||||
stadiums = []
|
||||
# Would include CBB arena data here
|
||||
print(f" Found {len(stadiums)} CBB arenas")
|
||||
return stadiums
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LEGACY STADIUM FUNCTIONS
|
||||
# =============================================================================
|
||||
@@ -407,7 +360,6 @@ def scrape_all_stadiums() -> list[Stadium]:
|
||||
all_stadiums.extend(scrape_mls_stadiums())
|
||||
all_stadiums.extend(scrape_wnba_stadiums())
|
||||
all_stadiums.extend(scrape_nwsl_stadiums())
|
||||
all_stadiums.extend(scrape_cbb_stadiums())
|
||||
|
||||
return all_stadiums
|
||||
|
||||
@@ -444,7 +396,7 @@ def get_team_abbrev(team_name: str, sport: str) -> str:
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Scrape sports schedules')
|
||||
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'], default='all')
|
||||
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all')
|
||||
parser.add_argument('--season', type=int, default=2026, help='Season year (ending year)')
|
||||
parser.add_argument('--stadiums-only', action='store_true', help='Only scrape stadium data (legacy method)')
|
||||
parser.add_argument('--stadiums-update', action='store_true', help='Scrape ALL stadium data for all 8 sports (comprehensive)')
|
||||
@@ -550,20 +502,6 @@ def main():
|
||||
nwsl_games = assign_stable_ids(nwsl_games, 'NWSL', str(args.season))
|
||||
all_games.extend(nwsl_games)
|
||||
|
||||
if args.sport in ['cbb', 'all']:
|
||||
print("\n" + "="*60)
|
||||
print(f"SCRAPING CBB {args.season}")
|
||||
print("="*60)
|
||||
cbb_sources = [
|
||||
ScraperSource('ESPN', scrape_cbb_espn, priority=1, min_games=1000),
|
||||
ScraperSource('Sports-Reference', scrape_cbb_sports_reference, priority=2, min_games=500),
|
||||
ScraperSource('CBS Sports', scrape_cbb_cbssports, priority=3, min_games=300),
|
||||
]
|
||||
cbb_games = scrape_with_fallback('CBB', args.season, cbb_sources)
|
||||
cbb_season = f"{args.season-1}-{str(args.season)[2:]}"
|
||||
cbb_games = assign_stable_ids(cbb_games, 'CBB', cbb_season)
|
||||
all_games.extend(cbb_games)
|
||||
|
||||
# Export
|
||||
print("\n" + "="*60)
|
||||
print("EXPORTING DATA")
|
||||
|
||||
Reference in New Issue
Block a user