remove cbb

This commit is contained in:
Trey t
2026-01-10 11:16:15 -06:00
parent ca9fa535f1
commit 9ef4b1a770
5 changed files with 11 additions and 77 deletions

View File

@@ -42,7 +42,7 @@ from scrape_schedules import (
# NWSL sources
scrape_nwsl_espn, scrape_nwsl_fbref, scrape_nwsl_nwslsoccer,
# Utilities
generate_stadiums_from_teams,
scrape_all_stadiums,
)
from validate_data import (
validate_games,
@@ -129,7 +129,7 @@ def run_pipeline(
# Scrape stadiums
print_section("Stadiums")
all_stadiums = generate_stadiums_from_teams()
all_stadiums = scrape_all_stadiums()
print(f" Generated {len(all_stadiums)} stadiums from team data")
# Scrape by sport with multi-source fallback

View File

@@ -12,8 +12,6 @@ This script coordinates scraping across sport-specific modules:
- wnba.py: WNBA stadiums
- nwsl.py: NWSL stadiums
CBB (College Basketball) remains inline pending extraction (350+ D1 teams).
Usage:
python scrape_schedules.py --sport nba --season 2026
python scrape_schedules.py --sport all --season 2026
@@ -93,7 +91,6 @@ from nwsl import (
# =============================================================================
# NON-CORE SPORT SCRAPERS
# NOTE: MLS, WNBA, NWSL stadiums are now imported from their respective modules
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
# =============================================================================
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
@@ -106,7 +103,6 @@ def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tupl
'wnba': 'WNBA',
'usa.1': 'MLS',
'usa.nwsl': 'NWSL',
'mens-college-basketball': 'CBB'
}.get(league, league.upper())
print(f"Fetching {sport_upper} {season} from ESPN API...")
@@ -213,13 +209,6 @@ def scrape_nwsl_espn(season: int) -> list[Game]:
return _scrape_espn_schedule('soccer', 'usa.nwsl', season, (start, end))
def scrape_cbb_espn(season: int) -> list[Game]:
"""Fetch College Basketball schedule from ESPN API (D1 only)."""
start = f"{season-1}1101"
end = f"{season}0415"
return _scrape_espn_schedule('basketball', 'mens-college-basketball', season, (start, end))
def scrape_wnba_basketball_reference(season: int) -> list[Game]:
"""Scrape WNBA schedule from Basketball-Reference."""
games = []
@@ -339,42 +328,6 @@ def scrape_nwsl_nwslsoccer(season: int) -> list[Game]:
return games
def scrape_cbb_sports_reference(season: int) -> list[Game]:
"""Scrape College Basketball schedule from Sports-Reference."""
games = []
print(f"Scraping CBB {season} from Sports-Reference...")
# Placeholder - Sports-Reference scraping would go here
print(f" Found {len(games)} games from Sports-Reference")
return games
def scrape_cbb_cbssports(season: int) -> list[Game]:
"""Fetch College Basketball schedule from CBS Sports."""
games = []
print(f"Fetching CBB {season} from CBS Sports...")
# Placeholder - CBS Sports scraping would go here
print(f" Found {len(games)} games from CBS Sports")
return games
# =============================================================================
# NON-CORE STADIUM SCRAPERS
# NOTE: scrape_mls_stadiums() is now imported from mls.py
# NOTE: scrape_wnba_stadiums() is now imported from wnba.py
# NOTE: scrape_nwsl_stadiums() is now imported from nwsl.py
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
# =============================================================================
def scrape_cbb_stadiums() -> list[Stadium]:
"""Fetch College Basketball arena data."""
print("\nCBB STADIUMS")
print("-" * 40)
stadiums = []
# Would include CBB arena data here
print(f" Found {len(stadiums)} CBB arenas")
return stadiums
# =============================================================================
# LEGACY STADIUM FUNCTIONS
# =============================================================================
@@ -407,7 +360,6 @@ def scrape_all_stadiums() -> list[Stadium]:
all_stadiums.extend(scrape_mls_stadiums())
all_stadiums.extend(scrape_wnba_stadiums())
all_stadiums.extend(scrape_nwsl_stadiums())
all_stadiums.extend(scrape_cbb_stadiums())
return all_stadiums
@@ -444,7 +396,7 @@ def get_team_abbrev(team_name: str, sport: str) -> str:
def main():
parser = argparse.ArgumentParser(description='Scrape sports schedules')
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'], default='all')
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all')
parser.add_argument('--season', type=int, default=2026, help='Season year (ending year)')
parser.add_argument('--stadiums-only', action='store_true', help='Only scrape stadium data (legacy method)')
parser.add_argument('--stadiums-update', action='store_true', help='Scrape ALL stadium data for all 8 sports (comprehensive)')
@@ -550,20 +502,6 @@ def main():
nwsl_games = assign_stable_ids(nwsl_games, 'NWSL', str(args.season))
all_games.extend(nwsl_games)
if args.sport in ['cbb', 'all']:
print("\n" + "="*60)
print(f"SCRAPING CBB {args.season}")
print("="*60)
cbb_sources = [
ScraperSource('ESPN', scrape_cbb_espn, priority=1, min_games=1000),
ScraperSource('Sports-Reference', scrape_cbb_sports_reference, priority=2, min_games=500),
ScraperSource('CBS Sports', scrape_cbb_cbssports, priority=3, min_games=300),
]
cbb_games = scrape_with_fallback('CBB', args.season, cbb_sources)
cbb_season = f"{args.season-1}-{str(args.season)[2:]}"
cbb_games = assign_stable_ids(cbb_games, 'CBB', cbb_season)
all_games.extend(cbb_games)
# Export
print("\n" + "="*60)
print("EXPORTING DATA")

View File

@@ -123,11 +123,11 @@ def get_season_and_sport(mode: str):
print(f"\n{Colors.BOLD}Select sport:{Colors.RESET}")
print(f" {Colors.GREEN}[1]{Colors.RESET} All Sports")
print(f" {Colors.GREEN}[2]{Colors.RESET} MLB {Colors.GREEN}[3]{Colors.RESET} NBA {Colors.GREEN}[4]{Colors.RESET} NHL {Colors.GREEN}[5]{Colors.RESET} NFL")
print(f" {Colors.GREEN}[6]{Colors.RESET} MLS {Colors.GREEN}[7]{Colors.RESET} WNBA {Colors.GREEN}[8]{Colors.RESET} NWSL {Colors.GREEN}[9]{Colors.RESET} CBB")
print(f" {Colors.GREEN}[6]{Colors.RESET} MLS {Colors.GREEN}[7]{Colors.RESET} WNBA {Colors.GREEN}[8]{Colors.RESET} NWSL")
sport_map = {
'1': 'all', '2': 'mlb', '3': 'nba', '4': 'nhl', '5': 'nfl',
'6': 'mls', '7': 'wnba', '8': 'nwsl', '9': 'cbb'
'6': 'mls', '7': 'wnba', '8': 'nwsl'
}
sport_choice = input(f"{Colors.CYAN}Enter choice [1]:{Colors.RESET} ").strip()
@@ -156,7 +156,6 @@ def scrape_submenu():
('6', 'mls', 'MLS - Major League Soccer'),
('7', 'wnba', 'WNBA - Women\'s National Basketball Association'),
('8', 'nwsl', 'NWSL - National Women\'s Soccer League'),
('9', 'cbb', 'CBB - College Basketball'),
('b', 'back', 'Back to main menu'),
]
@@ -235,7 +234,6 @@ def pipeline_submenu():
('6', 'mls', 'MLS only'),
('7', 'wnba', 'WNBA only'),
('8', 'nwsl', 'NWSL only'),
('9', 'cbb', 'CBB only'),
('b', 'back', 'Back to main menu'),
]
@@ -624,7 +622,7 @@ Examples:
)
scrape_parser.add_argument(
'--sport',
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
default='all',
help='Sport to scrape (default: all)'
)
@@ -653,7 +651,7 @@ Examples:
stadiums_update_parser = subparsers.add_parser(
'stadiums-update',
help='Scrape ALL stadium data for all 8 sports',
description='Comprehensive stadium scraping for NBA, MLB, NHL, NFL, WNBA, MLS, NWSL, and CBB'
description='Comprehensive stadium scraping for NBA, MLB, NHL, NFL, WNBA, MLS, and NWSL'
)
stadiums_update_parser.add_argument(
'--output',
@@ -893,7 +891,7 @@ Examples:
)
pipeline_parser.add_argument(
'--sport',
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
default='all',
help='Sport to process (default: all)'
)
@@ -934,7 +932,7 @@ Examples:
)
full_pipeline_parser.add_argument(
'--sport',
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
default='all',
help='Sport to process (default: all)'
)

View File

@@ -91,8 +91,6 @@ EXPECTED_GAMES = {
'max': 30,
'description': 'NWSL regular season (26 games)'
},
# Note: CBB doesn't have fixed game counts per "team"
# CBB teams vary widely (30+ games)
}

View File

@@ -27,7 +27,7 @@ from nfl import scrape_nfl_espn, NFL_TEAMS
# Import secondary sports from scrape_schedules (stubs)
from scrape_schedules import (
scrape_wnba_espn, scrape_mls_espn, scrape_nwsl_espn, scrape_cbb_espn,
scrape_wnba_espn, scrape_mls_espn, scrape_nwsl_espn,
WNBA_TEAMS, MLS_TEAMS, NWSL_TEAMS,
)
@@ -474,7 +474,7 @@ def main():
parser.add_argument('--data-dir', type=str, default='./data', help='Data directory')
parser.add_argument('--scrape-and-validate', action='store_true', help='Scrape fresh and validate')
parser.add_argument('--season', type=int, default=2025, help='Season year')
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'], default='all')
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all')
parser.add_argument('--output', type=str, default='./data/validation_report.json')
args = parser.parse_args()